Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -136,6 +136,12 @@ AliasAnalysis *AA; //@} + /// @brief Set to remeber non-affine branches in regions. + using NonAffineSubRegionSetTy = RegionSet; + using NonAffineSubRegionMapTy = + DenseMap; + NonAffineSubRegionMapTy NonAffineSubRegionMap; + /// @brief Context variables for SCoP detection. struct DetectionContext { Region &CurRegion; // The region to check. @@ -162,9 +168,13 @@ /// @brief The region has at least one store instruction. bool hasStores; - DetectionContext(Region &R, AliasAnalysis &AA, bool Verify) + /// @brief The set of non-affine subregions in the region we analyze. + NonAffineSubRegionSetTy &NonAffineSubRegionSet; + + DetectionContext(Region &R, AliasAnalysis &AA, + NonAffineSubRegionSetTy &NABS, bool Verify) : CurRegion(R), AST(AA), Verifying(Verify), Log(&R), hasLoads(false), - hasStores(false) {} + hasStores(false), NonAffineSubRegionSet(NABS) {} }; // Remember the valid regions @@ -300,6 +310,9 @@ /// @return Return true if R is the maximum Region in a Scop, false otherwise. bool isMaxRegionInScop(const Region &R, bool Verify = true) const; + /// @brief Return true if @p SubR is a non-affine subregion in @p ScopR. + bool isNonAffineSubRegion(const Region *SubR, const Region *ScopR) const; + /// @brief Get a message why a region is invalid /// /// @param R The region for which we get the error message Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -126,6 +126,11 @@ cl::Hidden, cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); +static cl::opt AllowNonAffineSubRegions( + "polly-allow-nonaffine-branches", + cl::desc("Allow non affine conditions for branches"), cl::Hidden, + cl::init(false), cl::ZeroOrMore, cl::cat(PollyCategory)); + static cl::opt AllowUnsigned("polly-allow-unsigned", cl::desc("Allow unsigned expressions"), cl::Hidden, cl::init(false), cl::ZeroOrMore, @@ -252,7 +257,9 @@ return false; if (Verify) { - DetectionContext Context(const_cast(R), *AA, false /*verifying*/); + NonAffineSubRegionSetTy DummyNonAffineSubRegionSet; + DetectionContext Context(const_cast(R), *AA, + DummyNonAffineSubRegionSet, false /*verifying*/); return isValidRegion(Context); } @@ -276,6 +283,15 @@ return RR->getMessage(); } +static bool containsLoop(Region *R, LoopInfo *LI) { + for (BasicBlock *BB : R->blocks()) { + Loop *L = LI->getLoopFor(BB); + if (R->contains(L)) + return true; + } + return false; +} + bool ScopDetection::isValidCFG(BasicBlock &BB, DetectionContext &Context) const { Region &CurRegion = Context.CurRegion; @@ -301,8 +317,12 @@ return invalid(Context, /*Assert=*/true, Br, &BB); // Only Constant and ICmpInst are allowed as condition. - if (!(isa(Condition) || isa(Condition))) - return invalid(Context, /*Assert=*/true, Br, &BB); + if (!(isa(Condition) || isa(Condition))) { + if (AllowNonAffineSubRegions && !containsLoop(RI->getRegionFor(&BB), LI)) + Context.NonAffineSubRegionSet.insert(RI->getRegionFor(&BB)); + else + return invalid(Context, /*Assert=*/true, Br, &BB); + } // Allow perfectly nested conditions. assert(Br->getNumSuccessors() == 2 && "Unexpected number of successors"); @@ -326,9 +346,13 @@ const SCEV *RHS = SE->getSCEVAtScope(ICmp->getOperand(1), L); if (!isAffineExpr(&CurRegion, LHS, *SE) || - !isAffineExpr(&CurRegion, RHS, *SE)) - return invalid(Context, /*Assert=*/true, &BB, LHS, - RHS, ICmp); + !isAffineExpr(&CurRegion, RHS, *SE)) { + if (AllowNonAffineSubRegions && !containsLoop(RI->getRegionFor(&BB), LI)) + Context.NonAffineSubRegionSet.insert(RI->getRegionFor(&BB)); + else + return invalid(Context, /*Assert=*/true, &BB, LHS, + RHS, ICmp); + } } // Allow loop exit conditions. @@ -648,10 +672,10 @@ bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { // Is the loop count affine? const SCEV *LoopCount = SE->getBackedgeTakenCount(L); - if (!isAffineExpr(&Context.CurRegion, LoopCount, *SE)) - return invalid(Context, /*Assert=*/true, L, LoopCount); + if (isAffineExpr(&Context.CurRegion, LoopCount, *SE)) + return true; - return true; + return invalid(Context, /*Assert=*/true, L, LoopCount); } Region *ScopDetection::expandRegion(Region &R) { @@ -662,7 +686,9 @@ DEBUG(dbgs() << "\tExpanding " << R.getNameStr() << "\n"); while (ExpandedRegion) { - DetectionContext Context(*ExpandedRegion, *AA, false /* verifying */); + DetectionContext Context(*ExpandedRegion, *AA, + NonAffineSubRegionMap[ExpandedRegion], + false /* verifying */); DEBUG(dbgs() << "\t\tTrying " << ExpandedRegion->getNameStr() << "\n"); // Only expand when we did not collect errors. @@ -738,7 +764,8 @@ if (!DetectRegionsWithoutLoops && regionWithoutLoops(R, LI)) return; - DetectionContext Context(R, *AA, false /*verifying*/); + DetectionContext Context(R, *AA, NonAffineSubRegionMap[&R], + false /*verifying*/); bool RegionIsValid = isValidRegion(Context); bool HasErrors = !RegionIsValid || Context.Log.size() > 0; @@ -748,6 +775,7 @@ if (!HasErrors) { ++ValidRegion; ValidRegions.insert(&R); + return; } @@ -965,9 +993,16 @@ return false; } +bool ScopDetection::isNonAffineSubRegion(const Region *SubR, + const Region *ScopR) const { + return NonAffineSubRegionMap.lookup(ScopR).count(SubR); +} + void polly::ScopDetection::verifyRegion(const Region &R) const { assert(isMaxRegionInScop(R) && "Expect R is a valid region."); - DetectionContext Context(const_cast(R), *AA, true /*verifying*/); + NonAffineSubRegionSetTy DummyNonAffineSubRegionSet; + DetectionContext Context(const_cast(R), *AA, + DummyNonAffineSubRegionSet, true /*verifying*/); isValidRegion(Context); } @@ -1000,6 +1035,7 @@ void ScopDetection::releaseMemory() { ValidRegions.clear(); RejectLogs.clear(); + NonAffineSubRegionMap.clear(); // Do not clear the invalid function set. } Index: test/ScopDetect/non-affine-conditional.ll =================================================================== --- /dev/null +++ test/ScopDetect/non-affine-conditional.ll @@ -0,0 +1,42 @@ +; RUN: opt %loadPolly -polly-allow-nonaffine-branches -polly-detect -analyze < %s | FileCheck %s +; +; void f(int *A) { +; for (int i = 0; i < 1024; i++) +; if (A[i]) +; A[i] = 0; +; } +; +; CHECK: Valid Region for Scop: bb1 => bb9 +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(i32* %A) { +bb: + br label %bb1 + +bb1: ; preds = %bb8, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb8 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb9 + +bb2: ; preds = %bb1 + %tmp = getelementptr inbounds i32* %A, i64 %indvars.iv + %tmp3 = load i32* %tmp, align 4 + %tmp4 = icmp eq i32 %tmp3, 0 + br i1 %tmp4, label %bb7, label %bb5 + +bb5: ; preds = %bb2 + %tmp6 = getelementptr inbounds i32* %A, i64 %indvars.iv + store i32 0, i32* %tmp6, align 4 + br label %bb7 + +bb7: ; preds = %bb2, %bb5 + br label %bb8 + +bb8: ; preds = %bb7 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb9: ; preds = %bb1 + ret void +} Index: test/ScopDetect/non-affine-float-compare.ll =================================================================== --- /dev/null +++ test/ScopDetect/non-affine-float-compare.ll @@ -0,0 +1,46 @@ +; RUN: opt %loadPolly -polly-detect -polly-allow-nonaffine-branches -analyze < %s | FileCheck %s +; +; void f(float *A) { +; for (int i = 0; i < 1024; i++) +; if (A[i] == A[i - 1]) +; A[i]++; +; } +; +; CHECK: Valid Region for Scop: bb1 => bb14 +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @f(float* %A) { +bb: + br label %bb1 + +bb1: ; preds = %bb13, %bb + %indvars.iv = phi i64 [ %indvars.iv.next, %bb13 ], [ 0, %bb ] + %exitcond = icmp ne i64 %indvars.iv, 1024 + br i1 %exitcond, label %bb2, label %bb14 + +bb2: ; preds = %bb1 + %tmp = getelementptr inbounds float* %A, i64 %indvars.iv + %tmp3 = load float* %tmp, align 4 + %tmp4 = add nsw i64 %indvars.iv, -1 + %tmp5 = getelementptr inbounds float* %A, i64 %tmp4 + %tmp6 = load float* %tmp5, align 4 + %tmp7 = fcmp oeq float %tmp3, %tmp6 + br i1 %tmp7, label %bb8, label %bb12 + +bb8: ; preds = %bb2 + %tmp9 = getelementptr inbounds float* %A, i64 %indvars.iv + %tmp10 = load float* %tmp9, align 4 + %tmp11 = fadd float %tmp10, 1.000000e+00 + store float %tmp11, float* %tmp9, align 4 + br label %bb12 + +bb12: ; preds = %bb8, %bb2 + br label %bb13 + +bb13: ; preds = %bb12 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + br label %bb1 + +bb14: ; preds = %bb1 + ret void +}