Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -341,6 +341,13 @@ /// @brief Return true if @p SubR is a non-affine subregion in @p ScopR. bool isNonAffineSubRegion(const Region *SubR, const Region *ScopR) const; + /// @brief Check if the block is a valid exception block. + /// + /// @param BB The block to check. + /// + /// @return True if the block is a valid exception block, false otherwise. + static bool isValidExceptionBlock(BasicBlock &BB); + /// @brief Get a message why a region is invalid /// /// @param R The region for which we get the error message Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -296,6 +296,32 @@ return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); } +bool ScopDetection::isValidExceptionBlock(BasicBlock &BB) { + + for (Instruction &Inst : BB) + if (CallInst *CI = dyn_cast(&Inst)) + if (Function *F = CI->getCalledFunction()) + if (F->getName().equals("__ubsan_handle_out_of_bounds")) + return true; + + if (!isa(BB.getTerminator())) + return false; + + for (auto *SuccBB : successors(&BB)) + if (SuccBB->getTerminator()->getNumSuccessors() == 1) + return false; + + if (BB.size() > 2) + return false; + + if (BB.size() == 2) { + CallInst *CI = dyn_cast(BB.begin()); + return CI && CI->doesNotReturn(); + } + + return true; +} + bool ScopDetection::isValidCFG(BasicBlock &BB, DetectionContext &Context) const { Region &CurRegion = Context.CurRegion; @@ -306,6 +332,10 @@ if (isa(TI) && !CurRegion.getExit() && TI->getNumOperands() == 0) return true; + // Allow unreachable instructions as terminators of exception blocks. + if (isa(TI)) + return isValidExceptionBlock(BB); + BranchInst *Br = dyn_cast(TI); if (!Br) @@ -368,7 +398,7 @@ bool ScopDetection::isValidCallInst(CallInst &CI) { if (CI.doesNotReturn()) - return false; + return isValidExceptionBlock(*CI.getParent()); if (CI.doesNotAccessMemory()) return true; @@ -382,7 +412,7 @@ if (isIgnoredIntrinsic(&CI)) return true; - return false; + return isValidExceptionBlock(*CI.getParent()); } bool ScopDetection::isInvariant(const Value &Val, const Region &Reg) const { @@ -707,10 +737,9 @@ // overapproximate it as a boxed loop. SmallVector ExitingBlocks; L->getExitingBlocks(ExitingBlocks); - for (BasicBlock *ExitingBB : ExitingBlocks) { + for (BasicBlock *ExitingBB : ExitingBlocks) if (!isValidCFG(*ExitingBB, Context)) return false; - } // We can use ISL to compute the trip count of L. return true; Index: lib/Analysis/ScopInfo.cpp =================================================================== --- lib/Analysis/ScopInfo.cpp +++ lib/Analysis/ScopInfo.cpp @@ -1554,6 +1554,8 @@ } BasicBlock *BB = getRegionNodeBasicBlock(RN); + TerminatorInst *TI = BB->getTerminator(); + isl_set *Domain = DomainMap[BB]; DEBUG(dbgs() << "\tVisit: " << BB->getName() << " : " << Domain << "\n"); assert(Domain && "Due to reverse post order traversal of the region all " @@ -1561,6 +1563,13 @@ "visited and a domain for this region node should have " "been set."); + // Unreachable instructions do not have successors so we can skip them. + // However, such blocks need to be valid exception blocks. + if (isa(TI)) { + assert(ScopDetection::isValidExceptionBlock(*BB)); + continue; + } + Loop *BBLoop = getRegionNodeLoop(RN, LI); int BBLoopDepth = getRelativeLoopDepth(BBLoop); @@ -1569,7 +1578,7 @@ // exit node, hence the single entry node domain is the condition set. For // basic blocks we use the helper function buildConditionSets. SmallVector ConditionSets; - BranchInst *BI = cast(BB->getTerminator()); + BranchInst *BI = cast(TI); if (RN->isSubRegion()) ConditionSets.push_back(isl_set_copy(Domain)); else @@ -1738,6 +1747,13 @@ // Under the union of all predecessor conditions we can reach this block. Domain = isl_set_intersect(Domain, PredDom); + + // Add assumptions for exception blocks. + if (ScopDetection::isValidExceptionBlock(*BB)) { + IsOptimized = true; + isl_set *DomPar = isl_set_params(isl_set_copy(Domain)); + addAssumption(isl_set_complement(DomPar)); + } } } @@ -2435,7 +2451,8 @@ ScalarEvolution *Scop::getSE() const { return SE; } bool Scop::isTrivialBB(BasicBlock *BB, TempScop &tempScop) { - if (tempScop.getAccessFunctions(BB)) + if (tempScop.getAccessFunctions(BB) && + !ScopDetection::isValidExceptionBlock(*BB)) return false; return true; Index: test/ScopInfo/BoundChecks/single-loop.ll =================================================================== --- test/ScopInfo/BoundChecks/single-loop.ll +++ test/ScopInfo/BoundChecks/single-loop.ll @@ -1,5 +1,5 @@ -; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s -; XFAIL: * +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-ast -analyze < %s | FileCheck %s --check-prefix=AST ; ; void exception() __attribute__((noreturn)); ; @@ -18,7 +18,18 @@ ; We should detect this kernel as a SCoP and derive run-time conditions such ; that the bound-checked blocks are not part of the optimized SCoP. -; CHECK: Assumed Context +; CHECK: Assumed Context: +; CHECK: [n] -> { : n <= 100 } + +; AST: if (n <= 100) +; AST: for (int c0 = 0; c0 <= min(99, n - 1); c0 += 1) +; AST: Stmt_if_end_4(c0); +; +; AST-NOT: for +; AST-NOT: Stmt +; +; AST: else +; AST: { /* original code */ } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" Index: test/ScopInfo/BoundChecks/two-loops.ll =================================================================== --- test/ScopInfo/BoundChecks/two-loops.ll +++ test/ScopInfo/BoundChecks/two-loops.ll @@ -1,53 +1,70 @@ -; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s -; XFAIL: * +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-detect-unprofitable -polly-ast -analyze < %s | FileCheck %s --check-prefix=AST ; ; void exception() __attribute__((noreturn)); ; ; void foo(long n, float A[100]) { -; for (long i = 0; i < n; i++) { -; if (i < 0) -; exception(); +; for (long j = 0; j < n; j++) { +; for (long i = j; i < n; i++) { +; if (i < 0) +; exception(); ; -; if (i >= 100) -; exception(); +; if (i >= 100) +; exception(); ; -; A[i] += i; +; A[i] += i; +; } ; } ; } +; +; CHECK: Assumed Context: +; CHECK: [n] -> { : n <= 100 } -; We should detect this kernel as a SCoP and derive run-time conditions such -; that the bound-checked blocks are not part of the optimized SCoP. - -; CHECK: Assumed Context - +; AST: if (n <= 100) +; AST: for (int c0 = 0; c0 <= min(99, n - 1); c0 += 1) +; AST: for (int c1 = 0; c1 <= min(n - c0 - 1, -c0 + 99); c1 += 1) +; AST: Stmt_if_end_7(c0, c1); +; +; AST-NOT: for +; AST-NOT: Stmt +; +; AST: else +; AST: { /* original code */ } target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" -; Function Attrs: nounwind uwtable define void @foo(i64 %n, float* %A) #0 { entry: br label %for.cond -for.cond: ; preds = %for.inc, %entry - %i.0 = phi i64 [ 0, %entry ], [ %inc, %for.inc ] - %cmp = icmp slt i64 %i.0, %n - br i1 %cmp, label %for.body, label %for.end +for.cond: ; preds = %for.inc.8, %entry + %j.0 = phi i64 [ 0, %entry ], [ %inc9, %for.inc.8 ] + %cmp = icmp slt i64 %j.0, %n + br i1 %cmp, label %for.body, label %for.end.10 for.body: ; preds = %for.cond + br label %for.cond.1 + +for.cond.1: ; preds = %for.inc, %for.body + %i.0 = phi i64 [ %j.0, %for.body ], [ %inc, %for.inc ] + %cmp2 = icmp slt i64 %i.0, %n + br i1 %cmp2, label %for.body.3, label %for.end + +for.body.3: ; preds = %for.cond.1 br i1 false, label %if.then, label %if.end -if.then: ; preds = %for.body +if.then: ; preds = %for.body.3 call void (...) @exception() #2 unreachable -if.end: ; preds = %for.body - %cmp2 = icmp sgt i64 %i.0, 99 - br i1 %cmp2, label %if.then.3, label %if.end.4 +if.end: ; preds = %for.body.3 + %cmp5 = icmp sgt i64 %i.0, 99 + br i1 %cmp5, label %if.then.6, label %if.end.7 -if.then.3: ; preds = %if.end +if.then.6: ; preds = %if.end call void (...) @exception() #2 unreachable -if.end.4: ; preds = %if.end +if.end.7: ; preds = %if.end %conv = sitofp i64 %i.0 to float %arrayidx = getelementptr inbounds float, float* %A, i64 %i.0 %tmp = load float, float* %arrayidx, align 4 @@ -55,15 +72,21 @@ store float %add, float* %arrayidx, align 4 br label %for.inc -for.inc: ; preds = %if.end.4 +for.inc: ; preds = %if.end.7 %inc = add nuw nsw i64 %i.0, 1 + br label %for.cond.1 + +for.end: ; preds = %for.cond.1 + br label %for.inc.8 + +for.inc.8: ; preds = %for.end + %inc9 = add nuw nsw i64 %j.0, 1 br label %for.cond -for.end: ; preds = %for.cond +for.end.10: ; preds = %for.cond ret void } -; Function Attrs: noreturn declare void @exception(...) #1 attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }