Index: include/polly/ScopDetectionDiagnostic.h =================================================================== --- include/polly/ScopDetectionDiagnostic.h +++ include/polly/ScopDetectionDiagnostic.h @@ -84,7 +84,7 @@ rrkLastAffFunc, rrkLoopBound, - rrkLoopOverlapWithNonAffineSubRegion, + rrkLoopHasNoExit, rrkFuncCall, rrkNonSimpleMemoryAccess, @@ -511,22 +511,18 @@ }; //===----------------------------------------------------------------------===// -/// Captures errors when loop overlap with nonaffine subregion. -class ReportLoopOverlapWithNonAffineSubRegion : public RejectReason { +/// Captures errors when loop has no exit. +class ReportLoopHasNoExit : public RejectReason { //===--------------------------------------------------------------------===// - /// If L and R are set then L and R overlap. - - /// The loop contains stmt overlapping nonaffine subregion. + /// The loop that has no exit. Loop *L; - /// The nonaffine subregion that contains infinite loop. - Region *R; - const DebugLoc Loc; public: - ReportLoopOverlapWithNonAffineSubRegion(Loop *L, Region *R); + ReportLoopHasNoExit(Loop *L) + : RejectReason(rrkLoopHasNoExit), L(L), Loc(L->getStartLoc()) {} /// @name LLVM-RTTI interface //@{ Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -297,56 +297,10 @@ // All loops in the region have to be overapproximated too if there // are accesses that depend on the iteration count. - BoxedLoopsSetTy ARBoxedLoopsSet; - for (BasicBlock *BB : AR->blocks()) { Loop *L = LI->getLoopFor(BB); - if (AR->contains(L)) { + if (AR->contains(L)) Context.BoxedLoopsSet.insert(L); - ARBoxedLoopsSet.insert(L); - } - } - - // Reject if the surrounding loop does not entirely contain the nonaffine - // subregion. - // This can happen because a region can contain BBs that have no path to the - // exit block (Infinite loops, UnreachableInst), but such blocks are never - // part of a loop. - // - // _______________ - // | Loop Header | <-----------. - // --------------- | - // | | - // _______________ ______________ - // | RegionEntry |-----> | RegionExit |-----> - // --------------- -------------- - // | - // _______________ - // | EndlessLoop | <--. - // --------------- | - // | | - // \------------/ - // - // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is - // neither entirely contained in the region RegionEntry->RegionExit - // (containing RegionEntry,EndlessLoop) nor is the region entirely contained - // in the loop. - // The block EndlessLoop is contained is in the region because - // Region::contains tests whether it is not dominated by RegionExit. This is - // probably to not having to query the PostdominatorTree. - // Instead of an endless loop, a dead end can also be formed by - // UnreachableInst. This case is already caught by isErrorBlock(). We hence - // only have to test whether there is an endless loop not contained in the - // surrounding loop. - BasicBlock *BBEntry = AR->getEntry(); - Loop *L = LI->getLoopFor(BBEntry); - while (L && AR->contains(L)) - L = L->getParentLoop(); - if (L) { - for (const auto *ARBoxedLoop : ARBoxedLoopsSet) - if (!L->contains(ARBoxedLoop)) - return invalid( - Context, /*Assert=*/true, L, AR); } return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); @@ -1057,18 +1011,23 @@ return invalid(Context, /*Assert=*/true, &Inst); } +/// Check whether @p L has exiting blocks. +/// +/// @param L The loop of interest +/// +/// @return True if the loop has exiting blocks, false otherwise. +static bool hasExitingBlocks(Loop *L) { + SmallVector ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + return !ExitingBlocks.empty(); +} + bool ScopDetection::canUseISLTripCount(Loop *L, DetectionContext &Context) const { // Ensure the loop has valid exiting blocks as well as latches, otherwise we // need to overapproximate it as a boxed loop. SmallVector LoopControlBlocks; L->getExitingBlocks(LoopControlBlocks); - - // Loops without exiting blocks cannot be handled by the schedule generation - // as it depends on a region covering that is not given. - if (LoopControlBlocks.empty()) - return false; - L->getLoopLatches(LoopControlBlocks); for (BasicBlock *ControlBB : LoopControlBlocks) { if (!isValidCFG(*ControlBB, true, false, Context)) @@ -1080,6 +1039,38 @@ } bool ScopDetection::isValidLoop(Loop *L, DetectionContext &Context) const { + // Loops that contain part but not all of the blocks of a region cannot be + // handled by the schedule generation. Such loop constructs can happen + // because a region can contain BBs that have no path to the exit block + // (Infinite loops, UnreachableInst), but such blocks are never part of a + // loop. + // + // _______________ + // | Loop Header | <-----------. + // --------------- | + // | | + // _______________ ______________ + // | RegionEntry |-----> | RegionExit |-----> + // --------------- -------------- + // | + // _______________ + // | EndlessLoop | <--. + // --------------- | + // | | + // \------------/ + // + // In the example above, the loop (LoopHeader,RegionEntry,RegionExit) is + // neither entirely contained in the region RegionEntry->RegionExit + // (containing RegionEntry,EndlessLoop) nor is the region entirely contained + // in the loop. + // The block EndlessLoop is contained in the region because Region::contains + // tests whether it is not dominated by RegionExit. This is probably to not + // having to query the PostdominatorTree. Instead of an endless loop, a dead + // end can also be formed by an UnreachableInst. This case is already caught + // by isErrorBlock(). We hence only have to reject endless loops here. + if (!hasExitingBlocks(L)) + return invalid(Context, /*Assert=*/true, L); + if (canUseISLTripCount(L, Context)) return true; Index: lib/Analysis/ScopDetectionDiagnostic.cpp =================================================================== --- lib/Analysis/ScopDetectionDiagnostic.cpp +++ lib/Analysis/ScopDetectionDiagnostic.cpp @@ -43,8 +43,6 @@ BADSCOP_STAT(CFG, "CFG too complex"); BADSCOP_STAT(LoopBound, "Loop bounds can not be computed"); -BADSCOP_STAT(LoopOverlapWithNonAffineSubRegion, - "Loop overlap with nonaffine subregion"); BADSCOP_STAT(FuncCall, "Function call with side effects appeared"); BADSCOP_STAT(AffFunc, "Expression not affine"); BADSCOP_STAT(Alias, "Found base address alias"); @@ -330,30 +328,20 @@ } //===----------------------------------------------------------------------===// -// ReportLoopOverlapWithNonAffineSubRegion. +// ReportLoopHasNoExit. -ReportLoopOverlapWithNonAffineSubRegion:: - ReportLoopOverlapWithNonAffineSubRegion(Loop *L, Region *R) - : RejectReason(rrkLoopOverlapWithNonAffineSubRegion), L(L), R(R), - Loc(L->getStartLoc()) { - ++BadLoopOverlapWithNonAffineSubRegionForScop; +std::string ReportLoopHasNoExit::getMessage() const { + return "Loop " + L->getHeader()->getName() + " has no exit."; } -std::string ReportLoopOverlapWithNonAffineSubRegion::getMessage() const { - return "Non affine subregion: " + R->getNameStr() + " overlaps Loop " + - L->getHeader()->getName(); +bool ReportLoopHasNoExit::classof(const RejectReason *RR) { + return RR->getKind() == rrkLoopHasNoExit; } -const DebugLoc &ReportLoopOverlapWithNonAffineSubRegion::getDebugLoc() const { - return Loc; -} - -bool ReportLoopOverlapWithNonAffineSubRegion::classof(const RejectReason *RR) { - return RR->getKind() == rrkLoopOverlapWithNonAffineSubRegion; -} +const DebugLoc &ReportLoopHasNoExit::getDebugLoc() const { return Loc; } -std::string ReportLoopOverlapWithNonAffineSubRegion::getEndUserMessage() const { - return "Loop overlaps with nonaffine subregion."; +std::string ReportLoopHasNoExit::getEndUserMessage() const { + return "Loop cannot be handled because it has no exit."; } //===----------------------------------------------------------------------===// Index: test/ScopDetectionDiagnostics/ReportLoopHasNoExit.ll =================================================================== --- /dev/null +++ test/ScopDetectionDiagnostics/ReportLoopHasNoExit.ll @@ -0,0 +1,101 @@ +; RUN: opt %loadPolly -pass-remarks-missed="polly-detect" -polly-allow-nonaffine-loops -analyze -polly-detect < %s 2>&1 | FileCheck %s +; RUN: opt %loadPolly -pass-remarks-missed="polly-detect" -polly-allow-nonaffine-loops=false -analyze -polly-detect < %s 2>&1 | FileCheck %s + +; void func (int param0, int N, int *A) +; { +; for (int i = 0; i < N; i++) +; if (param0) +; while (1) +; A[i] = 1; +; else +; A[i] = 2; +; } + +; CHECK: remark: ReportLoopHasNoExit.c:7:7: Loop cannot be handled because it has no exit. + + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind uwtable +define void @func(i32 %param0, i32 %N, i32* %A) #0 !dbg !6 { +entry: + %param0.addr = alloca i32, align 4 + %N.addr = alloca i32, align 4 + %A.addr = alloca i32*, align 8 + %i = alloca i32, align 4 + store i32 %param0, i32* %param0.addr, align 4 + store i32 %N, i32* %N.addr, align 4 + store i32* %A, i32** %A.addr, align 8 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4 + %1 = load i32, i32* %N.addr, align 4 + %cmp = icmp slt i32 %0, %1 + br i1 %cmp, label %for.body, label %for.end, !dbg !27 + +for.body: ; preds = %for.cond + %2 = load i32, i32* %param0.addr, align 4 + %tobool = icmp ne i32 %2, 0 + br i1 %tobool, label %if.then, label %if.else + +if.then: ; preds = %for.body + br label %while.body + +while.body: ; preds = %if.then, %while.body + %3 = load i32, i32* %i, align 4 + %idxprom = sext i32 %3 to i64 + %4 = load i32*, i32** %A.addr, align 8 + %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom + store i32 1, i32* %arrayidx, align 4 + br label %while.body, !dbg !37 + +if.else: ; preds = %for.body + %5 = load i32, i32* %i, align 4 + %idxprom1 = sext i32 %5 to i64 + %6 = load i32*, i32** %A.addr, align 8 + %arrayidx2 = getelementptr inbounds i32, i32* %6, i64 %idxprom1 + store i32 2, i32* %arrayidx2, align 4 + br label %if.end + +if.end: ; preds = %if.else + br label %for.inc + +for.inc: ; preds = %if.end + %7 = load i32, i32* %i, align 4 + %inc = add nsw i32 %7, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + ret void +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 ", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "ReportLoopHasNoExit.c", directory: "test/ScopDetectionDiagnostics/") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!"clang version 3.9.0 "} +!6 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 1, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!19 = distinct !DILexicalBlock(scope: !6, file: !1, line: 3, column: 3) +!23 = !DILexicalBlockFile(scope: !24, file: !1, discriminator: 1) +!24 = distinct !DILexicalBlock(scope: !19, file: !1, line: 3, column: 3) +!27 = !DILocation(line: 3, column: 3, scope: !23) +!29 = distinct !DILexicalBlock(scope: !30, file: !1, line: 5, column: 9) +!30 = distinct !DILexicalBlock(scope: !24, file: !1, line: 4, column: 3) +!33 = distinct !DILexicalBlock(scope: !29, file: !1, line: 6, column: 5) +!37 = !DILocation(line: 7, column: 7, scope: !38) +!38 = !DILexicalBlockFile(scope: !33, file: !1, discriminator: 1) Index: test/ScopDetectionDiagnostics/ReportLoopOverlapWithNonAffineSubRegion.ll =================================================================== --- test/ScopDetectionDiagnostics/ReportLoopOverlapWithNonAffineSubRegion.ll +++ /dev/null @@ -1,112 +0,0 @@ -; RUN: opt %loadPolly -pass-remarks-missed="polly-detect" -polly-allow-nonaffine-loops -analyze -polly-detect < %s 2>&1 | FileCheck %s --check-prefix=REJECTLOOPOVERLAPREGION -; RUN: opt %loadPolly -pass-remarks-missed="polly-detect" -polly-allow-nonaffine-loops=false -analyze -polly-detect < %s 2>&1 | FileCheck %s --check-prefix=REJECTNONAFFINELOOPS - -; void func (int param0, int N, int *A) -; { -; for (int i = 0; i < N; i++) -; if (param0) -; while (1) -; A[i] = 1; -; else -; A[i] = 2; -; } - -; If we reject non-affine region and loop will be reported: -; -; REJECTLOOPOVERLAPREGION: remark: ReportLoopOverlapWithNonAffineRegion.c:3:3: The following errors keep this region from being a Scop. -; REJECTLOOPOVERLAPREGION: remark: ReportLoopOverlapWithNonAffineRegion.c:3:3: Loop overlaps with nonaffine subregion. -; REJECTLOOPOVERLAPREGION: remark: ReportLoopOverlapWithNonAffineRegion.c:7:7: Failed to derive an affine function from the loop bounds. -; REJECTLOOPOVERLAPREGION: remark: ReportLoopOverlapWithNonAffineRegion.c:7:7: Invalid Scop candidate ends here. -; -; If we reject non-affine loops the non-affine loop bound will be reported: -; -; REJECTNONAFFINELOOPS: remark: ReportLoopOverlapWithNonAffineRegion.c:7:7: The following errors keep this region from being a Scop. -; REJECTNONAFFINELOOPS: remark: ReportLoopOverlapWithNonAffineRegion.c:7:7: Failed to derive an affine function from the loop bounds. -; REJECTNONAFFINELOOPS: remark: ReportLoopOverlapWithNonAffineRegion.c:7:7: Invalid Scop candidate ends here. - - - -target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" - -; Function Attrs: nounwind uwtable -define void @func(i32 %param0, i32 %N, i32* %A) #0 !dbg !6 { -entry: - %param0.addr = alloca i32, align 4 - %N.addr = alloca i32, align 4 - %A.addr = alloca i32*, align 8 - %i = alloca i32, align 4 - store i32 %param0, i32* %param0.addr, align 4 - store i32 %N, i32* %N.addr, align 4 - store i32* %A, i32** %A.addr, align 8 - store i32 0, i32* %i, align 4 - br label %for.cond - -for.cond: ; preds = %for.inc, %entry - %0 = load i32, i32* %i, align 4 - %1 = load i32, i32* %N.addr, align 4 - %cmp = icmp slt i32 %0, %1 - br i1 %cmp, label %for.body, label %for.end, !dbg !27 - -for.body: ; preds = %for.cond - %2 = load i32, i32* %param0.addr, align 4 - %tobool = icmp ne i32 %2, 0 - br i1 %tobool, label %if.then, label %if.else - -if.then: ; preds = %for.body - br label %while.body - -while.body: ; preds = %if.then, %while.body - %3 = load i32, i32* %i, align 4 - %idxprom = sext i32 %3 to i64 - %4 = load i32*, i32** %A.addr, align 8 - %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom - store i32 1, i32* %arrayidx, align 4 - br label %while.body, !dbg !37 - -if.else: ; preds = %for.body - %5 = load i32, i32* %i, align 4 - %idxprom1 = sext i32 %5 to i64 - %6 = load i32*, i32** %A.addr, align 8 - %arrayidx2 = getelementptr inbounds i32, i32* %6, i64 %idxprom1 - store i32 2, i32* %arrayidx2, align 4 - br label %if.end - -if.end: ; preds = %if.else - br label %for.inc - -for.inc: ; preds = %if.end - %7 = load i32, i32* %i, align 4 - %inc = add nsw i32 %7, 1 - store i32 %inc, i32* %i, align 4 - br label %for.cond - -for.end: ; preds = %for.cond - ret void -} - -; Function Attrs: nounwind readnone -declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 - -attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } -attributes #1 = { nounwind readnone } - -!llvm.dbg.cu = !{!0} -!llvm.module.flags = !{!3, !4} -!llvm.ident = !{!5} - -!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 ", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) -!1 = !DIFile(filename: "ReportLoopOverlapWithNonAffineRegion.c", directory: "test/ScopDetectionDiagnostics/") -!2 = !{} -!3 = !{i32 2, !"Dwarf Version", i32 4} -!4 = !{i32 2, !"Debug Info Version", i32 3} -!5 = !{!"clang version 3.9.0 "} -!6 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 1, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) -!19 = distinct !DILexicalBlock(scope: !6, file: !1, line: 3, column: 3) -!23 = !DILexicalBlockFile(scope: !24, file: !1, discriminator: 1) -!24 = distinct !DILexicalBlock(scope: !19, file: !1, line: 3, column: 3) -!27 = !DILocation(line: 3, column: 3, scope: !23) -!29 = distinct !DILexicalBlock(scope: !30, file: !1, line: 5, column: 9) -!30 = distinct !DILexicalBlock(scope: !24, file: !1, line: 4, column: 3) -!33 = distinct !DILexicalBlock(scope: !29, file: !1, line: 6, column: 5) -!37 = !DILocation(line: 7, column: 7, scope: !38) -!38 = !DILexicalBlockFile(scope: !33, file: !1, discriminator: 1)