Index: include/polly/ScopDetectionDiagnostic.h =================================================================== --- include/polly/ScopDetectionDiagnostic.h +++ include/polly/ScopDetectionDiagnostic.h @@ -84,6 +84,7 @@ rrkLastAffFunc, rrkLoopBound, + rrkLoopOverlapWithNonAffineRegion, rrkFuncCall, rrkNonSimpleMemoryAccess, @@ -512,6 +513,34 @@ }; //===----------------------------------------------------------------------===// +/// @brief Captures errors when loop overlap with nonaffine region. +class ReportLoopOverlapWithNonAffineRegion : public RejectReason { + //===--------------------------------------------------------------------===// + + // When L and R are set then L and R overlap. + Loop *L; + + Region *R; + + const DebugLoc Loc; + +public: + ReportLoopOverlapWithNonAffineRegion(Loop *L, Region *R); + + /// @name LLVM-RTTI interface + //@{ + static bool classof(const RejectReason *RR); + //@} + + /// @name RejectReason interface + //@{ + virtual std::string getMessage() const override; + virtual const DebugLoc &getDebugLoc() const override; + virtual std::string getEndUserMessage() const override; + //@} +}; + +//===----------------------------------------------------------------------===// /// @brief Captures errors with non-side-effect-known function calls. class ReportFuncCall : public RejectReason { //===--------------------------------------------------------------------===// Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -294,12 +294,28 @@ if (!Context.NonAffineSubRegionSet.insert(AR)) return true; + bool ContainsNonAffineLoops = false; // All loops in the region have to be overapproximated too if there // are accesses that depend on the iteration count. for (BasicBlock *BB : AR->blocks()) { Loop *L = LI->getLoopFor(BB); - if (AR->contains(L)) + if (AR->contains(L)) { Context.BoxedLoopsSet.insert(L); + ContainsNonAffineLoops = true; + } + } + + // Ensure that nonaffine region is immediately contained into a surrounding + // loop. + if (AllowNonAffineSubLoops && ContainsNonAffineLoops) { + BasicBlock *BBEntry = AR->getEntry(); + Loop *L = LI->getLoopFor(BBEntry); + while (L && AR->contains(L)) + L = L->getParentLoop(); + if (L) + for (auto *BB : AR->blocks()) + if (!L->contains(BB)) + return false; } return (AllowNonAffineSubLoops || Context.BoxedLoopsSet.empty()); @@ -1041,6 +1057,10 @@ if (addOverApproximatedRegion(R, Context)) return true; + else { + return invalid( + Context, /*Assert=*/true, L, R); + } } const SCEV *LoopCount = SE->getBackedgeTakenCount(L); Index: lib/Analysis/ScopDetectionDiagnostic.cpp =================================================================== --- lib/Analysis/ScopDetectionDiagnostic.cpp +++ lib/Analysis/ScopDetectionDiagnostic.cpp @@ -43,6 +43,8 @@ BADSCOP_STAT(CFG, "CFG too complex"); BADSCOP_STAT(LoopBound, "Loop bounds can not be computed"); +BADSCOP_STAT(LoopOverlapWithNonAffineRegion, + "Loop overlap with nonaffine region"); BADSCOP_STAT(FuncCall, "Function call with side effects appeared"); BADSCOP_STAT(AffFunc, "Expression not affine"); BADSCOP_STAT(Alias, "Found base address alias"); @@ -328,6 +330,33 @@ } //===----------------------------------------------------------------------===// +// ReportLoopOverlapWithNonAffineRegion. + +ReportLoopOverlapWithNonAffineRegion::ReportLoopOverlapWithNonAffineRegion( + Loop *L, Region *R) + : RejectReason(rrkLoopOverlapWithNonAffineRegion), L(L), R(R), + Loc(L->getStartLoc()) { + ++BadLoopOverlapWithNonAffineRegionForScop; +} + +std::string ReportLoopOverlapWithNonAffineRegion::getMessage() const { + return "Non affine region: " + R->getNameStr() + " overlap Loop " + + L->getHeader()->getName(); +} + +const DebugLoc &ReportLoopOverlapWithNonAffineRegion::getDebugLoc() const { + return Loc; +} + +bool ReportLoopOverlapWithNonAffineRegion::classof(const RejectReason *RR) { + return RR->getKind() == rrkLoopOverlapWithNonAffineRegion; +} + +std::string ReportLoopOverlapWithNonAffineRegion::getEndUserMessage() const { + return "Loop does contain stmt overlap with nonaffine region."; +} + +//===----------------------------------------------------------------------===// // ReportFuncCall. ReportFuncCall::ReportFuncCall(Instruction *Inst) Index: test/ScopDetectionDiagnostics/ReportLoopOverlapWithNonAffineRegion.ll =================================================================== --- /dev/null +++ test/ScopDetectionDiagnostics/ReportLoopOverlapWithNonAffineRegion.ll @@ -0,0 +1,150 @@ +; RUN: opt -pass-remarks-missed="polly-detect" -polly-allow-nonaffine-loops -polly-process-unprofitable -analyze -polly-detect < %s 2>&1 | FileCheck %s --check-prefix=REJECTLOOPOVERLAPREGION +; RUN: opt -pass-remarks-missed="polly-detect" -polly-allow-nonaffine-loops=false -polly-process-unprofitable -analyze -polly-detect < %s 2>&1 | FileCheck %s --check-prefix=REJECTNONAFFINELOOPS + +; void func (int param0, int N, int *A) +; { +; for (int i = 0; i < N; i++) +; if (param0) +; while (1) +; A[i] = 1; +; else +; A[i] = 2; +; } + +; If we reject non-affine region and loop will be reported: +; +; REJECTLOOPOVERLAPREGION: remark: ReportLoopOverlapWithNonAffineRegion.c:5:9: The following errors keep this region from being a Scop. +; REJECTLOOPOVERLAPREGION: remark: ReportLoopOverlapWithNonAffineRegion.c:7:7: Loop does contain stmt overlap with nonaffine region. +; REJECTLOOPOVERLAPREGION: remark: ReportLoopOverlapWithNonAffineRegion.c:8:14: Invalid Scop candidate ends here. +; +; If we reject non-affine loops the non-affine loop bound will be reported: +; +; REJECTNONAFFINELOOPS: remark: ReportLoopOverlapWithNonAffineRegion.c:5:9: The following errors keep this region from being a Scop. +; REJECTNONAFFINELOOPS: remark: ReportLoopOverlapWithNonAffineRegion.c:7:7: Failed to derive an affine function from the loop bounds. +; REJECTNONAFFINELOOPS: remark: ReportLoopOverlapWithNonAffineRegion.c:8:14: Invalid Scop candidate ends here. + + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +; Function Attrs: nounwind uwtable +define void @func(i32 %param0, i32 %N, i32* %A) #0 !dbg !6 { +entry: + %param0.addr = alloca i32, align 4 + %N.addr = alloca i32, align 4 + %A.addr = alloca i32*, align 8 + %i = alloca i32, align 4 + store i32 %param0, i32* %param0.addr, align 4 + call void @llvm.dbg.declare(metadata i32* %param0.addr, metadata !11, metadata !12), !dbg !13 + store i32 %N, i32* %N.addr, align 4 + call void @llvm.dbg.declare(metadata i32* %N.addr, metadata !14, metadata !12), !dbg !15 + store i32* %A, i32** %A.addr, align 8 + call void @llvm.dbg.declare(metadata i32** %A.addr, metadata !16, metadata !12), !dbg !17 + call void @llvm.dbg.declare(metadata i32* %i, metadata !18, metadata !12), !dbg !20 + store i32 0, i32* %i, align 4, !dbg !20 + br label %for.cond, !dbg !21 + +for.cond: ; preds = %for.inc, %entry + %0 = load i32, i32* %i, align 4, !dbg !22 + %1 = load i32, i32* %N.addr, align 4, !dbg !25 + %cmp = icmp slt i32 %0, %1, !dbg !26 + br i1 %cmp, label %for.body, label %for.end, !dbg !27 + +for.body: ; preds = %for.cond + %2 = load i32, i32* %param0.addr, align 4, !dbg !28 + %tobool = icmp ne i32 %2, 0, !dbg !28 + br i1 %tobool, label %if.then, label %if.else, !dbg !31 + +if.then: ; preds = %for.body + br label %while.body, !dbg !32 + +while.body: ; preds = %if.then, %while.body + %3 = load i32, i32* %i, align 4, !dbg !34 + %idxprom = sext i32 %3 to i64, !dbg !35 + %4 = load i32*, i32** %A.addr, align 8, !dbg !35 + %arrayidx = getelementptr inbounds i32, i32* %4, i64 %idxprom, !dbg !35 + store i32 1, i32* %arrayidx, align 4, !dbg !36 + br label %while.body, !dbg !37, !llvm.loop !39 + +if.else: ; preds = %for.body + %5 = load i32, i32* %i, align 4, !dbg !40 + %idxprom1 = sext i32 %5 to i64, !dbg !42 + %6 = load i32*, i32** %A.addr, align 8, !dbg !42 + %arrayidx2 = getelementptr inbounds i32, i32* %6, i64 %idxprom1, !dbg !42 + store i32 2, i32* %arrayidx2, align 4, !dbg !43 + br label %if.end + +if.end: ; preds = %if.else + br label %for.inc, !dbg !44 + +for.inc: ; preds = %if.end + %7 = load i32, i32* %i, align 4, !dbg !45 + %inc = add nsw i32 %7, 1, !dbg !45 + store i32 %inc, i32* %i, align 4, !dbg !45 + br label %for.cond, !dbg !47, !llvm.loop !48 + +for.end: ; preds = %for.cond + ret void, !dbg !50 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 ", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "ReportLoopOverlapWithNonAffineRegion.c", directory: "test/ScopDetectionDiagnostics/") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{!"clang version 3.9.0 "} +!6 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2) +!7 = !DISubroutineType(types: !8) +!8 = !{null, !9, !9, !10} +!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!10 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !9, size: 64, align: 64) +!11 = !DILocalVariable(name: "param0", arg: 1, scope: !6, file: !1, line: 1, type: !9) +!12 = !DIExpression() +!13 = !DILocation(line: 1, column: 16, scope: !6) +!14 = !DILocalVariable(name: "N", arg: 2, scope: !6, file: !1, line: 1, type: !9) +!15 = !DILocation(line: 1, column: 28, scope: !6) +!16 = !DILocalVariable(name: "A", arg: 3, scope: !6, file: !1, line: 1, type: !10) +!17 = !DILocation(line: 1, column: 36, scope: !6) +!18 = !DILocalVariable(name: "i", scope: !19, file: !1, line: 3, type: !9) +!19 = distinct !DILexicalBlock(scope: !6, file: !1, line: 3, column: 3) +!20 = !DILocation(line: 3, column: 12, scope: !19) +!21 = !DILocation(line: 3, column: 8, scope: !19) +!22 = !DILocation(line: 3, column: 19, scope: !23) +!23 = !DILexicalBlockFile(scope: !24, file: !1, discriminator: 1) +!24 = distinct !DILexicalBlock(scope: !19, file: !1, line: 3, column: 3) +!25 = !DILocation(line: 3, column: 23, scope: !23) +!26 = !DILocation(line: 3, column: 21, scope: !23) +!27 = !DILocation(line: 3, column: 3, scope: !23) +!28 = !DILocation(line: 5, column: 9, scope: !29) +!29 = distinct !DILexicalBlock(scope: !30, file: !1, line: 5, column: 9) +!30 = distinct !DILexicalBlock(scope: !24, file: !1, line: 4, column: 3) +!31 = !DILocation(line: 5, column: 9, scope: !30) +!32 = !DILocation(line: 7, column: 7, scope: !33) +!33 = distinct !DILexicalBlock(scope: !29, file: !1, line: 6, column: 5) +!34 = !DILocation(line: 8, column: 11, scope: !33) +!35 = !DILocation(line: 8, column: 9, scope: !33) +!36 = !DILocation(line: 8, column: 14, scope: !33) +!37 = !DILocation(line: 7, column: 7, scope: !38) +!38 = !DILexicalBlockFile(scope: !33, file: !1, discriminator: 1) +!39 = distinct !{!39, !32} +!40 = !DILocation(line: 12, column: 9, scope: !41) +!41 = distinct !DILexicalBlock(scope: !29, file: !1, line: 11, column: 5) +!42 = !DILocation(line: 12, column: 7, scope: !41) +!43 = !DILocation(line: 12, column: 12, scope: !41) +!44 = !DILocation(line: 14, column: 3, scope: !30) +!45 = !DILocation(line: 3, column: 27, scope: !46) +!46 = !DILexicalBlockFile(scope: !24, file: !1, discriminator: 2) +!47 = !DILocation(line: 3, column: 3, scope: !46) +!48 = distinct !{!48, !49} +!49 = !DILocation(line: 3, column: 3, scope: !6) +!50 = !DILocation(line: 15, column: 1, scope: !6)