Index: include/polly/ScopDetection.h =================================================================== --- include/polly/ScopDetection.h +++ include/polly/ScopDetection.h @@ -52,6 +52,8 @@ #include "polly/ScopDetectionDiagnostic.h" +#include "llvm/ADT/SetVector.h" + #include #include @@ -100,8 +102,8 @@ }; typedef std::map MapInsnToMemAcc; -typedef std::pair PairInsnAddRec; -typedef std::vector AFs; +typedef std::pair PairInstSCEV; +typedef std::vector AFs; typedef std::map BaseToAFs; typedef std::map BaseToElSize; @@ -134,8 +136,17 @@ bool Verifying; // If we are in the verification phase? RejectLog Log; - // Map a base pointer to all access functions accessing it. - BaseToAFs NonAffineAccesses, AffineAccesses; + /// @brief Map a base pointer to all access functions accessing it. + /// + /// This map is indexed by the base pointer. Each element of the map + /// is a list of memory accesses that reference this base pointer. + BaseToAFs Accesses; + + /// @brief The set of base pointers with non-affine accesses. + /// + /// This set contains all base pointers which are used in memory accesses + /// that can not be detected as affine accesses. + SetVector NonAffineAccesses; BaseToElSize ElementSize; DetectionContext(Region &R, AliasAnalysis &AA, bool Verify) Index: lib/Analysis/ScopDetection.cpp =================================================================== --- lib/Analysis/ScopDetection.cpp +++ lib/Analysis/ScopDetection.cpp @@ -359,47 +359,79 @@ MapInsnToMemAcc InsnToMemAcc; bool ScopDetection::hasAffineMemoryAccesses(DetectionContext &Context) const { - for (auto P : Context.NonAffineAccesses) { - const SCEVUnknown *BasePointer = P.first; + for (const SCEVUnknown *BasePointer : Context.NonAffineAccesses) { Value *BaseValue = BasePointer->getValue(); ArrayShape *Shape = new ArrayShape(BasePointer); + bool IsNonAffine = false; // First step: collect parametric terms in all array references. SmallVector Terms; - for (PairInsnAddRec PIAF : Context.NonAffineAccesses[BasePointer]) - PIAF.second->collectParametricTerms(*SE, Terms); + for (const auto &PIAF : Context.Accesses[BasePointer]) { + const SCEVAddRecExpr *AccessFunction = + dyn_cast(PIAF.second); + const Instruction *Insn = PIAF.first; + + if (!AccessFunction) { + if (AllowNonAffine) + continue; + invalid(Context, /*Assert=*/true, AccessFunction, + Insn, BaseValue); + if (!KeepGoing) + return false; + + continue; + } - // Also collect terms from the affine memory accesses. - for (PairInsnAddRec PIAF : Context.AffineAccesses[BasePointer]) - PIAF.second->collectParametricTerms(*SE, Terms); + AccessFunction->collectParametricTerms(*SE, Terms); + } + + if (IsNonAffine) + continue; // Second step: find array shape. SE->findArrayDimensions(Terms, Shape->DelinearizedSizes, Context.ElementSize[BasePointer]); // Third step: compute the access functions for each subscript. - for (PairInsnAddRec PIAF : Context.NonAffineAccesses[BasePointer]) { - const SCEVAddRecExpr *AF = PIAF.second; + // + // We first store the resulting memory accesses in TempMemoryAccesses. Only + // if the access functions for all memory accesses have been successfully + // delinearized we continue. Otherwise, we either report a failure or, if + // non-affine accesses are allowed, we drop the information. In case the + // information is dropped the memory accesses need to be overapproximated + // when translated to a polyhedral representation. + MapInsnToMemAcc TempMemoryAccesses; + for (const auto &PIAF : Context.Accesses[BasePointer]) { + const SCEVAddRecExpr *AF = dyn_cast(PIAF.second); const Instruction *Insn = PIAF.first; - if (Shape->DelinearizedSizes.empty()) - return invalid(Context, /*Assert=*/true, AF, - Insn, BaseValue); - - MemAcc *Acc = new MemAcc(Insn, Shape); - InsnToMemAcc.insert({Insn, Acc}); - AF->computeAccessFunctions(*SE, Acc->DelinearizedSubscripts, - Shape->DelinearizedSizes); - if (Shape->DelinearizedSizes.empty() || - Acc->DelinearizedSubscripts.empty()) - return invalid(Context, /*Assert=*/true, AF, - Insn, BaseValue); - - // Check that the delinearized subscripts are affine. - for (const SCEV *S : Acc->DelinearizedSubscripts) - if (!isAffineExpr(&Context.CurRegion, S, *SE, BaseValue)) - return invalid(Context, /*Assert=*/true, AF, - Insn, BaseValue); + if (!Shape->DelinearizedSizes.empty()) { + MemAcc *Acc = new MemAcc(Insn, Shape); + TempMemoryAccesses.insert({Insn, Acc}); + AF->computeAccessFunctions(*SE, Acc->DelinearizedSubscripts, + Shape->DelinearizedSizes); + if (!Acc->DelinearizedSubscripts.empty()) { + for (const SCEV *S : Acc->DelinearizedSubscripts) + if (!isAffineExpr(&Context.CurRegion, S, *SE, BaseValue)) + IsNonAffine = true; + } else { + IsNonAffine = true; + } + } else { + IsNonAffine = true; + } + + if (IsNonAffine) { + if (!AllowNonAffine) + invalid(Context, /*Assert=*/true, AF, Insn, + BaseValue); + if (!KeepGoing && !AllowNonAffine) + return false; + continue; + } } + + if (!IsNonAffine) + InsnToMemAcc.insert(TempMemoryAccesses.begin(), TempMemoryAccesses.end()); } return true; } @@ -433,26 +465,17 @@ AccessFunction = SE->getMinusSCEV(AccessFunction, BasePointer); - if (AllowNonAffine) { - // Do not check whether AccessFunction is affine. - } else if (!isAffineExpr(&Context.CurRegion, AccessFunction, *SE, - BaseValue)) { - const SCEVAddRecExpr *AF = dyn_cast(AccessFunction); + if (PollyDelinearize) { + // FIXME: We do not yet handle inconsistent element sizes. + Context.ElementSize[BasePointer] = SE->getElementSize(&Inst); + Context.Accesses[BasePointer].push_back({&Inst, AccessFunction}); - if (!PollyDelinearize || !AF) + if (!isAffineExpr(&Context.CurRegion, AccessFunction, *SE, BaseValue)) + Context.NonAffineAccesses.insert(BasePointer); + } else if (!AllowNonAffine) { + if (!isAffineExpr(&Context.CurRegion, AccessFunction, *SE, BaseValue)) return invalid(Context, /*Assert=*/true, AccessFunction, &Inst, BaseValue); - - const SCEV *ElementSize = SE->getElementSize(&Inst); - Context.ElementSize[BasePointer] = ElementSize; - - // Collect all non affine memory accesses, and check whether they are linear - // at the end of scop detection. That way we can delinearize all the memory - // accesses to the same array in a unique step. - Context.NonAffineAccesses[BasePointer].push_back({&Inst, AF}); - } else if (const SCEVAddRecExpr *AF = - dyn_cast(AccessFunction)) { - Context.AffineAccesses[BasePointer].push_back({&Inst, AF}); } // FIXME: Alias Analysis thinks IntToPtrInst aliases with alloca instructions Index: test/ScopDetectionDiagnostics/ReportMultipleNonAffineAccesses.ll =================================================================== --- /dev/null +++ test/ScopDetectionDiagnostics/ReportMultipleNonAffineAccesses.ll @@ -0,0 +1,147 @@ +; RUN: opt %loadPolly -basicaa -pass-remarks-missed="polly-detect" -polly-detect-track-failures -polly-detect -analyze < %s 2>&1| FileCheck %s +; RUN: opt %loadPolly -basicaa -pass-remarks-missed="polly-detect" -polly-detect-track-failures -polly-detect -polly-detect-keep-going -analyze < %s 2>&1| FileCheck %s -check-prefix=ALL +; RUN: opt %loadPolly -basicaa -pass-remarks-missed="polly-detect" -polly-detect-track-failures -polly-detect -polly-delinearize -analyze < %s 2>&1| FileCheck %s -check-prefix=DELIN +; RUN: opt %loadPolly -basicaa -pass-remarks-missed="polly-detect" -polly-detect-track-failures -polly-detect -polly-delinearize -polly-detect-keep-going -analyze < %s 2>&1| FileCheck %s -check-prefix=DELIN-ALL +; RUN: opt %loadPolly -basicaa -pass-remarks-missed="polly-detect" -polly-detect-track-failures -polly-detect -polly-allow-nonaffine -analyze < %s 2>&1| FileCheck %s -check-prefix=NONAFFINE +; RUN: opt %loadPolly -basicaa -pass-remarks-missed="polly-detect" -polly-detect-track-failures -polly-detect -polly-delinearize -polly-allow-nonaffine -analyze < %s 2>&1| FileCheck %s -check-prefix=NONAFFINE + +; 1 void manyaccesses(float A[restrict], long n, float B[restrict][n]) +; 2 { +; 3 for (long i = 0; i < 1024; ++i) { +; 4 float a1 = A[2 * i * i]; +; 5 float a2 = A[2 * i * i + 1]; +; 6 float b1 = B[i][i]; +; 7 float b2 = B[i * i][i]; +; 8 float b3 = B[i][0]; +; 9 float b4 = B[0][i]; +; 10 float b5 = B[0][i*i]; +; 11 +; 12 A[i * i] = a1 + a2 + b1 + b2 + b3 + b4 + b5; +; 13 } +; 14 } + +; CHECK: remark: /tmp/test.c:3:20: The following errors keep this region from being a Scop. +; CHECK: remark: /tmp/test.c:4:16: The array subscript of "A" is not affine +; CHECK: remark: /tmp/test.c:12:46: Invalid Scop candidate ends here. + +; ALL: remark: /tmp/test.c:3:20: The following errors keep this region from being a Scop. +; ALL: remark: /tmp/test.c:4:16: The array subscript of "A" is not affine +; ALL: remark: /tmp/test.c:5:16: The array subscript of "A" is not affine +; ALL: remark: /tmp/test.c:6:16: The array subscript of "B" is not affine +; ALL: remark: /tmp/test.c:7:16: The array subscript of "B" is not affine +; ALL: remark: /tmp/test.c:8:16: The array subscript of "B" is not affine +; -> B[0][i] is affine +; ALL: remark: /tmp/test.c:10:16: The array subscript of "B" is not affine +; ALL: remark: /tmp/test.c:12:5: The array subscript of "A" is not affine +; ALL: remark: /tmp/test.c:12:46: Invalid Scop candidate ends here. + +; DELIN: remark: /tmp/test.c:3:20: The following errors keep this region from being a Scop. +; DELIN: remark: /tmp/test.c:4:16: The array subscript of "A" is not affine +; DELIN: remark: /tmp/test.c:12:46: Invalid Scop candidate ends here. + +; DELIN-ALL: remark: /tmp/test.c:3:20: The following errors keep this region from being a Scop. +; DELIN-ALL: remark: /tmp/test.c:4:16: The array subscript of "A" is not affine +; DELIN-ALL: remark: /tmp/test.c:5:16: The array subscript of "A" is not affine +; DELIN-ALL: remark: /tmp/test.c:12:5: The array subscript of "A" is not affine +; DELIN-ALL: remark: /tmp/test.c:7:16: The array subscript of "B" is not affine +; B[i][0] is affine if delinearized; +; B[0][i] is affine if delinearized; +; DELIN-ALL: remark: /tmp/test.c:10:16: The array subscript of "B" is not affine +; DELIN-ALL: remark: /tmp/test.c:12:46: Invalid Scop candidate ends here. + +; NONAFFINE-NOT: remark: The following errors keep this region from being a Scop. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @manyaccesses(float* noalias %A, i64 %n, float* noalias %B) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %tmp = add i64 %n, 1, !dbg !10 + br label %for.body, !dbg !10 + +for.body: ; preds = %entry.split, %for.body + %tmp3 = phi i64 [ 0, %entry.split ], [ %tmp13, %for.body ], !dbg !15 + %mul = mul i64 %tmp3, 2, !dbg !17 + %tmp4 = mul i64 %tmp, %tmp3, !dbg !18 + %arrayidx6 = getelementptr float* %B, i64 %tmp4, !dbg !19 + %mul7 = mul i64 %n, %tmp3, !dbg !15 + %arrayidx10 = getelementptr float* %B, i64 %mul7, !dbg !20 + %arrayidx13 = getelementptr float* %B, i64 %tmp3, !dbg !21 + %mul1 = mul nsw i64 %mul, %tmp3, !dbg !17 + %arrayidx = getelementptr inbounds float* %A, i64 %mul1, !dbg !22 + %tmp5 = load float* %arrayidx, align 4, !dbg !22 + %mul3 = mul nsw i64 %mul, %tmp3, !dbg !27 + %add1 = or i64 %mul3, 1, !dbg !27 + %arrayidx4 = getelementptr inbounds float* %A, i64 %add1, !dbg !28 + %tmp6 = load float* %arrayidx4, align 4, !dbg !28 + %tmp7 = load float* %arrayidx6, align 4, !dbg !19 + %tmp8 = mul i64 %mul7, %tmp3, !dbg !15 + %arrayidx8.sum = add i64 %tmp8, %tmp3, !dbg !15 + %arrayidx9 = getelementptr inbounds float* %B, i64 %arrayidx8.sum, !dbg !15 + %tmp9 = load float* %arrayidx9, align 4, !dbg !15 + %tmp10 = load float* %arrayidx10, align 4, !dbg !20 + %tmp11 = load float* %arrayidx13, align 4, !dbg !21 + %mul14 = mul nsw i64 %tmp3, %tmp3, !dbg !29 + %arrayidx16 = getelementptr inbounds float* %B, i64 %mul14, !dbg !30 + %tmp12 = load float* %arrayidx16, align 4, !dbg !30 + %add17 = fadd float %tmp5, %tmp6, !dbg !31 + %add18 = fadd float %add17, %tmp7, !dbg !32 + %add19 = fadd float %add18, %tmp9, !dbg !33 + %add20 = fadd float %add19, %tmp10, !dbg !34 + %add21 = fadd float %add20, %tmp11, !dbg !35 + %add22 = fadd float %add21, %tmp12, !dbg !36 + %mul23 = mul nsw i64 %tmp3, %tmp3, !dbg !37 + %arrayidx24 = getelementptr inbounds float* %A, i64 %mul23, !dbg !38 + store float %add22, float* %arrayidx24, align 4, !dbg !38 + %tmp13 = add nsw i64 %tmp3, 1, !dbg !39 + %exitcond = icmp ne i64 %tmp13, 1024, !dbg !10 + br i1 %exitcond, label %for.body, label %for.end, !dbg !10 + +for.end: ; preds = %for.body + ret void, !dbg !40 +} + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!7, !8} +!llvm.ident = !{!9} + +!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.6.0 ", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !"", i32 2} ; [ DW_TAG_compile_unit ] [/home/grosser/Projects/polly/git/tools/polly/test/ScopDetectionDiagnostics//tmp/test.c] [DW_LANG_C99] +!1 = metadata !{metadata !"/tmp/test.c", metadata !"/home/grosser/Projects/polly/git/tools/polly/test/ScopDetectionDiagnostics"} +!2 = metadata !{} +!3 = metadata !{metadata !4} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"manyaccesses", metadata !"manyaccesses", metadata !"", i32 1, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (float*, i64, float*)* @manyaccesses, null, null, metadata !2, i32 2} ; [ DW_TAG_subprogram ] [line 1] [def] [scope 2] [manyaccesses] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/home/grosser/Projects/polly/git/tools/polly/test/ScopDetectionDiagnostics//tmp/test.c] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!8 = metadata !{i32 2, metadata !"Debug Info Version", i32 1} +!9 = metadata !{metadata !"clang version 3.6.0 "} +!10 = metadata !{i32 3, i32 20, metadata !11, null} +!11 = metadata !{i32 786443, metadata !1, metadata !12, i32 2} ; [ DW_TAG_lexical_block ] [/home/grosser/Projects/polly/git/tools/polly/test/ScopDetectionDiagnostics//tmp/test.c] +!12 = metadata !{i32 786443, metadata !1, metadata !13, i32 1} ; [ DW_TAG_lexical_block ] [/home/grosser/Projects/polly/git/tools/polly/test/ScopDetectionDiagnostics//tmp/test.c] +!13 = metadata !{i32 786443, metadata !1, metadata !14, i32 3, i32 3, i32 1} ; [ DW_TAG_lexical_block ] [/home/grosser/Projects/polly/git/tools/polly/test/ScopDetectionDiagnostics//tmp/test.c] +!14 = metadata !{i32 786443, metadata !1, metadata !4, i32 3, i32 3, i32 0} ; [ DW_TAG_lexical_block ] [/home/grosser/Projects/polly/git/tools/polly/test/ScopDetectionDiagnostics//tmp/test.c] +!15 = metadata !{i32 7, i32 16, metadata !16, null} +!16 = metadata !{i32 786443, metadata !1, metadata !13, i32 3, i32 35, i32 2} ; [ DW_TAG_lexical_block ] [/home/grosser/Projects/polly/git/tools/polly/test/ScopDetectionDiagnostics//tmp/test.c] +!17 = metadata !{i32 4, i32 26, metadata !16, null} +!18 = metadata !{i32 4, i32 22, metadata !16, null} +!19 = metadata !{i32 6, i32 16, metadata !16, null} +!20 = metadata !{i32 8, i32 16, metadata !16, null} ; [ DW_TAG_imported_declaration ] +!21 = metadata !{i32 9, i32 16, metadata !16, null} +!22 = metadata !{i32 4, i32 16, metadata !16, null} +!27 = metadata !{i32 5, i32 26, metadata !16, null} +!28 = metadata !{i32 5, i32 16, metadata !16, null} +!29 = metadata !{i32 10, i32 23, metadata !16, null} +!30 = metadata !{i32 10, i32 16, metadata !16, null} +!31 = metadata !{i32 12, i32 21, metadata !16, null} +!32 = metadata !{i32 12, i32 26, metadata !16, null} +!33 = metadata !{i32 12, i32 31, metadata !16, null} +!34 = metadata !{i32 12, i32 36, metadata !16, null} +!35 = metadata !{i32 12, i32 41, metadata !16, null} +!36 = metadata !{i32 12, i32 46, metadata !16, null} +!37 = metadata !{i32 12, i32 11, metadata !16, null} +!38 = metadata !{i32 12, i32 5, metadata !16, null} +!39 = metadata !{i32 3, i32 30, metadata !13, null} +!40 = metadata !{i32 14, i32 1, metadata !4, null} Index: test/ScopInfo/multidim_single_and_multidim_array.ll =================================================================== --- /dev/null +++ test/ScopInfo/multidim_single_and_multidim_array.ll @@ -0,0 +1,69 @@ +; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s +; RUN: opt %loadPolly -polly-scops -polly-allow-nonaffine -analyze < %s | FileCheck %s --check-prefix=NONAFFINE +; RUN: opt %loadPolly -polly-scops -polly-delinearize -analyze < %s | FileCheck %s --check-prefix=DELIN +; RUN: opt %loadPolly -polly-scops -polly-delinearize -polly-allow-nonaffine -analyze < %s | FileCheck %s --check-prefix=DELIN + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; void single-and-multi-dimensional-array(long n,float X[n][n]) { +; for (long i1 = 0; i1 < n; i1++) +; X[i1][0] = 1; +; +; for (long i2 = 0; i2 < n; i2++) +; X[n-1][i2] = 1; +; } +; +; In previous versions of Polly, the second access was detected as single +; dimensional access whereas the first one was detected as multi-dimensional. +; This test case checks that we now consistently delinearize the array accesses. + +; CHECK-NOT: Stmt_for_i_1 + +; NONAFFINE: p0: %n +; NONAFFINE: p1: (4 * (-1 + %n) * %n) +; NONAFFINE: Statements { +; NONAFFINE: Stmt_for_i_1 +; NONAFFINE: MayWriteAccess := [Reduction Type: NONE] +; NONAFFINE: [n, p_1] -> { Stmt_for_i_1[i0] -> MemRef_X[o0] }; +; NONAFFINE: Stmt_for_i_2 +; NONAFFINE: MustWriteAccess := [Reduction Type: NONE] +; NONAFFINE: [n, p_1] -> { Stmt_for_i_2[i0] -> MemRef_X[o0] : 4o0 = p_1 + 4i0 }; + +; DELIN: Stmt_for_i_1 +; DELIN: MustWriteAccess := +; DELIN: [n] -> { Stmt_for_i_1[i0] -> MemRef_X[i0, 0] }; +; DELIN: Stmt_for_i_2 +; DELIN: MustWriteAccess := +; DELIN: [n] -> { Stmt_for_i_2[i0] -> MemRef_X[-1 + n, i0] }; + +define void @single-and-multi-dimensional-array(i64 %n, float* %X) { +entry: + br label %for.i.1 + +for.i.1: + %indvar.1 = phi i64 [ 0, %entry ], [ %indvar.next.1, %for.i.1 ] + %offset.1 = mul i64 %n, %indvar.1 + %arrayidx.1 = getelementptr float* %X, i64 %offset.1 + store float 1.000000e+00, float* %arrayidx.1 + %indvar.next.1 = add nsw i64 %indvar.1, 1 + %exitcond.1 = icmp ne i64 %indvar.next.1, %n + br i1 %exitcond.1, label %for.i.1, label %next + +next: + br label %for.i.2 + +for.i.2: + %indvar.2 = phi i64 [ 0, %next ], [ %indvar.next.2, %for.i.2 ] + %offset.2.a = add i64 %n, -1 + %offset.2.b = mul i64 %n, %offset.2.a + %offset.2.c = add i64 %offset.2.b, %indvar.2 + %arrayidx.2 = getelementptr float* %X, i64 %offset.2.c + store float 1.000000e+00, float* %arrayidx.2 + %indvar.next.2 = add nsw i64 %indvar.2, 1 + %exitcond.2 = icmp ne i64 %indvar.next.2, %n + br i1 %exitcond.2, label %for.i.2, label %exit + +exit: + ret void +}