diff --git a/lib/Analysis/ScopInfo.cpp b/lib/Analysis/ScopInfo.cpp --- a/lib/Analysis/ScopInfo.cpp +++ b/lib/Analysis/ScopInfo.cpp @@ -531,15 +531,19 @@ LS = isl::local_space(Map.get_space()); int Num = ElemBytes / getScopArrayInfo()->getElemSizeInBytes(); + // See ScopBuilder::buildAccessMultiDim*, the subscripts created are in + // units of element type of the memory access. In this case, we need to + // multiply Num in computing coefficient of the input. + int coef = DimsArray > 1 ? Num : 1; C = isl::constraint::alloc_inequality(LS); C = C.set_constant_val(isl::val(Ctx, Num - 1)); - C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, 1); + C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, coef); C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, -1); Map = Map.add_constraint(C); C = isl::constraint::alloc_inequality(LS); - C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, -1); + C = C.set_coefficient_si(isl::dim::in, DimsArray - 1, -coef); C = C.set_coefficient_si(isl::dim::out, DimsArray - 1, 1); C = C.set_constant_val(isl::val(Ctx, 0)); Map = Map.add_constraint(C); diff --git a/test/ScopInfo/multiple-types-in-multiple-dimensions.ll b/test/ScopInfo/multiple-types-in-multiple-dimensions.ll new file mode 100644 --- /dev/null +++ b/test/ScopInfo/multiple-types-in-multiple-dimensions.ll @@ -0,0 +1,118 @@ +; RUN: opt %loadPolly -polly-scops -analyze \ +; RUN: -polly-process-unprofitable -S < %s | FileCheck %s +; +; #define N (1024) +; uint64_t A[1][N] = {0}; +; uint64_t foo(int n) { +; for (int i = 0; i < n; i += 1) +; A[0][i] = 42; +; +; for (int i = 0; i < n; i += 1) +; ((uint32_t*)&A[0][0])[65 + i*2] = 21; +; +; for (int i = 0; i < n; i += 1) +; A[0][i*2] = 2; +; +; return A[0][64]; +; } + +; A[0][i] = 42; +; CHECK: Stmt_for_body +; CHECK: Domain := +; CHECK: [n] -> { Stmt_for_body[i0] : 0 <= i0 < n }; +; CHECK: Schedule := +; CHECK: [n] -> { Stmt_for_body[i0] -> [0, i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [n] -> { Stmt_for_body[i0] -> MemRef_A[0, o1] : 2i0 <= o1 <= 1 + 2i0 }; + +; ((uint32_t*)&A[0][0])[65 + i*2] = 21; +; CHECK: Stmt_for_body4 +; CHECK: Domain := +; CHECK: [n] -> { Stmt_for_body4[i0] : 0 <= i0 < n }; +; CHECK: Schedule := +; CHECK: [n] -> { Stmt_for_body4[i0] -> [1, i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [n] -> { Stmt_for_body4[i0] -> MemRef_A[o0, 65 + 2i0 - 1024o0] : -479 + i0 <= 512o0 <= 32 + i0 }; + +; A[0][i*2] = 2; +; CHECK: Stmt_for_body14 +; CHECK: Domain := +; CHECK: [n] -> { Stmt_for_body14[i0] : 0 <= i0 < n }; +; CHECK: Schedule := +; CHECK: [n] -> { Stmt_for_body14[i0] -> [2, i0] }; +; CHECK: MustWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK: [n] -> { Stmt_for_body14[i0] -> MemRef_A[0, o1] : 4i0 <= o1 <= 1 + 4i0 }; + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@A = dso_local global [1 x [1024 x i64]] zeroinitializer, align 16 + +; Function Attrs: noinline nounwind uwtable +define dso_local i64 @foo(i32 %n) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %cmp5 = icmp sgt i32 %n, 0 + br i1 %cmp5, label %for.body.lr.ph, label %for.cond2.preheader + +for.body.lr.ph: ; preds = %entry.split + %wide.trip.count16 = zext i32 %n to i64 + br label %for.body + +for.cond.for.cond2.preheader_crit_edge: ; preds = %for.body + br label %for.cond2.preheader + +for.cond2.preheader: ; preds = %for.cond.for.cond2.preheader_crit_edge, %entry.split + %cmp33 = icmp sgt i32 %n, 0 + br i1 %cmp33, label %for.body4.lr.ph, label %for.cond12.preheader + +for.body4.lr.ph: ; preds = %for.cond2.preheader + %wide.trip.count12 = zext i32 %n to i64 + br label %for.body4 + +for.body: ; preds = %for.body.lr.ph, %for.body + %indvars.iv14 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next15, %for.body ] + %arrayidx = getelementptr inbounds [1 x [1024 x i64]], [1 x [1024 x i64]]* @A, i64 0, i64 0, i64 %indvars.iv14 + store i64 42, i64* %arrayidx, align 8 + %indvars.iv.next15 = add nuw nsw i64 %indvars.iv14, 1 + %exitcond17 = icmp ne i64 %indvars.iv.next15, %wide.trip.count16 + br i1 %exitcond17, label %for.body, label %for.cond.for.cond2.preheader_crit_edge + +for.cond2.for.cond12.preheader_crit_edge: ; preds = %for.body4 + br label %for.cond12.preheader + +for.cond12.preheader: ; preds = %for.cond2.for.cond12.preheader_crit_edge, %for.cond2.preheader + %cmp131 = icmp sgt i32 %n, 0 + br i1 %cmp131, label %for.body14.lr.ph, label %for.end20 + +for.body14.lr.ph: ; preds = %for.cond12.preheader + %wide.trip.count = zext i32 %n to i64 + br label %for.body14 + +for.body4: ; preds = %for.body4.lr.ph, %for.body4 + %indvars.iv8 = phi i64 [ 0, %for.body4.lr.ph ], [ %indvars.iv.next9, %for.body4 ] + %0 = shl nuw nsw i64 %indvars.iv8, 1 + %1 = add nuw nsw i64 %0, 65 + %arrayidx7 = getelementptr inbounds i32, i32* bitcast ([1 x [1024 x i64]]* @A to i32*), i64 %1 + store i32 21, i32* %arrayidx7, align 4 + %indvars.iv.next9 = add nuw nsw i64 %indvars.iv8, 1 + %exitcond13 = icmp ne i64 %indvars.iv.next9, %wide.trip.count12 + br i1 %exitcond13, label %for.body4, label %for.cond2.for.cond12.preheader_crit_edge + +for.body14: ; preds = %for.body14.lr.ph, %for.body14 + %indvars.iv = phi i64 [ 0, %for.body14.lr.ph ], [ %indvars.iv.next, %for.body14 ] + %2 = shl nuw nsw i64 %indvars.iv, 1 + %arrayidx17 = getelementptr inbounds [1 x [1024 x i64]], [1 x [1024 x i64]]* @A, i64 0, i64 0, i64 %2 + store i64 2, i64* %arrayidx17, align 16 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond, label %for.body14, label %for.cond12.for.end20_crit_edge + +for.cond12.for.end20_crit_edge: ; preds = %for.body14 + br label %for.end20 + +for.end20: ; preds = %for.cond12.for.end20_crit_edge, %for.cond12.preheader + %3 = load i64, i64* getelementptr inbounds ([1 x [1024 x i64]], [1 x [1024 x i64]]* @A, i64 0, i64 0, i64 64), align 16 + ret i64 %3 +}