Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -736,6 +736,7 @@ Value *ArraySize = getArraySize(Array); Value *Offset = getArrayOffset(Array); + if (Offset) ArraySize = Builder.CreateSub( ArraySize, @@ -1007,14 +1008,33 @@ return String.find(Prefix) == 0; } +static bool isPwAffZero(__isl_keep isl_pw_aff *aff) { + isl_set *NonZeroSet = isl_pw_aff_non_zero_set(isl_pw_aff_copy(aff)); + bool ZeroEverywhere = isl_set_is_empty(NonZeroSet); + isl_set_free(NonZeroSet); + // a pw_aff that is zero at one point and is a constant is necessarily zero + // everywhere. + return ZeroEverywhere; +} + Value *GPUNodeBuilder::getArraySize(gpu_array_info *Array) { isl_ast_build *Build = isl_ast_build_from_context(S.getContext()); Value *ArraySize = ConstantInt::get(Builder.getInt64Ty(), Array->size); if (!gpu_array_is_scalar(Array)) { auto OffsetDimZero = isl_multi_pw_aff_get_pw_aff(Array->bound, 0); - isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero); + // If the array is scalar, then it will be of the form: + // Number of dimensions: 1 + // 0th dimension: [params] -> { [(0)] } + // In that case, just return the array size. + if (Array->n_index == 1 && isPwAffZero(OffsetDimZero)) { + isl_pw_aff_free(OffsetDimZero); + isl_ast_build_free(Build); + return ArraySize; + } + + isl_ast_expr *Res = isl_ast_build_expr_from_pw_aff(Build, OffsetDimZero); for (unsigned int i = 1; i < Array->n_index; i++) { isl_pw_aff *Bound_I = isl_multi_pw_aff_get_pw_aff(Array->bound, i); isl_ast_expr *Expr = isl_ast_build_expr_from_pw_aff(Build, Bound_I); Index: test/GPGPU/size-of-pointer-to-scalar.ll =================================================================== --- /dev/null +++ test/GPGPU/size-of-pointer-to-scalar.ll @@ -0,0 +1,52 @@ +; RUN: opt %loadPolly -polly-codegen-ppcg -polly-invariant-load-hoisting \ +; RUN: -S < %s | \ +; RUN: FileCheck -check-prefix=HOST-IR %s + +; REQUIRES: pollyacc + +; HOST-IR: %p_dev_array_MemRef_begin = call i8* @polly_allocateMemoryForDevice(i64 4) +; HOST-IR: %p_dev_array_MemRef_end = call i8* @polly_allocateMemoryForDevice(i64 4) + +; Check that we do derive the correct size for `begin`, `end`. +; There are 1 dimensional arrays with 1 element / pointers to scalars. + +; void checkScalarSize(int A[], int *begin, int *end) { +; for(int i = *begin; i < *end; i++) { +; A[i] = 10; +; } +; } + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.12.0" + +define void @checkPrivatization(i32* %A, i32* %begin, i32* %end) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %tmp = load i32, i32* %begin, align 4 + %tmp21 = load i32, i32* %end, align 4 + %cmp3 = icmp slt i32 %tmp, %tmp21 + br i1 %cmp3, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry.split + %tmp1 = sext i32 %tmp to i64 + br label %for.body + +for.body: ; preds = %for.body.lr.ph, %for.body + %indvars.iv4 = phi i64 [ %tmp1, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv4 + store i32 10, i32* %arrayidx, align 4 + %indvars.iv.next = add i64 %indvars.iv4, 1 + %tmp2 = load i32, i32* %end, align 4 + %tmp3 = sext i32 %tmp2 to i64 + %cmp = icmp slt i64 %indvars.iv.next, %tmp3 + br i1 %cmp, label %for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry.split + ret void +} +