Index: include/polly/ScopInfo.h =================================================================== --- include/polly/ScopInfo.h +++ include/polly/ScopInfo.h @@ -282,6 +282,9 @@ /// For indirect accesses return the origin SAI of the BP, else null. const ScopArrayInfo *getBasePtrOriginSAI() const { return BasePtrOriginSAI; } + /// Return whether the ScopArrayInfo models a Fortran array. + bool isFortranArray() const { return FAD != nullptr; } + /// The set of derived indirect SAIs for this origin SAI. const SmallSetVector &getDerivedSAIs() const { return DerivedSAIs; @@ -2721,6 +2724,16 @@ /// that has name @p Name. ScopArrayInfo *getArrayInfoByName(const std::string BaseName); + // Return whether this Scop contains a Fortran array. + bool hasFortranArrays() const { + for (auto &S : *this) { + for (auto MemAcc : S) { + if (MemAcc->getLatestScopArrayInfo()->isFortranArray()) + return true; + } + } + return false; + } /// Check whether @p Schedule contains extension nodes. /// /// @return true if @p Schedule contains extension nodes. Index: lib/CodeGen/IslNodeBuilder.cpp =================================================================== --- lib/CodeGen/IslNodeBuilder.cpp +++ lib/CodeGen/IslNodeBuilder.cpp @@ -1436,7 +1436,7 @@ // Materialize values for the parameters of the SCoP. materializeParameters(); - // materialize the outermost dimension parameters for a Fortran array. + // Materialize the outermost dimension parameters for a Fortran array. // NOTE: materializeParameters() does not work since it looks through // the SCEVs. We don't have a corresponding SCEV for the array size // parameter Index: lib/CodeGen/PPCGCodeGeneration.cpp =================================================================== --- lib/CodeGen/PPCGCodeGeneration.cpp +++ lib/CodeGen/PPCGCodeGeneration.cpp @@ -127,8 +127,24 @@ isl_id_to_ast_expr *RefToExpr = isl_id_to_ast_expr_alloc(Ctx, 0); for (MemoryAccess *Acc : *Stmt) { - isl_map *AddrFunc = Acc->getAddressFunction(); - AddrFunc = isl_map_intersect_domain(AddrFunc, Stmt->getDomain()); + isl_map *AddrFunc = nullptr; + + if (Acc->getLatestScopArrayInfo()->isFortranArray()) { + // If we have a Fortran array, then the access relation could + // be unbounded in the outermost dimension. However, `getAccessFunction` + // directly calls `lexmin` on the access relation which will naturally + // fail. Hence, we choose to lower bound the outermost dimension manually + // and then invoke lexmin. + AddrFunc = Acc->getAccessRelation(); + AddrFunc = isl_map_intersect_domain(AddrFunc, Stmt->getDomain()); + AddrFunc = isl_map_lower_bound_si(AddrFunc, isl_dim_out, 0, 0); + AddrFunc = isl_map_lexmin(AddrFunc); + } else { + AddrFunc = Acc->getAddressFunction(); + AddrFunc = isl_map_intersect_domain(AddrFunc, Stmt->getDomain()); + } + + assert(AddrFunc && "expected AddrFunc to be initialized."); isl_id *RefId = Acc->getId(); isl_pw_multi_aff *PMA = isl_pw_multi_aff_from_map(AddrFunc); isl_multi_pw_aff *MPA = isl_multi_pw_aff_from_pw_multi_aff(PMA); @@ -138,7 +154,6 @@ Access = FunctionExpr(Access, RefId, UserExpr); RefToExpr = isl_id_to_ast_expr_set(RefToExpr, RefId, Access); } - return RefToExpr; } @@ -2134,33 +2149,39 @@ return isl_set_universe(Array->getSpace()); } - isl_set *AccessSet = - isl_union_set_extract_set(AccessUSet, Array->getSpace()); - - isl_union_set_free(AccessUSet); - isl_local_space *LS = isl_local_space_from_space(Array->getSpace()); - - isl_pw_aff *Val = - isl_pw_aff_from_aff(isl_aff_var_on_domain(LS, isl_dim_set, 0)); + isl_set *Extent = isl_set_universe(Array->getSpace()); - isl_pw_aff *OuterMin = isl_set_dim_min(isl_set_copy(AccessSet), 0); - isl_pw_aff *OuterMax = isl_set_dim_max(AccessSet, 0); - OuterMin = isl_pw_aff_add_dims(OuterMin, isl_dim_in, - isl_pw_aff_dim(Val, isl_dim_in)); - OuterMax = isl_pw_aff_add_dims(OuterMax, isl_dim_in, - isl_pw_aff_dim(Val, isl_dim_in)); - OuterMin = - isl_pw_aff_set_tuple_id(OuterMin, isl_dim_in, Array->getBasePtrId()); - OuterMax = - isl_pw_aff_set_tuple_id(OuterMax, isl_dim_in, Array->getBasePtrId()); + if (!Array->isFortranArray()) { + isl_set *AccessSet = + isl_union_set_extract_set(AccessUSet, Array->getSpace()); - isl_set *Extent = isl_set_universe(Array->getSpace()); + isl_union_set_free(AccessUSet); + isl_local_space *LS = isl_local_space_from_space(Array->getSpace()); + + isl_pw_aff *Val = + isl_pw_aff_from_aff(isl_aff_var_on_domain(LS, isl_dim_set, 0)); + + isl_pw_aff *OuterMin = isl_set_dim_min(isl_set_copy(AccessSet), 0); + isl_pw_aff *OuterMax = isl_set_dim_max(AccessSet, 0); + OuterMin = isl_pw_aff_add_dims(OuterMin, isl_dim_in, + isl_pw_aff_dim(Val, isl_dim_in)); + OuterMax = isl_pw_aff_add_dims(OuterMax, isl_dim_in, + isl_pw_aff_dim(Val, isl_dim_in)); + OuterMin = + isl_pw_aff_set_tuple_id(OuterMin, isl_dim_in, Array->getBasePtrId()); + OuterMax = + isl_pw_aff_set_tuple_id(OuterMax, isl_dim_in, Array->getBasePtrId()); + + Extent = isl_set_intersect( + Extent, isl_pw_aff_le_set(OuterMin, isl_pw_aff_copy(Val))); + Extent = isl_set_intersect(Extent, isl_pw_aff_ge_set(OuterMax, Val)); + } else { + isl_union_set_free(AccessUSet); + } - Extent = isl_set_intersect( - Extent, isl_pw_aff_le_set(OuterMin, isl_pw_aff_copy(Val))); - Extent = isl_set_intersect(Extent, isl_pw_aff_ge_set(OuterMax, Val)); + int StartLowerBoundDim = Array->isFortranArray() ? 0 : 1; - for (unsigned i = 1; i < NumDims; ++i) + for (unsigned i = StartLowerBoundDim; i < NumDims; ++i) Extent = isl_set_lower_bound_si(Extent, isl_dim_set, i, 0); for (unsigned i = 0; i < NumDims; ++i) { @@ -2424,7 +2445,8 @@ // We do not use here the Polly ScheduleOptimizer, as the schedule optimizer // is mostly CPU specific. Instead, we use PPCG's GPU code generation // strategy directly from this pass. - gpu_gen *generateGPU(ppcg_scop *PPCGScop, gpu_prog *PPCGProg) { + gpu_gen *generateGPU(bool HasFortranArrays, ppcg_scop *PPCGScop, + gpu_prog *PPCGProg) { auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen); @@ -2450,7 +2472,9 @@ int has_permutable = has_any_permutable_node(Schedule); - if (!has_permutable || has_permutable < 0) { + // TODO: I've simply allowed this to test out the codegen, is this a bad + // idea? + if ((!has_permutable || has_permutable < 0) && !HasFortranArrays) { Schedule = isl_schedule_free(Schedule); } else { Schedule = map_to_device(PPCGGen, Schedule); @@ -2683,7 +2707,8 @@ auto PPCGScop = createPPCGScop(); auto PPCGProg = createPPCGProg(PPCGScop); - auto PPCGGen = generateGPU(PPCGScop, PPCGProg); + auto PPCGGen = + generateGPU(CurrentScop.hasFortranArrays(), PPCGScop, PPCGProg); if (PPCGGen->tree) generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg); Index: test/GPGPU/fortran-copy-kernel-affine.ll =================================================================== --- /dev/null +++ test/GPGPU/fortran-copy-kernel-affine.ll @@ -0,0 +1,129 @@ +; RUN: opt -analyze %loadPolly -polly-allow-nonaffine -polly-ignore-aliasing -polly-scops -polly-detect-fortran-arrays \ +; RUN: < %s | FileCheck %s -check-prefix=SCOPS + +; RUN: opt %loadPolly -polly-allow-nonaffine -polly-ignore-aliasing -polly-detect-fortran-arrays -polly-codegen-ppcg -polly-acc-dump-code \ +; RUN: -disable-output < %s | FileCheck -check-prefix=CODE %s + +; REQUIRES: pollyacc + +; Check that Fortran arrays are detected. +; SCOPS: ReadAccess := [Reduction Type: NONE] [Fortran array descriptor: xs] [Scalar: 0] +; SCOPS-NEXT: [p_0_loaded_from_n, MemRef0_fortranarr_size, MemRef1_fortranarr_size] -> { Stmt_9[i0] -> MemRef0[o0] }; +; SCOPS-NEXT: MayWriteAccess := [Reduction Type: NONE] [Fortran array descriptor: ys] [Scalar: 0] +; SCOPS-NEXT: [p_0_loaded_from_n, MemRef0_fortranarr_size, MemRef1_fortranarr_size] -> { Stmt_9[i0] -> MemRef1[o0] }; + +; Check that we generate CUDA calls +; CODE: Code +; CODE-NEXT: ==== +; CODE-NEXT: # host +; CODE-NEXT: { +; CODE-NEXT: if (MemRef0_fortranarr_size >= 1) +; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef0, MemRef0, (MemRef0_fortranarr_size) * sizeof(i32), cudaMemcpyHostToDevice)); +; CODE-NEXT: if (MemRef1_fortranarr_size >= 1) +; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef1, MemRef1, (MemRef1_fortranarr_size) * sizeof(i32), cudaMemcpyHostToDevice)); +; CODE-NEXT: for (int c0 = 0; c0 < p_0_loaded_from_n; c0 += 1) +; CODE-NEXT: { +; CODE-NEXT: dim3 k0_dimBlock; +; CODE-NEXT: dim3 k0_dimGrid; +; CODE-NEXT: kernel0 <<>> (dev_MemRef0, dev_MemRef1, p_0_loaded_from_n, MemRef0_fortranarr_size, MemRef1_fortranarr_size, c0); +; CODE-NEXT: cudaCheckKernel(); +; CODE-NEXT: } +; CODE: if (MemRef1_fortranarr_size >= 1) +; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef1, dev_MemRef1, (MemRef1_fortranarr_size) * sizeof(i32), cudaMemcpyDeviceToHost)); +; CODE-NEXT: } + +; PROGRAM main +; INTEGER, DIMENSION(100) :: xs +; INTEGER, DIMENSION(100) :: ys +; +; DO i = 1, 100 +; xs (i) = i +; ys(i) = 0 +; END DO +; +; CALL copy(xs, ys, 10) +; +; PRINT *, ys +; CONTAINS +; SUBROUTINE copy(xs, ys, n) +; IMPLICIT NONE +; INTEGER, DIMENSION(:), INTENT(INOUT) :: xs, ys +; INTEGER, INTENT(IN) :: n +; INTEGER :: i +; +; DO i = 1, n +; ys(i * i) = xs(i * i) +; END DO +; +; END SUBROUTINE copy +; END PROGRAM + +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22" + +%"struct.array1_integer(kind=4)" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] } +%struct.descriptor_dimension = type { i64, i64, i64 } +%"struct.array1_integer(kind=4).0" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] } +%struct.__st_parameter_dt = type { %struct.__st_parameter_common, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] } +%struct.__st_parameter_common = type { i32, i32, i8*, i32, i32, i8*, i32* } +%"struct.array1_integer(kind=4).1" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] } +%"struct.array1_integer(kind=4).2" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] } +%"struct.array1_integer(kind=4).3" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] } +%"struct.array1_integer(kind=4).4" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] } + +@.cst = private constant [12 x i8] c"program.f90\00", align 8 +@0 = internal constant i32 10 +@options.14.1603 = internal constant [8 x i32] [i32 68, i32 511, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1], align 32 + +define internal void @copy.1550(%"struct.array1_integer(kind=4)"* noalias %xs, %"struct.array1_integer(kind=4).0"* noalias %ys, i32* noalias %n) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %0 = getelementptr inbounds %"struct.array1_integer(kind=4).0", %"struct.array1_integer(kind=4).0"* %ys, i64 0, i32 3, i64 0, i32 0 + %1 = load i64, i64* %0, align 8 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 1, i64 %1 + %4 = bitcast %"struct.array1_integer(kind=4).0"* %ys to i32** + %5 = load i32*, i32** %4, align 8 + %6 = getelementptr inbounds %"struct.array1_integer(kind=4)", %"struct.array1_integer(kind=4)"* %xs, i64 0, i32 3, i64 0, i32 0 + %7 = load i64, i64* %6, align 8 + %8 = icmp eq i64 %7, 0 + %. = select i1 %8, i64 1, i64 %7 + %9 = bitcast %"struct.array1_integer(kind=4)"* %xs to i32** + %10 = load i32*, i32** %9, align 8 + %11 = load i32, i32* %n, align 4 + %12 = icmp sgt i32 %11, 0 + br i1 %12, label %"9.preheader", label %return + +"9.preheader": ; preds = %entry.split + br label %"9" + +"9": ; preds = %"9.preheader", %"9" + %13 = phi i32 [ %24, %"9" ], [ 1, %"9.preheader" ] + %14 = sext i32 %13 to i64 + %15 = mul i64 %3, %14 + %16 = sub i64 %15, %3 + %17 = sext i32 %13 to i64 + %18 = mul i64 %., %17 + %19 = sub i64 %18, %. + %20 = getelementptr i32, i32* %10, i64 %19 + %21 = load i32, i32* %20, align 4 + %22 = getelementptr i32, i32* %5, i64 %16 + store i32 %21, i32* %22, align 4 + %23 = icmp eq i32 %13, %11 + %24 = add i32 %13, 1 + br i1 %23, label %return.loopexit, label %"9" + +return.loopexit: ; preds = %"9" + br label %return + +return: ; preds = %return.loopexit, %entry.split + ret void +} + +declare void @_gfortran_set_args(i32, i8**) + +declare void @_gfortran_set_options(i32, i32*) Index: test/GPGPU/fortran-copy-kernel-nonaffine.ll =================================================================== --- /dev/null +++ test/GPGPU/fortran-copy-kernel-nonaffine.ll @@ -0,0 +1,140 @@ +; RUN: opt -analyze %loadPolly -polly-allow-nonaffine -polly-ignore-aliasing -polly-scops -polly-detect-fortran-arrays \ +; RUN: < %s | \ +; RUN: FileCheck %s -check-prefix=SCOPS + +; RUN: opt %loadPolly -polly-allow-nonaffine -polly-ignore-aliasing -polly-detect-fortran-arrays -polly-codegen-ppcg -polly-acc-dump-code \ +; RUN: -disable-output < %s | \ +; RUN: FileCheck -check-prefix=CODE %s + +; REQUIRES: pollyacc + +; Check that Fortran arrays are detected. +; SCOPS: ReadAccess := [Reduction Type: NONE] [Fortran array descriptor: xs] [Scalar: 0] +; SCOPS-NEXT: [p_0_loaded_from_n, MemRef0_fortranarr_size, MemRef1_fortranarr_size] -> { Stmt_9[i0] -> MemRef0[o0] }; +; SCOPS-NEXT: MayWriteAccess := [Reduction Type: NONE] [Fortran array descriptor: ys] [Scalar: 0] +; SCOPS-NEXT: [p_0_loaded_from_n, MemRef0_fortranarr_size, MemRef1_fortranarr_size] -> { Stmt_9[i0] -> MemRef1[o0] }; + +; Check that we generate CUDA calls +; CODE: Code +; CODE-NEXT: ==== +; CODE-NEXT: # host +; CODE-NEXT: { +; CODE-NEXT: if (MemRef0_fortranarr_size >= 1) +; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef0, MemRef0, (MemRef0_fortranarr_size) * sizeof(i32), cudaMemcpyHostToDevice)); +; CODE-NEXT: if (MemRef1_fortranarr_size >= 1) +; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef1, MemRef1, (MemRef1_fortranarr_size) * sizeof(i32), cudaMemcpyHostToDevice)); +; CODE-NEXT: for (int c0 = 0; c0 < p_0_loaded_from_n; c0 += 1) +; CODE-NEXT: { +; CODE-NEXT: dim3 k0_dimBlock; +; CODE-NEXT: dim3 k0_dimGrid; +; CODE-NEXT: kernel0 <<>> (dev_MemRef0, dev_MemRef1, p_0_loaded_from_n, MemRef0_fortranarr_size, MemRef1_fortranarr_size, c0); +; CODE-NEXT: cudaCheckKernel(); +; CODE-NEXT: } + +; CODE: if (MemRef1_fortranarr_size >= 1) +; CODE-NEXT: cudaCheckReturn(cudaMemcpy(MemRef1, dev_MemRef1, (MemRef1_fortranarr_size) * sizeof(i32), cudaMemcpyDeviceToHost)); +; CODE-NEXT: } + +; CODE: kernel0 +; CODE-NEXT: Stmt_9(c0); + +; PROGRAM main +; INTEGER, DIMENSION(100) :: xs +; INTEGER, DIMENSION(100) :: ys +; +; DO i = 1, 100 +; xs (i) = i +; ys(i) = 0 +; END DO +; +; CALL copy(xs, ys, 10) +; +; PRINT *, ys +; CONTAINS +; SUBROUTINE copy(xs, ys, n) +; IMPLICIT NONE +; INTEGER, DIMENSION(:), INTENT(INOUT) :: xs, ys +; INTEGER, INTENT(IN) :: n +; INTEGER :: i +; +; DO i = 1, n +; ys(i * i) = xs(i * i) +; END DO +; +; END SUBROUTINE copy +; END PROGRAM +; ModuleID = 'program.bc' + +; ModuleID = 'program.bc' +source_filename = "program.bc" +target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-unknown-linux-gnu" + +module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22" + +%"struct.array1_integer(kind=4)" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] } +%struct.descriptor_dimension = type { i64, i64, i64 } +%"struct.array1_integer(kind=4).0" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] } +%"struct.array1_integer(kind=4).1" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] } +%"struct.array1_integer(kind=4).2" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] } +%struct.__st_parameter_dt = type { %struct.__st_parameter_common, i64, i64*, i64*, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, [256 x i8], i32*, i64, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, i32, i8*, i8*, i32, [4 x i8] } +%struct.__st_parameter_common = type { i32, i32, i8*, i32, i32, i8*, i32* } +%"struct.array1_integer(kind=4).3" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] } + +@0 = internal constant i32 10 +@.cst = private constant [12 x i8] c"program.f90\00", align 8 +@options.12.1603 = internal constant [8 x i32] [i32 68, i32 511, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1], align 32 + +; Function Attrs: nounwind uwtable +define internal void @copy.1550(%"struct.array1_integer(kind=4)"* noalias %xs, %"struct.array1_integer(kind=4).0"* noalias %ys, i32* noalias %n) { +entry: + br label %entry.split + +entry.split: ; preds = %entry + %0 = getelementptr inbounds %"struct.array1_integer(kind=4).0", %"struct.array1_integer(kind=4).0"* %ys, i64 0, i32 3, i64 0, i32 0 + %1 = load i64, i64* %0, align 8 + %2 = icmp eq i64 %1, 0 + %3 = select i1 %2, i64 1, i64 %1 + %4 = bitcast %"struct.array1_integer(kind=4).0"* %ys to i32** + %5 = load i32*, i32** %4, align 8 + %6 = getelementptr inbounds %"struct.array1_integer(kind=4)", %"struct.array1_integer(kind=4)"* %xs, i64 0, i32 3, i64 0, i32 0 + %7 = load i64, i64* %6, align 8 + %8 = icmp eq i64 %7, 0 + %. = select i1 %8, i64 1, i64 %7 + %9 = bitcast %"struct.array1_integer(kind=4)"* %xs to i32** + %10 = load i32*, i32** %9, align 8 + %11 = load i32, i32* %n, align 4 + %12 = icmp sgt i32 %11, 0 + br i1 %12, label %"9.preheader", label %return + +"9.preheader": ; preds = %entry.split + br label %"9" + +"9": ; preds = %"9", %"9.preheader" + %13 = phi i32 [ %26, %"9" ], [ 1, %"9.preheader" ] + %14 = mul i32 %13, %13 + %15 = sext i32 %14 to i64 + %16 = mul i64 %3, %15 + %17 = sub i64 %16, %3 + %18 = mul i32 %13, %13 + %19 = sext i32 %18 to i64 + %20 = mul i64 %., %19 + %21 = sub i64 %20, %. + %22 = getelementptr i32, i32* %10, i64 %21 + %23 = load i32, i32* %22, align 4 + %24 = getelementptr i32, i32* %5, i64 %17 + store i32 %23, i32* %24, align 4 + %25 = icmp eq i32 %13, %11 + %26 = add i32 %13, 1 + br i1 %25, label %return.loopexit, label %"9" + +return.loopexit: ; preds = %"9" + br label %return + +return: ; preds = %return.loopexit, %entry.split + ret void +} + +declare void @_gfortran_set_args(i32, i8**) + +declare void @_gfortran_set_options(i32, i32*) \ No newline at end of file Index: test/Isl/CodeGen/fortran_array_runtime_size_generation.ll =================================================================== --- test/Isl/CodeGen/fortran_array_runtime_size_generation.ll +++ test/Isl/CodeGen/fortran_array_runtime_size_generation.ll @@ -1,5 +1,12 @@ ; Check that the runtime size computation is generated for Fortran arrays. +; REQUIRES=pollyacc + +; PPCG code generation backend: +; RUN: opt %loadPolly -S -polly-detect-fortran-arrays \ +; RUN: -polly-target=gpu -polly-acc-mincompute=0 \ +; RUN: -polly-codegen-ppcg < %s | FileCheck %s + ; Regular code generation backend: ; RUN: opt %loadPolly -S -polly-detect-fortran-arrays \ ; RUN: -polly-codegen < %s | FileCheck %s