Index: include/polly/ScopInfo.h
===================================================================
--- include/polly/ScopInfo.h
+++ include/polly/ScopInfo.h
@@ -282,6 +282,9 @@
   /// For indirect accesses return the origin SAI of the BP, else null.
   const ScopArrayInfo *getBasePtrOriginSAI() const { return BasePtrOriginSAI; }
 
+  /// Return whether the ScopArrayInfo models a Fortran array.
+  bool isFortranArray() const { return FAD != nullptr; }
+
   /// The set of derived indirect SAIs for this origin SAI.
   const SmallSetVector<ScopArrayInfo *, 2> &getDerivedSAIs() const {
     return DerivedSAIs;
@@ -2731,6 +2734,16 @@
   ///        that has name @p Name.
   ScopArrayInfo *getArrayInfoByName(const std::string BaseName);
 
+  // Return whether this Scop contains a Fortran array.
+  bool hasFortranArrays() const {
+    for (auto &S : *this) {
+      for (auto MemAcc : S) {
+        if (MemAcc->getLatestScopArrayInfo()->isFortranArray())
+          return true;
+      }
+    }
+    return false;
+  }
   /// Check whether @p Schedule contains extension nodes.
   ///
   /// @return true if @p Schedule contains extension nodes.
Index: lib/Analysis/ScopInfo.cpp
===================================================================
--- lib/Analysis/ScopInfo.cpp
+++ lib/Analysis/ScopInfo.cpp
@@ -4645,7 +4645,7 @@
 static isl::multi_union_pw_aff mapToDimension(isl::union_set USet, int N) {
   assert(N >= 0);
   assert(USet);
-  assert(!USet.is_empty());
+  assert(!bool(USet.is_empty()));
 
   auto Result = isl::union_pw_multi_aff::empty(USet.get_space());
 
Index: lib/CodeGen/IslNodeBuilder.cpp
===================================================================
--- lib/CodeGen/IslNodeBuilder.cpp
+++ lib/CodeGen/IslNodeBuilder.cpp
@@ -1436,7 +1436,7 @@
   // Materialize values for the parameters of the SCoP.
   materializeParameters();
 
-  // materialize the outermost dimension parameters for a Fortran array.
+  // Materialize the outermost dimension parameters for a Fortran array.
   // NOTE: materializeParameters() does not work since it looks through
   // the SCEVs. We don't have a corresponding SCEV for the array size
   // parameter
Index: lib/CodeGen/PPCGCodeGeneration.cpp
===================================================================
--- lib/CodeGen/PPCGCodeGeneration.cpp
+++ lib/CodeGen/PPCGCodeGeneration.cpp
@@ -127,8 +127,20 @@
   isl_id_to_ast_expr *RefToExpr = isl_id_to_ast_expr_alloc(Ctx, 0);
 
   for (MemoryAccess *Acc : *Stmt) {
-    isl_map *AddrFunc = Acc->getAddressFunction();
-    AddrFunc = isl_map_intersect_domain(AddrFunc, Stmt->getDomain());
+    isl_map *AddrFunc = nullptr;
+
+    if (Acc->isAffine()) {
+      AddrFunc = Acc->getAddressFunction();
+      AddrFunc = isl_map_intersect_domain(AddrFunc, Stmt->getDomain());
+
+    } else {
+      errs() << "@@Access: ";
+      Acc->dump();
+      llvm_unreachable("Cannot codegen for GPU backend with non-affine access");
+      return nullptr;
+    }
+
+    assert(AddrFunc && "expected AddrFunc to be initialized.");
     isl_id *RefId = Acc->getId();
     isl_pw_multi_aff *PMA = isl_pw_multi_aff_from_map(AddrFunc);
     isl_multi_pw_aff *MPA = isl_multi_pw_aff_from_pw_multi_aff(PMA);
@@ -138,7 +150,6 @@
     Access = FunctionExpr(Access, RefId, UserExpr);
     RefToExpr = isl_id_to_ast_expr_set(RefToExpr, RefId, Access);
   }
-
   return RefToExpr;
 }
 
@@ -2134,6 +2145,7 @@
       return isl_set_universe(Array->getSpace());
     }
 
+    isl_set *Extent = isl_set_universe(Array->getSpace());
     isl_set *AccessSet =
         isl_union_set_extract_set(AccessUSet, Array->getSpace());
 
@@ -2143,24 +2155,34 @@
     isl_pw_aff *Val =
         isl_pw_aff_from_aff(isl_aff_var_on_domain(LS, isl_dim_set, 0));
 
-    isl_pw_aff *OuterMin = isl_set_dim_min(isl_set_copy(AccessSet), 0);
-    isl_pw_aff *OuterMax = isl_set_dim_max(AccessSet, 0);
-    OuterMin = isl_pw_aff_add_dims(OuterMin, isl_dim_in,
-                                   isl_pw_aff_dim(Val, isl_dim_in));
-    OuterMax = isl_pw_aff_add_dims(OuterMax, isl_dim_in,
-                                   isl_pw_aff_dim(Val, isl_dim_in));
-    OuterMin =
-        isl_pw_aff_set_tuple_id(OuterMin, isl_dim_in, Array->getBasePtrId());
-    OuterMax =
-        isl_pw_aff_set_tuple_id(OuterMax, isl_dim_in, Array->getBasePtrId());
+    // In case the array is a Fortran array, we allow unbounded
+    // outermost dimensions, since we can load the outermost dimension
+    // information at runtime.
+    if (Array->isFortranArray() &&
+        !isl_set_dim_is_bounded(AccessSet, isl_dim_set, 0)) {
+      isl_set_free(AccessSet);
+      isl_pw_aff_free(Val);
+    } else {
 
-    isl_set *Extent = isl_set_universe(Array->getSpace());
+      isl_pw_aff *OuterMin = isl_set_dim_min(isl_set_copy(AccessSet), 0);
+      isl_pw_aff *OuterMax = isl_set_dim_max(AccessSet, 0);
+      OuterMin = isl_pw_aff_add_dims(OuterMin, isl_dim_in,
+                                     isl_pw_aff_dim(Val, isl_dim_in));
+      OuterMax = isl_pw_aff_add_dims(OuterMax, isl_dim_in,
+                                     isl_pw_aff_dim(Val, isl_dim_in));
+      OuterMin =
+          isl_pw_aff_set_tuple_id(OuterMin, isl_dim_in, Array->getBasePtrId());
+      OuterMax =
+          isl_pw_aff_set_tuple_id(OuterMax, isl_dim_in, Array->getBasePtrId());
+
+      Extent = isl_set_intersect(
+          Extent, isl_pw_aff_le_set(OuterMin, isl_pw_aff_copy(Val)));
+      Extent = isl_set_intersect(Extent, isl_pw_aff_ge_set(OuterMax, Val));
+    }
 
-    Extent = isl_set_intersect(
-        Extent, isl_pw_aff_le_set(OuterMin, isl_pw_aff_copy(Val)));
-    Extent = isl_set_intersect(Extent, isl_pw_aff_ge_set(OuterMax, Val));
+    int StartLowerBoundDim = Array->isFortranArray() ? 0 : 1;
 
-    for (unsigned i = 1; i < NumDims; ++i)
+    for (unsigned i = StartLowerBoundDim; i < NumDims; ++i)
       Extent = isl_set_lower_bound_si(Extent, isl_dim_set, i, 0);
 
     for (unsigned i = 0; i < NumDims; ++i) {
@@ -2424,7 +2446,8 @@
   // We do not use here the Polly ScheduleOptimizer, as the schedule optimizer
   // is mostly CPU specific. Instead, we use PPCG's GPU code generation
   // strategy directly from this pass.
-  gpu_gen *generateGPU(ppcg_scop *PPCGScop, gpu_prog *PPCGProg) {
+  gpu_gen *generateGPU(bool HasFortranArrays, ppcg_scop *PPCGScop,
+                       gpu_prog *PPCGProg) {
 
     auto PPCGGen = isl_calloc_type(S->getIslCtx(), struct gpu_gen);
 
@@ -2450,7 +2473,9 @@
 
     int has_permutable = has_any_permutable_node(Schedule);
 
-    if (!has_permutable || has_permutable < 0) {
+    // TODO: I've simply allowed this to test out the codegen, is this a bad
+    // idea?
+    if ((!has_permutable || has_permutable < 0) && !HasFortranArrays) {
       Schedule = isl_schedule_free(Schedule);
     } else {
       Schedule = map_to_device(PPCGGen, Schedule);
@@ -2683,7 +2708,8 @@
 
     auto PPCGScop = createPPCGScop();
     auto PPCGProg = createPPCGProg(PPCGScop);
-    auto PPCGGen = generateGPU(PPCGScop, PPCGProg);
+    auto PPCGGen =
+        generateGPU(CurrentScop.hasFortranArrays(), PPCGScop, PPCGProg);
 
     if (PPCGGen->tree)
       generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg);
Index: test/GPGPU/fortran-copy-kernel-affine.ll
===================================================================
--- /dev/null
+++ test/GPGPU/fortran-copy-kernel-affine.ll
@@ -0,0 +1,134 @@
+; RUN: opt -analyze %loadPolly -polly-allow-nonaffine -polly-ignore-aliasing -polly-scops -polly-detect-fortran-arrays \
+; RUN:  < %s | FileCheck  %s -check-prefix=SCOPS
+
+; RUN: opt %loadPolly -polly-allow-nonaffine -polly-ignore-aliasing -polly-detect-fortran-arrays -polly-codegen-ppcg -polly-acc-dump-code \
+; RUN: -disable-output < %s | FileCheck -check-prefix=CODE %s
+
+; REQUIRES: pollyacc
+
+; Check that Fortran arrays are detected.
+; SCOPS:       ReadAccess :=	[Reduction Type: NONE] [Fortran array descriptor: xs] [Scalar: 0]
+; SCOPS-NEXT:     [tmp11, tmp7, p_2, tmp1, p_4, MemRef_tmp10_fortranarr_size, MemRef_tmp5_fortranarr_size] -> { Stmt_9[i0] -> MemRef_tmp10[1 + i0, -p_2] };
+; SCOPS-NEXT: MustWriteAccess :=	[Reduction Type: NONE] [Fortran array descriptor: ys] [Scalar: 0]
+; SCOPS-NEXT:     [tmp11, tmp7, p_2, tmp1, p_4, MemRef_tmp10_fortranarr_size, MemRef_tmp5_fortranarr_size] -> { Stmt_9[i0] -> MemRef_tmp5[1 + i0, -p_4] };
+
+; Check that we generate CUDA calls
+; CODE:      Code
+; CODE-NEXT: ====
+; CODE-NEXT: # host
+; CODE-NEXT: if (tmp11 >= 1) {
+; CODE-NEXT:   if (tmp7 >= 1 && MemRef_tmp10_fortranarr_size >= 2)
+; CODE-NEXT:     cudaCheckReturn(cudaMemcpy(dev_MemRef_tmp10, MemRef_tmp10, (MemRef_tmp10_fortranarr_size >= tmp11 + 1 ? tmp11 + 1 : MemRef_tmp10_fortranarr_size) * (tmp7) * sizeof(i32), cudaMemcpyHostToDevice));
+; CODE-NEXT:   if (tmp1 >= 1 && MemRef_tmp5_fortranarr_size >= 2)
+; CODE-NEXT:     cudaCheckReturn(cudaMemcpy(dev_MemRef_tmp5, MemRef_tmp5, (MemRef_tmp5_fortranarr_size >= tmp11 + 1 ? tmp11 + 1 : MemRef_tmp5_fortranarr_size) * (tmp1) * sizeof(i32), cudaMemcpyHostToDevice));
+; CODE-NEXT:   {
+; CODE-NEXT:     dim3 k0_dimBlock(32);
+; CODE-NEXT:     dim3 k0_dimGrid(tmp11 >= 1048546 ? 32768 : floord(tmp11 + 31, 32));
+; CODE-NEXT:     kernel0 <<<k0_dimGrid, k0_dimBlock>>> (dev_MemRef_tmp10, dev_MemRef_tmp5, tmp11, tmp7, p_2, tmp1, p_4, MemRef_tmp10_fortranarr_size, MemRef_tmp5_fortranarr_size);
+; CODE-NEXT:     cudaCheckKernel();
+; CODE-NEXT:   }
+
+; CODE:        if (tmp1 >= 1 && MemRef_tmp5_fortranarr_size >= 2)
+; CODE-NEXT:     cudaCheckReturn(cudaMemcpy(MemRef_tmp5, dev_MemRef_tmp5, (MemRef_tmp5_fortranarr_size >= tmp11 + 1 ? tmp11 + 1 : MemRef_tmp5_fortranarr_size) * (tmp1) * sizeof(i32), cudaMemcpyDeviceToHost));
+; CODE-NEXT: }
+; This is not the exact code that is generated from dragonegg. A non-affine access that is
+; created due to error checking is manually made linear.
+;
+; PROGRAM main
+;     INTEGER, DIMENSION(100) :: xs
+;     INTEGER, DIMENSION(100) :: ys
+; 
+;     DO i = 1, 100
+;         xs (i) = i
+;         ys(i) = 0
+;     END DO
+; 
+;     CALL copy(xs, ys, 10)
+; 
+;     PRINT *, ys
+; CONTAINS
+;     SUBROUTINE copy(xs, ys, n)
+;         IMPLICIT NONE
+;         INTEGER, DIMENSION(:), INTENT(INOUT) :: xs, ys
+;         INTEGER, INTENT(IN) :: n
+;         INTEGER :: i
+; 
+;         DO i = 1, n
+;             ys(i) = xs(i)
+;         END DO
+; 
+;     END SUBROUTINE copy
+; END PROGRAM
+
+
+; ModuleID = 'test/GPGPU/fortran-copy-kernel-affine.ll'
+source_filename = "test/GPGPU/fortran-copy-kernel-affine.ll"
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.6.4 LLVM: 3.3.1\22"
+
+%"struct.array1_integer(kind=4)" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] }
+%struct.descriptor_dimension = type { i64, i64, i64 }
+%"struct.array1_integer(kind=4).0" = type { i8*, i64, i64, [1 x %struct.descriptor_dimension] }
+
+@.cst = private constant [12 x i8] c"program.f90\00", align 8
+@0 = internal constant i32 10
+@options.14.1603 = internal constant [8 x i32] [i32 68, i32 511, i32 0, i32 0, i32 0, i32 1, i32 0, i32 1], align 32
+
+define internal void @copy.1550(%"struct.array1_integer(kind=4)"* noalias %xs, %"struct.array1_integer(kind=4).0"* noalias %ys, i32* noalias %n) {
+entry:
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  %tmp = getelementptr inbounds %"struct.array1_integer(kind=4).0", %"struct.array1_integer(kind=4).0"* %ys, i64 0, i32 3, i64 0, i32 0
+  %tmp1 = load i64, i64* %tmp, align 8
+  %tmp2 = icmp eq i64 %tmp1, 0
+  %tmp3 = select i1 %tmp2, i64 1, i64 %tmp1
+  %tmp4 = bitcast %"struct.array1_integer(kind=4).0"* %ys to i32**
+  %tmp5 = load i32*, i32** %tmp4, align 8
+  %tmp6 = getelementptr inbounds %"struct.array1_integer(kind=4)", %"struct.array1_integer(kind=4)"* %xs, i64 0, i32 3, i64 0, i32 0
+  %tmp7 = load i64, i64* %tmp6, align 8
+  %tmp8 = icmp eq i64 %tmp7, 0
+  %. = select i1 %tmp8, i64 1, i64 %tmp7
+  %tmp9 = bitcast %"struct.array1_integer(kind=4)"* %xs to i32**
+  %tmp10 = load i32*, i32** %tmp9, align 8
+  %tmp11 = load i32, i32* %n, align 4
+  %tmp12 = icmp sgt i32 %tmp11, 0
+  br i1 %tmp12, label %"9.preheader", label %return
+
+"9.preheader":                                    ; preds = %entry.split
+  br label %"9"
+
+"9":                                              ; preds = %"9", %"9.preheader"
+  %tmp13 = phi i32 [ %tmp24, %"9" ], [ 1, %"9.preheader" ]
+  %tmp14 = sext i32 %tmp13 to i64
+  ; replace %tmp3 with % tmp1
+  ; %tmp15 = mul i64 %tmp3, %tmp14
+  %tmp15 = mul i64 %tmp1, %tmp14
+  
+  %tmp16 = sub i64 %tmp15, %tmp3
+  %tmp17 = sext i32 %tmp13 to i64
+  ; replace %. with %tmp7
+  ; %tmp18 = mul i64 %., %tmp17
+  %tmp18 = mul i64 %tmp7, %tmp17
+  
+  %tmp19 = sub i64 %tmp18, %.
+  %tmp20 = getelementptr i32, i32* %tmp10, i64 %tmp19
+  %tmp21 = load i32, i32* %tmp20, align 4
+  %tmp22 = getelementptr i32, i32* %tmp5, i64 %tmp16
+  store i32 %tmp21, i32* %tmp22, align 4
+  %tmp23 = icmp eq i32 %tmp13, %tmp11
+  %tmp24 = add i32 %tmp13, 1
+  br i1 %tmp23, label %return.loopexit, label %"9"
+
+return.loopexit:                                  ; preds = %"9"
+  br label %return
+
+return:                                           ; preds = %return.loopexit, %entry.split
+  ret void
+}
+
+declare void @_gfortran_set_args(i32, i8**)
+
+declare void @_gfortran_set_options(i32, i32*)
Index: test/Isl/CodeGen/fortran_array_runtime_size_generation.ll
===================================================================
--- test/Isl/CodeGen/fortran_array_runtime_size_generation.ll
+++ test/Isl/CodeGen/fortran_array_runtime_size_generation.ll
@@ -1,5 +1,12 @@
 ; Check that the runtime size computation is generated for Fortran arrays.
 
+; REQUIRES=pollyacc
+
+; PPCG code generation backend:
+; RUN: opt %loadPolly -S -polly-detect-fortran-arrays \
+; RUN: -polly-target=gpu  -polly-acc-mincompute=0 \
+; RUN: -polly-codegen-ppcg < %s | FileCheck %s 
+
 ; Regular code generation backend:
 ; RUN: opt %loadPolly -S -polly-detect-fortran-arrays \
 ; RUN: -polly-codegen < %s | FileCheck %s