Index: lib/CodeGen/PPCGCodeGeneration.cpp
===================================================================
--- lib/CodeGen/PPCGCodeGeneration.cpp
+++ lib/CodeGen/PPCGCodeGeneration.cpp
@@ -822,13 +822,6 @@
       HostPtr = ScopArray->getBasePtr();
     HostPtr = getLatestValue(HostPtr);
 
-    Value *Offset = getArrayOffset(Array);
-    if (Offset) {
-      HostPtr = Builder.CreatePointerCast(
-          HostPtr, ScopArray->getElementType()->getPointerTo());
-      HostPtr = Builder.CreateGEP(HostPtr, Offset);
-    }
-
     HostPtr = Builder.CreatePointerCast(HostPtr, Builder.getInt8PtrTy());
     DeviceAllocations[ScopArray] = HostPtr;
   }
Index: managed-mem-with-offset.ll
===================================================================
--- /dev/null
+++ managed-mem-with-offset.ll
@@ -0,0 +1,79 @@
+; RUN: opt %loadPolly -polly-scops \
+; RUN: -analyze < %s | FileCheck %s --check-prefix=SCOP
+
+; RUN: opt %loadPolly -polly-codegen-ppcg \
+; RUN: -S -polly-acc-codegen-managed-memory \
+; RUN: < %s | FileCheck %s --check-prefix=HOST-IR
+;
+; REQUIRES: pollyacc
+
+; We used to generate an offset computation twice, creating incorrect
+; IR. Fix this to only compute offsets once. This is a regression test.
+
+; SCOP: Function: f
+; SCOP-NEXT: Region: %entry.split---%for.end
+; SCOP-NEXT: Max Loop Depth:  1
+
+; We should NOT have a GEP with an offset and then an undo of the offset.
+; HOST-IR-NOT:  %9 = getelementptr i32, i32* %arr, i64 100
+; HOST-IR-NOT:  %10 = bitcast i32* %9 to i8*
+; HOST-IR-NOT:  %11 = bitcast i8* %10 to i32*
+; HOST-IR-NOT:  %12 = getelementptr i32, i32* %11, i64 -100
+; HOST-IR-NOT:  %13 = bitcast i32* %12 to i8*
+
+; HOST-IR:  %9 = bitcast i32* %arr to i8*
+; HOST-IR:  %10 = bitcast i8* %9 to i32*
+; HOST-IR:  %11 = getelementptr i32, i32* %10, i64 -100
+; HOST-IR:  %12 = bitcast i32* %11 to i8*
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.12.0"
+
+define void @f(i32* %arr, i32 %N) {
+entry:
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  %tmp = sext i32 %N to i64
+  %cmp1 = icmp sgt i32 %N, 0
+  br i1 %cmp1, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph:                                   ; preds = %entry.split
+  br label %for.body
+
+for.body:                                         ; preds = %for.body.lr.ph, %for.body
+  %indvars.iv2 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+  %tmp3 = add nuw nsw i64 %indvars.iv2, 100
+  %arrayidx = getelementptr inbounds i32, i32* %arr, i64 %tmp3
+  %tmp4 = load i32, i32* %arrayidx, align 4, !tbaa !3
+  %add1 = add nsw i32 %tmp4, 42
+  store i32 %add1, i32* %arrayidx, align 4, !tbaa !3
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv2, 1
+  %exitcond = icmp ne i64 %indvars.iv.next, %tmp
+  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge:                       ; preds = %for.body
+  br label %for.end
+
+for.end:                                          ; preds = %for.cond.for.end_crit_edge, %entry.split
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #0
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #0
+
+attributes #0 = { argmemonly nounwind }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 7, !"PIC Level", i32 2}
+!2 = !{!"clang version 6.0.0 (http://llvm.org/git/clang.git a2ce4ccdf0d85af0048a3d80644af4fcee34c007) (http://llvm.org/git/llvm.git 75f9fda9e19662a3f06732843da79e8a5b9c448c)"}
+!3 = !{!4, !4, i64 0}
+!4 = !{!"int", !5, i64 0}
+!5 = !{!"omnipotent char", !6, i64 0}
+!6 = !{!"Simple C/C++ TBAA"}