Index: lib/Target/AArch64/AArch64TargetMachine.cpp
===================================================================
--- lib/Target/AArch64/AArch64TargetMachine.cpp
+++ lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -232,6 +232,7 @@
     // and lower a GEP with multiple indices to either arithmetic operations or
     // multiple GEPs with single index.
     addPass(createSeparateConstOffsetFromGEPPass(TM, true));
+    addPass(createReassociatePass());
     // Call EarlyCSE pass to find and remove subexpressions in the lowered
     // result.
     addPass(createEarlyCSEPass());
Index: test/CodeGen/AArch64/aarch64-gep-opt.ll
===================================================================
--- test/CodeGen/AArch64/aarch64-gep-opt.ll
+++ test/CodeGen/AArch64/aarch64-gep-opt.ll
@@ -30,21 +30,20 @@
 }
 
 ; CHECK-LABEL: test_GEP_CSE:
-; CHECK: madd
+; CHECK: {{madd|mul}}
 ; CHECK: ldr
-; CHECK-NOT: madd
+; CHECK-NOT: {{madd|mul}}
 ; CHECK:ldr
 
 ; CHECK-NoAA-LABEL: @test_GEP_CSE(
 ; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint [240 x %struct]* %string to i64
 ; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96
-; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]]
-; CHECK-NoAA: add i64 [[PTR2]], 23052
+; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], 23052
+; CHECK-NoAA: [[PTR3:%[a-zA-Z0-9]+]] = add i64 [[PTR2]], [[PTR1]]
 ; CHECK-NoAA: inttoptr
 ; CHECK-NoAA: if.then:
-; CHECK-NoAA-NOT: ptrtoint
 ; CHECK-NoAA-NOT: mul
-; CHECK-NoAA: add i64 [[PTR2]], 23048
+; CHECK-NoAA: add i64 {{%[a-zA-Z0-9]+}}, [[PTR1]]
 ; CHECK-NoAA: inttoptr
 
 ; CHECK-UseAA-LABEL: @test_GEP_CSE(
@@ -83,19 +82,18 @@
   ret void
 }
 ; CHECK-LABEL: test_GEP_across_BB:
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #528]
-; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #532]
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}
+; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}
 ; CHECK-NOT: add
-; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #532]
-; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #528]
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}
+; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}
 
 ; CHECK-NoAA-LABEL: test_GEP_across_BB(
-; CHECK-NoAA: add i64 [[TMP:%[a-zA-Z0-9]+]], 528
-; CHECK-NoAA: add i64 [[TMP]], 532
+; CHECK-NoAA: [[TMP:%[a-zA-Z0-9]+]] = mul i64 %idx, 16
 ; CHECK-NoAA: if.true:
-; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 532
+; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 {{.*}}[[TMP]]
 ; CHECK-NoAA: exit:
-; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 528
+; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 {{.*}}[[TMP]]
 
 ; CHECK-UseAA-LABEL: test_GEP_across_BB(
 ; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = getelementptr
@@ -123,7 +121,7 @@
 }
 ; CHECK-NoAA-LABEL: @test-struct_1(
 ; CHECK-NoAA-NOT: getelementptr
-; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, 88
+; CHECK-NoAA: }
 
 ; CHECK-UseAA-LABEL: @test-struct_1(
 ; CHECK-UseAA: getelementptr i8, i8* %{{[a-zA-Z0-9]+}}, i64 88
Index: test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll
===================================================================
--- test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll
+++ test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll
@@ -41,8 +41,8 @@
 ; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #3
 ; CHECK: ldp [[CPLX1_I:w[0-9]+]], [[CPLX1_R:w[0-9]+]], {{\[}}[[BASE]]]
 ; CHECK: ldp [[CPLX2_I:w[0-9]+]], [[CPLX2_R:w[0-9]+]], {{\[}}[[BASE]], #64]
-; CHECK: add {{w[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]]
-; CHECK: add {{w[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]]
+; CHECK: add {{w[0-9]+}}, [[CPLX1_I]], [[CPLX2_I]]
+; CHECK: add {{w[0-9]+}}, [[CPLX1_R]], [[CPLX2_R]]
 ; CHECK: ret
 define void @test_int(%class.Complex_int* nocapture %out, i64 %out_start) {
 entry:
@@ -73,8 +73,8 @@
 ; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #4
 ; CHECK: ldp [[CPLX1_I:x[0-9]+]], [[CPLX1_R:x[0-9]+]], {{\[}}[[BASE]]]
 ; CHECK: ldp [[CPLX2_I:x[0-9]+]], [[CPLX2_R:x[0-9]+]], {{\[}}[[BASE]], #128]
-; CHECK: add {{x[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]]
-; CHECK: add {{x[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]]
+; CHECK: add {{x[0-9]+}}, [[CPLX1_I]], [[CPLX2_I]]
+; CHECK: add {{x[0-9]+}}, [[CPLX1_R]], [[CPLX2_R]]
 ; CHECK: ret
 define void @test_long(%class.Complex_long* nocapture %out, i64 %out_start) {
 entry:
Index: test/CodeGen/AArch64/arm64-gep-reassoc.ll
===================================================================
--- /dev/null
+++ test/CodeGen/AArch64/arm64-gep-reassoc.ll
@@ -0,0 +1,24 @@
+; RUN: llc -mtriple aarch64-linux-gnu -O3 -aarch64-gep-opt=false < %s | FileCheck %s
+; RUN: llc -mtriple aarch64-linux-gnu -O3 -aarch64-gep-opt=true < %s | FileCheck %s
+
+%struct.S = type { i64, [4 x [256 x i32]] }
+
+; The offset (#8) should be calculated once and applied to all ldr's in the addressing
+; mode.
+
+; CHECK-LABEL: f:
+; CHECK: add
+; CHECK-NEXT: ldr
+; CHECK-NEXT: ldr
+; CHECK-NEXT: ldr
+define i32 @f(%struct.S* %a, i64 %b) {
+  %x3 = getelementptr inbounds %struct.S, %struct.S* %a, i64 0, i32 1, i64 0, i64 %b
+  %x4 = load i32, i32* %x3, align 4
+  %x5 = zext i32 %x4 to i64
+  %x6 = getelementptr inbounds %struct.S, %struct.S* %a, i64 0, i32 1, i64 0, i64 %x5
+  %x7 = load i32, i32* %x6, align 4
+  %x8 = zext i32 %x7 to i64
+  %x9 = getelementptr inbounds %struct.S, %struct.S* %a, i64 0, i32 1, i64 0, i64 %x8
+  %x10 = load i32, i32* %x9, align 4
+  ret i32 %x10
+}