Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -232,6 +232,7 @@ // and lower a GEP with multiple indices to either arithmetic operations or // multiple GEPs with single index. addPass(createSeparateConstOffsetFromGEPPass(TM, true)); + addPass(createReassociatePass()); // Call EarlyCSE pass to find and remove subexpressions in the lowered // result. addPass(createEarlyCSEPass()); Index: test/CodeGen/AArch64/aarch64-gep-opt.ll =================================================================== --- test/CodeGen/AArch64/aarch64-gep-opt.ll +++ test/CodeGen/AArch64/aarch64-gep-opt.ll @@ -30,21 +30,20 @@ } ; CHECK-LABEL: test_GEP_CSE: -; CHECK: madd +; CHECK: {{madd|mul}} ; CHECK: ldr -; CHECK-NOT: madd +; CHECK-NOT: {{madd|mul}} ; CHECK:ldr ; CHECK-NoAA-LABEL: @test_GEP_CSE( ; CHECK-NoAA: [[PTR0:%[a-zA-Z0-9]+]] = ptrtoint [240 x %struct]* %string to i64 ; CHECK-NoAA: [[PTR1:%[a-zA-Z0-9]+]] = mul i64 %idxprom, 96 -; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], [[PTR1]] -; CHECK-NoAA: add i64 [[PTR2]], 23052 +; CHECK-NoAA: [[PTR2:%[a-zA-Z0-9]+]] = add i64 [[PTR0]], 23052 +; CHECK-NoAA: [[PTR3:%[a-zA-Z0-9]+]] = add i64 [[PTR2]], [[PTR1]] ; CHECK-NoAA: inttoptr ; CHECK-NoAA: if.then: -; CHECK-NoAA-NOT: ptrtoint ; CHECK-NoAA-NOT: mul -; CHECK-NoAA: add i64 [[PTR2]], 23048 +; CHECK-NoAA: add i64 {{%[a-zA-Z0-9]+}}, [[PTR1]] ; CHECK-NoAA: inttoptr ; CHECK-UseAA-LABEL: @test_GEP_CSE( @@ -83,19 +82,18 @@ ret void } ; CHECK-LABEL: test_GEP_across_BB: -; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #528] -; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}}, #532] +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}} +; CHECK: ldr {{w[0-9]+}}, [{{x[0-9]+}} ; CHECK-NOT: add -; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #532] -; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}}, #528] +; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}} +; CHECK: str {{w[0-9]+}}, [{{x[0-9]+}} ; CHECK-NoAA-LABEL: test_GEP_across_BB( -; CHECK-NoAA: add i64 [[TMP:%[a-zA-Z0-9]+]], 528 -; CHECK-NoAA: add i64 [[TMP]], 532 +; CHECK-NoAA: [[TMP:%[a-zA-Z0-9]+]] = mul i64 %idx, 16 ; CHECK-NoAA: if.true: -; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 532 +; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 {{.*}}[[TMP]] ; CHECK-NoAA: exit: -; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 [[TMP]], 528 +; CHECK-NoAA: {{%sunk[a-zA-Z0-9]+}} = add i64 {{.*}}[[TMP]] ; CHECK-UseAA-LABEL: test_GEP_across_BB( ; CHECK-UseAA: [[PTR0:%[a-zA-Z0-9]+]] = getelementptr @@ -123,7 +121,7 @@ } ; CHECK-NoAA-LABEL: @test-struct_1( ; CHECK-NoAA-NOT: getelementptr -; CHECK-NoAA: add i64 %{{[a-zA-Z0-9]+}}, 88 +; CHECK-NoAA: } ; CHECK-UseAA-LABEL: @test-struct_1( ; CHECK-UseAA: getelementptr i8, i8* %{{[a-zA-Z0-9]+}}, i64 88 Index: test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll =================================================================== --- test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll +++ test/CodeGen/AArch64/arm64-dagcombiner-load-slicing.ll @@ -41,8 +41,8 @@ ; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #3 ; CHECK: ldp [[CPLX1_I:w[0-9]+]], [[CPLX1_R:w[0-9]+]], {{\[}}[[BASE]]] ; CHECK: ldp [[CPLX2_I:w[0-9]+]], [[CPLX2_R:w[0-9]+]], {{\[}}[[BASE]], #64] -; CHECK: add {{w[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]] -; CHECK: add {{w[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]] +; CHECK: add {{w[0-9]+}}, [[CPLX1_I]], [[CPLX2_I]] +; CHECK: add {{w[0-9]+}}, [[CPLX1_R]], [[CPLX2_R]] ; CHECK: ret define void @test_int(%class.Complex_int* nocapture %out, i64 %out_start) { entry: @@ -73,8 +73,8 @@ ; CHECK: add [[BASE:x[0-9]+]], x0, x1, lsl #4 ; CHECK: ldp [[CPLX1_I:x[0-9]+]], [[CPLX1_R:x[0-9]+]], {{\[}}[[BASE]]] ; CHECK: ldp [[CPLX2_I:x[0-9]+]], [[CPLX2_R:x[0-9]+]], {{\[}}[[BASE]], #128] -; CHECK: add {{x[0-9]+}}, [[CPLX2_I]], [[CPLX1_I]] -; CHECK: add {{x[0-9]+}}, [[CPLX2_R]], [[CPLX1_R]] +; CHECK: add {{x[0-9]+}}, [[CPLX1_I]], [[CPLX2_I]] +; CHECK: add {{x[0-9]+}}, [[CPLX1_R]], [[CPLX2_R]] ; CHECK: ret define void @test_long(%class.Complex_long* nocapture %out, i64 %out_start) { entry: Index: test/CodeGen/AArch64/arm64-gep-reassoc.ll =================================================================== --- /dev/null +++ test/CodeGen/AArch64/arm64-gep-reassoc.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple aarch64-linux-gnu -O3 -aarch64-gep-opt=false < %s | FileCheck %s +; RUN: llc -mtriple aarch64-linux-gnu -O3 -aarch64-gep-opt=true < %s | FileCheck %s + +%struct.S = type { i64, [4 x [256 x i32]] } + +; The offset (#8) should be calculated once and applied to all ldr's in the addressing +; mode. + +; CHECK-LABEL: f: +; CHECK: add +; CHECK-NEXT: ldr +; CHECK-NEXT: ldr +; CHECK-NEXT: ldr +define i32 @f(%struct.S* %a, i64 %b) { + %x3 = getelementptr inbounds %struct.S, %struct.S* %a, i64 0, i32 1, i64 0, i64 %b + %x4 = load i32, i32* %x3, align 4 + %x5 = zext i32 %x4 to i64 + %x6 = getelementptr inbounds %struct.S, %struct.S* %a, i64 0, i32 1, i64 0, i64 %x5 + %x7 = load i32, i32* %x6, align 4 + %x8 = zext i32 %x7 to i64 + %x9 = getelementptr inbounds %struct.S, %struct.S* %a, i64 0, i32 1, i64 0, i64 %x8 + %x10 = load i32, i32* %x9, align 4 + ret i32 %x10 +}