Index: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -128,7 +128,7 @@ static cl::opt EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), - cl::init(false)); + cl::init(true)); static cl::opt BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true), @@ -563,17 +563,6 @@ addPass(createFalkorMarkStridedAccessesPass()); } - TargetPassConfig::addIRPasses(); - - addPass(createAArch64StackTaggingPass( - /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None)); - - // Match interleaved memory accesses to ldN/stN intrinsics. - if (TM->getOptLevel() != CodeGenOpt::None) { - addPass(createInterleavedLoadCombinePass()); - addPass(createInterleavedAccessPass()); - } - if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { // Call SeparateConstOffsetFromGEP pass to extract constants within indices // and lower a GEP with multiple indices to either arithmetic operations or @@ -587,6 +576,17 @@ addPass(createLICMPass()); } + TargetPassConfig::addIRPasses(); + + addPass(createAArch64StackTaggingPass( + /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None)); + + // Match interleaved memory accesses to ldN/stN intrinsics. + if (TM->getOptLevel() != CodeGenOpt::None) { + addPass(createInterleavedLoadCombinePass()); + addPass(createInterleavedAccessPass()); + } + // Add Control Flow Guard checks. if (TM->getTargetTriple().isOSWindows()) addPass(createCFGuardCheckPass()); Index: llvm/test/CodeGen/AArch64/cond-br-tuning.ll =================================================================== --- llvm/test/CodeGen/AArch64/cond-br-tuning.ll +++ llvm/test/CodeGen/AArch64/cond-br-tuning.ll @@ -27,9 +27,7 @@ define void @test_add_cbz_multiple_use(i32 %a, i32 %b, i32* %ptr) { ; CHECK-LABEL: test_add_cbz_multiple_use: ; CHECK: // %bb.0: // %common.ret -; CHECK-NEXT: adds w8, w0, w1 -; CHECK-NEXT: csel w8, wzr, w8, ne -; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: str wzr, [x2] ; CHECK-NEXT: ret %c = add nsw i32 %a, %b %d = icmp ne i32 %c, 0 Index: llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -O3 -mtriple=aarch64-linux-gnu -aarch64-enable-gep-opt | FileCheck %s + +%struct = type { i32, i32, i32 } + +define i32 @test1(%struct* %ptr, i64 %idx) { +; CHECK-LABEL: test1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #12 +; CHECK-NEXT: madd x8, x1, x8, x0 +; CHECK-NEXT: ldr w9, [x8, #4] +; CHECK-NEXT: tbnz w9, #31, .LBB0_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %then +; CHECK-NEXT: ldr w8, [x8, #8] +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret + %gep.1 = getelementptr %struct, %struct* %ptr, i64 %idx, i32 1 + %lv.1 = load i32, i32* %gep.1 + %c = icmp slt i32 %lv.1, 0 + br i1 %c, label %then, label %else + +then: + %gep.2 = getelementptr %struct, %struct* %ptr, i64 %idx, i32 2 + %lv.2 = load i32, i32* %gep.2 + %res = add i32 %lv.1, %lv.2 + ret i32 %res + + +else: + ret i32 0 +}