Index: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -128,7 +128,7 @@ static cl::opt EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), - cl::init(false)); + cl::init(true)); static cl::opt BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true), @@ -563,17 +563,6 @@ addPass(createFalkorMarkStridedAccessesPass()); } - TargetPassConfig::addIRPasses(); - - addPass(createAArch64StackTaggingPass( - /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None)); - - // Match interleaved memory accesses to ldN/stN intrinsics. - if (TM->getOptLevel() != CodeGenOpt::None) { - addPass(createInterleavedLoadCombinePass()); - addPass(createInterleavedAccessPass()); - } - if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { // Call SeparateConstOffsetFromGEP pass to extract constants within indices // and lower a GEP with multiple indices to either arithmetic operations or @@ -587,6 +576,17 @@ addPass(createLICMPass()); } + TargetPassConfig::addIRPasses(); + + addPass(createAArch64StackTaggingPass( + /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None)); + + // Match interleaved memory accesses to ldN/stN intrinsics. + if (TM->getOptLevel() != CodeGenOpt::None) { + addPass(createInterleavedLoadCombinePass()); + addPass(createInterleavedAccessPass()); + } + // Add Control Flow Guard checks. if (TM->getTargetTriple().isOSWindows()) addPass(createCFGuardCheckPass()); Index: llvm/test/CodeGen/AArch64/O3-pipeline.ll =================================================================== --- llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -33,9 +33,20 @@ ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Data Prefetch ; CHECK-NEXT: Falkor HW Prefetch Fix -; CHECK-NEXT: Module Verifier +; CHECK-NEXT: Split GEPs to a variadic base and a constant offset for better CSE +; CHECK-NEXT: Early CSE ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: LCSSA Verifier +; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Loop Invariant Code Motion +; CHECK-NEXT: Module Verifier ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users Index: llvm/test/CodeGen/AArch64/cond-br-tuning.ll =================================================================== --- llvm/test/CodeGen/AArch64/cond-br-tuning.ll +++ llvm/test/CodeGen/AArch64/cond-br-tuning.ll @@ -27,12 +27,13 @@ define void @test_add_cbz_multiple_use(i32 %a, i32 %b, i32* %ptr) { ; CHECK-LABEL: test_add_cbz_multiple_use: ; CHECK: // %bb.0: // %common.ret -; CHECK-NEXT: adds w8, w0, w1 +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w8, #10 ; CHECK-NEXT: csel w8, wzr, w8, ne ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret %c = add nsw i32 %a, %b - %d = icmp ne i32 %c, 0 + %d = icmp ne i32 %c, 10 br i1 %d, label %L1, label %L2 L1: store i32 0, i32* %ptr, align 4 Index: llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep.ll =================================================================== --- /dev/null +++ llvm/test/Transforms/SeparateConstOffsetFromGEP/split-gep.ll @@ -0,0 +1,32 @@ +; RUN: llc < %s -O3 -mtriple=aarch64-linux-gnu | FileCheck %s + +%struct = type { i32, i32, i32 } + +define i32 @test1(%struct* %ptr, i64 %idx) { +; CHECK-LABEL: test1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #12 +; CHECK-NEXT: madd x8, x1, x8, x0 +; CHECK-NEXT: ldr w9, [x8, #4] +; CHECK-NEXT: tbnz w9, #31, .LBB0_2 +; CHECK-NEXT: // %bb.1: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %then +; CHECK-NEXT: ldr w8, [x8, #8] +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: ret + %gep.1 = getelementptr %struct, %struct* %ptr, i64 %idx, i32 1 + %lv.1 = load i32, i32* %gep.1 + %c = icmp slt i32 %lv.1, 0 + br i1 %c, label %then, label %else + +then: + %gep.2 = getelementptr %struct, %struct* %ptr, i64 %idx, i32 2 + %lv.2 = load i32, i32* %gep.2 + %res = add i32 %lv.1, %lv.2 + ret i32 %res + +else: + ret i32 0 +}