Index: llvm/lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -128,7 +128,7 @@ static cl::opt EnableGEPOpt("aarch64-enable-gep-opt", cl::Hidden, cl::desc("Enable optimizations on complex GEPs"), - cl::init(false)); + cl::init(true)); static cl::opt BranchRelaxation("aarch64-enable-branch-relax", cl::Hidden, cl::init(true), @@ -563,17 +563,6 @@ addPass(createFalkorMarkStridedAccessesPass()); } - TargetPassConfig::addIRPasses(); - - addPass(createAArch64StackTaggingPass( - /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None)); - - // Match interleaved memory accesses to ldN/stN intrinsics. - if (TM->getOptLevel() != CodeGenOpt::None) { - addPass(createInterleavedLoadCombinePass()); - addPass(createInterleavedAccessPass()); - } - if (TM->getOptLevel() == CodeGenOpt::Aggressive && EnableGEPOpt) { // Call SeparateConstOffsetFromGEP pass to extract constants within indices // and lower a GEP with multiple indices to either arithmetic operations or @@ -587,6 +576,17 @@ addPass(createLICMPass()); } + TargetPassConfig::addIRPasses(); + + addPass(createAArch64StackTaggingPass( + /*IsOptNone=*/TM->getOptLevel() == CodeGenOpt::None)); + + // Match interleaved memory accesses to ldN/stN intrinsics. + if (TM->getOptLevel() != CodeGenOpt::None) { + addPass(createInterleavedLoadCombinePass()); + addPass(createInterleavedAccessPass()); + } + // Add Control Flow Guard checks. if (TM->getTargetTriple().isOSWindows()) addPass(createCFGuardCheckPass()); Index: llvm/test/CodeGen/AArch64/O3-pipeline.ll =================================================================== --- llvm/test/CodeGen/AArch64/O3-pipeline.ll +++ llvm/test/CodeGen/AArch64/O3-pipeline.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --debugify-and-strip-all-safe=0 -mtriple=arm64-- -O3 -debug-pass=Structure < %s -o /dev/null 2>&1 | \ ; RUN: grep -v "Verify generated machine code" | FileCheck %s @@ -33,9 +34,20 @@ ; CHECK-NEXT: Scalar Evolution Analysis ; CHECK-NEXT: Loop Data Prefetch ; CHECK-NEXT: Falkor HW Prefetch Fix -; CHECK-NEXT: Module Verifier +; CHECK-NEXT: Split GEPs to a variadic base and a constant offset for better CSE +; CHECK-NEXT: Early CSE ; CHECK-NEXT: Basic Alias Analysis (stateless AA impl) +; CHECK-NEXT: Function Alias Analysis Results +; CHECK-NEXT: Memory SSA ; CHECK-NEXT: Canonicalize natural loops +; CHECK-NEXT: LCSSA Verifier +; CHECK-NEXT: Loop-Closed SSA Form Pass +; CHECK-NEXT: Scalar Evolution Analysis +; CHECK-NEXT: Lazy Branch Probability Analysis +; CHECK-NEXT: Lazy Block Frequency Analysis +; CHECK-NEXT: Loop Pass Manager +; CHECK-NEXT: Loop Invariant Code Motion +; CHECK-NEXT: Module Verifier ; CHECK-NEXT: Loop Pass Manager ; CHECK-NEXT: Canonicalize Freeze Instructions in Loops ; CHECK-NEXT: Induction Variable Users Index: llvm/test/CodeGen/AArch64/cond-br-tuning.ll =================================================================== --- llvm/test/CodeGen/AArch64/cond-br-tuning.ll +++ llvm/test/CodeGen/AArch64/cond-br-tuning.ll @@ -27,12 +27,13 @@ define void @test_add_cbz_multiple_use(i32 %a, i32 %b, i32* %ptr) { ; CHECK-LABEL: test_add_cbz_multiple_use: ; CHECK: // %bb.0: // %common.ret -; CHECK-NEXT: adds w8, w0, w1 +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w8, #10 ; CHECK-NEXT: csel w8, wzr, w8, ne ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret %c = add nsw i32 %a, %b - %d = icmp ne i32 %c, 0 + %d = icmp ne i32 %c, 10 br i1 %d, label %L1, label %L2 L1: store i32 0, i32* %ptr, align 4