diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -79,14 +79,17 @@ CacheLineSize = 64; break; case CortexA35: - break; case CortexA53: case CortexA55: PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 4; + MaxBytesForLoopAlignment = 8; break; case CortexA57: MaxInterleaveFactor = 4; PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 4; + MaxBytesForLoopAlignment = 8; break; case CortexA65: PrefFunctionLogAlignment = 3; @@ -94,6 +97,10 @@ case CortexA72: case CortexA73: case CortexA75: + PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 4; + MaxBytesForLoopAlignment = 8; + break; case CortexA76: case CortexA77: case CortexA78: @@ -102,12 +109,21 @@ case CortexX1: case CortexX1C: PrefFunctionLogAlignment = 4; + PrefLoopLogAlignment = 5; + MaxBytesForLoopAlignment = 16; break; case CortexA510: + PrefFunctionLogAlignment = 4; + VScaleForTuning = 1; + PrefLoopLogAlignment = 4; + MaxBytesForLoopAlignment = 8; + break; case CortexA710: case CortexX2: PrefFunctionLogAlignment = 4; VScaleForTuning = 1; + PrefLoopLogAlignment = 5; + MaxBytesForLoopAlignment = 16; break; case A64FX: CacheLineSize = 256; diff --git a/llvm/test/CodeGen/AArch64/aarch64-p2align-max-bytes-neoverse.ll b/llvm/test/CodeGen/AArch64/aarch64-p2align-max-bytes-neoverse.ll --- a/llvm/test/CodeGen/AArch64/aarch64-p2align-max-bytes-neoverse.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-p2align-max-bytes-neoverse.ll @@ -1,12 +1,25 @@ -; RUN: llc -mtriple=aarch64-none-linux-gnu -align-loops=32 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT -; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-n1 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-N1 +; RUN: llc -mtriple=aarch64-none-linux-gnu -align-loops=32 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT +; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-n1 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-16 +; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-n2 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-16 +; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=neoverse-v1 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-16 +; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-x1 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-16 +; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-x2 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-16 +; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a35 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-8 +; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a53 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-8 +; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-8 +; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-8 +; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a510 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-8 +; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a75 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-8 +; RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a710 < %s -o -| FileCheck %s --check-prefixes=CHECK,CHECK-16 define i32 @a(i32 %x, i32* nocapture readonly %y, i32* nocapture readonly %z) { ; CHECK-DEFAULT: .p2align 5 -; CHECK-N1: .p2align 5, 0x0, 16 +; CHECK-8: .p2align 4, 0x0, 8 +; CHECK-16: .p2align 5, 0x0, 16 ; CHECK-NEXT: .LBB0_5: // %vector.body ; CHECK-DEFAULT: .p2align 5 -; CHECK-N1: .p2align 5, 0x0, 16 +; CHECK-8: .p2align 4, 0x0, 8 +; CHECK-16: .p2align 5, 0x0, 16 ; CHECK-NEXT: .LBB0_8: // %for.body entry: %cmp10 = icmp sgt i32 %x, 0 diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll --- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll +++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll @@ -35,6 +35,7 @@ ; A53-NEXT: // %bb.1: ; A53-NEXT: ldr w8, [x19] ; A53-NEXT: ldr w9, [x9] +; A53-NEXT: .p2align 4, 0x0, 8 ; A53-NEXT: .LBB0_2: // %while.body.i.split.ver.us ; A53-NEXT: // =>This Inner Loop Header: Depth=1 ; A53-NEXT: lsl w9, w9, #1 @@ -46,6 +47,7 @@ ; A53-NEXT: str x0, [x8, :lo12:gv1] ; A53-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; A53-NEXT: ret +; A53-NEXT: .p2align 4, 0x0, 8 ; A53-NEXT: .LBB0_4: // %while.body.i.split ; A53-NEXT: // =>This Inner Loop Header: Depth=1 ; A53-NEXT: b .LBB0_4 diff --git a/llvm/test/CodeGen/AArch64/preferred-function-alignment.ll b/llvm/test/CodeGen/AArch64/preferred-function-alignment.ll --- a/llvm/test/CodeGen/AArch64/preferred-function-alignment.ll +++ b/llvm/test/CodeGen/AArch64/preferred-function-alignment.ll @@ -1,5 +1,5 @@ ; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=generic < %s | FileCheck --check-prefixes=ALIGN2,CHECK %s -; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=cortex-a35 < %s | FileCheck --check-prefixes=ALIGN2,CHECK %s +; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=cortex-a35 < %s | FileCheck --check-prefixes=ALIGN4,CHECK %s ; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=cortex-a53 < %s | FileCheck --check-prefixes=ALIGN4,CHECK %s ; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=cortex-a55 < %s | FileCheck --check-prefixes=ALIGN4,CHECK %s ; RUN: llc -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 < %s | FileCheck --check-prefixes=ALIGN4,CHECK %s