diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -174,9 +174,6 @@ def FeatureCallSavedX#i : SubtargetFeature<"call-saved-x"#i, "CustomCallSavedXRegs["#i#"]", "true", "Make X"#i#" callee saved.">; -def FeatureUseAA : SubtargetFeature<"use-aa", "UseAA", "true", - "Use alias analysis during codegen">; - def FeatureBalanceFPOps : SubtargetFeature<"balance-fp-ops", "BalanceFPOps", "true", "balance mix of odd and even D-registers for fp multiply(-accumulate) ops">; @@ -581,7 +578,6 @@ FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, - FeatureUseAA ]>; def ProcA55 : SubtargetFeature<"a55", "ARMProcFamily", "CortexA55", @@ -596,7 +592,6 @@ FeatureRCPC, FeaturePerfMon, FeaturePostRAScheduler, - FeatureUseAA ]>; def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", @@ -730,7 +725,6 @@ "CortexR82", "Cortex-R82 ARM Processors", [ FeaturePostRAScheduler, - FeatureUseAA, // All other features are implied by v8_0r ops: HasV8_0rOps, ]>; @@ -979,7 +973,6 @@ FeatureRCPC, FeatureSSBS, FeaturePostRAScheduler, - FeatureUseAA, FeatureFuseAES, ]>; @@ -996,7 +989,6 @@ FeatureSPE, FeatureSSBS, FeaturePostRAScheduler, - FeatureUseAA, FeatureFuseAES, ]>; @@ -1012,7 +1004,6 @@ FeatureSVE2BitPerm, FeatureTRBE, FeaturePostRAScheduler, - FeatureUseAA, FeatureCrypto, FeatureFuseAES, ]>; @@ -1032,7 +1023,6 @@ FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, - FeatureUseAA, FeatureRandGen, FeatureSPE, FeatureSSBS, @@ -1079,7 +1069,6 @@ FeaturePredictableSelectIsExpensive, FeatureLSE, FeaturePAuth, - FeatureUseAA, FeatureBalanceFPOps, FeaturePerfMon, FeatureStrictAlign, diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -214,7 +214,6 @@ unsigned MinVectorRegisterBitWidth = 64; bool OutlineAtomics = false; - bool UseAA = false; bool PredictableSelectIsExpensive = false; bool BalanceFPOps = false; bool CustomAsCheapAsMove = false; @@ -494,7 +493,7 @@ TargetTriple.getEnvironment() == Triple::GNUILP32; } - bool useAA() const override { return UseAA; } + bool useAA() const override; bool outlineAtomics() const { return OutlineAtomics; } diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp --- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -59,6 +59,9 @@ "with zero meaning no minimum size is assumed."), cl::init(0), cl::Hidden); +static cl::opt UseAA("aarch64-use-aa", cl::init(true), + cl::desc("Enable the use of AA during codegen.")); + AArch64Subtarget & AArch64Subtarget::initializeSubtargetDependencies(StringRef FS, StringRef CPUString) { @@ -380,3 +383,5 @@ // Prefer NEON unless larger SVE registers are available. return hasSVE() && getMinSVEVectorSizeInBits() >= 256; } + +bool AArch64Subtarget::useAA() const { return UseAA; } diff --git a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll --- a/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-gep-opt.ll @@ -1,8 +1,8 @@ ; RUN: llc -O3 -aarch64-enable-gep-opt=true -verify-machineinstrs %s -o - | FileCheck %s -; RUN: llc -O3 -aarch64-enable-gep-opt=true -mattr=-use-aa -print-after=codegenprepare < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s -; RUN: llc -O3 -aarch64-enable-gep-opt=true -mattr=+use-aa -print-after=codegenprepare < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s -; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-NoAA <%t %s -; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s >%t 2>&1 && FileCheck --check-prefix=CHECK-UseAA <%t %s +; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s +; RUN: llc -O3 -aarch64-enable-gep-opt=true -aarch64-use-aa=false -print-after=codegenprepare < %s 2>&1 | FileCheck --check-prefix=CHECK-NoAA %s +; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cyclone < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s +; RUN: llc -O3 -aarch64-enable-gep-opt=true -print-after=codegenprepare -mcpu=cortex-a53 < %s 2>&1 | FileCheck --check-prefix=CHECK-UseAA %s target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" target triple = "aarch64-linux-gnueabi" @@ -15,7 +15,7 @@ %struct = type { i32, i32, i32, i32, [20 x i32] } ; Check that when two complex GEPs are used in two basic blocks, LLVM can -; elimilate the common subexpression for the second use. +; eliminate the common subexpression for the second use. define void @test_GEP_CSE([240 x %struct]* %string, i32* %adj, i32 %lib, i64 %idxprom) { %liberties = getelementptr [240 x %struct], [240 x %struct]* %string, i64 1, i64 %idxprom, i32 3 %1 = load i32, i32* %liberties, align 4 diff --git a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll --- a/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-variadic-aapcs.ll @@ -132,9 +132,10 @@ ; CHECK: add x[[SRC:[0-9]+]], {{x[0-9]+}}, :lo12:var -; CHECK: ldp [[BLOCK:q[0-9]+]], [[BLOCK:q[0-9]+]], [x[[SRC]]] +; CHECK: ldr [[BLOCKB:q[0-9]+]], [x[[SRC]], #16] ; CHECK: add x[[DST:[0-9]+]], {{x[0-9]+}}, :lo12:second_list -; CHECK: stp [[BLOCK:q[0-9]+]], [[BLOCK:q[0-9]+]], [x[[DST]]] +; CHECK: ldr [[BLOCKA:q[0-9]+]], [x[[SRC]]] +; CHECK: stp [[BLOCKA]], [[BLOCKB]], [x[[DST]]] ret void ; CHECK: ret } diff --git a/llvm/test/CodeGen/AArch64/arm64-virtual_base.ll b/llvm/test/CodeGen/AArch64/arm64-virtual_base.ll --- a/llvm/test/CodeGen/AArch64/arm64-virtual_base.ll +++ b/llvm/test/CodeGen/AArch64/arm64-virtual_base.ll @@ -33,10 +33,10 @@ define void @Precompute_Patch_Values(%struct.Bicubic_Patch_Struct* %Shape) { ; CHECK: Precompute_Patch_Values -; CHECK: ldr [[VAL:x[0-9]+]], [x0, #288] -; CHECK-NEXT: ldr [[VAL2:q[0-9]+]], [x0, #272] -; CHECK-NEXT: str [[VAL]], [sp, #232] +; CHECK: ldr [[VAL2:q[0-9]+]], [x0, #272] +; CHECK-NEXT: ldr [[VAL:x[0-9]+]], [x0, #288] ; CHECK-NEXT: stur [[VAL2]], {{\[}}sp, #216] +; CHECK-NEXT: str [[VAL]], [sp, #232] entry: %Control_Points = alloca [16 x [3 x double]], align 8 %arraydecay5.3.1 = getelementptr inbounds [16 x [3 x double]], [16 x [3 x double]]* %Control_Points, i64 0, i64 9, i64 0 diff --git a/llvm/test/CodeGen/AArch64/ilp32-va.ll b/llvm/test/CodeGen/AArch64/ilp32-va.ll --- a/llvm/test/CodeGen/AArch64/ilp32-va.ll +++ b/llvm/test/CodeGen/AArch64/ilp32-va.ll @@ -132,11 +132,11 @@ ; CHECK: add x[[SRC:[0-9]+]], {{x[0-9]+}}, :lo12:var -; CHECK: ldr [[BLOCK:q[0-9]+]], [x[[SRC]]] -; CHECK: add x[[DST:[0-9]+]], {{x[0-9]+}}, :lo12:second_list ; CHECK: ldr [[BLOCK:w[0-9]+]], [x[[SRC]], #16] -; CHECK: str [[BLOCK:q[0-9]+]], [x[[DST]]] +; CHECK: add x[[DST:[0-9]+]], {{x[0-9]+}}, :lo12:second_list ; CHECK: str [[BLOCK:w[0-9]+]], [x[[DST]], #16] +; CHECK: ldr [[BLOCK:q[0-9]+]], [x[[SRC]]] +; CHECK: str [[BLOCK:q[0-9]+]], [x[[DST]]] ret void ; CHECK: ret } diff --git a/llvm/test/CodeGen/AArch64/misched-stp.ll b/llvm/test/CodeGen/AArch64/misched-stp.ll --- a/llvm/test/CodeGen/AArch64/misched-stp.ll +++ b/llvm/test/CodeGen/AArch64/misched-stp.ll @@ -1,5 +1,5 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+use-aa,+slow-misaligned-128store -enable-misched -verify-misched -o - | FileCheck %s +; RUN: llc < %s -mtriple=aarch64 -mcpu=cyclone -mattr=+slow-misaligned-128store -enable-misched -verify-misched -o - | FileCheck %s ; Tests to check that the scheduler dependencies derived from alias analysis are ; correct when we have loads that have been split up so that they can later be diff --git a/llvm/test/CodeGen/AArch64/seh-finally.ll b/llvm/test/CodeGen/AArch64/seh-finally.ll --- a/llvm/test/CodeGen/AArch64/seh-finally.ll +++ b/llvm/test/CodeGen/AArch64/seh-finally.ll @@ -66,8 +66,8 @@ ; CHECK-LABEL: fin_simple_seh ; CHECK: movz x8, #:abs_g1_s:.Lsimple_seh$frame_escape_0 ; CHECK: movk x8, #:abs_g0_nc:.Lsimple_seh$frame_escape_0 +; CHECK: ldr w8, [x1, x8] ; CHECK: strb w0, [sp, #15] -; CHECK: ldr w0, [x1, x8] ; CHECK: bl foo %frame_pointer.addr = alloca i8*, align 8 @@ -120,8 +120,8 @@ ; CHECK-LABEL: fin_stack_realign ; CHECK: movz x8, #:abs_g1_s:.Lstack_realign$frame_escape_0 ; CHECK: movk x8, #:abs_g0_nc:.Lstack_realign$frame_escape_0 +; CHECK: ldr w8, [x1, x8] ; CHECK: strb w0, [sp, #15] -; CHECK: ldr w0, [x1, x8] ; CHECK: bl foo %frame_pointer.addr = alloca i8*, align 8 @@ -186,8 +186,8 @@ ; CHECK-LABEL: fin_vla_present ; CHECK: movz x8, #:abs_g1_s:.Lvla_present$frame_escape_0 ; CHECK: movk x8, #:abs_g0_nc:.Lvla_present$frame_escape_0 +; CHECK: ldr w8, [x1, x8] ; CHECK: strb w0, [sp, #15] -; CHECK: ldr w0, [x1, x8] ; CHECK: bl foo %frame_pointer.addr = alloca i8*, align 8 @@ -256,8 +256,8 @@ ; CHECK-LABEL: fin_vla_and_realign ; CHECK: movz x8, #:abs_g1_s:.Lvla_and_realign$frame_escape_0 ; CHECK: movk x8, #:abs_g0_nc:.Lvla_and_realign$frame_escape_0 +; CHECK: ldr w8, [x1, x8] ; CHECK: strb w0, [sp, #15] -; CHECK: ldr w0, [x1, x8] ; CHECK: bl foo %frame_pointer.addr = alloca i8*, align 8