diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -295,6 +295,10 @@ "disable-latency-sched-heuristic", "DisableLatencySchedHeuristic", "true", "Disable latency scheduling heuristic">; +def FeatureStorePairSuppress : SubtargetFeature< + "store-pair-suppress", "EnableStorePairSuppress", "true", + "Enable Store Pair Suppression heuristics">; + def FeatureForce32BitJumpTables : SubtargetFeature<"force-32bit-jump-tables", "Force32BitJumpTables", "true", "Force jump table entries to be 32-bits wide except at MinSize">; @@ -952,8 +956,8 @@ FeaturePostRAScheduler, FeatureAggressiveFMA, FeatureArithmeticBccFusion, - FeaturePredictableSelectIsExpensive - ]>; + FeatureStorePairSuppress, + FeaturePredictableSelectIsExpensive]>; def TuneCarmel : SubtargetFeature<"carmel", "ARMProcFamily", "Carmel", "Nvidia Carmel processors">; @@ -967,10 +971,10 @@ FeatureArithmeticCbzFusion, FeatureDisableLatencySchedHeuristic, FeatureFuseAES, FeatureFuseCryptoEOR, + FeatureStorePairSuppress, FeatureZCRegMove, FeatureZCZeroing, - FeatureZCZeroingFPWorkaround] - >; + FeatureZCZeroingFPWorkaround]>; def TuneAppleA10 : SubtargetFeature<"apple-a10", "ARMProcFamily", "AppleA10", "Apple A10", [ @@ -980,9 +984,9 @@ FeatureDisableLatencySchedHeuristic, FeatureFuseAES, FeatureFuseCryptoEOR, + FeatureStorePairSuppress, FeatureZCRegMove, - FeatureZCZeroing] - >; + FeatureZCZeroing]>; def TuneAppleA11 : SubtargetFeature<"apple-a11", "ARMProcFamily", "AppleA11", "Apple A11", [ @@ -992,9 +996,9 @@ FeatureDisableLatencySchedHeuristic, FeatureFuseAES, FeatureFuseCryptoEOR, + FeatureStorePairSuppress, FeatureZCRegMove, - FeatureZCZeroing] - >; + FeatureZCZeroing]>; def TuneAppleA12 : SubtargetFeature<"apple-a12", "ARMProcFamily", "AppleA12", "Apple A12", [ @@ -1004,9 +1008,9 @@ FeatureDisableLatencySchedHeuristic, FeatureFuseAES, FeatureFuseCryptoEOR, + FeatureStorePairSuppress, FeatureZCRegMove, - FeatureZCZeroing] - >; + FeatureZCZeroing]>; def TuneAppleA13 : SubtargetFeature<"apple-a13", "ARMProcFamily", "AppleA13", "Apple A13", [ @@ -1016,9 +1020,9 @@ FeatureDisableLatencySchedHeuristic, FeatureFuseAES, FeatureFuseCryptoEOR, + FeatureStorePairSuppress, FeatureZCRegMove, - FeatureZCZeroing] - >; + FeatureZCZeroing]>; def TuneAppleA14 : SubtargetFeature<"apple-a14", "ARMProcFamily", "AppleA14", "Apple A14", [ @@ -1034,6 +1038,7 @@ FeatureFuseCryptoEOR, FeatureFuseAdrpAdd, FeatureFuseLiterals, + FeatureStorePairSuppress, FeatureZCRegMove, FeatureZCZeroing]>; @@ -1049,9 +1054,9 @@ FeatureFuseCCSelect, FeatureFuseCryptoEOR, FeatureFuseLiterals, + FeatureStorePairSuppress, FeatureZCRegMove, - FeatureZCZeroing - ]>; + FeatureZCZeroing]>; def TuneAppleA16 : SubtargetFeature<"apple-a16", "ARMProcFamily", "AppleA16", "Apple A16", [ @@ -1065,9 +1070,9 @@ FeatureFuseCCSelect, FeatureFuseCryptoEOR, FeatureFuseLiterals, + FeatureStorePairSuppress, FeatureZCRegMove, - FeatureZCZeroing - ]>; + FeatureZCZeroing]>; def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", "Samsung Exynos-M3 processors", @@ -1078,6 +1083,7 @@ FeatureFuseCCSelect, FeatureFuseAdrpAdd, FeatureFuseLiterals, + FeatureStorePairSuppress, FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, @@ -1096,6 +1102,7 @@ FeatureFuseCCSelect, FeatureFuseAdrpAdd, FeatureFuseLiterals, + FeatureStorePairSuppress, FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, @@ -1107,18 +1114,18 @@ FeaturePredictableSelectIsExpensive, FeatureZCZeroing, FeatureAddrLSLFast, - FeatureALULSLFast] - >; + FeatureALULSLFast, + FeatureStorePairSuppress]>; def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", "Qualcomm Falkor processors", [ FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureZCZeroing, + FeatureStorePairSuppress, FeatureAddrLSLFast, FeatureALULSLFast, - FeatureSlowSTRQro - ]>; + FeatureSlowSTRQro]>; def TuneNeoverseE1 : SubtargetFeature<"neoversee1", "ARMProcFamily", "NeoverseE1", "Neoverse E1 ARM processors", [ @@ -1182,6 +1189,7 @@ FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureZCZeroing, + FeatureStorePairSuppress, FeatureAddrLSLFast, FeatureALULSLFast]>; @@ -1190,6 +1198,7 @@ FeatureAggressiveFMA, FeatureArithmeticBccFusion, FeaturePostRAScheduler, + FeatureStorePairSuppress, FeaturePredictableSelectIsExpensive]>; def TuneThunderX3T110 : SubtargetFeature<"thunderx3t110", "ARMProcFamily", @@ -1200,34 +1209,40 @@ FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureBalanceFPOps, + FeatureStorePairSuppress, FeatureStrictAlign]>; def TuneThunderX : SubtargetFeature<"thunderx", "ARMProcFamily", "ThunderX", "Cavium ThunderX processors", [ FeaturePostRAScheduler, + FeatureStorePairSuppress, FeaturePredictableSelectIsExpensive]>; def TuneThunderXT88 : SubtargetFeature<"thunderxt88", "ARMProcFamily", "ThunderXT88", "Cavium ThunderX processors", [ FeaturePostRAScheduler, + FeatureStorePairSuppress, FeaturePredictableSelectIsExpensive]>; def TuneThunderXT81 : SubtargetFeature<"thunderxt81", "ARMProcFamily", "ThunderXT81", "Cavium ThunderX processors", [ FeaturePostRAScheduler, + FeatureStorePairSuppress, FeaturePredictableSelectIsExpensive]>; def TuneThunderXT83 : SubtargetFeature<"thunderxt83", "ARMProcFamily", "ThunderXT83", "Cavium ThunderX processors", [ FeaturePostRAScheduler, + FeatureStorePairSuppress, FeaturePredictableSelectIsExpensive]>; def TuneTSV110 : SubtargetFeature<"tsv110", "ARMProcFamily", "TSV110", "HiSilicon TS-V110 processors", [ FeatureFuseAES, + FeatureStorePairSuppress, FeaturePostRAScheduler]>; def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", @@ -1241,7 +1256,8 @@ FeatureCmpBccFusion, FeatureFuseAddress, FeatureFuseLiterals, - FeatureLdpAlignedOnly, + FeatureStorePairSuppress, + FeatureLdpAlignedOnly, FeatureStpAlignedOnly]>; def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A", @@ -1256,6 +1272,7 @@ FeatureFuseAddress, FeatureFuseLiterals, FeatureFuseLiterals, + FeatureStorePairSuppress, FeatureLdpAlignedOnly, FeatureStpAlignedOnly]>; diff --git a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp --- a/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp +++ b/llvm/lib/Target/AArch64/AArch64StorePairSuppress.cpp @@ -11,6 +11,7 @@ // ===---------------------------------------------------------------------===// #include "AArch64InstrInfo.h" +#include "AArch64Subtarget.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" @@ -122,7 +123,10 @@ if (skipFunction(MF.getFunction()) || MF.getFunction().hasOptSize()) return false; - const TargetSubtargetInfo &ST = MF.getSubtarget(); + const AArch64Subtarget &ST = MF.getSubtarget(); + if (!ST.enableStorePairSuppress()) + return false; + TII = static_cast(ST.getInstrInfo()); TRI = ST.getRegisterInfo(); MRI = &MF.getRegInfo(); diff --git a/llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll b/llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll --- a/llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll @@ -1,13 +1,13 @@ -; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-EVEN -; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-ODD -; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN -; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD +; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN +; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD +; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN +; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD ; The following tests use the balance-fp-ops feature, and should be independent of ; the target cpu. -; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN --check-prefix CHECK-BALFP -; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD --check-prefix CHECK-BALFP +; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN +; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so ; our test strategy is to: @@ -81,9 +81,7 @@ ; CHECK: fmsub [[x]] ; CHECK: fmadd [[y]] ; CHECK: fmadd [[x]] -; CHECK-BALFP: stp [[x]], [[y]] -; CHECK-A53-DAG: str [[x]] -; CHECK-A53-DAG: str [[y]] +; CHECK: stp [[x]], [[y]] define void @f2(ptr nocapture readonly %p, ptr nocapture %q) #0 { entry: @@ -176,9 +174,7 @@ ; CHECK: fmsub [[x]] ; CHECK: fmadd [[y]] ; CHECK: fmadd [[x]] -; CHECK-BALFP: stp [[x]], [[y]] -; CHECK-A53-DAG: str [[x]] -; CHECK-A53-DAG: str [[y]] +; CHECK: stp [[x]], [[y]] define void @f4(ptr nocapture readonly %p, ptr nocapture %q) #0 { entry: diff --git a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll --- a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll +++ b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll @@ -385,12 +385,9 @@ ; CHECK-NEXT: bl return_in_block ; CHECK-NEXT: adrp x8, in_block_store ; CHECK-NEXT: add x8, x8, :lo12:in_block_store -; CHECK-NEXT: str d0, [x8] -; CHECK-NEXT: str d1, [x8, #8] -; CHECK-NEXT: str d2, [x8, #16] -; CHECK-NEXT: str d3, [x8, #24] -; CHECK-NEXT: str d4, [x8, #32] -; CHECK-NEXT: str d5, [x8, #40] +; CHECK-NEXT: stp d0, d1, [x8] +; CHECK-NEXT: stp d2, d3, [x8, #16] +; CHECK-NEXT: stp d4, d5, [x8, #32] ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %1 = call %T_IN_BLOCK @return_in_block() @@ -403,12 +400,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, in_block_store ; CHECK-NEXT: add x8, x8, :lo12:in_block_store -; CHECK-NEXT: str d5, [x8, #40] -; CHECK-NEXT: str d4, [x8, #32] -; CHECK-NEXT: str d3, [x8, #24] -; CHECK-NEXT: str d2, [x8, #16] -; CHECK-NEXT: str d1, [x8, #8] -; CHECK-NEXT: str d0, [x8] +; CHECK-NEXT: stp d4, d5, [x8, #32] +; CHECK-NEXT: stp d2, d3, [x8, #16] +; CHECK-NEXT: stp d0, d1, [x8] ; CHECK-NEXT: ret store %T_IN_BLOCK %a, ptr @in_block_store ret void diff --git a/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll b/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll --- a/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll +++ b/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll @@ -152,8 +152,7 @@ ; CHECK-NEXT: add x19, x19, :lo12:Pod ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: bl copy_pod -; CHECK-NEXT: str d0, [x19] -; CHECK-NEXT: str d1, [x19, #8] +; CHECK-NEXT: stp d0, d1, [x19] ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg x30, 8 diff --git a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir --- a/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir +++ b/llvm/test/CodeGen/AArch64/ldrpre-ldr-merge.mir @@ -1,3 +1,4 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 # RUN: llc -o - %s -mtriple=aarch64 -mcpu=cortex-a55 -lsr-preferred-addressing-mode=preindexed -stop-after=aarch64-ldst-opt | FileCheck %s --- @@ -14,9 +15,10 @@ liveins: $w0, $w1, $x1 ; CHECK-LABEL: name: 1-ldrwpre-ldrwui-merge ; CHECK: liveins: $w0, $w1, $x1 - ; CHECK: early-clobber $x1, renamable $w0, renamable $w2 = LDPWpre renamable $x1, 5 :: (load (s32)) - ; CHECK: STPWi renamable $w0, renamable $w2, renamable $x1, 0 :: (store (s32)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $x1, renamable $w0, renamable $w2 = LDPWpre renamable $x1, 5 :: (load (s32)) + ; CHECK-NEXT: STPWi renamable $w0, renamable $w2, renamable $x1, 0 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $w0 = LDRWpre killed renamable $x1, 20 :: (load (s32)) renamable $w2 = LDRWui renamable $x1, 1 :: (load (s32)) STRWui killed renamable $w0, renamable $x1, 0 :: (store (s32)) @@ -39,9 +41,10 @@ liveins: $x2, $x3, $x1 ; CHECK-LABEL: name: 2-ldrxpre-ldrxui-merge ; CHECK: liveins: $x1, $x2, $x3 - ; CHECK: early-clobber $x1, renamable $x2, renamable $x3 = LDPXpre renamable $x1, 3 :: (load (s64)) - ; CHECK: STPXi renamable $x2, renamable $x3, renamable $x1, 0 :: (store (s64)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $x1, renamable $x2, renamable $x3 = LDPXpre renamable $x1, 3 :: (load (s64)) + ; CHECK-NEXT: STPXi renamable $x2, renamable $x3, renamable $x1, 0 :: (store (s64)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $x2 = LDRXpre killed renamable $x1, 24 :: (load (s64)) renamable $x3 = LDRXui renamable $x1, 1 :: (load (s64)) STRXui killed renamable $x2, renamable $x1, 0 :: (store (s64)) @@ -64,10 +67,10 @@ liveins: $s0, $s1, $x1 ; CHECK-LABEL: name: 3-ldrspre-ldrsui-merge ; CHECK: liveins: $s0, $s1, $x1 - ; CHECK: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 3 :: (load (s32)) - ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32)) - ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 3 :: (load (s32)) + ; CHECK-NEXT: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32)) renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) STRSui killed renamable $s0, renamable $x1, 0 :: (store (s32)) @@ -90,10 +93,10 @@ liveins: $d0, $d1, $x1 ; CHECK-LABEL: name: 4-ldrqdre-ldrdui-merge ; CHECK: liveins: $d0, $d1, $x1 - ; CHECK: early-clobber $x1, renamable $d0, renamable $d1 = LDPDpre renamable $x1, 16 :: (load (s64)) - ; CHECK: STRDui renamable $d0, renamable $x1, 0 :: (store (s64)) - ; CHECK: STRDui renamable $d1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s64)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $x1, renamable $d0, renamable $d1 = LDPDpre renamable $x1, 16 :: (load (s64)) + ; CHECK-NEXT: STPDi renamable $d0, renamable $d1, renamable $x1, 0 :: (store (s64)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $d0 = LDRDpre killed renamable $x1, 128 :: (load (s64)) renamable $d1 = LDRDui renamable $x1, 1 :: (load (s64)) STRDui killed renamable $d0, renamable $x1, 0 :: (store (s64)) @@ -120,9 +123,10 @@ liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 5-ldrqpre-ldrqui-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 3 :: (load (s128)) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 3 :: (load (s128)) + ; CHECK-NEXT: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 48 :: (load (s128)) renamable $q1 = LDRQui renamable $x1, 1 :: (load (s128)) STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) @@ -149,10 +153,11 @@ liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 6-ldrqui-ldrqpre-no-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: renamable $q1 = LDRQui renamable $x1, 1 :: (load (s128)) - ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1 :: (load (s128)) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $q1 = LDRQui renamable $x1, 1 :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) + ; CHECK-NEXT: RET undef $lr renamable $q1 = LDRQui renamable $x1, 1 :: (load (s128)) early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 48 :: (load (s128)) STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) @@ -179,9 +184,10 @@ liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 7-ldrqpre-ldrqui-max-offset-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 15 :: (load (s128)) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 15 :: (load (s128)) + ; CHECK-NEXT: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 240 :: (load (s128)) renamable $q1 = LDRQui renamable $x1, 1 :: (load (s128)) STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) @@ -208,9 +214,10 @@ liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 8-ldrqpre-ldrqui-min-offset-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, -16 :: (load (s128)) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, -16 :: (load (s128)) + ; CHECK-NEXT: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, -256 :: (load (s128)) renamable $q1 = LDRQui renamable $x1, 1 :: (load (s128)) STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) @@ -238,11 +245,12 @@ liveins: $s0, $s1, $x0, $x1 ; CHECK-LABEL: name: 9-ldrspre-ldrsui-mod-base-reg-no-merge ; CHECK: liveins: $s0, $s1, $x0, $x1 - ; CHECK: dead early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32)) - ; CHECK: renamable $x1 = LDRXui renamable $x0, 1 :: (load (s64)) - ; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) - ; CHECK: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: renamable $x1 = LDRXui renamable $x0, 1 :: (load (s64)) + ; CHECK-NEXT: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) + ; CHECK-NEXT: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32)) renamable $x1 = LDRXui renamable $x0, 1 :: (load (s64)) renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) @@ -271,13 +279,13 @@ liveins: $s0, $s1, $x0, $x1 ; CHECK-LABEL: name: 10-ldrspre-ldrsui-used-base-reg-no-merge ; CHECK: liveins: $s0, $s1, $x0, $x1 - ; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32)) - ; CHECK: renamable $x0 = LDRXui renamable $x1, 1 :: (load (s64)) - ; CHECK: STRXui renamable $x0, renamable $x0, 1 :: (store (s64)) - ; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) - ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32)) - ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: renamable $x0 = LDRXui renamable $x1, 1 :: (load (s64)) + ; CHECK-NEXT: STRXui renamable $x0, renamable $x0, 1 :: (store (s64)) + ; CHECK-NEXT: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) + ; CHECK-NEXT: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32)) renamable $x0 = LDRXui renamable $x1, 1 :: (load (s64)) STRXui killed renamable $x0, renamable $x0, 1 :: (store (s64)) @@ -306,14 +314,15 @@ liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 11-ldrqpre-ldrqpre-no-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1 :: (load (s128)) - ; CHECK: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 1, implicit $w1 :: (load (s128)) - ; CHECK: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1 :: (load (s128)) - ; CHECK: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 12, implicit $w1 :: (load (s128)) - ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1 :: (load (s128)) - ; CHECK: early-clobber renamable $x1, renamable $q1 = LDRQpre renamable $x1, 16, implicit $w1 :: (load (s128)) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 48, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 1, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $q1 = LDRQpre renamable $x1, 12, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 16, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q1 = LDRQpre renamable $x1, 16, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 48 :: (load (s128)) early-clobber renamable $x1, renamable $q1 = LDRQpre killed renamable $x1, 1 :: (load (s128)) early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 16 :: (load (s128)) @@ -344,11 +353,11 @@ ; CHECK-LABEL: name: 12-ldrspre-ldrsui-no-merge ; CHECK: liveins: $s0, $s1, $x1 - ; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32)) - ; CHECK: renamable $s1 = LDRSui renamable $x1, 2 :: (load (s32)) - ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32)) - ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 12, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: renamable $s1 = LDRSui renamable $x1, 2 :: (load (s32)) + ; CHECK-NEXT: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 12 :: (load (s32)) renamable $s1 = LDRSui renamable $x1, 2 :: (load (s32)) STRSui killed renamable $s0, renamable $x1, 0 :: (store (s32)) @@ -375,11 +384,12 @@ liveins: $q0, $d1, $x1 ; CHECK-LABEL: name: 13-ldrqpre-ldrdui-no-merge ; CHECK: liveins: $d1, $q0, $x1 - ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) - ; CHECK: renamable $d1 = LDRDui renamable $x1, 1 :: (load (s64)) - ; CHECK: STRQui renamable $q0, renamable $x1, 0 :: (store (s128)) - ; CHECK: STRDui renamable $d1, renamable $x1, 1 :: (store (s64)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: renamable $d1 = LDRDui renamable $x1, 1 :: (load (s64)) + ; CHECK-NEXT: STRQui renamable $q0, renamable $x1, 0 :: (store (s128)) + ; CHECK-NEXT: STRDui renamable $d1, renamable $x1, 1 :: (store (s64)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load (s128)) renamable $d1 = LDRDui renamable $x1, 1 :: (load (s64)) STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) @@ -406,9 +416,10 @@ liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 14-ldrqpre-strqui-no-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) - ; CHECK: STRQui renamable $q0, renamable $x1, 0 :: (store (s128)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: STRQui renamable $q0, renamable $x1, 0 :: (store (s128)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load (s128)) STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) RET undef $lr @@ -433,10 +444,11 @@ liveins: $q0, $x1 ; CHECK-LABEL: name: 15-ldrqpre-ldrqui-same-dst-reg-no-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) - ; CHECK: renamable $q0 = LDRQui renamable $x1, 1 :: (load (s128)) - ; CHECK: STRQui renamable $q0, renamable $x1, 0 :: (store (s128)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: renamable $q0 = LDRQui renamable $x1, 1 :: (load (s128)) + ; CHECK-NEXT: STRQui renamable $q0, renamable $x1, 0 :: (store (s128)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load (s128)) renamable $q0 = LDRQui renamable $x1, 1 :: (load (s128)) STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) @@ -463,10 +475,11 @@ liveins: $q0, $q1, $x1, $x2 ; CHECK-LABEL: name: 16-ldrqpre-ldrqui-diff-base-reg-no-merge ; CHECK: liveins: $q0, $q1, $x1, $x2 - ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) - ; CHECK: renamable $q1 = LDRQui renamable $x2, 1 :: (load (s128)) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: renamable $q1 = LDRQui renamable $x2, 1 :: (load (s128)) + ; CHECK-NEXT: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load (s128)) renamable $q1 = LDRQui renamable $x2, 1 :: (load (s128)) STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) @@ -493,9 +506,10 @@ liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 17-ldrqpre-ldurqi-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 2 :: (load (s128)) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $x1, renamable $q0, renamable $q1 = LDPQpre renamable $x1, 2 :: (load (s128)) + ; CHECK-NEXT: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load (s128)) renamable $q1 = LDURQi renamable $x1, 16 :: (load (s128)) STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) @@ -522,10 +536,11 @@ liveins: $q0, $q1, $x1 ; CHECK-LABEL: name: 18-ldrqpre-ldurqi-no-merge ; CHECK: liveins: $q0, $q1, $x1 - ; CHECK: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) - ; CHECK: renamable $q1 = LDURQi renamable $x1, 1 :: (load (s128)) - ; CHECK: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x1, renamable $q0 = LDRQpre renamable $x1, 32, implicit $w1 :: (load (s128)) + ; CHECK-NEXT: renamable $q1 = LDURQi renamable $x1, 1 :: (load (s128)) + ; CHECK-NEXT: STPQi renamable $q0, renamable $q1, renamable $x1, 0 :: (store (s128)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $q0 = LDRQpre killed renamable $x1, 32 :: (load (s128)) renamable $q1 = LDURQi renamable $x1, 1 :: (load (s128)) STRQui killed renamable $q0, renamable $x1, 0 :: (store (s128)) @@ -548,10 +563,10 @@ liveins: $s0, $s1, $x1 ; CHECK-LABEL: name: 19-ldrspre-ldrsui-max-merge ; CHECK: liveins: $s0, $s1, $x1 - ; CHECK: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 63 :: (load (s32)) - ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32)) - ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $x1, renamable $s0, renamable $s1 = LDPSpre renamable $x1, 63 :: (load (s32)) + ; CHECK-NEXT: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 252 :: (load (s32)) renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) STRSui killed renamable $s0, renamable $x1, 0 :: (store (s32)) @@ -574,11 +589,11 @@ liveins: $s0, $s1, $x1 ; CHECK-LABEL: name: 20-ldrspre-ldrsui-unaligned-no-merge ; CHECK: liveins: $s0, $s1, $x1 - ; CHECK: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 251, implicit $w1 :: (load (s32)) - ; CHECK: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) - ; CHECK: STRSui renamable $s0, renamable $x1, 0 :: (store (s32)) - ; CHECK: STRSui renamable $s1, renamable $x1, 1 :: ("aarch64-suppress-pair" store (s32)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x1, renamable $s0 = LDRSpre renamable $x1, 251, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) + ; CHECK-NEXT: STPSi renamable $s0, renamable $s1, renamable $x1, 0 :: (store (s32)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $s0 = LDRSpre killed renamable $x1, 251 :: (load (s32)) renamable $s1 = LDRSui renamable $x1, 1 :: (load (s32)) STRSui killed renamable $s0, renamable $x1, 0 :: (store (s32)) @@ -601,9 +616,10 @@ liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: 21-ldrswpre-ldrswui-merge ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: early-clobber $x1, renamable $x0, renamable $x2 = LDPSWpre renamable $x1, 10 :: (load (s32)) - ; CHECK: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $x1, renamable $x0, renamable $x2 = LDPSWpre renamable $x1, 10 :: (load (s32)) + ; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $x0 = LDRSWpre killed renamable $x1, 40 :: (load (s32)) renamable $x2 = LDRSWui renamable $x1, 1 :: (load (s32)) STRXui killed renamable $x0, renamable $x1, 0 :: (store (s64)) @@ -626,9 +642,10 @@ liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: 22-ldrswpre-ldurswi-merge ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: early-clobber $x1, renamable $x0, renamable $x2 = LDPSWpre renamable $x1, 10 :: (load (s32)) - ; CHECK: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber $x1, renamable $x0, renamable $x2 = LDPSWpre renamable $x1, 10 :: (load (s32)) + ; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $x0 = LDRSWpre killed renamable $x1, 40 :: (load (s32)) renamable $x2 = LDURSWi renamable $x1, 4 :: (load (s32)) STRXui killed renamable $x0, renamable $x1, 0 :: (store (s64)) @@ -651,10 +668,11 @@ liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: 23-ldrswui-ldrswpre-no-merge ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: renamable $x2 = LDRSWui renamable $x1, 1 :: (load (s32)) - ; CHECK: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1 :: (load (s32)) - ; CHECK: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x2 = LDRSWui renamable $x1, 1 :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) + ; CHECK-NEXT: RET undef $lr renamable $x2 = LDRSWui renamable $x1, 1 :: (load (s32)) early-clobber renamable $x1, renamable $x0 = LDRSWpre killed renamable $x1, 40 :: (load (s32)) STRXui killed renamable $x0, renamable $x1, 0 :: (store (s64)) @@ -677,10 +695,11 @@ liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: 24-ldurswi-ldrswpre-no-merge ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: renamable $x2 = LDURSWi renamable $x1, 4 :: (load (s32)) - ; CHECK: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1 :: (load (s32)) - ; CHECK: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $x2 = LDURSWi renamable $x1, 4 :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) + ; CHECK-NEXT: RET undef $lr renamable $x2 = LDURSWi renamable $x1, 4 :: (load (s32)) early-clobber renamable $x1, renamable $x0 = LDRSWpre killed renamable $x1, 40 :: (load (s32)) STRXui killed renamable $x0, renamable $x1, 0 :: (store (s64)) @@ -703,14 +722,15 @@ liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: 25-ldrswpre-ldrswpre-no-merge ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: early-clobber renamable $x1, dead renamable $x0 = LDRSWpre renamable $x1, 48, implicit $w1 :: (load (s32)) - ; CHECK: early-clobber renamable $x1, dead renamable $x2 = LDRSWpre renamable $x1, 1, implicit $w1 :: (load (s32)) - ; CHECK: early-clobber renamable $x1, dead renamable $x0 = LDRSWpre renamable $x1, 16, implicit $w1 :: (load (s32)) - ; CHECK: early-clobber renamable $x1, dead renamable $x2 = LDRSWpre renamable $x1, 12, implicit $w1 :: (load (s32)) - ; CHECK: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 16, implicit $w1 :: (load (s32)) - ; CHECK: early-clobber renamable $x1, renamable $x2 = LDRSWpre renamable $x1, 16, implicit $w1 :: (load (s32)) - ; CHECK: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $x0 = LDRSWpre renamable $x1, 48, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $x2 = LDRSWpre renamable $x1, 1, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $x0 = LDRSWpre renamable $x1, 16, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, dead renamable $x2 = LDRSWpre renamable $x1, 12, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 16, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: early-clobber renamable $x1, renamable $x2 = LDRSWpre renamable $x1, 16, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $x0 = LDRSWpre killed renamable $x1, 48 :: (load (s32)) early-clobber renamable $x1, renamable $x2 = LDRSWpre killed renamable $x1, 1 :: (load (s32)) early-clobber renamable $x1, renamable $x0 = LDRSWpre killed renamable $x1, 16 :: (load (s32)) @@ -737,10 +757,11 @@ liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: 26-ldrswpre-ldrwui-no-merge ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1 :: (load (s32)) - ; CHECK: renamable $w2 = LDRWui renamable $x1, 1, implicit-def $x2 :: (load (s32)) - ; CHECK: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x1, renamable $x0 = LDRSWpre renamable $x1, 40, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: renamable $w2 = LDRWui renamable $x1, 1, implicit-def $x2 :: (load (s32)) + ; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $x0 = LDRSWpre killed renamable $x1, 40 :: (load (s32)) renamable $w2 = LDRWui renamable $x1, 1 :: (load (s32)) STRXui killed renamable $x0, renamable $x1, 0 :: (store (s64)) @@ -763,10 +784,11 @@ liveins: $x0, $x1, $x2 ; CHECK-LABEL: name: 27-ldrwpre-ldrswui-no-merge ; CHECK: liveins: $x0, $x1, $x2 - ; CHECK: early-clobber renamable $x1, renamable $w0 = LDRWpre renamable $x1, 40, implicit $w1 :: (load (s32)) - ; CHECK: renamable $x2 = LDRSWui renamable $x1, 1 :: (load (s32)) - ; CHECK: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x1, renamable $w0 = LDRWpre renamable $x1, 40, implicit $w1 :: (load (s32)) + ; CHECK-NEXT: renamable $x2 = LDRSWui renamable $x1, 1 :: (load (s32)) + ; CHECK-NEXT: STPXi renamable $x0, renamable $x2, renamable $x1, 0 :: (store (s64)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x1, renamable $w0 = LDRWpre killed renamable $x1, 40 :: (load (s32)) renamable $x2 = LDRSWui renamable $x1, 1 :: (load (s32)) STRXui killed renamable $x0, renamable $x1, 0 :: (store (s64)) @@ -788,12 +810,13 @@ liveins: $x11, $x13 ; CHECK-LABEL: name: 28-ldrswpre-ldrwpre-no-merge ; CHECK: liveins: $x11, $x13 - ; CHECK: early-clobber renamable $x11, dead renamable $x10 = LDRSWpre renamable $x11, 8, implicit $w11 :: (load (s32), align 8) - ; CHECK: $x14 = EORXrs renamable $x11, renamable $x13, 0 - ; CHECK: early-clobber renamable $x11, dead renamable $w12 = LDRWpre renamable $x11, 4, implicit $w11 :: (load (s32)) - ; CHECK: $x13 = EORXrs renamable $x11, renamable $x13, 0 - ; CHECK: STPXi renamable $x13, renamable $x14, renamable $x11, 0 :: (store (s64)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x11, dead renamable $x10 = LDRSWpre renamable $x11, 8, implicit $w11 :: (load (s32), align 8) + ; CHECK-NEXT: $x14 = EORXrs renamable $x11, renamable $x13, 0 + ; CHECK-NEXT: early-clobber renamable $x11, dead renamable $w12 = LDRWpre renamable $x11, 4, implicit $w11 :: (load (s32)) + ; CHECK-NEXT: $x13 = EORXrs renamable $x11, renamable $x13, 0 + ; CHECK-NEXT: STPXi renamable $x13, renamable $x14, renamable $x11, 0 :: (store (s64)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x11, renamable $x10 = LDRSWpre killed renamable $x11, 8 :: (load (s32), align 8) $x14 = EORXrs renamable $x11, renamable $x13, 0 early-clobber renamable $x11, renamable $w12 = LDRWpre killed renamable $x11, 4 :: (load (s32)) @@ -817,12 +840,13 @@ liveins: $x11, $x13 ; CHECK-LABEL: name: 29-ldrwpre-ldrswpre-no-merge ; CHECK: liveins: $x11, $x13 - ; CHECK: early-clobber renamable $x11, dead renamable $w12 = LDRWpre renamable $x11, 8, implicit $w11 :: (load (s32)) - ; CHECK: $x14 = EORXrs renamable $x11, renamable $x13, 0 - ; CHECK: early-clobber renamable $x11, dead renamable $x10 = LDRSWpre renamable $x11, 4, implicit $w11 :: (load (s32), align 8) - ; CHECK: $x13 = EORXrs renamable $x11, renamable $x13, 0 - ; CHECK: STPXi renamable $x13, renamable $x14, renamable $x11, 0 :: (store (s64)) - ; CHECK: RET undef $lr + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: early-clobber renamable $x11, dead renamable $w12 = LDRWpre renamable $x11, 8, implicit $w11 :: (load (s32)) + ; CHECK-NEXT: $x14 = EORXrs renamable $x11, renamable $x13, 0 + ; CHECK-NEXT: early-clobber renamable $x11, dead renamable $x10 = LDRSWpre renamable $x11, 4, implicit $w11 :: (load (s32), align 8) + ; CHECK-NEXT: $x13 = EORXrs renamable $x11, renamable $x13, 0 + ; CHECK-NEXT: STPXi renamable $x13, renamable $x14, renamable $x11, 0 :: (store (s64)) + ; CHECK-NEXT: RET undef $lr early-clobber renamable $x11, renamable $w12 = LDRWpre killed renamable $x11, 8 :: (load (s32)) $x14 = EORXrs renamable $x11, renamable $x13, 0 early-clobber renamable $x11, renamable $x10 = LDRSWpre killed renamable $x11, 4 :: (load (s32), align 8) diff --git a/llvm/test/CodeGen/AArch64/no-sve-no-neon.ll b/llvm/test/CodeGen/AArch64/no-sve-no-neon.ll --- a/llvm/test/CodeGen/AArch64/no-sve-no-neon.ll +++ b/llvm/test/CodeGen/AArch64/no-sve-no-neon.ll @@ -14,33 +14,26 @@ ; CHECK-NEXT: ldp x10, x9, [sp, #16] ; CHECK-NEXT: ucvtf s3, x11 ; CHECK-NEXT: str s0, [x8, #60] -; CHECK-NEXT: str s1, [x8, #56] ; CHECK-NEXT: ucvtf s4, x9 -; CHECK-NEXT: ucvtf s0, x10 ; CHECK-NEXT: ldp x11, x9, [sp] -; CHECK-NEXT: str s2, [x8, #52] -; CHECK-NEXT: str s3, [x8, #48] -; CHECK-NEXT: ucvtf s3, x7 +; CHECK-NEXT: ucvtf s0, x10 +; CHECK-NEXT: stp s2, s1, [x8, #52] ; CHECK-NEXT: ucvtf s1, x9 ; CHECK-NEXT: ucvtf s2, x11 -; CHECK-NEXT: str s4, [x8, #44] +; CHECK-NEXT: stp s4, s3, [x8, #44] +; CHECK-NEXT: ucvtf s3, x7 ; CHECK-NEXT: ucvtf s4, x6 -; CHECK-NEXT: str s0, [x8, #40] +; CHECK-NEXT: stp s1, s0, [x8, #36] ; CHECK-NEXT: ucvtf s0, x5 -; CHECK-NEXT: str s3, [x8, #28] -; CHECK-NEXT: ucvtf s3, x2 -; CHECK-NEXT: str s1, [x8, #36] ; CHECK-NEXT: ucvtf s1, x4 -; CHECK-NEXT: str s2, [x8, #32] +; CHECK-NEXT: stp s3, s2, [x8, #28] ; CHECK-NEXT: ucvtf s2, x3 -; CHECK-NEXT: str s4, [x8, #24] +; CHECK-NEXT: ucvtf s3, x2 +; CHECK-NEXT: stp s0, s4, [x8, #20] ; CHECK-NEXT: ucvtf s4, x1 -; CHECK-NEXT: str s0, [x8, #20] ; CHECK-NEXT: ucvtf s0, x0 -; CHECK-NEXT: str s1, [x8, #16] -; CHECK-NEXT: str s2, [x8, #12] -; CHECK-NEXT: str s3, [x8, #8] -; CHECK-NEXT: str s4, [x8, #4] +; CHECK-NEXT: stp s2, s1, [x8, #12] +; CHECK-NEXT: stp s4, s3, [x8, #4] ; CHECK-NEXT: str s0, [x8] ; CHECK-NEXT: ret %conv1 = uitofp <16 x i64> %a to <16 x float> diff --git a/llvm/test/CodeGen/AArch64/storepairsuppress_minsize.ll b/llvm/test/CodeGen/AArch64/storepairsuppress_minsize.ll --- a/llvm/test/CodeGen/AArch64/storepairsuppress_minsize.ll +++ b/llvm/test/CodeGen/AArch64/storepairsuppress_minsize.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64 -mcpu=cortex-a55 -o - %s | FileCheck %s +; RUN: llc -mtriple=aarch64 -mcpu=cortex-a55 -mattr=+store-pair-suppress -o - %s | FileCheck %s ; Check that stp are not suppressed at minsize.