diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -1372,7 +1372,7 @@ // FeatureFuseAdrpAdd is enabled under Generic to allow linker merging // optimizations. -def : ProcessorModel<"generic", CortexA55Model, ProcessorFeatures.Generic, +def : ProcessorModel<"generic", CortexA510Model, ProcessorFeatures.Generic, [FeatureFuseAES, FeatureFuseAdrpAdd, FeaturePostRAScheduler, FeatureEnableSelectOptimize]>; def : ProcessorModel<"cortex-a35", CortexA53Model, ProcessorFeatures.A53, diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll --- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -125,9 +125,9 @@ ; CODE: mov ; CODE: mov ; CODE: mov +; CODE: ldr ; CODE: cmge ; CODE: cmge -; CODE: ldr ; CODE: bif ; CODE: bif ; CODE: ext diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2.ll @@ -1129,7 +1129,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1141,7 +1141,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1153,7 +1153,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1165,7 +1165,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1177,7 +1177,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1189,7 +1189,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1201,7 +1201,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1213,7 +1213,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1225,7 +1225,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1237,7 +1237,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1249,7 +1249,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1261,7 +1261,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1273,7 +1273,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1285,7 +1285,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1297,7 +1297,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1312,7 +1312,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -1327,7 +1327,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -1342,7 +1342,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -1357,7 +1357,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -1372,7 +1372,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -1904,7 +1904,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1916,7 +1916,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1928,7 +1928,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1940,7 +1940,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1952,7 +1952,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1964,7 +1964,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1976,7 +1976,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1988,7 +1988,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -2000,7 +2000,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2012,7 +2012,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2024,7 +2024,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2036,7 +2036,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2048,7 +2048,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2060,7 +2060,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2072,7 +2072,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2086,7 +2086,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2100,7 +2100,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2114,7 +2114,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2128,7 +2128,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2142,7 +2142,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -2489,9 +2489,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -2512,9 +2512,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -2535,9 +2535,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -2558,9 +2558,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -2581,9 +2581,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -2679,7 +2679,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -2691,7 +2691,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -2703,7 +2703,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -2715,7 +2715,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -2727,7 +2727,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -2739,7 +2739,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -2751,7 +2751,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -2763,7 +2763,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -2775,7 +2775,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2787,7 +2787,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2799,7 +2799,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2811,7 +2811,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2823,7 +2823,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2835,7 +2835,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2847,7 +2847,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2862,7 +2862,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2877,7 +2877,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2892,7 +2892,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2907,7 +2907,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2922,7 +2922,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3312,9 +3312,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 ret i128 %r @@ -3339,9 +3339,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 ret i128 %r @@ -3366,9 +3366,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 ret i128 %r @@ -3393,9 +3393,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r @@ -3420,9 +3420,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r @@ -3530,7 +3530,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -3544,7 +3544,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -3558,7 +3558,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -3572,7 +3572,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -3586,7 +3586,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -3600,7 +3600,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -3614,7 +3614,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -3628,7 +3628,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -3642,7 +3642,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -3656,7 +3656,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -3670,7 +3670,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -3684,7 +3684,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -3698,7 +3698,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -3712,7 +3712,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -3726,7 +3726,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -3744,7 +3744,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3763,7 +3763,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3782,7 +3782,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3801,7 +3801,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3820,7 +3820,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -4169,9 +4169,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4192,9 +4192,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -4215,9 +4215,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -4238,9 +4238,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -4261,9 +4261,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -4359,7 +4359,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4371,7 +4371,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4383,7 +4383,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4395,7 +4395,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4407,7 +4407,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4419,7 +4419,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4431,7 +4431,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4443,7 +4443,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4455,7 +4455,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4467,7 +4467,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4479,7 +4479,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4491,7 +4491,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4503,7 +4503,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4515,7 +4515,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4527,7 +4527,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4542,7 +4542,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4557,7 +4557,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4572,7 +4572,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4587,7 +4587,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4602,7 +4602,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4949,9 +4949,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4972,9 +4972,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -4995,9 +4995,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -5018,9 +5018,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -5041,9 +5041,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -5139,7 +5139,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5151,7 +5151,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5163,7 +5163,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5175,7 +5175,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5187,7 +5187,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5199,7 +5199,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5211,7 +5211,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5223,7 +5223,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5235,7 +5235,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5247,7 +5247,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5259,7 +5259,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5271,7 +5271,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5283,7 +5283,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5295,7 +5295,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5307,7 +5307,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5322,7 +5322,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5337,7 +5337,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5352,7 +5352,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5367,7 +5367,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5382,7 +5382,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6109,8 +6109,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6127,8 +6127,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6145,8 +6145,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6163,8 +6163,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -6181,8 +6181,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -6197,8 +6197,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -6213,8 +6213,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6229,8 +6229,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6245,8 +6245,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6261,8 +6261,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6277,8 +6277,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6293,8 +6293,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6309,8 +6309,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6325,8 +6325,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6341,8 +6341,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6366,9 +6366,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6392,9 +6392,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6418,9 +6418,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6444,9 +6444,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6470,9 +6470,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -7199,8 +7199,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -7217,8 +7217,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -7235,8 +7235,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -7253,8 +7253,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7271,8 +7271,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7287,8 +7287,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7303,8 +7303,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7319,8 +7319,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7335,8 +7335,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -7351,8 +7351,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -7367,8 +7367,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -7383,8 +7383,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -7399,8 +7399,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -7415,8 +7415,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -7431,8 +7431,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -7456,9 +7456,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -7482,9 +7482,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -7508,9 +7508,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -7534,9 +7534,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -7560,9 +7560,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -8283,8 +8283,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -8300,8 +8300,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -8317,8 +8317,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -8334,8 +8334,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -8351,8 +8351,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -8367,8 +8367,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -8383,8 +8383,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -8399,8 +8399,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -8415,8 +8415,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -8431,8 +8431,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -8447,8 +8447,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -8463,8 +8463,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -8479,8 +8479,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -8495,8 +8495,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -8511,8 +8511,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -8536,9 +8536,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -8562,9 +8562,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -8588,9 +8588,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -8614,9 +8614,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -8640,9 +8640,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -9363,8 +9363,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -9380,8 +9380,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -9397,8 +9397,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -9414,8 +9414,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -9431,8 +9431,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -9447,8 +9447,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -9463,8 +9463,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -9479,8 +9479,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -9495,8 +9495,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -9511,8 +9511,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -9527,8 +9527,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -9543,8 +9543,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -9559,8 +9559,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -9575,8 +9575,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -9591,8 +9591,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -9616,9 +9616,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -9642,9 +9642,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -9668,9 +9668,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -9694,9 +9694,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -9720,9 +9720,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-lse2_lse128.ll @@ -654,7 +654,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -666,7 +666,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -678,7 +678,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -690,7 +690,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -702,7 +702,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -714,7 +714,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -726,7 +726,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -738,7 +738,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -750,7 +750,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -762,7 +762,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -774,7 +774,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -786,7 +786,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -798,7 +798,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -810,7 +810,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -822,7 +822,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -837,7 +837,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -852,7 +852,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -867,7 +867,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -882,7 +882,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -897,7 +897,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -1184,7 +1184,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1196,7 +1196,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1208,7 +1208,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1220,7 +1220,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1232,7 +1232,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1244,7 +1244,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1256,7 +1256,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1268,7 +1268,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1280,7 +1280,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1292,7 +1292,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1304,7 +1304,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1316,7 +1316,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1328,7 +1328,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1340,7 +1340,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1352,7 +1352,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1366,7 +1366,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -1380,7 +1380,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -1394,7 +1394,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -1408,7 +1408,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -1422,7 +1422,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -1704,7 +1704,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1716,7 +1716,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1728,7 +1728,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1740,7 +1740,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1752,7 +1752,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1764,7 +1764,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1776,7 +1776,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1788,7 +1788,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1800,7 +1800,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1812,7 +1812,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1824,7 +1824,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1836,7 +1836,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1848,7 +1848,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1860,7 +1860,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1872,7 +1872,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1887,7 +1887,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -1902,7 +1902,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -1917,7 +1917,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -1932,7 +1932,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -1947,7 +1947,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -2495,7 +2495,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -2509,7 +2509,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -2523,7 +2523,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -2537,7 +2537,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -2551,7 +2551,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -2565,7 +2565,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -2579,7 +2579,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -2593,7 +2593,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -2607,7 +2607,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -2621,7 +2621,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -2635,7 +2635,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -2649,7 +2649,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -2663,7 +2663,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -2677,7 +2677,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -2691,7 +2691,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -2709,7 +2709,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2728,7 +2728,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2747,7 +2747,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2766,7 +2766,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2785,7 +2785,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3024,7 +3024,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -3036,7 +3036,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -3048,7 +3048,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -3060,7 +3060,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -3072,7 +3072,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -3084,7 +3084,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -3096,7 +3096,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -3108,7 +3108,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -3120,7 +3120,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -3132,7 +3132,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -3144,7 +3144,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -3156,7 +3156,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -3168,7 +3168,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -3180,7 +3180,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -3192,7 +3192,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -3207,7 +3207,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -3222,7 +3222,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -3237,7 +3237,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -3252,7 +3252,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -3267,7 +3267,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3559,7 +3559,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -3571,7 +3571,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -3583,7 +3583,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -3595,7 +3595,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -3607,7 +3607,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -3619,7 +3619,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -3631,7 +3631,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -3643,7 +3643,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -3655,7 +3655,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -3667,7 +3667,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -3679,7 +3679,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -3691,7 +3691,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -3703,7 +3703,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -3715,7 +3715,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -3727,7 +3727,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -3742,7 +3742,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -3757,7 +3757,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -3772,7 +3772,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -3787,7 +3787,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -3802,7 +3802,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4154,8 +4154,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4172,8 +4172,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4190,8 +4190,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4208,8 +4208,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4226,8 +4226,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4242,8 +4242,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4258,8 +4258,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4274,8 +4274,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4290,8 +4290,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4306,8 +4306,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4322,8 +4322,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4338,8 +4338,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4354,8 +4354,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4370,8 +4370,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4386,8 +4386,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4411,9 +4411,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4437,9 +4437,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4463,9 +4463,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4489,9 +4489,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4515,9 +4515,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4869,8 +4869,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4887,8 +4887,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4905,8 +4905,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4923,8 +4923,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4941,8 +4941,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4957,8 +4957,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4973,8 +4973,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4989,8 +4989,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5005,8 +5005,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5021,8 +5021,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5037,8 +5037,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5053,8 +5053,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5069,8 +5069,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5085,8 +5085,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5101,8 +5101,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5126,9 +5126,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5152,9 +5152,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5178,9 +5178,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5204,9 +5204,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5230,9 +5230,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -5583,8 +5583,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5600,8 +5600,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5617,8 +5617,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5634,8 +5634,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5651,8 +5651,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5667,8 +5667,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5683,8 +5683,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5699,8 +5699,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5715,8 +5715,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5731,8 +5731,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5747,8 +5747,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5763,8 +5763,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5779,8 +5779,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5795,8 +5795,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5811,8 +5811,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5836,9 +5836,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5862,9 +5862,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5888,9 +5888,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5914,9 +5914,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5940,9 +5940,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6293,8 +6293,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6310,8 +6310,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6327,8 +6327,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6344,8 +6344,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -6361,8 +6361,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -6377,8 +6377,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -6393,8 +6393,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6409,8 +6409,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6425,8 +6425,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6441,8 +6441,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6457,8 +6457,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6473,8 +6473,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6489,8 +6489,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6505,8 +6505,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6521,8 +6521,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6546,9 +6546,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6572,9 +6572,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6598,9 +6598,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6624,9 +6624,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6650,9 +6650,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-outline_atomics.ll @@ -709,7 +709,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -721,7 +721,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -733,7 +733,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -745,7 +745,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -757,7 +757,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -769,7 +769,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -781,7 +781,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -793,7 +793,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -805,7 +805,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -817,7 +817,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -829,7 +829,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -841,7 +841,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -853,7 +853,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -865,7 +865,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -877,7 +877,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -892,7 +892,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -907,7 +907,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -922,7 +922,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -937,7 +937,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -952,7 +952,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -1349,7 +1349,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1361,7 +1361,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1373,7 +1373,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1385,7 +1385,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1397,7 +1397,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1409,7 +1409,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1421,7 +1421,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1433,7 +1433,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1445,7 +1445,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1457,7 +1457,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1469,7 +1469,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1481,7 +1481,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1493,7 +1493,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1505,7 +1505,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1517,7 +1517,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1531,7 +1531,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -1545,7 +1545,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -1559,7 +1559,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -1573,7 +1573,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -1587,7 +1587,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -1849,9 +1849,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -1872,9 +1872,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -1895,9 +1895,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -1918,9 +1918,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -1941,9 +1941,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -2014,7 +2014,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -2026,7 +2026,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -2038,7 +2038,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -2050,7 +2050,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -2062,7 +2062,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -2074,7 +2074,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -2086,7 +2086,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -2098,7 +2098,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -2110,7 +2110,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2122,7 +2122,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2134,7 +2134,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2146,7 +2146,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2158,7 +2158,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2170,7 +2170,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2182,7 +2182,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2197,7 +2197,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2212,7 +2212,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2227,7 +2227,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2242,7 +2242,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2257,7 +2257,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -2647,9 +2647,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 ret i128 %r @@ -2674,9 +2674,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 ret i128 %r @@ -2701,9 +2701,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 ret i128 %r @@ -2728,9 +2728,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r @@ -2755,9 +2755,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r @@ -2865,7 +2865,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -2879,7 +2879,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -2893,7 +2893,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -2907,7 +2907,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -2921,7 +2921,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -2935,7 +2935,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -2949,7 +2949,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -2963,7 +2963,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -2977,7 +2977,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -2991,7 +2991,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -3005,7 +3005,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -3019,7 +3019,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -3033,7 +3033,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -3047,7 +3047,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -3061,7 +3061,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -3079,7 +3079,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3098,7 +3098,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3117,7 +3117,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3136,7 +3136,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3155,7 +3155,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3319,9 +3319,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -3342,9 +3342,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -3365,9 +3365,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -3388,9 +3388,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -3411,9 +3411,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -3459,7 +3459,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -3471,7 +3471,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -3483,7 +3483,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -3495,7 +3495,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -3507,7 +3507,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -3519,7 +3519,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -3531,7 +3531,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -3543,7 +3543,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -3555,7 +3555,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -3567,7 +3567,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -3579,7 +3579,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -3591,7 +3591,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -3603,7 +3603,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -3615,7 +3615,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -3627,7 +3627,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -3642,7 +3642,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -3657,7 +3657,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -3672,7 +3672,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -3687,7 +3687,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -3702,7 +3702,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3864,9 +3864,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -3887,9 +3887,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -3910,9 +3910,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -3933,9 +3933,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -3956,9 +3956,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -4004,7 +4004,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4016,7 +4016,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4028,7 +4028,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4040,7 +4040,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4052,7 +4052,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4064,7 +4064,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4076,7 +4076,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4088,7 +4088,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4100,7 +4100,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4112,7 +4112,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4124,7 +4124,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4136,7 +4136,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4148,7 +4148,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4160,7 +4160,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4172,7 +4172,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4187,7 +4187,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4202,7 +4202,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4217,7 +4217,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4232,7 +4232,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4247,7 +4247,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4974,8 +4974,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4992,8 +4992,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5010,8 +5010,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5028,8 +5028,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5046,8 +5046,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5062,8 +5062,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5078,8 +5078,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5094,8 +5094,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5110,8 +5110,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5126,8 +5126,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5142,8 +5142,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5158,8 +5158,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5174,8 +5174,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5190,8 +5190,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5206,8 +5206,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5231,9 +5231,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5257,9 +5257,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5283,9 +5283,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5309,9 +5309,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5335,9 +5335,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6064,8 +6064,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6082,8 +6082,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6100,8 +6100,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6118,8 +6118,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -6136,8 +6136,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -6152,8 +6152,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -6168,8 +6168,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6184,8 +6184,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6200,8 +6200,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6216,8 +6216,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6232,8 +6232,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6248,8 +6248,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6264,8 +6264,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6280,8 +6280,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6296,8 +6296,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6321,9 +6321,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6347,9 +6347,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6373,9 +6373,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6399,9 +6399,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6425,9 +6425,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -7148,8 +7148,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -7165,8 +7165,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -7182,8 +7182,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -7199,8 +7199,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7216,8 +7216,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7232,8 +7232,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7248,8 +7248,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7264,8 +7264,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7280,8 +7280,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -7296,8 +7296,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -7312,8 +7312,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -7328,8 +7328,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -7344,8 +7344,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -7360,8 +7360,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -7376,8 +7376,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -7401,9 +7401,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -7427,9 +7427,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -7453,9 +7453,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -7479,9 +7479,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -7505,9 +7505,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -8228,8 +8228,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -8245,8 +8245,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -8262,8 +8262,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -8279,8 +8279,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -8296,8 +8296,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -8312,8 +8312,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -8328,8 +8328,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -8344,8 +8344,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -8360,8 +8360,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -8376,8 +8376,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -8392,8 +8392,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -8408,8 +8408,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -8424,8 +8424,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -8440,8 +8440,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -8456,8 +8456,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -8481,9 +8481,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -8507,9 +8507,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -8533,9 +8533,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -8559,9 +8559,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -8585,9 +8585,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc.ll @@ -1129,7 +1129,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1141,7 +1141,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1153,7 +1153,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1165,7 +1165,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1177,7 +1177,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1189,7 +1189,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1201,7 +1201,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1213,7 +1213,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1225,7 +1225,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1237,7 +1237,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1249,7 +1249,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1261,7 +1261,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1273,7 +1273,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1285,7 +1285,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1297,7 +1297,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1312,7 +1312,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -1327,7 +1327,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -1342,7 +1342,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -1357,7 +1357,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -1372,7 +1372,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -1904,7 +1904,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1916,7 +1916,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1928,7 +1928,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1940,7 +1940,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1952,7 +1952,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1964,7 +1964,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1976,7 +1976,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1988,7 +1988,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -2000,7 +2000,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2012,7 +2012,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2024,7 +2024,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2036,7 +2036,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2048,7 +2048,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2060,7 +2060,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2072,7 +2072,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2086,7 +2086,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2100,7 +2100,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2114,7 +2114,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2128,7 +2128,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2142,7 +2142,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -2489,9 +2489,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -2512,9 +2512,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -2535,9 +2535,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -2558,9 +2558,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -2581,9 +2581,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -2679,7 +2679,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -2691,7 +2691,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -2703,7 +2703,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -2715,7 +2715,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -2727,7 +2727,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -2739,7 +2739,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -2751,7 +2751,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -2763,7 +2763,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -2775,7 +2775,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2787,7 +2787,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2799,7 +2799,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2811,7 +2811,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2823,7 +2823,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2835,7 +2835,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2847,7 +2847,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2862,7 +2862,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2877,7 +2877,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2892,7 +2892,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2907,7 +2907,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2922,7 +2922,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3312,9 +3312,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 ret i128 %r @@ -3339,9 +3339,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 ret i128 %r @@ -3366,9 +3366,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 ret i128 %r @@ -3393,9 +3393,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r @@ -3420,9 +3420,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r @@ -3530,7 +3530,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -3544,7 +3544,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -3558,7 +3558,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -3572,7 +3572,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -3586,7 +3586,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -3600,7 +3600,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -3614,7 +3614,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -3628,7 +3628,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -3642,7 +3642,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -3656,7 +3656,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -3670,7 +3670,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -3684,7 +3684,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -3698,7 +3698,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -3712,7 +3712,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -3726,7 +3726,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -3744,7 +3744,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3763,7 +3763,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3782,7 +3782,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3801,7 +3801,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3820,7 +3820,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -4169,9 +4169,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4192,9 +4192,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -4215,9 +4215,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -4238,9 +4238,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -4261,9 +4261,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -4359,7 +4359,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4371,7 +4371,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4383,7 +4383,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4395,7 +4395,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4407,7 +4407,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4419,7 +4419,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4431,7 +4431,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4443,7 +4443,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4455,7 +4455,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4467,7 +4467,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4479,7 +4479,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4491,7 +4491,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4503,7 +4503,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4515,7 +4515,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4527,7 +4527,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4542,7 +4542,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4557,7 +4557,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4572,7 +4572,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4587,7 +4587,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4602,7 +4602,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4949,9 +4949,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4972,9 +4972,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -4995,9 +4995,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -5018,9 +5018,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -5041,9 +5041,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -5139,7 +5139,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5151,7 +5151,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5163,7 +5163,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5175,7 +5175,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5187,7 +5187,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5199,7 +5199,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5211,7 +5211,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5223,7 +5223,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5235,7 +5235,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5247,7 +5247,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5259,7 +5259,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5271,7 +5271,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5283,7 +5283,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5295,7 +5295,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5307,7 +5307,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5322,7 +5322,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5337,7 +5337,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5352,7 +5352,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5367,7 +5367,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5382,7 +5382,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6109,8 +6109,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6127,8 +6127,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6145,8 +6145,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6163,8 +6163,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -6181,8 +6181,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -6197,8 +6197,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -6213,8 +6213,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6229,8 +6229,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6245,8 +6245,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6261,8 +6261,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6277,8 +6277,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6293,8 +6293,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6309,8 +6309,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6325,8 +6325,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6341,8 +6341,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6366,9 +6366,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6392,9 +6392,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6418,9 +6418,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6444,9 +6444,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6470,9 +6470,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -7199,8 +7199,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -7217,8 +7217,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -7235,8 +7235,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -7253,8 +7253,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7271,8 +7271,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7287,8 +7287,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7303,8 +7303,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7319,8 +7319,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7335,8 +7335,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -7351,8 +7351,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -7367,8 +7367,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -7383,8 +7383,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -7399,8 +7399,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -7415,8 +7415,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -7431,8 +7431,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -7456,9 +7456,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -7482,9 +7482,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -7508,9 +7508,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -7534,9 +7534,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -7560,9 +7560,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -8283,8 +8283,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -8300,8 +8300,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -8317,8 +8317,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -8334,8 +8334,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -8351,8 +8351,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -8367,8 +8367,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -8383,8 +8383,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -8399,8 +8399,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -8415,8 +8415,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -8431,8 +8431,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -8447,8 +8447,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -8463,8 +8463,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -8479,8 +8479,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -8495,8 +8495,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -8511,8 +8511,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -8536,9 +8536,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -8562,9 +8562,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -8588,9 +8588,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -8614,9 +8614,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -8640,9 +8640,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -9363,8 +9363,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -9380,8 +9380,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -9397,8 +9397,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -9414,8 +9414,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -9431,8 +9431,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -9447,8 +9447,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -9463,8 +9463,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -9479,8 +9479,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -9495,8 +9495,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -9511,8 +9511,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -9527,8 +9527,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -9543,8 +9543,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -9559,8 +9559,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -9575,8 +9575,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -9591,8 +9591,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -9616,9 +9616,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -9642,9 +9642,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -9668,9 +9668,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -9694,9 +9694,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -9720,9 +9720,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-rcpc3.ll @@ -1129,7 +1129,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1141,7 +1141,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1153,7 +1153,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1165,7 +1165,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1177,7 +1177,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1189,7 +1189,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1201,7 +1201,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1213,7 +1213,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1225,7 +1225,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1237,7 +1237,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1249,7 +1249,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1261,7 +1261,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1273,7 +1273,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1285,7 +1285,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1297,7 +1297,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1312,7 +1312,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -1327,7 +1327,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -1342,7 +1342,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -1357,7 +1357,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -1372,7 +1372,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -1904,7 +1904,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1916,7 +1916,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1928,7 +1928,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1940,7 +1940,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1952,7 +1952,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1964,7 +1964,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1976,7 +1976,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1988,7 +1988,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -2000,7 +2000,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2012,7 +2012,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2024,7 +2024,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2036,7 +2036,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2048,7 +2048,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2060,7 +2060,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2072,7 +2072,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2086,7 +2086,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2100,7 +2100,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2114,7 +2114,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2128,7 +2128,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2142,7 +2142,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -2489,9 +2489,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -2512,9 +2512,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -2535,9 +2535,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -2558,9 +2558,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -2581,9 +2581,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -2679,7 +2679,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -2691,7 +2691,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -2703,7 +2703,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -2715,7 +2715,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -2727,7 +2727,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -2739,7 +2739,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -2751,7 +2751,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -2763,7 +2763,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -2775,7 +2775,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2787,7 +2787,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2799,7 +2799,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2811,7 +2811,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2823,7 +2823,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2835,7 +2835,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2847,7 +2847,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2862,7 +2862,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2877,7 +2877,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2892,7 +2892,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2907,7 +2907,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2922,7 +2922,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3312,9 +3312,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 ret i128 %r @@ -3339,9 +3339,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 ret i128 %r @@ -3366,9 +3366,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 ret i128 %r @@ -3393,9 +3393,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r @@ -3420,9 +3420,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r @@ -3530,7 +3530,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -3544,7 +3544,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -3558,7 +3558,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -3572,7 +3572,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -3586,7 +3586,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -3600,7 +3600,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -3614,7 +3614,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -3628,7 +3628,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -3642,7 +3642,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -3656,7 +3656,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -3670,7 +3670,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -3684,7 +3684,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -3698,7 +3698,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -3712,7 +3712,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -3726,7 +3726,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -3744,7 +3744,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3763,7 +3763,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3782,7 +3782,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3801,7 +3801,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3820,7 +3820,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -4169,9 +4169,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4192,9 +4192,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -4215,9 +4215,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -4238,9 +4238,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -4261,9 +4261,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -4359,7 +4359,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4371,7 +4371,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4383,7 +4383,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4395,7 +4395,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4407,7 +4407,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4419,7 +4419,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4431,7 +4431,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4443,7 +4443,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4455,7 +4455,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4467,7 +4467,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4479,7 +4479,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4491,7 +4491,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4503,7 +4503,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4515,7 +4515,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4527,7 +4527,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4542,7 +4542,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4557,7 +4557,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4572,7 +4572,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4587,7 +4587,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4602,7 +4602,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4949,9 +4949,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4972,9 +4972,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -4995,9 +4995,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -5018,9 +5018,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -5041,9 +5041,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -5139,7 +5139,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5151,7 +5151,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5163,7 +5163,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5175,7 +5175,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5187,7 +5187,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5199,7 +5199,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5211,7 +5211,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5223,7 +5223,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5235,7 +5235,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5247,7 +5247,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5259,7 +5259,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5271,7 +5271,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5283,7 +5283,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5295,7 +5295,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5307,7 +5307,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5322,7 +5322,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5337,7 +5337,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5352,7 +5352,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5367,7 +5367,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5382,7 +5382,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6109,8 +6109,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6127,8 +6127,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6145,8 +6145,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6163,8 +6163,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -6181,8 +6181,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -6197,8 +6197,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -6213,8 +6213,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6229,8 +6229,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6245,8 +6245,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6261,8 +6261,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6277,8 +6277,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6293,8 +6293,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6309,8 +6309,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6325,8 +6325,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6341,8 +6341,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6366,9 +6366,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6392,9 +6392,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6418,9 +6418,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6444,9 +6444,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6470,9 +6470,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -7199,8 +7199,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -7217,8 +7217,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -7235,8 +7235,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -7253,8 +7253,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7271,8 +7271,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7287,8 +7287,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7303,8 +7303,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7319,8 +7319,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7335,8 +7335,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -7351,8 +7351,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -7367,8 +7367,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -7383,8 +7383,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -7399,8 +7399,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -7415,8 +7415,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -7431,8 +7431,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -7456,9 +7456,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -7482,9 +7482,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -7508,9 +7508,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -7534,9 +7534,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -7560,9 +7560,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -8283,8 +8283,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -8300,8 +8300,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -8317,8 +8317,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -8334,8 +8334,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -8351,8 +8351,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -8367,8 +8367,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -8383,8 +8383,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -8399,8 +8399,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -8415,8 +8415,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -8431,8 +8431,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -8447,8 +8447,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -8463,8 +8463,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -8479,8 +8479,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -8495,8 +8495,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -8511,8 +8511,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -8536,9 +8536,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -8562,9 +8562,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -8588,9 +8588,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -8614,9 +8614,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -8640,9 +8640,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -9363,8 +9363,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -9380,8 +9380,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -9397,8 +9397,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -9414,8 +9414,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -9431,8 +9431,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -9447,8 +9447,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -9463,8 +9463,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -9479,8 +9479,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -9495,8 +9495,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -9511,8 +9511,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -9527,8 +9527,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -9543,8 +9543,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -9559,8 +9559,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -9575,8 +9575,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -9591,8 +9591,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -9616,9 +9616,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -9642,9 +9642,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -9668,9 +9668,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -9694,9 +9694,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -9720,9 +9720,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8_1a.ll @@ -689,7 +689,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -701,7 +701,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -713,7 +713,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -725,7 +725,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -737,7 +737,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -749,7 +749,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -761,7 +761,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -773,7 +773,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -785,7 +785,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -797,7 +797,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -809,7 +809,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -821,7 +821,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -833,7 +833,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -845,7 +845,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -857,7 +857,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -872,7 +872,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -887,7 +887,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -902,7 +902,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -917,7 +917,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -932,7 +932,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -1219,7 +1219,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1231,7 +1231,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1243,7 +1243,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1255,7 +1255,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1267,7 +1267,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1279,7 +1279,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1291,7 +1291,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1303,7 +1303,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1315,7 +1315,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1327,7 +1327,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1339,7 +1339,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1351,7 +1351,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1363,7 +1363,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1375,7 +1375,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1387,7 +1387,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1401,7 +1401,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -1415,7 +1415,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -1429,7 +1429,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -1443,7 +1443,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -1457,7 +1457,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -1774,7 +1774,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1786,7 +1786,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1798,7 +1798,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1810,7 +1810,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1822,7 +1822,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1834,7 +1834,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1846,7 +1846,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1858,7 +1858,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1870,7 +1870,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1882,7 +1882,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1894,7 +1894,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1906,7 +1906,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1918,7 +1918,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1930,7 +1930,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1942,7 +1942,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1957,7 +1957,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -1972,7 +1972,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -1987,7 +1987,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2002,7 +2002,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2017,7 +2017,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -2565,7 +2565,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -2579,7 +2579,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -2593,7 +2593,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -2607,7 +2607,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -2621,7 +2621,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -2635,7 +2635,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -2649,7 +2649,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -2663,7 +2663,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -2677,7 +2677,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -2691,7 +2691,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -2705,7 +2705,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -2719,7 +2719,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -2733,7 +2733,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -2747,7 +2747,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -2761,7 +2761,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -2779,7 +2779,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2798,7 +2798,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2817,7 +2817,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2836,7 +2836,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2855,7 +2855,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3149,7 +3149,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -3161,7 +3161,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -3173,7 +3173,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -3185,7 +3185,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -3197,7 +3197,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -3209,7 +3209,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -3221,7 +3221,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -3233,7 +3233,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -3245,7 +3245,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -3257,7 +3257,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -3269,7 +3269,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -3281,7 +3281,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -3293,7 +3293,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -3305,7 +3305,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -3317,7 +3317,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -3332,7 +3332,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -3347,7 +3347,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -3362,7 +3362,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -3377,7 +3377,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -3392,7 +3392,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3684,7 +3684,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -3696,7 +3696,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -3708,7 +3708,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -3720,7 +3720,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -3732,7 +3732,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -3744,7 +3744,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -3756,7 +3756,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -3768,7 +3768,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -3780,7 +3780,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -3792,7 +3792,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -3804,7 +3804,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -3816,7 +3816,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -3828,7 +3828,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -3840,7 +3840,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -3852,7 +3852,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -3867,7 +3867,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -3882,7 +3882,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -3897,7 +3897,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -3912,7 +3912,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -3927,7 +3927,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4279,8 +4279,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4297,8 +4297,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4315,8 +4315,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4333,8 +4333,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4351,8 +4351,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4367,8 +4367,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4383,8 +4383,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4399,8 +4399,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4415,8 +4415,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4431,8 +4431,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4447,8 +4447,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4463,8 +4463,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4479,8 +4479,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4495,8 +4495,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4511,8 +4511,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4536,9 +4536,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4562,9 +4562,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4588,9 +4588,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4614,9 +4614,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4640,9 +4640,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4994,8 +4994,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5012,8 +5012,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5030,8 +5030,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5048,8 +5048,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5066,8 +5066,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5082,8 +5082,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5098,8 +5098,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5114,8 +5114,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5130,8 +5130,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5146,8 +5146,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5162,8 +5162,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5178,8 +5178,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5194,8 +5194,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5210,8 +5210,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5226,8 +5226,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5251,9 +5251,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5277,9 +5277,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5303,9 +5303,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5329,9 +5329,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5355,9 +5355,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -5708,8 +5708,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5725,8 +5725,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5742,8 +5742,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5759,8 +5759,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5776,8 +5776,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5792,8 +5792,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5808,8 +5808,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5824,8 +5824,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5840,8 +5840,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5856,8 +5856,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5872,8 +5872,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5888,8 +5888,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5904,8 +5904,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5920,8 +5920,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5936,8 +5936,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5961,9 +5961,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5987,9 +5987,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6013,9 +6013,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6039,9 +6039,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6065,9 +6065,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6418,8 +6418,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6435,8 +6435,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6452,8 +6452,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6469,8 +6469,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -6486,8 +6486,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -6502,8 +6502,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -6518,8 +6518,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6534,8 +6534,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6550,8 +6550,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6566,8 +6566,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6582,8 +6582,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6598,8 +6598,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6614,8 +6614,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6630,8 +6630,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6646,8 +6646,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6671,9 +6671,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6697,9 +6697,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6723,9 +6723,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6749,9 +6749,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6775,9 +6775,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64-atomicrmw-v8a.ll @@ -1129,7 +1129,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1141,7 +1141,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1153,7 +1153,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1165,7 +1165,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1177,7 +1177,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1189,7 +1189,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1201,7 +1201,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1213,7 +1213,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1225,7 +1225,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1237,7 +1237,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1249,7 +1249,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1261,7 +1261,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1273,7 +1273,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1285,7 +1285,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1297,7 +1297,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1312,7 +1312,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -1327,7 +1327,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -1342,7 +1342,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -1357,7 +1357,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -1372,7 +1372,7 @@ ; ; -O1-LABEL: atomicrmw_add_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: adds x8, x0, x20 +; -O1: adds x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -1904,7 +1904,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1916,7 +1916,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1928,7 +1928,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1940,7 +1940,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1952,7 +1952,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1964,7 +1964,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1976,7 +1976,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1988,7 +1988,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -2000,7 +2000,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2012,7 +2012,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2024,7 +2024,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2036,7 +2036,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2048,7 +2048,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2060,7 +2060,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2072,7 +2072,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2086,7 +2086,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2100,7 +2100,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2114,7 +2114,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2128,7 +2128,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2142,7 +2142,7 @@ ; ; -O1-LABEL: atomicrmw_sub_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: subs x8, x0, x20 +; -O1: subs x8, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -2489,9 +2489,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -2512,9 +2512,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -2535,9 +2535,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -2558,9 +2558,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -2581,9 +2581,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: and x9, x0, x2 -; -O1: and x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x1, x3 +; -O1: and x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -2679,7 +2679,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -2691,7 +2691,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -2703,7 +2703,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -2715,7 +2715,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -2727,7 +2727,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -2739,7 +2739,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -2751,7 +2751,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -2763,7 +2763,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -2775,7 +2775,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2787,7 +2787,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2799,7 +2799,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2811,7 +2811,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2823,7 +2823,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2835,7 +2835,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2847,7 +2847,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2862,7 +2862,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2877,7 +2877,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2892,7 +2892,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2907,7 +2907,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2922,7 +2922,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3312,9 +3312,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 ret i128 %r @@ -3339,9 +3339,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 ret i128 %r @@ -3366,9 +3366,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 ret i128 %r @@ -3393,9 +3393,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r @@ -3420,9 +3420,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] ; -O1: and x9, x0, x2 -; -O1: mvn x9, x9 ; -O1: and x10, x1, x3 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r @@ -3530,7 +3530,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -3544,7 +3544,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -3558,7 +3558,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -3572,7 +3572,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -3586,7 +3586,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -3600,7 +3600,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -3614,7 +3614,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -3628,7 +3628,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -3642,7 +3642,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -3656,7 +3656,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -3670,7 +3670,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -3684,7 +3684,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -3698,7 +3698,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -3712,7 +3712,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -3726,7 +3726,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -3744,7 +3744,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3763,7 +3763,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3782,7 +3782,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3801,7 +3801,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3820,7 +3820,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -4169,9 +4169,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4192,9 +4192,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -4215,9 +4215,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -4238,9 +4238,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -4261,9 +4261,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: orr x9, x0, x2 -; -O1: orr x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x1, x3 +; -O1: orr x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -4359,7 +4359,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4371,7 +4371,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4383,7 +4383,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4395,7 +4395,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4407,7 +4407,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4419,7 +4419,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4431,7 +4431,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4443,7 +4443,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4455,7 +4455,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4467,7 +4467,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4479,7 +4479,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4491,7 +4491,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4503,7 +4503,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4515,7 +4515,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4527,7 +4527,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4542,7 +4542,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4557,7 +4557,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4572,7 +4572,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4587,7 +4587,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4602,7 +4602,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4949,9 +4949,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4972,9 +4972,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -4995,9 +4995,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -5018,9 +5018,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -5041,9 +5041,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldaxp x0, x1, [x8] -; -O1: eor x9, x0, x2 -; -O1: eor x10, x1, x3 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x1, x3 +; -O1: eor x10, x0, x2 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -5139,7 +5139,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5151,7 +5151,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5163,7 +5163,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5175,7 +5175,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5187,7 +5187,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5199,7 +5199,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5211,7 +5211,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5223,7 +5223,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5235,7 +5235,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5247,7 +5247,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5259,7 +5259,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5271,7 +5271,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5283,7 +5283,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5295,7 +5295,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5307,7 +5307,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5322,7 +5322,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5337,7 +5337,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5352,7 +5352,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5367,7 +5367,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5382,7 +5382,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6109,8 +6109,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6127,8 +6127,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6145,8 +6145,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6163,8 +6163,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -6181,8 +6181,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -6197,8 +6197,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -6213,8 +6213,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6229,8 +6229,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6245,8 +6245,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6261,8 +6261,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6277,8 +6277,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6293,8 +6293,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6309,8 +6309,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6325,8 +6325,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6341,8 +6341,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6366,9 +6366,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6392,9 +6392,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6418,9 +6418,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6444,9 +6444,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6470,9 +6470,9 @@ ; ; -O1-LABEL: atomicrmw_max_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -7199,8 +7199,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -7217,8 +7217,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -7235,8 +7235,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -7253,8 +7253,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7271,8 +7271,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7287,8 +7287,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7303,8 +7303,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7319,8 +7319,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7335,8 +7335,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -7351,8 +7351,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -7367,8 +7367,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -7383,8 +7383,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -7399,8 +7399,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -7415,8 +7415,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -7431,8 +7431,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -7456,9 +7456,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -7482,9 +7482,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -7508,9 +7508,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -7534,9 +7534,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -7560,9 +7560,9 @@ ; ; -O1-LABEL: atomicrmw_min_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -8283,8 +8283,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -8300,8 +8300,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -8317,8 +8317,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -8334,8 +8334,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -8351,8 +8351,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -8367,8 +8367,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -8383,8 +8383,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -8399,8 +8399,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -8415,8 +8415,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -8431,8 +8431,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -8447,8 +8447,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -8463,8 +8463,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -8479,8 +8479,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -8495,8 +8495,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -8511,8 +8511,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -8536,9 +8536,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -8562,9 +8562,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -8588,9 +8588,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -8614,9 +8614,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -8640,9 +8640,9 @@ ; ; -O1-LABEL: atomicrmw_umax_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -9363,8 +9363,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -9380,8 +9380,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -9397,8 +9397,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -9414,8 +9414,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -9431,8 +9431,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -9447,8 +9447,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -9463,8 +9463,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -9479,8 +9479,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -9495,8 +9495,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -9511,8 +9511,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -9527,8 +9527,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -9543,8 +9543,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -9559,8 +9559,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -9575,8 +9575,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -9591,8 +9591,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -9616,9 +9616,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -9642,9 +9642,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -9668,9 +9668,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -9694,9 +9694,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -9720,9 +9720,9 @@ ; ; -O1-LABEL: atomicrmw_umin_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] -; -O1: cmp x20, x0 +; -O1: cmp x21, x0 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2.ll @@ -1129,7 +1129,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1141,7 +1141,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1153,7 +1153,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1165,7 +1165,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1177,7 +1177,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1189,7 +1189,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1201,7 +1201,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1213,7 +1213,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1225,7 +1225,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1237,7 +1237,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1249,7 +1249,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1261,7 +1261,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1273,7 +1273,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1285,7 +1285,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1297,7 +1297,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1894,7 +1894,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1906,7 +1906,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1918,7 +1918,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1930,7 +1930,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1942,7 +1942,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1954,7 +1954,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1966,7 +1966,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1978,7 +1978,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1990,7 +1990,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2002,7 +2002,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2014,7 +2014,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2026,7 +2026,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2038,7 +2038,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2050,7 +2050,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2062,7 +2062,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2487,9 +2487,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -2508,9 +2508,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -2529,9 +2529,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -2550,9 +2550,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -2571,9 +2571,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -2669,7 +2669,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -2681,7 +2681,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -2693,7 +2693,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -2705,7 +2705,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -2717,7 +2717,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -2729,7 +2729,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -2741,7 +2741,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -2753,7 +2753,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -2765,7 +2765,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2777,7 +2777,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2789,7 +2789,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2801,7 +2801,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2813,7 +2813,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2825,7 +2825,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2837,7 +2837,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2852,7 +2852,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2867,7 +2867,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2882,7 +2882,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2897,7 +2897,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2912,7 +2912,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3315,9 +3315,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 ret i128 %r @@ -3340,9 +3340,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 ret i128 %r @@ -3365,9 +3365,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 ret i128 %r @@ -3390,9 +3390,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r @@ -3415,9 +3415,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r @@ -3525,7 +3525,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -3539,7 +3539,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -3553,7 +3553,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -3567,7 +3567,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -3581,7 +3581,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -3595,7 +3595,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -3609,7 +3609,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -3623,7 +3623,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -3637,7 +3637,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -3651,7 +3651,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -3665,7 +3665,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -3679,7 +3679,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -3693,7 +3693,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -3707,7 +3707,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -3721,7 +3721,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -3739,7 +3739,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3758,7 +3758,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3777,7 +3777,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3796,7 +3796,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3815,7 +3815,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -4177,9 +4177,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4198,9 +4198,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -4219,9 +4219,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -4240,9 +4240,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -4261,9 +4261,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -4359,7 +4359,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4371,7 +4371,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4383,7 +4383,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4395,7 +4395,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4407,7 +4407,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4419,7 +4419,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4431,7 +4431,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4443,7 +4443,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4455,7 +4455,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4467,7 +4467,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4479,7 +4479,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4491,7 +4491,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4503,7 +4503,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4515,7 +4515,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4527,7 +4527,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4542,7 +4542,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4557,7 +4557,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4572,7 +4572,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4587,7 +4587,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4602,7 +4602,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4962,9 +4962,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4983,9 +4983,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -5004,9 +5004,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -5025,9 +5025,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -5046,9 +5046,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -5144,7 +5144,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5156,7 +5156,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5168,7 +5168,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5180,7 +5180,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5192,7 +5192,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5204,7 +5204,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5216,7 +5216,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5228,7 +5228,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5240,7 +5240,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5252,7 +5252,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5264,7 +5264,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5276,7 +5276,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5288,7 +5288,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5300,7 +5300,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5312,7 +5312,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5327,7 +5327,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5342,7 +5342,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5357,7 +5357,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5372,7 +5372,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5387,7 +5387,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6022,8 +6022,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6038,8 +6038,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6054,8 +6054,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6070,8 +6070,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -6086,8 +6086,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -6100,8 +6100,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -6114,8 +6114,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6128,8 +6128,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6142,8 +6142,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6156,8 +6156,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6170,8 +6170,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6184,8 +6184,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6198,8 +6198,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6212,8 +6212,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6226,8 +6226,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6244,7 +6244,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6261,7 +6261,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6278,7 +6278,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6295,7 +6295,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6312,7 +6312,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6947,8 +6947,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6963,8 +6963,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6979,8 +6979,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6995,8 +6995,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7011,8 +7011,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7025,8 +7025,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7039,8 +7039,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7053,8 +7053,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7067,8 +7067,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -7081,8 +7081,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -7095,8 +7095,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -7109,8 +7109,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -7123,8 +7123,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -7137,8 +7137,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -7151,8 +7151,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -7169,7 +7169,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -7186,7 +7186,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -7203,7 +7203,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -7220,7 +7220,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -7237,7 +7237,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -7872,8 +7872,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -7888,8 +7888,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -7904,8 +7904,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -7920,8 +7920,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7936,8 +7936,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7950,8 +7950,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7964,8 +7964,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7978,8 +7978,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7992,8 +7992,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -8006,8 +8006,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -8020,8 +8020,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -8034,8 +8034,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -8048,8 +8048,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -8062,8 +8062,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -8076,8 +8076,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -8094,7 +8094,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -8111,7 +8111,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -8128,7 +8128,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -8145,7 +8145,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -8162,7 +8162,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -8797,8 +8797,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -8813,8 +8813,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -8829,8 +8829,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -8845,8 +8845,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -8861,8 +8861,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -8875,8 +8875,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -8889,8 +8889,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -8903,8 +8903,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -8917,8 +8917,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -8931,8 +8931,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -8945,8 +8945,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -8959,8 +8959,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -8973,8 +8973,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -8987,8 +8987,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -9001,8 +9001,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -9019,7 +9019,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -9036,7 +9036,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -9053,7 +9053,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -9070,7 +9070,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -9087,7 +9087,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-lse2_lse128.ll @@ -634,7 +634,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -646,7 +646,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -658,7 +658,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -670,7 +670,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -682,7 +682,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -694,7 +694,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -706,7 +706,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -718,7 +718,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -730,7 +730,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -742,7 +742,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -754,7 +754,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -766,7 +766,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -778,7 +778,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -790,7 +790,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -802,7 +802,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1239,7 +1239,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1251,7 +1251,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1263,7 +1263,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1275,7 +1275,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1287,7 +1287,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1299,7 +1299,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1311,7 +1311,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1323,7 +1323,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1335,7 +1335,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1347,7 +1347,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1359,7 +1359,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1371,7 +1371,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1383,7 +1383,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1395,7 +1395,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1407,7 +1407,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1754,7 +1754,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1766,7 +1766,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1778,7 +1778,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1790,7 +1790,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1802,7 +1802,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1814,7 +1814,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1826,7 +1826,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1838,7 +1838,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1850,7 +1850,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1862,7 +1862,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1874,7 +1874,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1886,7 +1886,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1898,7 +1898,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1910,7 +1910,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1922,7 +1922,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1937,7 +1937,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -1952,7 +1952,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -1967,7 +1967,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -1982,7 +1982,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -1997,7 +1997,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -2550,7 +2550,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -2564,7 +2564,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -2578,7 +2578,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -2592,7 +2592,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -2606,7 +2606,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -2620,7 +2620,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -2634,7 +2634,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -2648,7 +2648,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -2662,7 +2662,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -2676,7 +2676,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -2690,7 +2690,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -2704,7 +2704,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -2718,7 +2718,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -2732,7 +2732,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -2746,7 +2746,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -2764,7 +2764,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2783,7 +2783,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2802,7 +2802,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2821,7 +2821,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2840,7 +2840,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3079,7 +3079,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -3091,7 +3091,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -3103,7 +3103,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -3115,7 +3115,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -3127,7 +3127,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -3139,7 +3139,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -3151,7 +3151,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -3163,7 +3163,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -3175,7 +3175,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -3187,7 +3187,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -3199,7 +3199,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -3211,7 +3211,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -3223,7 +3223,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -3235,7 +3235,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -3247,7 +3247,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -3262,7 +3262,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -3277,7 +3277,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -3292,7 +3292,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -3307,7 +3307,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -3322,7 +3322,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3604,7 +3604,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -3616,7 +3616,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -3628,7 +3628,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -3640,7 +3640,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -3652,7 +3652,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -3664,7 +3664,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -3676,7 +3676,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -3688,7 +3688,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -3700,7 +3700,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -3712,7 +3712,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -3724,7 +3724,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -3736,7 +3736,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -3748,7 +3748,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -3760,7 +3760,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -3772,7 +3772,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -3787,7 +3787,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -3802,7 +3802,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -3817,7 +3817,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -3832,7 +3832,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -3847,7 +3847,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4142,8 +4142,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4158,8 +4158,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4174,8 +4174,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4190,8 +4190,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4206,8 +4206,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4220,8 +4220,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4234,8 +4234,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4248,8 +4248,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4262,8 +4262,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4276,8 +4276,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4290,8 +4290,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4304,8 +4304,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4318,8 +4318,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4332,8 +4332,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4346,8 +4346,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4364,7 +4364,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4381,7 +4381,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4398,7 +4398,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4415,7 +4415,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4432,7 +4432,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4727,8 +4727,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4743,8 +4743,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4759,8 +4759,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4775,8 +4775,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4791,8 +4791,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4805,8 +4805,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4819,8 +4819,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4833,8 +4833,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4847,8 +4847,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4861,8 +4861,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4875,8 +4875,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4889,8 +4889,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4903,8 +4903,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4917,8 +4917,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4931,8 +4931,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4949,7 +4949,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4966,7 +4966,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4983,7 +4983,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5000,7 +5000,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5017,7 +5017,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -5312,8 +5312,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5328,8 +5328,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5344,8 +5344,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5360,8 +5360,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5376,8 +5376,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5390,8 +5390,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5404,8 +5404,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5418,8 +5418,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5432,8 +5432,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5446,8 +5446,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5460,8 +5460,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5474,8 +5474,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5488,8 +5488,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5502,8 +5502,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5516,8 +5516,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5534,7 +5534,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5551,7 +5551,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5568,7 +5568,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5585,7 +5585,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5602,7 +5602,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -5897,8 +5897,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5913,8 +5913,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5929,8 +5929,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5945,8 +5945,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5961,8 +5961,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5975,8 +5975,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5989,8 +5989,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6003,8 +6003,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6017,8 +6017,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6031,8 +6031,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6045,8 +6045,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6059,8 +6059,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6073,8 +6073,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6087,8 +6087,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6101,8 +6101,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6119,7 +6119,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6136,7 +6136,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6153,7 +6153,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6170,7 +6170,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6187,7 +6187,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-outline_atomics.ll @@ -639,7 +639,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -651,7 +651,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -663,7 +663,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -675,7 +675,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -687,7 +687,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -699,7 +699,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -711,7 +711,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -723,7 +723,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -735,7 +735,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -747,7 +747,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -759,7 +759,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -771,7 +771,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -783,7 +783,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -795,7 +795,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -807,7 +807,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1234,7 +1234,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1246,7 +1246,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1258,7 +1258,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1270,7 +1270,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1282,7 +1282,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1294,7 +1294,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1306,7 +1306,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1318,7 +1318,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1330,7 +1330,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1342,7 +1342,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1354,7 +1354,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1366,7 +1366,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1378,7 +1378,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1390,7 +1390,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1402,7 +1402,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1723,9 +1723,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -1740,9 +1740,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -1757,9 +1757,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -1774,9 +1774,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -1791,9 +1791,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -1864,7 +1864,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1876,7 +1876,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1888,7 +1888,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1900,7 +1900,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1912,7 +1912,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1924,7 +1924,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1936,7 +1936,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1948,7 +1948,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1960,7 +1960,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1972,7 +1972,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1984,7 +1984,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1996,7 +1996,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2008,7 +2008,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2020,7 +2020,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2032,7 +2032,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2047,7 +2047,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2062,7 +2062,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2077,7 +2077,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2092,7 +2092,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2107,7 +2107,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -2466,9 +2466,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 ret i128 %r @@ -2487,9 +2487,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 ret i128 %r @@ -2508,9 +2508,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 ret i128 %r @@ -2529,9 +2529,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r @@ -2550,9 +2550,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r @@ -2650,7 +2650,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -2664,7 +2664,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -2678,7 +2678,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -2692,7 +2692,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -2706,7 +2706,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -2720,7 +2720,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -2734,7 +2734,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -2748,7 +2748,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -2762,7 +2762,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -2776,7 +2776,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -2790,7 +2790,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -2804,7 +2804,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -2818,7 +2818,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -2832,7 +2832,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -2846,7 +2846,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -2864,7 +2864,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2883,7 +2883,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2902,7 +2902,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2921,7 +2921,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2940,7 +2940,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3098,9 +3098,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -3115,9 +3115,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -3132,9 +3132,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -3149,9 +3149,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -3166,9 +3166,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -3214,7 +3214,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -3226,7 +3226,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -3238,7 +3238,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -3250,7 +3250,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -3262,7 +3262,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -3274,7 +3274,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -3286,7 +3286,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -3298,7 +3298,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -3310,7 +3310,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -3322,7 +3322,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -3334,7 +3334,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -3346,7 +3346,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -3358,7 +3358,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -3370,7 +3370,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -3382,7 +3382,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -3397,7 +3397,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -3412,7 +3412,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -3427,7 +3427,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -3442,7 +3442,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -3457,7 +3457,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3613,9 +3613,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -3630,9 +3630,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -3647,9 +3647,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -3664,9 +3664,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -3681,9 +3681,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -3729,7 +3729,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -3741,7 +3741,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -3753,7 +3753,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -3765,7 +3765,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -3777,7 +3777,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -3789,7 +3789,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -3801,7 +3801,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -3813,7 +3813,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -3825,7 +3825,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -3837,7 +3837,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -3849,7 +3849,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -3861,7 +3861,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -3873,7 +3873,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -3885,7 +3885,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -3897,7 +3897,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -3912,7 +3912,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -3927,7 +3927,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -3942,7 +3942,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -3957,7 +3957,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -3972,7 +3972,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4537,8 +4537,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4553,8 +4553,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4569,8 +4569,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4585,8 +4585,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4601,8 +4601,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4615,8 +4615,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4629,8 +4629,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4643,8 +4643,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4657,8 +4657,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4671,8 +4671,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4685,8 +4685,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4699,8 +4699,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4713,8 +4713,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4727,8 +4727,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4741,8 +4741,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4759,7 +4759,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4776,7 +4776,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4793,7 +4793,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4810,7 +4810,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4827,7 +4827,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -5392,8 +5392,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5408,8 +5408,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5424,8 +5424,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5440,8 +5440,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5456,8 +5456,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5470,8 +5470,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5484,8 +5484,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5498,8 +5498,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5512,8 +5512,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5526,8 +5526,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5540,8 +5540,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5554,8 +5554,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5568,8 +5568,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5582,8 +5582,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5596,8 +5596,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5614,7 +5614,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5631,7 +5631,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5648,7 +5648,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5665,7 +5665,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5682,7 +5682,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6247,8 +6247,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6263,8 +6263,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6279,8 +6279,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6295,8 +6295,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -6311,8 +6311,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -6325,8 +6325,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -6339,8 +6339,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6353,8 +6353,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6367,8 +6367,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6381,8 +6381,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6395,8 +6395,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6409,8 +6409,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6423,8 +6423,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6437,8 +6437,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6451,8 +6451,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6469,7 +6469,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6486,7 +6486,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6503,7 +6503,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6520,7 +6520,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6537,7 +6537,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -7102,8 +7102,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -7118,8 +7118,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -7134,8 +7134,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -7150,8 +7150,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7166,8 +7166,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7180,8 +7180,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7194,8 +7194,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7208,8 +7208,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7222,8 +7222,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -7236,8 +7236,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -7250,8 +7250,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -7264,8 +7264,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -7278,8 +7278,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -7292,8 +7292,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -7306,8 +7306,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -7324,7 +7324,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -7341,7 +7341,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -7358,7 +7358,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -7375,7 +7375,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -7392,7 +7392,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc.ll @@ -1129,7 +1129,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1141,7 +1141,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1153,7 +1153,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1165,7 +1165,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1177,7 +1177,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1189,7 +1189,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1201,7 +1201,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1213,7 +1213,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1225,7 +1225,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1237,7 +1237,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1249,7 +1249,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1261,7 +1261,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1273,7 +1273,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1285,7 +1285,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1297,7 +1297,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1894,7 +1894,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1906,7 +1906,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1918,7 +1918,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1930,7 +1930,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1942,7 +1942,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1954,7 +1954,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1966,7 +1966,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1978,7 +1978,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1990,7 +1990,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2002,7 +2002,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2014,7 +2014,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2026,7 +2026,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2038,7 +2038,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2050,7 +2050,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2062,7 +2062,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2487,9 +2487,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -2508,9 +2508,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -2529,9 +2529,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -2550,9 +2550,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -2571,9 +2571,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -2669,7 +2669,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -2681,7 +2681,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -2693,7 +2693,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -2705,7 +2705,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -2717,7 +2717,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -2729,7 +2729,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -2741,7 +2741,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -2753,7 +2753,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -2765,7 +2765,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2777,7 +2777,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2789,7 +2789,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2801,7 +2801,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2813,7 +2813,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2825,7 +2825,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2837,7 +2837,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2852,7 +2852,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2867,7 +2867,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2882,7 +2882,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2897,7 +2897,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2912,7 +2912,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3315,9 +3315,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 ret i128 %r @@ -3340,9 +3340,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 ret i128 %r @@ -3365,9 +3365,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 ret i128 %r @@ -3390,9 +3390,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r @@ -3415,9 +3415,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r @@ -3525,7 +3525,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -3539,7 +3539,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -3553,7 +3553,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -3567,7 +3567,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -3581,7 +3581,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -3595,7 +3595,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -3609,7 +3609,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -3623,7 +3623,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -3637,7 +3637,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -3651,7 +3651,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -3665,7 +3665,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -3679,7 +3679,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -3693,7 +3693,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -3707,7 +3707,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -3721,7 +3721,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -3739,7 +3739,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3758,7 +3758,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3777,7 +3777,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3796,7 +3796,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3815,7 +3815,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -4177,9 +4177,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4198,9 +4198,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -4219,9 +4219,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -4240,9 +4240,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -4261,9 +4261,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -4359,7 +4359,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4371,7 +4371,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4383,7 +4383,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4395,7 +4395,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4407,7 +4407,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4419,7 +4419,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4431,7 +4431,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4443,7 +4443,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4455,7 +4455,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4467,7 +4467,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4479,7 +4479,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4491,7 +4491,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4503,7 +4503,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4515,7 +4515,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4527,7 +4527,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4542,7 +4542,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4557,7 +4557,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4572,7 +4572,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4587,7 +4587,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4602,7 +4602,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4962,9 +4962,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4983,9 +4983,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -5004,9 +5004,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -5025,9 +5025,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -5046,9 +5046,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -5144,7 +5144,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5156,7 +5156,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5168,7 +5168,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5180,7 +5180,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5192,7 +5192,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5204,7 +5204,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5216,7 +5216,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5228,7 +5228,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5240,7 +5240,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5252,7 +5252,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5264,7 +5264,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5276,7 +5276,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5288,7 +5288,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5300,7 +5300,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5312,7 +5312,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5327,7 +5327,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5342,7 +5342,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5357,7 +5357,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5372,7 +5372,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5387,7 +5387,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6022,8 +6022,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6038,8 +6038,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6054,8 +6054,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6070,8 +6070,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -6086,8 +6086,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -6100,8 +6100,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -6114,8 +6114,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6128,8 +6128,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6142,8 +6142,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6156,8 +6156,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6170,8 +6170,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6184,8 +6184,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6198,8 +6198,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6212,8 +6212,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6226,8 +6226,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6244,7 +6244,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6261,7 +6261,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6278,7 +6278,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6295,7 +6295,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6312,7 +6312,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6947,8 +6947,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6963,8 +6963,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6979,8 +6979,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6995,8 +6995,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7011,8 +7011,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7025,8 +7025,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7039,8 +7039,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7053,8 +7053,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7067,8 +7067,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -7081,8 +7081,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -7095,8 +7095,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -7109,8 +7109,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -7123,8 +7123,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -7137,8 +7137,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -7151,8 +7151,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -7169,7 +7169,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -7186,7 +7186,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -7203,7 +7203,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -7220,7 +7220,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -7237,7 +7237,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -7872,8 +7872,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -7888,8 +7888,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -7904,8 +7904,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -7920,8 +7920,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7936,8 +7936,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7950,8 +7950,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7964,8 +7964,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7978,8 +7978,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7992,8 +7992,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -8006,8 +8006,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -8020,8 +8020,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -8034,8 +8034,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -8048,8 +8048,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -8062,8 +8062,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -8076,8 +8076,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -8094,7 +8094,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -8111,7 +8111,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -8128,7 +8128,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -8145,7 +8145,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -8162,7 +8162,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -8797,8 +8797,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -8813,8 +8813,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -8829,8 +8829,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -8845,8 +8845,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -8861,8 +8861,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -8875,8 +8875,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -8889,8 +8889,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -8903,8 +8903,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -8917,8 +8917,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -8931,8 +8931,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -8945,8 +8945,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -8959,8 +8959,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -8973,8 +8973,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -8987,8 +8987,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -9001,8 +9001,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -9019,7 +9019,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -9036,7 +9036,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -9053,7 +9053,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -9070,7 +9070,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -9087,7 +9087,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-rcpc3.ll @@ -1129,7 +1129,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1141,7 +1141,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1153,7 +1153,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1165,7 +1165,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1177,7 +1177,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1189,7 +1189,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1201,7 +1201,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1213,7 +1213,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1225,7 +1225,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1237,7 +1237,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1249,7 +1249,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1261,7 +1261,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1273,7 +1273,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1285,7 +1285,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1297,7 +1297,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1894,7 +1894,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1906,7 +1906,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1918,7 +1918,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1930,7 +1930,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1942,7 +1942,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1954,7 +1954,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1966,7 +1966,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1978,7 +1978,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1990,7 +1990,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2002,7 +2002,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2014,7 +2014,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2026,7 +2026,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2038,7 +2038,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2050,7 +2050,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2062,7 +2062,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2487,9 +2487,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -2508,9 +2508,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -2529,9 +2529,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -2550,9 +2550,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -2571,9 +2571,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -2669,7 +2669,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -2681,7 +2681,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -2693,7 +2693,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -2705,7 +2705,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -2717,7 +2717,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -2729,7 +2729,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -2741,7 +2741,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -2753,7 +2753,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -2765,7 +2765,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2777,7 +2777,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2789,7 +2789,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2801,7 +2801,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2813,7 +2813,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2825,7 +2825,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2837,7 +2837,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2852,7 +2852,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2867,7 +2867,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2882,7 +2882,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2897,7 +2897,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2912,7 +2912,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3315,9 +3315,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 ret i128 %r @@ -3340,9 +3340,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 ret i128 %r @@ -3365,9 +3365,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 ret i128 %r @@ -3390,9 +3390,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r @@ -3415,9 +3415,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r @@ -3525,7 +3525,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -3539,7 +3539,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -3553,7 +3553,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -3567,7 +3567,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -3581,7 +3581,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -3595,7 +3595,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -3609,7 +3609,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -3623,7 +3623,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -3637,7 +3637,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -3651,7 +3651,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -3665,7 +3665,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -3679,7 +3679,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -3693,7 +3693,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -3707,7 +3707,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -3721,7 +3721,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -3739,7 +3739,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3758,7 +3758,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3777,7 +3777,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3796,7 +3796,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3815,7 +3815,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -4177,9 +4177,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4198,9 +4198,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -4219,9 +4219,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -4240,9 +4240,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -4261,9 +4261,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -4359,7 +4359,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4371,7 +4371,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4383,7 +4383,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4395,7 +4395,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4407,7 +4407,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4419,7 +4419,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4431,7 +4431,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4443,7 +4443,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4455,7 +4455,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4467,7 +4467,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4479,7 +4479,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4491,7 +4491,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4503,7 +4503,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4515,7 +4515,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4527,7 +4527,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4542,7 +4542,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4557,7 +4557,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4572,7 +4572,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4587,7 +4587,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4602,7 +4602,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4962,9 +4962,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4983,9 +4983,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -5004,9 +5004,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -5025,9 +5025,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -5046,9 +5046,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -5144,7 +5144,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5156,7 +5156,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5168,7 +5168,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5180,7 +5180,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5192,7 +5192,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5204,7 +5204,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5216,7 +5216,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5228,7 +5228,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5240,7 +5240,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5252,7 +5252,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5264,7 +5264,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5276,7 +5276,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5288,7 +5288,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5300,7 +5300,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5312,7 +5312,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5327,7 +5327,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5342,7 +5342,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5357,7 +5357,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5372,7 +5372,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5387,7 +5387,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6022,8 +6022,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6038,8 +6038,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6054,8 +6054,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6070,8 +6070,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -6086,8 +6086,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -6100,8 +6100,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -6114,8 +6114,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6128,8 +6128,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6142,8 +6142,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6156,8 +6156,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6170,8 +6170,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6184,8 +6184,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6198,8 +6198,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6212,8 +6212,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6226,8 +6226,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6244,7 +6244,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6261,7 +6261,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6278,7 +6278,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6295,7 +6295,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6312,7 +6312,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6947,8 +6947,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6963,8 +6963,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6979,8 +6979,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6995,8 +6995,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7011,8 +7011,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7025,8 +7025,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7039,8 +7039,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7053,8 +7053,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7067,8 +7067,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -7081,8 +7081,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -7095,8 +7095,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -7109,8 +7109,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -7123,8 +7123,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -7137,8 +7137,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -7151,8 +7151,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -7169,7 +7169,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -7186,7 +7186,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -7203,7 +7203,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -7220,7 +7220,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -7237,7 +7237,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -7872,8 +7872,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -7888,8 +7888,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -7904,8 +7904,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -7920,8 +7920,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7936,8 +7936,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7950,8 +7950,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7964,8 +7964,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7978,8 +7978,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7992,8 +7992,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -8006,8 +8006,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -8020,8 +8020,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -8034,8 +8034,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -8048,8 +8048,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -8062,8 +8062,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -8076,8 +8076,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -8094,7 +8094,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -8111,7 +8111,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -8128,7 +8128,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -8145,7 +8145,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -8162,7 +8162,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -8797,8 +8797,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -8813,8 +8813,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -8829,8 +8829,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -8845,8 +8845,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -8861,8 +8861,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -8875,8 +8875,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -8889,8 +8889,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -8903,8 +8903,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -8917,8 +8917,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -8931,8 +8931,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -8945,8 +8945,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -8959,8 +8959,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -8973,8 +8973,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -8987,8 +8987,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -9001,8 +9001,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -9019,7 +9019,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -9036,7 +9036,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -9053,7 +9053,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -9070,7 +9070,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -9087,7 +9087,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8_1a.ll @@ -659,7 +659,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -671,7 +671,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -683,7 +683,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -695,7 +695,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -707,7 +707,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -719,7 +719,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -731,7 +731,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -743,7 +743,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -755,7 +755,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -767,7 +767,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -779,7 +779,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -791,7 +791,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -803,7 +803,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -815,7 +815,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -827,7 +827,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1264,7 +1264,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1276,7 +1276,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1288,7 +1288,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1300,7 +1300,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1312,7 +1312,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1324,7 +1324,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1336,7 +1336,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1348,7 +1348,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1360,7 +1360,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1372,7 +1372,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1384,7 +1384,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1396,7 +1396,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1408,7 +1408,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1420,7 +1420,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1432,7 +1432,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1804,7 +1804,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1816,7 +1816,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1828,7 +1828,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1840,7 +1840,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1852,7 +1852,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1864,7 +1864,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1876,7 +1876,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1888,7 +1888,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1900,7 +1900,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1912,7 +1912,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1924,7 +1924,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1936,7 +1936,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1948,7 +1948,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1960,7 +1960,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1972,7 +1972,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1987,7 +1987,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2002,7 +2002,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2017,7 +2017,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2032,7 +2032,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2047,7 +2047,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -2600,7 +2600,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -2614,7 +2614,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -2628,7 +2628,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -2642,7 +2642,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -2656,7 +2656,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -2670,7 +2670,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -2684,7 +2684,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -2698,7 +2698,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -2712,7 +2712,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -2726,7 +2726,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -2740,7 +2740,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -2754,7 +2754,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -2768,7 +2768,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -2782,7 +2782,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -2796,7 +2796,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -2814,7 +2814,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2833,7 +2833,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2852,7 +2852,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2871,7 +2871,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -2890,7 +2890,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3174,7 +3174,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -3186,7 +3186,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -3198,7 +3198,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -3210,7 +3210,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -3222,7 +3222,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -3234,7 +3234,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -3246,7 +3246,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -3258,7 +3258,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -3270,7 +3270,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -3282,7 +3282,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -3294,7 +3294,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -3306,7 +3306,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -3318,7 +3318,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -3330,7 +3330,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -3342,7 +3342,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -3357,7 +3357,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -3372,7 +3372,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -3387,7 +3387,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -3402,7 +3402,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -3417,7 +3417,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3699,7 +3699,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -3711,7 +3711,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -3723,7 +3723,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -3735,7 +3735,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -3747,7 +3747,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -3759,7 +3759,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -3771,7 +3771,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -3783,7 +3783,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -3795,7 +3795,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -3807,7 +3807,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -3819,7 +3819,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -3831,7 +3831,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -3843,7 +3843,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -3855,7 +3855,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -3867,7 +3867,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -3882,7 +3882,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -3897,7 +3897,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -3912,7 +3912,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -3927,7 +3927,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -3942,7 +3942,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4237,8 +4237,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4253,8 +4253,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4269,8 +4269,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4285,8 +4285,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4301,8 +4301,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4315,8 +4315,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4329,8 +4329,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4343,8 +4343,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4357,8 +4357,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4371,8 +4371,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4385,8 +4385,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4399,8 +4399,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4413,8 +4413,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4427,8 +4427,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4441,8 +4441,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4459,7 +4459,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4476,7 +4476,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4493,7 +4493,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4510,7 +4510,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4527,7 +4527,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4822,8 +4822,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4838,8 +4838,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4854,8 +4854,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4870,8 +4870,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4886,8 +4886,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4900,8 +4900,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4914,8 +4914,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4928,8 +4928,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4942,8 +4942,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4956,8 +4956,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4970,8 +4970,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4984,8 +4984,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4998,8 +4998,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5012,8 +5012,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5026,8 +5026,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5044,7 +5044,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5061,7 +5061,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5078,7 +5078,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5095,7 +5095,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5112,7 +5112,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -5407,8 +5407,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5423,8 +5423,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5439,8 +5439,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5455,8 +5455,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5471,8 +5471,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5485,8 +5485,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5499,8 +5499,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5513,8 +5513,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5527,8 +5527,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5541,8 +5541,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5555,8 +5555,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5569,8 +5569,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5583,8 +5583,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5597,8 +5597,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5611,8 +5611,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5629,7 +5629,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5646,7 +5646,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5663,7 +5663,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5680,7 +5680,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5697,7 +5697,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -5992,8 +5992,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6008,8 +6008,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6024,8 +6024,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6040,8 +6040,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -6056,8 +6056,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -6070,8 +6070,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -6084,8 +6084,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6098,8 +6098,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6112,8 +6112,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6126,8 +6126,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6140,8 +6140,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6154,8 +6154,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6168,8 +6168,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6182,8 +6182,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6196,8 +6196,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6214,7 +6214,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6231,7 +6231,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6248,7 +6248,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6265,7 +6265,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6282,7 +6282,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll --- a/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll +++ b/llvm/test/CodeGen/AArch64/Atomics/aarch64_be-atomicrmw-v8a.ll @@ -1129,7 +1129,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1141,7 +1141,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1153,7 +1153,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1165,7 +1165,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1177,7 +1177,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i16_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1189,7 +1189,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_monotonic: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1201,7 +1201,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acquire: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1213,7 +1213,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_release: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1225,7 +1225,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_acq_rel: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -1237,7 +1237,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i32_unaligned_seq_cst: -; -O1: add w8, w0, w19 +; -O1: add w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -1249,7 +1249,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_monotonic: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -1261,7 +1261,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acquire: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -1273,7 +1273,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_release: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -1285,7 +1285,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_acq_rel: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -1297,7 +1297,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_add_i64_unaligned_seq_cst: -; -O1: add x8, x0, x19 +; -O1: add x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw add ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -1894,7 +1894,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -1906,7 +1906,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -1918,7 +1918,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -1930,7 +1930,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -1942,7 +1942,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i16_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -1954,7 +1954,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_monotonic: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -1966,7 +1966,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acquire: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -1978,7 +1978,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_release: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -1990,7 +1990,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_acq_rel: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2002,7 +2002,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i32_unaligned_seq_cst: -; -O1: sub w8, w0, w19 +; -O1: sub w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2014,7 +2014,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_monotonic: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2026,7 +2026,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acquire: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2038,7 +2038,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_release: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2050,7 +2050,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_acq_rel: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2062,7 +2062,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_sub_i64_unaligned_seq_cst: -; -O1: sub x8, x0, x19 +; -O1: sub x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw sub ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2487,9 +2487,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -2508,9 +2508,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -2529,9 +2529,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -2550,9 +2550,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -2571,9 +2571,9 @@ ; ; -O1-LABEL: atomicrmw_and_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: and x9, x1, x3 -; -O1: and x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: and x9, x0, x2 +; -O1: and x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -2669,7 +2669,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -2681,7 +2681,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -2693,7 +2693,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -2705,7 +2705,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -2717,7 +2717,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -2729,7 +2729,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -2741,7 +2741,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -2753,7 +2753,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -2765,7 +2765,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -2777,7 +2777,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -2789,7 +2789,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -2801,7 +2801,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -2813,7 +2813,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -2825,7 +2825,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -2837,7 +2837,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_and_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -2852,7 +2852,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -2867,7 +2867,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -2882,7 +2882,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -2897,7 +2897,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -2912,7 +2912,7 @@ ; -O1-LABEL: atomicrmw_and_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw and ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -3315,9 +3315,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value monotonic, align 16 ret i128 %r @@ -3340,9 +3340,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acquire, align 16 ret i128 %r @@ -3365,9 +3365,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value release, align 16 ret i128 %r @@ -3390,9 +3390,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r @@ -3415,9 +3415,9 @@ ; -O1-LABEL: atomicrmw_nand_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] ; -O1: and x9, x1, x3 -; -O1: mvn x9, x9 ; -O1: and x10, x0, x2 ; -O1: mvn x10, x10 +; -O1: mvn x9, x9 ; -O1: stlxp w11, x9, x10, [x8] %r = atomicrmw nand ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r @@ -3525,7 +3525,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value monotonic, align 1 @@ -3539,7 +3539,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acquire, align 1 @@ -3553,7 +3553,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value release, align 1 @@ -3567,7 +3567,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value acq_rel, align 1 @@ -3581,7 +3581,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i16_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i16 %value seq_cst, align 1 @@ -3595,7 +3595,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_monotonic: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value monotonic, align 1 @@ -3609,7 +3609,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acquire: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acquire, align 1 @@ -3623,7 +3623,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_release: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value release, align 1 @@ -3637,7 +3637,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_acq_rel: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value acq_rel, align 1 @@ -3651,7 +3651,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i32_unaligned_seq_cst: -; -O1: and w8, w0, w19 +; -O1: and w8, w0, w20 ; -O1: mvn w8, w8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i32 %value seq_cst, align 1 @@ -3665,7 +3665,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_monotonic: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value monotonic, align 1 @@ -3679,7 +3679,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acquire: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acquire, align 1 @@ -3693,7 +3693,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_release: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value release, align 1 @@ -3707,7 +3707,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_acq_rel: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value acq_rel, align 1 @@ -3721,7 +3721,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_nand_i64_unaligned_seq_cst: -; -O1: and x8, x0, x19 +; -O1: and x8, x0, x20 ; -O1: mvn x8, x8 ; -O1: bl __atomic_compare_exchange %r = atomicrmw nand ptr %ptr, i64 %value seq_cst, align 1 @@ -3739,7 +3739,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3758,7 +3758,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3777,7 +3777,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3796,7 +3796,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -3815,7 +3815,7 @@ ; -O1-LABEL: atomicrmw_nand_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: and x8, x1, x19 -; -O1: and x9, x0, x20 +; -O1: and x9, x0, x21 ; -O1: mvn x8, x8 ; -O1: mvn x9, x9 ; -O1: bl __atomic_compare_exchange @@ -4177,9 +4177,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4198,9 +4198,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -4219,9 +4219,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -4240,9 +4240,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -4261,9 +4261,9 @@ ; ; -O1-LABEL: atomicrmw_or_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: orr x9, x1, x3 -; -O1: orr x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: orr x9, x0, x2 +; -O1: orr x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -4359,7 +4359,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -4371,7 +4371,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -4383,7 +4383,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -4395,7 +4395,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -4407,7 +4407,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i16_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -4419,7 +4419,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_monotonic: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -4431,7 +4431,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acquire: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -4443,7 +4443,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_release: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -4455,7 +4455,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_acq_rel: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -4467,7 +4467,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i32_unaligned_seq_cst: -; -O1: orr w8, w0, w19 +; -O1: orr w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -4479,7 +4479,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_monotonic: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -4491,7 +4491,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acquire: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -4503,7 +4503,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_release: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -4515,7 +4515,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_acq_rel: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -4527,7 +4527,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_or_i64_unaligned_seq_cst: -; -O1: orr x8, x0, x19 +; -O1: orr x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -4542,7 +4542,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -4557,7 +4557,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -4572,7 +4572,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -4587,7 +4587,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -4602,7 +4602,7 @@ ; -O1-LABEL: atomicrmw_or_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: orr x8, x1, x19 -; -O1: orr x9, x0, x20 +; -O1: orr x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw or ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -4962,9 +4962,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_monotonic: ; -O1: ldxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 16 ret i128 %r } @@ -4983,9 +4983,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acquire: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 16 ret i128 %r } @@ -5004,9 +5004,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_release: ; -O1: ldxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value release, align 16 ret i128 %r } @@ -5025,9 +5025,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_acq_rel: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 16 ret i128 %r } @@ -5046,9 +5046,9 @@ ; ; -O1-LABEL: atomicrmw_xor_i128_aligned_seq_cst: ; -O1: ldaxp x1, x0, [x8] -; -O1: eor x9, x1, x3 -; -O1: eor x10, x0, x2 -; -O1: stlxp w11, x9, x10, [x8] +; -O1: eor x9, x0, x2 +; -O1: eor x10, x1, x3 +; -O1: stlxp w11, x10, x9, [x8] %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 16 ret i128 %r } @@ -5144,7 +5144,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -5156,7 +5156,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -5168,7 +5168,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -5180,7 +5180,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -5192,7 +5192,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i16_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -5204,7 +5204,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_monotonic: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -5216,7 +5216,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acquire: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -5228,7 +5228,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_release: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -5240,7 +5240,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_acq_rel: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -5252,7 +5252,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i32_unaligned_seq_cst: -; -O1: eor w8, w0, w19 +; -O1: eor w8, w0, w20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -5264,7 +5264,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_monotonic: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -5276,7 +5276,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acquire: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -5288,7 +5288,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_release: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -5300,7 +5300,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_acq_rel: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -5312,7 +5312,7 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_xor_i64_unaligned_seq_cst: -; -O1: eor x8, x0, x19 +; -O1: eor x8, x0, x20 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -5327,7 +5327,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_monotonic: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -5342,7 +5342,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acquire: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -5357,7 +5357,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_release: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -5372,7 +5372,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_acq_rel: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -5387,7 +5387,7 @@ ; -O1-LABEL: atomicrmw_xor_i128_unaligned_seq_cst: ; -O1: ldp x0, x1, [x0] ; -O1: eor x8, x1, x19 -; -O1: eor x9, x0, x20 +; -O1: eor x9, x0, x21 ; -O1: bl __atomic_compare_exchange %r = atomicrmw xor ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6022,8 +6022,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6038,8 +6038,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6054,8 +6054,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6070,8 +6070,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -6086,8 +6086,8 @@ ; ; -O1-LABEL: atomicrmw_max_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, gt +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -6100,8 +6100,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -6114,8 +6114,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -6128,8 +6128,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -6142,8 +6142,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -6156,8 +6156,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, gt +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -6170,8 +6170,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -6184,8 +6184,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -6198,8 +6198,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -6212,8 +6212,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -6226,8 +6226,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_max_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, gt +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, gt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -6244,7 +6244,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -6261,7 +6261,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -6278,7 +6278,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -6295,7 +6295,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -6312,7 +6312,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lt -; -O1: csel x9, x0, x20, lt +; -O1: csel x9, x0, x21, lt ; -O1: bl __atomic_compare_exchange %r = atomicrmw max ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -6947,8 +6947,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_monotonic: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -6963,8 +6963,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acquire: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -6979,8 +6979,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_release: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -6995,8 +6995,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_acq_rel: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7011,8 +7011,8 @@ ; ; -O1-LABEL: atomicrmw_min_i16_unaligned_seq_cst: ; -O1: sxth w8, w0 -; -O1: cmp w8, w19, sxth -; -O1: csel w8, w0, w19, le +; -O1: cmp w8, w20, sxth +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7025,8 +7025,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7039,8 +7039,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7053,8 +7053,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7067,8 +7067,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -7081,8 +7081,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, le +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -7095,8 +7095,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -7109,8 +7109,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -7123,8 +7123,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -7137,8 +7137,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -7151,8 +7151,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_min_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, le +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, le ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -7169,7 +7169,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -7186,7 +7186,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -7203,7 +7203,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -7220,7 +7220,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -7237,7 +7237,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, ge -; -O1: csel x9, x0, x20, ge +; -O1: csel x9, x0, x21, ge ; -O1: bl __atomic_compare_exchange %r = atomicrmw min ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -7872,8 +7872,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -7888,8 +7888,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -7904,8 +7904,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -7920,8 +7920,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -7936,8 +7936,8 @@ ; ; -O1-LABEL: atomicrmw_umax_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, hi +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -7950,8 +7950,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -7964,8 +7964,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -7978,8 +7978,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -7992,8 +7992,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -8006,8 +8006,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, hi +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -8020,8 +8020,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -8034,8 +8034,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -8048,8 +8048,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -8062,8 +8062,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -8076,8 +8076,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umax_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, hi +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, hi ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -8094,7 +8094,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -8111,7 +8111,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -8128,7 +8128,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -8145,7 +8145,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -8162,7 +8162,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, lo -; -O1: csel x9, x0, x20, lo +; -O1: csel x9, x0, x21, lo ; -O1: bl __atomic_compare_exchange %r = atomicrmw umax ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r @@ -8797,8 +8797,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_monotonic: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value monotonic, align 1 ret i16 %r @@ -8813,8 +8813,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acquire: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acquire, align 1 ret i16 %r @@ -8829,8 +8829,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_release: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value release, align 1 ret i16 %r @@ -8845,8 +8845,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_acq_rel: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value acq_rel, align 1 ret i16 %r @@ -8861,8 +8861,8 @@ ; ; -O1-LABEL: atomicrmw_umin_i16_unaligned_seq_cst: ; -O1: and w8, w0, #0xffff -; -O1: cmp w8, w19, uxth -; -O1: csel w8, w0, w19, ls +; -O1: cmp w8, w20, uxth +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i16 %value seq_cst, align 1 ret i16 %r @@ -8875,8 +8875,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_monotonic: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value monotonic, align 1 ret i32 %r @@ -8889,8 +8889,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acquire: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acquire, align 1 ret i32 %r @@ -8903,8 +8903,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_release: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value release, align 1 ret i32 %r @@ -8917,8 +8917,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_acq_rel: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value acq_rel, align 1 ret i32 %r @@ -8931,8 +8931,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i32_unaligned_seq_cst: -; -O1: cmp w0, w19 -; -O1: csel w8, w0, w19, ls +; -O1: cmp w0, w20 +; -O1: csel w8, w0, w20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i32 %value seq_cst, align 1 ret i32 %r @@ -8945,8 +8945,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_monotonic: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value monotonic, align 1 ret i64 %r @@ -8959,8 +8959,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acquire: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acquire, align 1 ret i64 %r @@ -8973,8 +8973,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_release: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value release, align 1 ret i64 %r @@ -8987,8 +8987,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_acq_rel: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value acq_rel, align 1 ret i64 %r @@ -9001,8 +9001,8 @@ ; -O0: bl __atomic_compare_exchange ; ; -O1-LABEL: atomicrmw_umin_i64_unaligned_seq_cst: -; -O1: cmp x0, x19 -; -O1: csel x8, x0, x19, ls +; -O1: cmp x0, x20 +; -O1: csel x8, x0, x20, ls ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i64 %value seq_cst, align 1 ret i64 %r @@ -9019,7 +9019,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value monotonic, align 1 ret i128 %r @@ -9036,7 +9036,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acquire, align 1 ret i128 %r @@ -9053,7 +9053,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value release, align 1 ret i128 %r @@ -9070,7 +9070,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value acq_rel, align 1 ret i128 %r @@ -9087,7 +9087,7 @@ ; -O1: ldp x0, x1, [x0] ; -O1: cmp x19, x1 ; -O1: csel x8, x1, x19, hs -; -O1: csel x9, x0, x20, hs +; -O1: csel x9, x0, x21, hs ; -O1: bl __atomic_compare_exchange %r = atomicrmw umin ptr %ptr, i128 %value seq_cst, align 1 ret i128 %r diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll b/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/aapcs_vararg_frame.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s --global-isel=0 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s ; RUN: llc < %s --global-isel=1 -mtriple=aarch64-linux-gnu -mattr=+fp-armv8 | FileCheck %s --check-prefix=GISEL @@ -5,9 +6,9 @@ ; CHECK-LABEL: va: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #176 -; CHECK-NEXT: stp x4, x5, [sp, #144] ; CHECK-NEXT: stp x2, x3, [sp, #128] ; CHECK-NEXT: str x1, [sp, #120] +; CHECK-NEXT: stp x4, x5, [sp, #144] ; CHECK-NEXT: stp x6, x7, [sp, #160] ; CHECK-NEXT: stp q1, q2, [sp] ; CHECK-NEXT: stp q3, q4, [sp, #32] diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-atomic.ll @@ -502,7 +502,7 @@ define i32 @fetch_and_or(ptr %p) #0 { ; CHECK-NOLSE-O1-LABEL: fetch_and_or: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: mov w9, #5 +; CHECK-NOLSE-O1-NEXT: mov w9, #5 ; =0x5 ; CHECK-NOLSE-O1-NEXT: LBB8_1: ; %atomicrmw.start ; CHECK-NOLSE-O1-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NOLSE-O1-NEXT: ldaxr w8, [x0] @@ -525,7 +525,7 @@ ; CHECK-NOLSE-O0-NEXT: ; Child Loop BB8_2 Depth 2 ; CHECK-NOLSE-O0-NEXT: ldr w8, [sp, #28] ; 4-byte Folded Reload ; CHECK-NOLSE-O0-NEXT: ldr x11, [sp, #16] ; 8-byte Folded Reload -; CHECK-NOLSE-O0-NEXT: mov w9, #5 +; CHECK-NOLSE-O0-NEXT: mov w9, #5 ; =0x5 ; CHECK-NOLSE-O0-NEXT: orr w12, w8, w9 ; CHECK-NOLSE-O0-NEXT: LBB8_2: ; %atomicrmw.start ; CHECK-NOLSE-O0-NEXT: ; Parent Loop BB8_1 Depth=1 @@ -552,13 +552,13 @@ ; ; CHECK-LSE-O1-LABEL: fetch_and_or: ; CHECK-LSE-O1: ; %bb.0: -; CHECK-LSE-O1-NEXT: mov w8, #5 +; CHECK-LSE-O1-NEXT: mov w8, #5 ; =0x5 ; CHECK-LSE-O1-NEXT: ldsetal w8, w0, [x0] ; CHECK-LSE-O1-NEXT: ret ; ; CHECK-LSE-O0-LABEL: fetch_and_or: ; CHECK-LSE-O0: ; %bb.0: -; CHECK-LSE-O0-NEXT: mov w8, #5 +; CHECK-LSE-O0-NEXT: mov w8, #5 ; =0x5 ; CHECK-LSE-O0-NEXT: ldsetal w8, w0, [x0] ; CHECK-LSE-O0-NEXT: ret %val = atomicrmw or ptr %p, i32 5 seq_cst @@ -616,13 +616,13 @@ ; ; CHECK-LSE-O1-LABEL: fetch_and_or_64: ; CHECK-LSE-O1: ; %bb.0: -; CHECK-LSE-O1-NEXT: mov w8, #7 +; CHECK-LSE-O1-NEXT: mov w8, #7 ; =0x7 ; CHECK-LSE-O1-NEXT: ldset x8, x0, [x0] ; CHECK-LSE-O1-NEXT: ret ; ; CHECK-LSE-O0-LABEL: fetch_and_or_64: ; CHECK-LSE-O0: ; %bb.0: -; CHECK-LSE-O0-NEXT: mov w8, #7 +; CHECK-LSE-O0-NEXT: mov w8, #7 ; =0x7 ; CHECK-LSE-O0-NEXT: ; kill: def $x8 killed $w8 ; CHECK-LSE-O0-NEXT: ldset x8, x0, [x0] ; CHECK-LSE-O0-NEXT: ret @@ -709,14 +709,14 @@ define i8 @atomic_load_relaxed_8(ptr %p, i32 %off32) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_8: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 -; CHECK-NOLSE-O1-NEXT: ldrb w9, [x0, #4095] -; CHECK-NOLSE-O1-NEXT: ldrb w10, [x0, w1, sxtw] -; CHECK-NOLSE-O1-NEXT: ldurb w11, [x0, #-256] -; CHECK-NOLSE-O1-NEXT: ldrb w8, [x8] -; CHECK-NOLSE-O1-NEXT: add w9, w9, w11 -; CHECK-NOLSE-O1-NEXT: add w9, w9, w10 -; CHECK-NOLSE-O1-NEXT: add w0, w9, w8 +; CHECK-NOLSE-O1-NEXT: ldrb w8, [x0, #4095] +; CHECK-NOLSE-O1-NEXT: ldrb w9, [x0, w1, sxtw] +; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldurb w10, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w9 +; CHECK-NOLSE-O1-NEXT: ldrb w9, [x11] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w10 +; CHECK-NOLSE-O1-NEXT: add w0, w8, w9 ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_8: @@ -779,14 +779,14 @@ define i16 @atomic_load_relaxed_16(ptr %p, i32 %off32) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_16: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 -; CHECK-NOLSE-O1-NEXT: ldrh w9, [x0, #8190] -; CHECK-NOLSE-O1-NEXT: ldrh w10, [x0, w1, sxtw #1] -; CHECK-NOLSE-O1-NEXT: ldurh w11, [x0, #-256] -; CHECK-NOLSE-O1-NEXT: ldrh w8, [x8] -; CHECK-NOLSE-O1-NEXT: add w9, w9, w11 -; CHECK-NOLSE-O1-NEXT: add w9, w9, w10 -; CHECK-NOLSE-O1-NEXT: add w0, w9, w8 +; CHECK-NOLSE-O1-NEXT: ldrh w8, [x0, #8190] +; CHECK-NOLSE-O1-NEXT: ldrh w9, [x0, w1, sxtw #1] +; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldurh w10, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w9 +; CHECK-NOLSE-O1-NEXT: ldrh w9, [x11] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w10 +; CHECK-NOLSE-O1-NEXT: add w0, w8, w9 ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_16: @@ -849,14 +849,14 @@ define i32 @atomic_load_relaxed_32(ptr %p, i32 %off32) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_32: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 -; CHECK-NOLSE-O1-NEXT: ldr w9, [x0, #16380] -; CHECK-NOLSE-O1-NEXT: ldr w10, [x0, w1, sxtw #2] -; CHECK-NOLSE-O1-NEXT: ldur w11, [x0, #-256] -; CHECK-NOLSE-O1-NEXT: ldr w8, [x8] -; CHECK-NOLSE-O1-NEXT: add w9, w9, w11 -; CHECK-NOLSE-O1-NEXT: add w9, w9, w10 -; CHECK-NOLSE-O1-NEXT: add w0, w9, w8 +; CHECK-NOLSE-O1-NEXT: ldr w8, [x0, #16380] +; CHECK-NOLSE-O1-NEXT: ldr w9, [x0, w1, sxtw #2] +; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldur w10, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w9 +; CHECK-NOLSE-O1-NEXT: ldr w9, [x11] +; CHECK-NOLSE-O1-NEXT: add w8, w8, w10 +; CHECK-NOLSE-O1-NEXT: add w0, w8, w9 ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_32: @@ -915,14 +915,14 @@ define i64 @atomic_load_relaxed_64(ptr %p, i32 %off32) #0 { ; CHECK-NOLSE-O1-LABEL: atomic_load_relaxed_64: ; CHECK-NOLSE-O1: ; %bb.0: -; CHECK-NOLSE-O1-NEXT: add x8, x0, #291, lsl #12 ; =1191936 -; CHECK-NOLSE-O1-NEXT: ldr x9, [x0, #32760] -; CHECK-NOLSE-O1-NEXT: ldr x10, [x0, w1, sxtw #3] -; CHECK-NOLSE-O1-NEXT: ldur x11, [x0, #-256] -; CHECK-NOLSE-O1-NEXT: ldr x8, [x8] -; CHECK-NOLSE-O1-NEXT: add x9, x9, x11 -; CHECK-NOLSE-O1-NEXT: add x9, x9, x10 -; CHECK-NOLSE-O1-NEXT: add x0, x9, x8 +; CHECK-NOLSE-O1-NEXT: ldr x8, [x0, #32760] +; CHECK-NOLSE-O1-NEXT: ldr x9, [x0, w1, sxtw #3] +; CHECK-NOLSE-O1-NEXT: add x11, x0, #291, lsl #12 ; =1191936 +; CHECK-NOLSE-O1-NEXT: ldur x10, [x0, #-256] +; CHECK-NOLSE-O1-NEXT: add x8, x8, x9 +; CHECK-NOLSE-O1-NEXT: ldr x9, [x11] +; CHECK-NOLSE-O1-NEXT: add x8, x8, x10 +; CHECK-NOLSE-O1-NEXT: add x0, x8, x9 ; CHECK-NOLSE-O1-NEXT: ret ; ; CHECK-NOLSE-O0-LABEL: atomic_load_relaxed_64: @@ -982,19 +982,19 @@ define void @atomc_store(ptr %p) #0 { ; CHECK-NOLSE-LABEL: atomc_store: ; CHECK-NOLSE: ; %bb.0: -; CHECK-NOLSE-NEXT: mov w8, #4 +; CHECK-NOLSE-NEXT: mov w8, #4 ; =0x4 ; CHECK-NOLSE-NEXT: stlr w8, [x0] ; CHECK-NOLSE-NEXT: ret ; ; CHECK-LSE-O1-LABEL: atomc_store: ; CHECK-LSE-O1: ; %bb.0: -; CHECK-LSE-O1-NEXT: mov w8, #4 +; CHECK-LSE-O1-NEXT: mov w8, #4 ; =0x4 ; CHECK-LSE-O1-NEXT: stlr w8, [x0] ; CHECK-LSE-O1-NEXT: ret ; ; CHECK-LSE-O0-LABEL: atomc_store: ; CHECK-LSE-O0: ; %bb.0: -; CHECK-LSE-O0-NEXT: mov w8, #4 +; CHECK-LSE-O0-NEXT: mov w8, #4 ; =0x4 ; CHECK-LSE-O0-NEXT: stlr w8, [x0] ; CHECK-LSE-O0-NEXT: ret store atomic i32 4, ptr %p seq_cst, align 4 @@ -2743,7 +2743,7 @@ ; CHECK-NOLSE-O1-NEXT: stxrb w10, w2, [x8] ; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB47_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.3: -; CHECK-NOLSE-O1-NEXT: mov w1, #1 +; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1 ; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NOLSE-O1-NEXT: ret ; CHECK-NOLSE-O1-NEXT: LBB47_4: ; %cmpxchg.nostore @@ -2810,7 +2810,7 @@ ; CHECK-NOLSE-O1-NEXT: stxrh w10, w2, [x8] ; CHECK-NOLSE-O1-NEXT: cbnz w10, LBB48_1 ; CHECK-NOLSE-O1-NEXT: ; %bb.3: -; CHECK-NOLSE-O1-NEXT: mov w1, #1 +; CHECK-NOLSE-O1-NEXT: mov w1, #1 ; =0x1 ; CHECK-NOLSE-O1-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NOLSE-O1-NEXT: ret ; CHECK-NOLSE-O1-NEXT: LBB48_4: ; %cmpxchg.nostore diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll @@ -384,14 +384,14 @@ ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $x8 = ADDXri renamable $x0, 291, 12 - ; CHECK-NEXT: renamable $w9 = LDRBBui renamable $x0, 4095, pcsections !0 :: (load monotonic (s8) from %ir.ptr_unsigned) - ; CHECK-NEXT: renamable $w10 = LDRBBroW renamable $x0, killed renamable $w1, 1, 0, pcsections !0 :: (load unordered (s8) from %ir.ptr_regoff) - ; CHECK-NEXT: renamable $w11 = LDURBBi killed renamable $x0, -256, pcsections !0 :: (load monotonic (s8) from %ir.ptr_unscaled) - ; CHECK-NEXT: renamable $w8 = LDRBBui killed renamable $x8, 0, pcsections !0 :: (load unordered (s8) from %ir.ptr_random) - ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0, pcsections !0 - ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w10, 0, pcsections !0 - ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w9, killed renamable $w8, 0, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDRBBui renamable $x0, 4095, pcsections !0 :: (load monotonic (s8) from %ir.ptr_unsigned) + ; CHECK-NEXT: renamable $w9 = LDRBBroW renamable $x0, killed renamable $w1, 1, 0, pcsections !0 :: (load unordered (s8) from %ir.ptr_regoff) + ; CHECK-NEXT: renamable $w10 = LDURBBi renamable $x0, -256, pcsections !0 :: (load monotonic (s8) from %ir.ptr_unscaled) + ; CHECK-NEXT: renamable $x11 = ADDXri killed renamable $x0, 291, 12 + ; CHECK-NEXT: $w8 = ADDWrs killed renamable $w8, killed renamable $w9, 0, pcsections !0 + ; CHECK-NEXT: renamable $w9 = LDRBBui killed renamable $x11, 0, pcsections !0 :: (load unordered (s8) from %ir.ptr_random) + ; CHECK-NEXT: $w8 = ADDWrs killed renamable $w8, killed renamable $w10, 0, pcsections !0 + ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $w0 %ptr_unsigned = getelementptr i8, ptr %p, i32 4095 %val_unsigned = load atomic i8, ptr %ptr_unsigned monotonic, align 1, !pcsections !0 @@ -416,14 +416,14 @@ ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $x8 = ADDXri renamable $x0, 291, 12 - ; CHECK-NEXT: renamable $w9 = LDRHHui renamable $x0, 4095, pcsections !0 :: (load monotonic (s16) from %ir.ptr_unsigned) - ; CHECK-NEXT: renamable $w10 = LDRHHroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s16) from %ir.ptr_regoff) - ; CHECK-NEXT: renamable $w11 = LDURHHi killed renamable $x0, -256, pcsections !0 :: (load monotonic (s16) from %ir.ptr_unscaled) - ; CHECK-NEXT: renamable $w8 = LDRHHui killed renamable $x8, 0, pcsections !0 :: (load unordered (s16) from %ir.ptr_random) - ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0, pcsections !0 - ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w10, 0, pcsections !0 - ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w9, killed renamable $w8, 0, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDRHHui renamable $x0, 4095, pcsections !0 :: (load monotonic (s16) from %ir.ptr_unsigned) + ; CHECK-NEXT: renamable $w9 = LDRHHroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s16) from %ir.ptr_regoff) + ; CHECK-NEXT: renamable $w10 = LDURHHi renamable $x0, -256, pcsections !0 :: (load monotonic (s16) from %ir.ptr_unscaled) + ; CHECK-NEXT: renamable $x11 = ADDXri killed renamable $x0, 291, 12 + ; CHECK-NEXT: $w8 = ADDWrs killed renamable $w8, killed renamable $w9, 0, pcsections !0 + ; CHECK-NEXT: renamable $w9 = LDRHHui killed renamable $x11, 0, pcsections !0 :: (load unordered (s16) from %ir.ptr_random) + ; CHECK-NEXT: $w8 = ADDWrs killed renamable $w8, killed renamable $w10, 0, pcsections !0 + ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $w0 %ptr_unsigned = getelementptr i16, ptr %p, i32 4095 %val_unsigned = load atomic i16, ptr %ptr_unsigned monotonic, align 2, !pcsections !0 @@ -448,14 +448,14 @@ ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $x8 = ADDXri renamable $x0, 291, 12 - ; CHECK-NEXT: renamable $w9 = LDRWui renamable $x0, 4095, pcsections !0 :: (load monotonic (s32) from %ir.ptr_unsigned) - ; CHECK-NEXT: renamable $w10 = LDRWroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s32) from %ir.ptr_regoff) - ; CHECK-NEXT: renamable $w11 = LDURWi killed renamable $x0, -256, pcsections !0 :: (load monotonic (s32) from %ir.ptr_unscaled) - ; CHECK-NEXT: renamable $w8 = LDRWui killed renamable $x8, 0, pcsections !0 :: (load unordered (s32) from %ir.ptr_random) - ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w11, 0, pcsections !0 - ; CHECK-NEXT: $w9 = ADDWrs killed renamable $w9, killed renamable $w10, 0, pcsections !0 - ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w9, killed renamable $w8, 0, pcsections !0 + ; CHECK-NEXT: renamable $w8 = LDRWui renamable $x0, 4095, pcsections !0 :: (load monotonic (s32) from %ir.ptr_unsigned) + ; CHECK-NEXT: renamable $w9 = LDRWroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s32) from %ir.ptr_regoff) + ; CHECK-NEXT: renamable $w10 = LDURWi renamable $x0, -256, pcsections !0 :: (load monotonic (s32) from %ir.ptr_unscaled) + ; CHECK-NEXT: renamable $x11 = ADDXri killed renamable $x0, 291, 12 + ; CHECK-NEXT: $w8 = ADDWrs killed renamable $w8, killed renamable $w9, 0, pcsections !0 + ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x11, 0, pcsections !0 :: (load unordered (s32) from %ir.ptr_random) + ; CHECK-NEXT: $w8 = ADDWrs killed renamable $w8, killed renamable $w10, 0, pcsections !0 + ; CHECK-NEXT: $w0 = ADDWrs killed renamable $w8, killed renamable $w9, 0, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $w0 %ptr_unsigned = getelementptr i32, ptr %p, i32 4095 %val_unsigned = load atomic i32, ptr %ptr_unsigned monotonic, align 4, !pcsections !0 @@ -480,14 +480,14 @@ ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $x8 = ADDXri renamable $x0, 291, 12 - ; CHECK-NEXT: renamable $x9 = LDRXui renamable $x0, 4095, pcsections !0 :: (load monotonic (s64) from %ir.ptr_unsigned) - ; CHECK-NEXT: renamable $x10 = LDRXroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s64) from %ir.ptr_regoff) - ; CHECK-NEXT: renamable $x11 = LDURXi killed renamable $x0, -256, pcsections !0 :: (load monotonic (s64) from %ir.ptr_unscaled) - ; CHECK-NEXT: renamable $x8 = LDRXui killed renamable $x8, 0, pcsections !0 :: (load unordered (s64) from %ir.ptr_random) - ; CHECK-NEXT: $x9 = ADDXrs killed renamable $x9, killed renamable $x11, 0, pcsections !0 - ; CHECK-NEXT: $x9 = ADDXrs killed renamable $x9, killed renamable $x10, 0, pcsections !0 - ; CHECK-NEXT: $x0 = ADDXrs killed renamable $x9, killed renamable $x8, 0, pcsections !0 + ; CHECK-NEXT: renamable $x8 = LDRXui renamable $x0, 4095, pcsections !0 :: (load monotonic (s64) from %ir.ptr_unsigned) + ; CHECK-NEXT: renamable $x9 = LDRXroW renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (load unordered (s64) from %ir.ptr_regoff) + ; CHECK-NEXT: renamable $x10 = LDURXi renamable $x0, -256, pcsections !0 :: (load monotonic (s64) from %ir.ptr_unscaled) + ; CHECK-NEXT: renamable $x11 = ADDXri killed renamable $x0, 291, 12 + ; CHECK-NEXT: $x8 = ADDXrs killed renamable $x8, killed renamable $x9, 0, pcsections !0 + ; CHECK-NEXT: renamable $x9 = LDRXui killed renamable $x11, 0, pcsections !0 :: (load unordered (s64) from %ir.ptr_random) + ; CHECK-NEXT: $x8 = ADDXrs killed renamable $x8, killed renamable $x10, 0, pcsections !0 + ; CHECK-NEXT: $x0 = ADDXrs killed renamable $x8, killed renamable $x9, 0, pcsections !0 ; CHECK-NEXT: RET undef $lr, implicit $x0 %ptr_unsigned = getelementptr i64, ptr %p, i32 4095 %val_unsigned = load atomic i64, ptr %ptr_unsigned monotonic, align 8, !pcsections !0 @@ -525,10 +525,10 @@ ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $x8 = ADDXri renamable $x0, 291, 12 ; CHECK-NEXT: STRBBui renamable $w2, renamable $x0, 4095, pcsections !0 :: (store monotonic (s8) into %ir.ptr_unsigned) ; CHECK-NEXT: STRBBroW renamable $w2, renamable $x0, killed renamable $w1, 1, 0, pcsections !0 :: (store unordered (s8) into %ir.ptr_regoff) - ; CHECK-NEXT: STURBBi renamable $w2, killed renamable $x0, -256, pcsections !0 :: (store monotonic (s8) into %ir.ptr_unscaled) + ; CHECK-NEXT: STURBBi renamable $w2, renamable $x0, -256, pcsections !0 :: (store monotonic (s8) into %ir.ptr_unscaled) + ; CHECK-NEXT: renamable $x8 = ADDXri killed renamable $x0, 291, 12 ; CHECK-NEXT: STRBBui killed renamable $w2, killed renamable $x8, 0, pcsections !0 :: (store unordered (s8) into %ir.ptr_random) ; CHECK-NEXT: RET undef $lr %ptr_unsigned = getelementptr i8, ptr %p, i32 4095 @@ -551,10 +551,10 @@ ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $x8 = ADDXri renamable $x0, 291, 12 ; CHECK-NEXT: STRHHui renamable $w2, renamable $x0, 4095, pcsections !0 :: (store monotonic (s16) into %ir.ptr_unsigned) ; CHECK-NEXT: STRHHroW renamable $w2, renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (store unordered (s16) into %ir.ptr_regoff) - ; CHECK-NEXT: STURHHi renamable $w2, killed renamable $x0, -256, pcsections !0 :: (store monotonic (s16) into %ir.ptr_unscaled) + ; CHECK-NEXT: STURHHi renamable $w2, renamable $x0, -256, pcsections !0 :: (store monotonic (s16) into %ir.ptr_unscaled) + ; CHECK-NEXT: renamable $x8 = ADDXri killed renamable $x0, 291, 12 ; CHECK-NEXT: STRHHui killed renamable $w2, killed renamable $x8, 0, pcsections !0 :: (store unordered (s16) into %ir.ptr_random) ; CHECK-NEXT: RET undef $lr %ptr_unsigned = getelementptr i16, ptr %p, i32 4095 @@ -577,10 +577,10 @@ ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $w2, $x0 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $x8 = ADDXri renamable $x0, 291, 12 ; CHECK-NEXT: STRWui renamable $w2, renamable $x0, 4095, pcsections !0 :: (store monotonic (s32) into %ir.ptr_unsigned) ; CHECK-NEXT: STRWroW renamable $w2, renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (store unordered (s32) into %ir.ptr_regoff) - ; CHECK-NEXT: STURWi renamable $w2, killed renamable $x0, -256, pcsections !0 :: (store monotonic (s32) into %ir.ptr_unscaled) + ; CHECK-NEXT: STURWi renamable $w2, renamable $x0, -256, pcsections !0 :: (store monotonic (s32) into %ir.ptr_unscaled) + ; CHECK-NEXT: renamable $x8 = ADDXri killed renamable $x0, 291, 12 ; CHECK-NEXT: STRWui killed renamable $w2, killed renamable $x8, 0, pcsections !0 :: (store unordered (s32) into %ir.ptr_random) ; CHECK-NEXT: RET undef $lr %ptr_unsigned = getelementptr i32, ptr %p, i32 4095 @@ -603,10 +603,10 @@ ; CHECK: bb.0 (%ir-block.0): ; CHECK-NEXT: liveins: $w1, $x0, $x2 ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: renamable $x8 = ADDXri renamable $x0, 291, 12 ; CHECK-NEXT: STRXui renamable $x2, renamable $x0, 4095, pcsections !0 :: (store monotonic (s64) into %ir.ptr_unsigned) ; CHECK-NEXT: STRXroW renamable $x2, renamable $x0, killed renamable $w1, 1, 1, pcsections !0 :: (store unordered (s64) into %ir.ptr_regoff) - ; CHECK-NEXT: STURXi renamable $x2, killed renamable $x0, -256, pcsections !0 :: (store monotonic (s64) into %ir.ptr_unscaled) + ; CHECK-NEXT: STURXi renamable $x2, renamable $x0, -256, pcsections !0 :: (store monotonic (s64) into %ir.ptr_unscaled) + ; CHECK-NEXT: renamable $x8 = ADDXri killed renamable $x0, 291, 12 ; CHECK-NEXT: STRXui killed renamable $x2, killed renamable $x8, 0, pcsections !0 :: (store unordered (s64) into %ir.ptr_random) ; CHECK-NEXT: RET undef $lr %ptr_unsigned = getelementptr i64, ptr %p, i32 4095 @@ -633,6 +633,7 @@ ; CHECK-NOLSE-NEXT: renamable $w9 = LDRHHui killed renamable $x1, 0, pcsections !0 :: (load unordered (s16) from %ir.p16) ; CHECK-NOLSE-NEXT: renamable $w0 = ADDWrx killed renamable $w9, killed renamable $w8, 0, pcsections !0 ; CHECK-NOLSE-NEXT: RET undef $lr, implicit $w0 + ; ; CHECK-LDAPR-LABEL: name: load_zext ; CHECK-LDAPR: bb.0 (%ir-block.0): ; CHECK-LDAPR-NEXT: liveins: $x0, $x1 @@ -659,6 +660,7 @@ ; CHECK-NOLSE-NEXT: renamable $w0 = LDARW killed renamable $x0, pcsections !0 :: (load seq_cst (s32) from %ir.p32) ; CHECK-NOLSE-NEXT: renamable $x1 = LDARX killed renamable $x1, pcsections !0 :: (load acquire (s64) from %ir.p64) ; CHECK-NOLSE-NEXT: RET undef $lr, implicit $w0, implicit $x1 + ; ; CHECK-LDAPR-LABEL: name: load_acq ; CHECK-LDAPR: bb.0 (%ir-block.0): ; CHECK-LDAPR-NEXT: liveins: $x0, $x1 @@ -685,6 +687,7 @@ ; CHECK-NOLSE-NEXT: renamable $w9 = SBFMWri killed renamable $w9, 0, 15 ; CHECK-NOLSE-NEXT: renamable $w0 = ADDWrx killed renamable $w9, killed renamable $w8, 32, pcsections !0 ; CHECK-NOLSE-NEXT: RET undef $lr, implicit $w0 + ; ; CHECK-LDAPR-LABEL: name: load_sext ; CHECK-LDAPR: bb.0 (%ir-block.0): ; CHECK-LDAPR-NEXT: liveins: $x0, $x1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll @@ -70,17 +70,17 @@ ; CHECK-NEXT: bl _puts ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload ; CHECK-NEXT: mov w0, w19 +; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload ; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp q5, q4, [sp, #32] ; 32-byte Folded Reload ; CHECK-NEXT: mov x2, x21 +; CHECK-NEXT: ldp q7, q6, [sp] ; 32-byte Folded Reload ; CHECK-NEXT: mov x3, x22 ; CHECK-NEXT: mov x4, x23 ; CHECK-NEXT: mov x5, x24 ; CHECK-NEXT: mov x6, x25 ; CHECK-NEXT: mov x7, x26 ; CHECK-NEXT: mov x8, x27 -; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload -; CHECK-NEXT: ldp q5, q4, [sp, #32] ; 32-byte Folded Reload -; CHECK-NEXT: ldp q7, q6, [sp] ; 32-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #208] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #192] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #176] ; 16-byte Folded Reload @@ -122,8 +122,9 @@ ; CHECK-NEXT: .cfi_offset w26, -80 ; CHECK-NEXT: .cfi_offset w27, -88 ; CHECK-NEXT: .cfi_offset w28, -96 -; CHECK-NEXT: add x9, sp, #128 -; CHECK-NEXT: add x10, sp, #256 +; CHECK-NEXT: mov x27, x8 +; CHECK-NEXT: add x8, sp, #128 +; CHECK-NEXT: add x9, sp, #256 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: mov x21, x2 @@ -133,16 +134,18 @@ ; CHECK-NEXT: mov x25, x6 ; CHECK-NEXT: mov x26, x7 ; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill -; CHECK-NEXT: mov x27, x8 ; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill ; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill -; CHECK-NEXT: str x10, [x9] +; CHECK-NEXT: str x9, [x8] ; CHECK-NEXT: bl _get_f -; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload ; CHECK-NEXT: mov x9, x0 +; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload +; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload ; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: ldp q5, q4, [sp, #32] ; 32-byte Folded Reload ; CHECK-NEXT: mov x1, x20 +; CHECK-NEXT: ldp q7, q6, [sp] ; 32-byte Folded Reload ; CHECK-NEXT: mov x2, x21 ; CHECK-NEXT: mov x3, x22 ; CHECK-NEXT: mov x4, x23 @@ -150,9 +153,6 @@ ; CHECK-NEXT: mov x6, x25 ; CHECK-NEXT: mov x7, x26 ; CHECK-NEXT: mov x8, x27 -; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload -; CHECK-NEXT: ldp q5, q4, [sp, #32] ; 32-byte Folded Reload -; CHECK-NEXT: ldp q7, q6, [sp] ; 32-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #240] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #224] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #208] ; 16-byte Folded Reload @@ -193,7 +193,7 @@ ; CHECK-NEXT: Lloh2: ; CHECK-NEXT: adrp x10, _g@GOTPAGE ; CHECK-NEXT: ldr x9, [x0, #16] -; CHECK-NEXT: mov w11, #42 +; CHECK-NEXT: mov w11, #42 ; =0x2a ; CHECK-NEXT: Lloh3: ; CHECK-NEXT: ldr x10, [x10, _g@GOTPAGEOFF] ; CHECK-NEXT: Lloh4: diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll @@ -35,10 +35,10 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI1_0 -; SDAG-NEXT: adrp x9, .LCPI1_1 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] +; SDAG-NEXT: adrp x8, .LCPI1_1 +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_1] ; SDAG-NEXT: adrp x8, .LCPI1_2 -; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI1_1] ; SDAG-NEXT: ushl v1.8h, v0.8h, v1.8h ; SDAG-NEXT: umull2 v3.4s, v1.8h, v2.8h ; SDAG-NEXT: umull v1.4s, v1.4h, v2.4h @@ -48,39 +48,39 @@ ; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h ; SDAG-NEXT: umull2 v3.4s, v0.8h, v2.8h ; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_3] ; SDAG-NEXT: uzp2 v0.8h, v0.8h, v3.8h ; SDAG-NEXT: add v0.8h, v0.8h, v1.8h -; SDAG-NEXT: ushl v0.8h, v0.8h, v2.8h +; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_3] +; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h ; SDAG-NEXT: ret ; ; GISEL-LABEL: combine_vec_udiv_nonuniform: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI1_4 -; GISEL-NEXT: adrp x9, .LCPI1_5 +; GISEL-NEXT: adrp x9, .LCPI1_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4] ; GISEL-NEXT: adrp x8, .LCPI1_3 -; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3] ; GISEL-NEXT: adrp x8, .LCPI1_2 +; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2] -; GISEL-NEXT: adrp x8, .LCPI1_1 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h -; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h -; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h -; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_2] +; GISEL-NEXT: adrp x8, .LCPI1_1 +; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h +; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h +; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_1] -; GISEL-NEXT: adrp x8, .LCPI1_0 -; GISEL-NEXT: neg v3.8h, v3.8h +; GISEL-NEXT: adrp x8, .LCPI1_5 ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI1_5] -; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI1_0] +; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI1_0] ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h -; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h -; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h +; GISEL-NEXT: neg v2.8h, v3.8h +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5] +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: cmeq v2.8h, v3.8h, v4.8h ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -91,38 +91,38 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform2: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI2_0 -; SDAG-NEXT: adrp x9, .LCPI2_1 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] -; SDAG-NEXT: adrp x8, .LCPI2_2 -; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] +; SDAG-NEXT: adrp x8, .LCPI2_1 ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h -; SDAG-NEXT: umull2 v1.4s, v0.8h, v2.8h -; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h -; SDAG-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_1] +; SDAG-NEXT: adrp x8, .LCPI2_2 +; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h +; SDAG-NEXT: umull v0.4s, v0.4h, v1.4h ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_2] +; SDAG-NEXT: uzp2 v0.8h, v0.8h, v2.8h ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h ; SDAG-NEXT: ret ; ; GISEL-LABEL: combine_vec_udiv_nonuniform2: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI2_3 -; GISEL-NEXT: adrp x9, .LCPI2_1 +; GISEL-NEXT: adrp x9, .LCPI2_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3] ; GISEL-NEXT: adrp x8, .LCPI2_2 -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI2_1] -; GISEL-NEXT: neg v1.8h, v1.8h +; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI2_0] ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2] -; GISEL-NEXT: adrp x8, .LCPI2_4 +; GISEL-NEXT: adrp x8, .LCPI2_1 +; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h -; GISEL-NEXT: neg v4.8h, v4.8h ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_4] -; GISEL-NEXT: adrp x8, .LCPI2_0 +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_1] +; GISEL-NEXT: adrp x8, .LCPI2_4 +; GISEL-NEXT: neg v2.8h, v2.8h ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_0] -; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h -; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_4] +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: cmeq v2.8h, v3.8h, v4.8h ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -147,21 +147,21 @@ ; GISEL-LABEL: combine_vec_udiv_nonuniform3: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI3_2 -; GISEL-NEXT: adrp x9, .LCPI3_1 +; GISEL-NEXT: adrp x9, .LCPI3_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2] -; GISEL-NEXT: adrp x8, .LCPI3_3 -; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI3_1] +; GISEL-NEXT: adrp x8, .LCPI3_1 +; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_0] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h -; GISEL-NEXT: neg v3.8h, v3.8h ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3] -; GISEL-NEXT: adrp x8, .LCPI3_0 -; GISEL-NEXT: sub v4.8h, v0.8h, v1.8h -; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI3_0] -; GISEL-NEXT: usra v1.8h, v4.8h, #1 -; GISEL-NEXT: cmeq v2.8h, v2.8h, v5.8h -; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h +; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h +; GISEL-NEXT: usra v1.8h, v2.8h, #1 +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] +; GISEL-NEXT: adrp x8, .LCPI3_3 +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_3] +; GISEL-NEXT: neg v2.8h, v2.8h +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: cmeq v2.8h, v3.8h, v4.8h ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -174,8 +174,8 @@ ; SDAG-NEXT: movi v1.16b, #171 ; SDAG-NEXT: adrp x8, .LCPI4_0 ; SDAG-NEXT: adrp x9, .LCPI4_1 -; SDAG-NEXT: umull2 v2.8h, v0.16b, v1.16b ; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI4_1] +; SDAG-NEXT: umull2 v2.8h, v0.16b, v1.16b ; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b ; SDAG-NEXT: and v0.16b, v0.16b, v3.16b ; SDAG-NEXT: uzp2 v1.16b, v1.16b, v2.16b @@ -188,19 +188,19 @@ ; GISEL-LABEL: combine_vec_udiv_nonuniform4: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI4_2 -; GISEL-NEXT: adrp x9, .LCPI4_1 +; GISEL-NEXT: adrp x9, .LCPI4_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2] +; GISEL-NEXT: adrp x8, .LCPI4_1 +; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0] +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_1] ; GISEL-NEXT: adrp x8, .LCPI4_3 -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_1] ; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_3] ; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b -; GISEL-NEXT: adrp x8, .LCPI4_0 -; GISEL-NEXT: neg v4.16b, v4.16b ; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] -; GISEL-NEXT: cmeq v2.16b, v3.16b, v2.16b -; GISEL-NEXT: ushl v1.16b, v1.16b, v4.16b +; GISEL-NEXT: neg v2.16b, v3.16b +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_3] +; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b +; GISEL-NEXT: cmeq v2.16b, v3.16b, v4.16b ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %div = udiv <16 x i8> %x, @@ -211,52 +211,52 @@ ; SDAG-LABEL: pr38477: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI5_0 -; SDAG-NEXT: adrp x9, .LCPI5_3 +; SDAG-NEXT: adrp x9, .LCPI5_4 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] ; SDAG-NEXT: adrp x8, .LCPI5_1 +; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_1] +; SDAG-NEXT: adrp x8, .LCPI5_2 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h ; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_1] -; SDAG-NEXT: adrp x8, .LCPI5_2 -; SDAG-NEXT: sub v3.8h, v0.8h, v1.8h -; SDAG-NEXT: umull2 v4.4s, v3.8h, v2.8h -; SDAG-NEXT: umull v2.4s, v3.4h, v2.4h -; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_2] -; SDAG-NEXT: adrp x8, .LCPI5_4 +; SDAG-NEXT: sub v2.8h, v0.8h, v1.8h +; SDAG-NEXT: umull2 v4.4s, v2.8h, v3.8h +; SDAG-NEXT: umull v2.4s, v2.4h, v3.4h +; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI5_4] +; SDAG-NEXT: and v0.16b, v0.16b, v3.16b ; SDAG-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; SDAG-NEXT: ldr q4, [x9, :lo12:.LCPI5_3] ; SDAG-NEXT: add v1.8h, v2.8h, v1.8h -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_4] -; SDAG-NEXT: ushl v1.8h, v1.8h, v3.8h -; SDAG-NEXT: and v0.16b, v0.16b, v2.16b -; SDAG-NEXT: and v1.16b, v1.16b, v4.16b +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_2] +; SDAG-NEXT: adrp x8, .LCPI5_3 +; SDAG-NEXT: ushl v1.8h, v1.8h, v2.8h +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_3] +; SDAG-NEXT: and v1.16b, v1.16b, v2.16b ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b ; SDAG-NEXT: ret ; ; GISEL-LABEL: pr38477: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI5_3 -; GISEL-NEXT: adrp x9, .LCPI5_4 +; GISEL-NEXT: adrp x9, .LCPI5_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3] ; GISEL-NEXT: adrp x8, .LCPI5_2 +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_2] +; GISEL-NEXT: adrp x8, .LCPI5_1 ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_2] -; GISEL-NEXT: adrp x8, .LCPI5_1 -; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h -; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h -; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h +; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h +; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h +; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_1] -; GISEL-NEXT: adrp x8, .LCPI5_0 -; GISEL-NEXT: neg v3.8h, v3.8h +; GISEL-NEXT: adrp x8, .LCPI5_4 ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI5_4] -; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI5_0] +; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI5_0] ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h -; GISEL-NEXT: cmeq v2.8h, v4.8h, v5.8h -; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h +; GISEL-NEXT: neg v2.8h, v3.8h +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4] +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: cmeq v2.8h, v3.8h, v4.8h ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %a0, diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll b/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/select-bitfield-insert.ll @@ -145,8 +145,8 @@ ; SDAG: ; %bb.0: ; %bb ; SDAG-NEXT: and x8, x1, #0x1 ; SDAG-NEXT: bfi x1, x0, #1, #63 -; SDAG-NEXT: mov x0, x1 ; SDAG-NEXT: str x8, [x2] +; SDAG-NEXT: mov x0, x1 ; SDAG-NEXT: ret bb: %tmp3 = shl i64 %in1, 1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/store-merging.ll @@ -4,8 +4,8 @@ define void @test_simple_2xs8(ptr %ptr) { ; CHECK-LABEL: test_simple_2xs8: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: mov w9, #5 +; CHECK-NEXT: mov w8, #4 ; =0x4 +; CHECK-NEXT: mov w9, #5 ; =0x5 ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: strb w9, [x0, #1] ; CHECK-NEXT: ret @@ -18,7 +18,7 @@ define void @test_simple_2xs16(ptr %ptr) { ; CHECK-LABEL: test_simple_2xs16: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w8, #4 ; =0x4 ; CHECK-NEXT: movk w8, #5, lsl #16 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret @@ -31,7 +31,7 @@ define void @test_simple_4xs16(ptr %ptr) { ; CHECK-LABEL: test_simple_4xs16: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov x8, #4 +; CHECK-NEXT: mov x8, #4 ; =0x4 ; CHECK-NEXT: movk x8, #5, lsl #16 ; CHECK-NEXT: movk x8, #9, lsl #32 ; CHECK-NEXT: movk x8, #14, lsl #48 @@ -50,7 +50,7 @@ define void @test_simple_2xs32(ptr %ptr) { ; CHECK-LABEL: test_simple_2xs32: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov x8, #4 +; CHECK-NEXT: mov x8, #4 ; =0x4 ; CHECK-NEXT: movk x8, #5, lsl #32 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret @@ -63,8 +63,8 @@ define void @test_simple_2xs64_illegal(ptr %ptr) { ; CHECK-LABEL: test_simple_2xs64_illegal: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: mov w9, #5 +; CHECK-NEXT: mov w8, #4 ; =0x4 +; CHECK-NEXT: mov w9, #5 ; =0x5 ; CHECK-NEXT: stp x8, x9, [x0] ; CHECK-NEXT: ret store i64 4, ptr %ptr @@ -77,14 +77,14 @@ define void @test_simple_vector(ptr %ptr) { ; CHECK-LABEL: test_simple_vector: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: mov w9, #7 -; CHECK-NEXT: mov w10, #5 -; CHECK-NEXT: mov w11, #8 +; CHECK-NEXT: mov w8, #4 ; =0x4 +; CHECK-NEXT: mov w9, #7 ; =0x7 ; CHECK-NEXT: strh w8, [x0] +; CHECK-NEXT: mov w8, #5 ; =0x5 ; CHECK-NEXT: strh w9, [x0, #2] -; CHECK-NEXT: strh w10, [x0, #4] -; CHECK-NEXT: strh w11, [x0, #6] +; CHECK-NEXT: mov w9, #8 ; =0x8 +; CHECK-NEXT: strh w8, [x0, #4] +; CHECK-NEXT: strh w9, [x0, #6] ; CHECK-NEXT: ret store <2 x i16> , ptr %ptr %addr2 = getelementptr <2 x i16>, ptr %ptr, i64 1 @@ -95,10 +95,10 @@ define i32 @test_unknown_alias(ptr %ptr, ptr %aliasptr) { ; CHECK-LABEL: test_unknown_alias: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: mov w9, #4 ; =0x4 ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: str w9, [x0] -; CHECK-NEXT: mov w9, #5 +; CHECK-NEXT: mov w9, #5 ; =0x5 ; CHECK-NEXT: ldr w0, [x1] ; CHECK-NEXT: str w9, [x8, #4] ; CHECK-NEXT: ret @@ -112,12 +112,12 @@ define void @test_2x_2xs32(ptr %ptr, ptr %ptr2) { ; CHECK-LABEL: test_2x_2xs32: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov x10, #9 -; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: mov w9, #5 -; CHECK-NEXT: movk x10, #17, lsl #32 +; CHECK-NEXT: mov w8, #4 ; =0x4 +; CHECK-NEXT: mov w9, #5 ; =0x5 ; CHECK-NEXT: stp w8, w9, [x0] -; CHECK-NEXT: str x10, [x1] +; CHECK-NEXT: mov x8, #9 ; =0x9 +; CHECK-NEXT: movk x8, #17, lsl #32 +; CHECK-NEXT: str x8, [x1] ; CHECK-NEXT: ret store i32 4, ptr %ptr %addr2 = getelementptr i32, ptr %ptr, i64 1 @@ -170,14 +170,14 @@ define void @test_alias_4xs16(ptr %ptr, ptr %ptr2) { ; CHECK-LABEL: test_alias_4xs16: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: mov w9, #9 +; CHECK-NEXT: mov w8, #4 ; =0x4 +; CHECK-NEXT: mov w9, #9 ; =0x9 ; CHECK-NEXT: movk w8, #5, lsl #16 -; CHECK-NEXT: mov w10, #14 ; CHECK-NEXT: strh w9, [x0, #4] ; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: mov w8, #14 ; =0xe ; CHECK-NEXT: strh wzr, [x1] -; CHECK-NEXT: strh w10, [x0, #6] +; CHECK-NEXT: strh w8, [x0, #6] ; CHECK-NEXT: ret store i16 4, ptr %ptr %addr2 = getelementptr i16, ptr %ptr, i64 1 @@ -194,13 +194,13 @@ define void @test_alias2_4xs16(ptr %ptr, ptr %ptr2, ptr %ptr3) { ; CHECK-LABEL: test_alias2_4xs16: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: mov w9, #5 -; CHECK-NEXT: movk w9, #9, lsl #16 +; CHECK-NEXT: mov w8, #4 ; =0x4 ; CHECK-NEXT: strh w8, [x0] -; CHECK-NEXT: mov w8, #14 +; CHECK-NEXT: mov w8, #5 ; =0x5 +; CHECK-NEXT: movk w8, #9, lsl #16 ; CHECK-NEXT: strh wzr, [x2] -; CHECK-NEXT: stur w9, [x0, #2] +; CHECK-NEXT: stur w8, [x0, #2] +; CHECK-NEXT: mov w8, #14 ; =0xe ; CHECK-NEXT: strh wzr, [x1] ; CHECK-NEXT: strh w8, [x0, #6] ; CHECK-NEXT: ret @@ -220,17 +220,17 @@ define void @test_alias3_4xs16(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr %ptr4) { ; CHECK-LABEL: test_alias3_4xs16: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: mov w9, #5 +; CHECK-NEXT: mov w8, #4 ; =0x4 ; CHECK-NEXT: strh w8, [x0] -; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: mov w8, #5 ; =0x5 ; CHECK-NEXT: strh wzr, [x2] -; CHECK-NEXT: strh w9, [x0, #2] -; CHECK-NEXT: mov w9, #14 +; CHECK-NEXT: strh w8, [x0, #2] +; CHECK-NEXT: mov w8, #9 ; =0x9 ; CHECK-NEXT: strh wzr, [x3] ; CHECK-NEXT: strh w8, [x0, #4] +; CHECK-NEXT: mov w8, #14 ; =0xe ; CHECK-NEXT: strh wzr, [x1] -; CHECK-NEXT: strh w9, [x0, #6] +; CHECK-NEXT: strh w8, [x0, #6] ; CHECK-NEXT: ret store i16 4, ptr %ptr %addr2 = getelementptr i16, ptr %ptr, i64 1 @@ -251,7 +251,7 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: mov x8, #4 +; CHECK-NEXT: mov x8, #4 ; =0x4 ; CHECK-NEXT: ldr w0, [sp, #4] ; CHECK-NEXT: movk x8, #5, lsl #32 ; CHECK-NEXT: str x8, [sp, #8] @@ -285,9 +285,9 @@ ; CHECK-LABEL: test_atomic: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: add x9, x8, #4 ; CHECK-NEXT: stlr wzr, [x8] -; CHECK-NEXT: stlr wzr, [x9] +; CHECK-NEXT: add x8, x8, #4 +; CHECK-NEXT: stlr wzr, [x8] ; CHECK-NEXT: ret entry: %0 = load ptr, ptr %ptr, align 8 @@ -304,14 +304,14 @@ define i32 @test_alias_3xs16(ptr %ptr, ptr %ptr2, ptr %ptr3, ptr noalias %safe_ptr) { ; CHECK-LABEL: test_alias_3xs16: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov x10, #9 ; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: mov w9, #5 ; =0x5 ; CHECK-NEXT: ldr w0, [x3] -; CHECK-NEXT: mov w9, #5 -; CHECK-NEXT: movk x10, #14, lsl #32 ; CHECK-NEXT: str w9, [x8, #4] +; CHECK-NEXT: mov x9, #9 ; =0x9 +; CHECK-NEXT: movk x9, #14, lsl #32 ; CHECK-NEXT: strh wzr, [x8, #4] -; CHECK-NEXT: str x10, [x8, #8] +; CHECK-NEXT: str x9, [x8, #8] ; CHECK-NEXT: ret %safeld = load i32, ptr %safe_ptr %addr2 = getelementptr i32, ptr %ptr, i64 1 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/swifterror.ll @@ -55,8 +55,8 @@ ; "caller2" is the caller of "foo", it calls "foo" inside a loop. define float @caller2(ptr %error_ref) { ; CHECK-LABEL: caller2: -; CHECK: mov [[ID:x[0-9]+]], x0 ; CHECK: fmov [[CMP:s[0-9]+]], #1.0 +; CHECK: mov [[ID:x[0-9]+]], x0 ; CHECK: mov x21, xzr ; CHECK: bl {{.*}}foo ; CHECK: cbnz x21 @@ -160,8 +160,8 @@ ; CHECK-DAG: mov w0, #16 ; CHECK: malloc ; CHECK: mov [[ID:w[0-9]+]], #1 -; CHECK: mov x21, x0 ; CHECK: strb [[ID]], [x0, #8] +; CHECK: mov x21, x0 ; CHECK: str w{{.*}}, [{{.*}}[[SRET]], #4] ; CHECK-NOT: x21 @@ -214,9 +214,8 @@ ; CHECK: mov w0, #16 ; CHECK: malloc ; CHECK: mov [[ID:w[0-9]+]], #1 -; CHECK: mov x21, x0 -; CHECK-NOT: x21 ; CHECK: strb [[ID]], [x0, #8] +; CHECK: mov x21, x0 ; CHECK-NOT: x21 ; First vararg @@ -336,6 +335,7 @@ ; CHECK: str xzr, [sp] ; CHECK: bl _params_in_reg2 ; Restore original arguments for next call. +; CHECK: ldr x8, [sp, #24] ; CHECK: mov x1, x20 ; CHECK: mov x2, x22 ; CHECK: mov x3, x23 @@ -345,12 +345,13 @@ ; CHECK: mov x7, x27 ; Restore original swiftself argument and swifterror %err. ; CHECK: mov x21, x28 -; CHECK: ldr x8, [sp ; CHECK: bl _params_in_reg2 ; Restore calle save registers but don't clober swifterror x21. ; CHECK-NOT: x21 ; CHECK: ldp x29, x30, [sp ; CHECK-NOT: x21 +; CHECK: ldr x28, [sp +; CHECK-NOT: x21 ; CHECK: ldp x20, x19, [sp ; CHECK-NOT: x21 ; CHECK: ldp x23, x22, [sp @@ -359,8 +360,6 @@ ; CHECK-NOT: x21 ; CHECK: ldp x27, x26, [sp ; CHECK-NOT: x21 -; CHECK: ldr x28, [sp -; CHECK-NOT: x21 ; CHECK: ret define swiftcc void @params_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, ptr, ptr nocapture swifterror %err) { %error_ptr_ref = alloca swifterror ptr, align 8 @@ -373,7 +372,7 @@ ; CHECK-LABEL: params_and_return_in_reg ; Store callee saved registers. -; CHECK: stp x28, x21, [sp, #16 +; CHECK: stp x28, x0, [sp, #16 ; CHECK: stp x27, x26, [sp ; CHECK: stp x25, x24, [sp ; CHECK: stp x23, x22, [sp @@ -399,9 +398,9 @@ ; CHECK: mov x21, xzr ; CHECK: bl _params_in_reg2 ; Store swifterror %error_ptr_ref. +; CHECK: ldr x0, [sp, #24] ; CHECK: stp {{x[0-9]+}}, x21, [sp] ; Setup call arguments from original arguments. -; CHECK: mov x0, x19 ; CHECK: mov x1, x20 ; CHECK: mov x2, x22 ; CHECK: mov x3, x23 @@ -409,19 +408,19 @@ ; CHECK: mov x5, x25 ; CHECK: mov x6, x26 ; CHECK: mov x7, x27 -; CHECK: ldr x21, [sp, #24 +; CHECK: mov x21, x28 ; CHECK: bl _params_and_return_in_reg2 +; CHECK: mov x19, x21 +; CHECK: ldr x21, [sp, #8 ; Store return values. -; CHECK: mov x19, x0 -; CHECK: mov x20, x1 -; CHECK: mov x22, x2 -; CHECK: mov x23, x3 -; CHECK: mov x24, x4 -; CHECK: mov x25, x5 -; CHECK: mov x26, x6 -; CHECK: mov x27, x7 -; Save swifterror %err. -; CHECK: mov x28, x21 +; CHECK: mov x20, x0 +; CHECK: mov x22, x1 +; CHECK: mov x23, x2 +; CHECK: mov x24, x3 +; CHECK: mov x25, x4 +; CHECK: mov x26, x5 +; CHECK: mov x27, x6 +; CHECK: mov x28, x7 ; Setup call. ; CHECK: mov w0, #1 ; CHECK: mov w1, #2 @@ -431,26 +430,25 @@ ; CHECK: mov w5, #6 ; CHECK: mov w6, #7 ; CHECK: mov w7, #8 -; ... setup call with swiferror %error_ptr_ref. -; CHECK: ldr x21, [sp, #8] +; CHECK: str xzr, [sp] ; CHECK: bl _params_in_reg2 ; Restore return values for return from this function. -; CHECK: mov x0, x19 -; CHECK: mov x1, x20 -; CHECK: mov x2, x22 -; CHECK: mov x3, x23 -; CHECK: mov x4, x24 -; CHECK: mov x5, x25 -; CHECK: mov x6, x26 -; CHECK: mov x7, x27 -; CHECK: ldp x29, x30, [sp -; CHECK: mov x21, x28 -; Restore callee save registers. -; CHECK: ldp x20, x19, [sp -; CHECK: ldp x23, x22, [sp -; CHECK: ldp x25, x24, [sp -; CHECK: ldp x27, x26, [sp -; CHECK: ldr x28, [sp +; CHECK: mov x0, x20 +; CHECK: mov x1, x22 +; CHECK: mov x2, x23 +; CHECK: mov x3, x24 +; CHECK: mov x4, x25 +; CHECK: mov x5, x26 +; CHECK: mov x6, x27 +; CHECK: mov x21, x19 +; CHECK: mov x7, x28 +; CHECK: ldp x29, x30, [sp, #96] ; 16-byte Folded Reload +; CHECK: ldr x28, [sp, #16] ; 8-byte Folded Reload +; CHECK: ldp x20, x19, [sp, #80] ; 16-byte Folded Reload +; CHECK: ldp x23, x22, [sp, #64] ; 16-byte Folded Reload +; CHECK: ldp x25, x24, [sp, #48] ; 16-byte Folded Reload +; CHECK: ldp x27, x26, [sp, #32] ; 16-byte Folded Reload +; CHECK: add sp, sp, #112 ; CHECK: ret define swiftcc { i64, i64, i64, i64, i64, i64, i64, i64 } @params_and_return_in_reg(i64, i64, i64, i64, i64, i64, i64, i64, ptr , ptr nocapture swifterror %err) { %error_ptr_ref = alloca swifterror ptr, align 8 diff --git a/llvm/test/CodeGen/AArch64/a57-csel.ll b/llvm/test/CodeGen/AArch64/a57-csel.ll --- a/llvm/test/CodeGen/AArch64/a57-csel.ll +++ b/llvm/test/CodeGen/AArch64/a57-csel.ll @@ -1,9 +1,16 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mcpu=cortex-a57 -aarch64-enable-early-ifcvt=false | FileCheck %s ; Check that the select isn't expanded into a branch sequence ; when the icmp's first operand %x0 is from load. define i64 @f(i64 %a, i64 %b, ptr %c, i64 %d, i64 %e) { - ; CHECK: csel +; CHECK-LABEL: f: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr x8, [x2] +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: csel x8, x0, x1, eq +; CHECK-NEXT: add x0, x8, x3 +; CHECK-NEXT: ret %x0 = load i64, ptr %c %x1 = icmp eq i64 %x0, 0 %x2 = select i1 %x1, i64 %a, i64 %b diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll --- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll @@ -93,16 +93,16 @@ declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) define i32 @oversized_ADDV_512(ptr %arr) { -; SDAG-LABEL: oversized_ADDV_512: -; SDAG: // %bb.0: -; SDAG-NEXT: ldp q0, q1, [x0, #32] -; SDAG-NEXT: ldp q3, q2, [x0] -; SDAG-NEXT: add v0.4s, v3.4s, v0.4s -; SDAG-NEXT: add v1.4s, v2.4s, v1.4s -; SDAG-NEXT: add v0.4s, v0.4s, v1.4s -; SDAG-NEXT: addv s0, v0.4s -; SDAG-NEXT: fmov w0, s0 -; SDAG-NEXT: ret +; SDAG-LABEL: oversized_ADDV_512: +; SDAG: // %bb.0: +; SDAG-NEXT: ldp q0, q1, [x0, #32] +; SDAG-NEXT: ldp q2, q3, [x0] +; SDAG-NEXT: add v1.4s, v3.4s, v1.4s +; SDAG-NEXT: add v0.4s, v2.4s, v0.4s +; SDAG-NEXT: add v0.4s, v0.4s, v1.4s +; SDAG-NEXT: addv s0, v0.4s +; SDAG-NEXT: fmov w0, s0 +; SDAG-NEXT: ret ; ; GISEL-LABEL: oversized_ADDV_512: ; GISEL: // %bb.0: @@ -148,19 +148,19 @@ } define i32 @addv_combine_i32(<4 x i32> %a1, <4 x i32> %a2) { -; SDAG-LABEL: addv_combine_i32: -; SDAG: // %bb.0: // %entry -; SDAG-NEXT: add v0.4s, v0.4s, v1.4s -; SDAG-NEXT: addv s0, v0.4s -; SDAG-NEXT: fmov w0, s0 -; SDAG-NEXT: ret +; SDAG-LABEL: addv_combine_i32: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: add v0.4s, v0.4s, v1.4s +; SDAG-NEXT: addv s0, v0.4s +; SDAG-NEXT: fmov w0, s0 +; SDAG-NEXT: ret ; ; GISEL-LABEL: addv_combine_i32: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: addv s0, v0.4s -; GISEL-NEXT: addv s1, v1.4s -; GISEL-NEXT: fmov w8, s0 -; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: addv s0, v0.4s +; GISEL-NEXT: addv s1, v1.4s +; GISEL-NEXT: fmov w8, s0 +; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: add w0, w8, w9 ; GISEL-NEXT: ret entry: @@ -171,19 +171,19 @@ } define i64 @addv_combine_i64(<2 x i64> %a1, <2 x i64> %a2) { -; SDAG-LABEL: addv_combine_i64: -; SDAG: // %bb.0: // %entry -; SDAG-NEXT: add v0.2d, v0.2d, v1.2d -; SDAG-NEXT: addp d0, v0.2d -; SDAG-NEXT: fmov x0, d0 -; SDAG-NEXT: ret +; SDAG-LABEL: addv_combine_i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: add v0.2d, v0.2d, v1.2d +; SDAG-NEXT: addp d0, v0.2d +; SDAG-NEXT: fmov x0, d0 +; SDAG-NEXT: ret ; ; GISEL-LABEL: addv_combine_i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: addp d0, v0.2d -; GISEL-NEXT: addp d1, v1.2d -; GISEL-NEXT: fmov x8, d0 -; GISEL-NEXT: fmov x9, d1 +; GISEL-NEXT: addp d0, v0.2d +; GISEL-NEXT: addp d1, v1.2d +; GISEL-NEXT: fmov x8, d0 +; GISEL-NEXT: fmov x9, d1 ; GISEL-NEXT: add x0, x8, x9 ; GISEL-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll --- a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll @@ -6,11 +6,11 @@ define dso_local void @movi_modimm_t1() nounwind { ; CHECK-LABEL: movi_modimm_t1: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.4s, #1 ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: ld1 { v0.8h }, [x8] -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ld1 { v1.8h }, [x8] +; CHECK-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret %in = load <8 x i16>, ptr @vec_v8i16 @@ -22,11 +22,11 @@ define dso_local void @movi_modimm_t2() nounwind { ; CHECK-LABEL: movi_modimm_t2: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.4s, #1, lsl #8 ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.4s, #1, lsl #8 -; CHECK-NEXT: ld1 { v0.8h }, [x8] -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ld1 { v1.8h }, [x8] +; CHECK-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret %in = load <8 x i16>, ptr @vec_v8i16 @@ -38,11 +38,11 @@ define dso_local void @movi_modimm_t3() nounwind { ; CHECK-LABEL: movi_modimm_t3: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.4s, #1, lsl #16 ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.4s, #1, lsl #16 -; CHECK-NEXT: ld1 { v0.8h }, [x8] -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ld1 { v1.8h }, [x8] +; CHECK-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret %in = load <8 x i16>, ptr @vec_v8i16 @@ -54,11 +54,11 @@ define dso_local void @movi_modimm_t4() nounwind { ; CHECK-LABEL: movi_modimm_t4: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.4s, #1, lsl #24 ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.4s, #1, lsl #24 -; CHECK-NEXT: ld1 { v0.8h }, [x8] -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ld1 { v1.8h }, [x8] +; CHECK-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret %in = load <8 x i16>, ptr @vec_v8i16 @@ -70,11 +70,11 @@ define dso_local void @movi_modimm_t5() nounwind { ; CHECK-LABEL: movi_modimm_t5: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.8h, #1 ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.8h, #1 -; CHECK-NEXT: ld1 { v0.8h }, [x8] -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ld1 { v1.8h }, [x8] +; CHECK-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret %in = load <8 x i16>, ptr @vec_v8i16 @@ -86,11 +86,11 @@ define dso_local void @movi_modimm_t6() nounwind { ; CHECK-LABEL: movi_modimm_t6: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.8h, #1, lsl #8 ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.8h, #1, lsl #8 -; CHECK-NEXT: ld1 { v0.8h }, [x8] -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ld1 { v1.8h }, [x8] +; CHECK-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret %in = load <8 x i16>, ptr @vec_v8i16 @@ -102,11 +102,11 @@ define dso_local void @movi_modimm_t7() nounwind { ; CHECK-LABEL: movi_modimm_t7: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.4s, #1, msl #8 ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.4s, #1, msl #8 -; CHECK-NEXT: ld1 { v0.8h }, [x8] -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ld1 { v1.8h }, [x8] +; CHECK-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret %in = load <8 x i16>, ptr @vec_v8i16 @@ -118,11 +118,11 @@ define dso_local void @movi_modimm_t8() nounwind { ; CHECK-LABEL: movi_modimm_t8: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.4s, #1, msl #16 ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.4s, #1, msl #16 -; CHECK-NEXT: ld1 { v0.8h }, [x8] -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ld1 { v1.8h }, [x8] +; CHECK-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret %in = load <8 x i16>, ptr @vec_v8i16 @@ -134,11 +134,11 @@ define dso_local void @movi_modimm_t9() nounwind { ; CHECK-LABEL: movi_modimm_t9: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.16b, #1 ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.16b, #1 -; CHECK-NEXT: ld1 { v0.8h }, [x8] -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ld1 { v1.8h }, [x8] +; CHECK-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret %in = load <8 x i16>, ptr @vec_v8i16 @@ -150,11 +150,11 @@ define dso_local void @movi_modimm_t10() nounwind { ; CHECK-LABEL: movi_modimm_t10: ; CHECK: // %bb.0: +; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: ld1 { v0.8h }, [x8] -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ld1 { v1.8h }, [x8] +; CHECK-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret %in = load <8 x i16>, ptr @vec_v8i16 @@ -166,11 +166,11 @@ define dso_local void @fmov_modimm_t11() nounwind { ; CHECK-LABEL: fmov_modimm_t11: ; CHECK: // %bb.0: +; CHECK-NEXT: fmov v0.4s, #3.00000000 ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: fmov v1.4s, #3.00000000 -; CHECK-NEXT: ld1 { v0.8h }, [x8] -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ld1 { v1.8h }, [x8] +; CHECK-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret %in = load <8 x i16>, ptr @vec_v8i16 @@ -182,11 +182,11 @@ define dso_local void @fmov_modimm_t12() nounwind { ; CHECK-LABEL: fmov_modimm_t12: ; CHECK: // %bb.0: +; CHECK-NEXT: fmov v0.2d, #0.17968750 ; CHECK-NEXT: adrp x8, vec_v8i16 ; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 -; CHECK-NEXT: fmov v1.2d, #0.17968750 -; CHECK-NEXT: ld1 { v0.8h }, [x8] -; CHECK-NEXT: add v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ld1 { v1.8h }, [x8] +; CHECK-NEXT: add v0.8h, v1.8h, v0.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] ; CHECK-NEXT: ret %in = load <8 x i16>, ptr @vec_v8i16 diff --git a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll --- a/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-bit-gen.ll @@ -151,23 +151,23 @@ ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: cinc w9, w0, lt -; CHECK-NEXT: asr w9, w9, #1 +; CHECK-NEXT: mov w9, wzr +; CHECK-NEXT: cinc w8, w0, lt +; CHECK-NEXT: asr w8, w8, #1 ; CHECK-NEXT: .LBB11_1: // %do.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: bit v1.16b, v0.16b, v2.16b ; CHECK-NEXT: add x10, sp, #16 -; CHECK-NEXT: bfi x10, x8, #2, #2 ; CHECK-NEXT: mov x11, sp -; CHECK-NEXT: bfi x11, x8, #2, #2 -; CHECK-NEXT: add w8, w8, #1 -; CHECK-NEXT: cmp w8, #5 +; CHECK-NEXT: bfi x10, x9, #2, #2 +; CHECK-NEXT: bfi x11, x9, #2, #2 +; CHECK-NEXT: add w9, w9, #1 +; CHECK-NEXT: cmp w9, #5 ; CHECK-NEXT: str q1, [sp, #16] ; CHECK-NEXT: str w0, [x10] ; CHECK-NEXT: ldr q1, [sp, #16] ; CHECK-NEXT: str q0, [sp] -; CHECK-NEXT: str w9, [x11] +; CHECK-NEXT: str w8, [x11] ; CHECK-NEXT: ldr q0, [sp] ; CHECK-NEXT: b.ne .LBB11_1 ; CHECK-NEXT: // %bb.2: // %do.end diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll b/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll --- a/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-combine-add-sub-mul.ll @@ -4,8 +4,8 @@ define <2 x i64> @test_mul_add_2x64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { ; CHECK-LABEL: test_mul_add_2x64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d @@ -19,8 +19,8 @@ define <1 x i64> @test_mul_add_1x64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) { ; CHECK-LABEL: test_mul_add_1x64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d @@ -34,8 +34,8 @@ define <2 x i64> @test_mul_sub_2x64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { ; CHECK-LABEL: test_mul_sub_2x64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: mls z0.d, p0/m, z1.d, z2.d @@ -49,14 +49,15 @@ define <2 x i64> @test_mul_sub_2x64_2(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) { ; CHECK-LABEL: test_mul_sub_2x64_2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: mul z2.d, p0/m, z2.d, z3.d +; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 +; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 ; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: mul z1.d, p0/m, z1.d, z3.d +; CHECK-NEXT: sub v0.2d, v1.2d, v0.2d ; CHECK-NEXT: ret %div = sdiv <2 x i64> %a, %b %mul = mul <2 x i64> %c, %d @@ -67,8 +68,8 @@ define <2 x i64> @test_mul_sub_2x64_3(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, <2 x i64> %d) { ; CHECK-LABEL: test_mul_sub_2x64_3: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q3 killed $q3 def $z3 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 @@ -85,8 +86,8 @@ define <1 x i64> @test_mul_sub_1x64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) { ; CHECK-LABEL: test_mul_sub_1x64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: mul z1.d, p0/m, z1.d, z2.d ; CHECK-NEXT: sub d0, d1, d0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll --- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext-scalable.ll @@ -4,8 +4,8 @@ define @dupsext_v2i8_v2i16(i8 %src, %b) { ; CHECK-LABEL: dupsext_v2i8_v2i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -20,8 +20,8 @@ define @dupsext_v4i8_v4i16(i8 %src, %b) { ; CHECK-LABEL: dupsext_v4i8_v4i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret @@ -36,8 +36,8 @@ define @dupsext_v8i8_v8i16(i8 %src, %b) { ; CHECK-LABEL: dupsext_v8i8_v8i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret @@ -52,8 +52,8 @@ define @dupsext_v2i8_v2i32(i8 %src, %b) { ; CHECK-LABEL: dupsext_v2i8_v2i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -68,8 +68,8 @@ define @dupsext_v4i8_v4i32(i8 %src, %b) { ; CHECK-LABEL: dupsext_v4i8_v4i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret @@ -84,9 +84,9 @@ define @dupsext_v2i8_v2i64(i8 %src, %b) { ; CHECK-LABEL: dupsext_v2i8_v2i64: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtb x8, w0 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -101,8 +101,8 @@ define @dupsext_v2i16_v2i32(i16 %src, %b) { ; CHECK-LABEL: dupsext_v2i16_v2i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sxth w8, w0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sxth w8, w0 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -117,8 +117,8 @@ define @dupsext_v4i16_v4i32(i16 %src, %b) { ; CHECK-LABEL: dupsext_v4i16_v4i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sxth w8, w0 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sxth w8, w0 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret @@ -133,9 +133,9 @@ define @dupsext_v2i16_v2i64(i16 %src, %b) { ; CHECK-LABEL: dupsext_v2i16_v2i64: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxth x8, w0 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -150,9 +150,9 @@ define @dupsext_v2i32_v2i64(i32 %src, %b) { ; CHECK-LABEL: dupsext_v2i32_v2i64: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -167,8 +167,8 @@ define @dupzext_v2i8_v2i16(i8 %src, %b) { ; CHECK-LABEL: dupzext_v2i8_v2i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -183,8 +183,8 @@ define @dupzext_v4i8_v4i16(i8 %src, %b) { ; CHECK-LABEL: dupzext_v4i8_v4i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret @@ -199,8 +199,8 @@ define @dupzext_v8i8_v8i16(i8 %src, %b) { ; CHECK-LABEL: dupzext_v8i8_v8i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret @@ -215,8 +215,8 @@ define @dupzext_v2i8_v2i32(i8 %src, %b) { ; CHECK-LABEL: dupzext_v2i8_v2i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -231,8 +231,8 @@ define @dupzext_v4i8_v4i32(i8 %src, %b) { ; CHECK-LABEL: dupzext_v4i8_v4i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret @@ -247,9 +247,9 @@ define @dupzext_v2i8_v2i64(i8 %src, %b) { ; CHECK-LABEL: dupzext_v2i8_v2i64: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: and x8, x0, #0xff -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -264,8 +264,8 @@ define @dupzext_v2i16_v2i32(i16 %src, %b) { ; CHECK-LABEL: dupzext_v2i16_v2i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -280,8 +280,8 @@ define @dupzext_v4i16_v4i32(i16 %src, %b) { ; CHECK-LABEL: dupzext_v4i16_v4i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret @@ -296,9 +296,9 @@ define @dupzext_v2i16_v2i64(i16 %src, %b) { ; CHECK-LABEL: dupzext_v2i16_v2i64: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: and x8, x0, #0xffff -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -313,8 +313,8 @@ define @dupzext_v2i32_v2i64(i32 %src, %b) { ; CHECK-LABEL: dupzext_v2i32_v2i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll --- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll @@ -100,8 +100,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sxtb w8, w0 ; CHECK-NEXT: shl v0.2s, v0.2s, #24 -; CHECK-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-NEXT: dup v1.2s, w8 +; CHECK-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ret entry: @@ -116,8 +116,8 @@ define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) { ; CHECK-LABEL: dupzext_v2i16_v2i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: movi d1, #0x00ffff0000ffff +; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: dup v2.2s, w8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NEXT: umull v0.2d, v2.2s, v0.2s @@ -191,8 +191,8 @@ ; CHECK-NEXT: cmp x0, #0 ; CHECK-NEXT: ldr q0, [x2] ; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: cmtst v0.8h, v0.8h, v0.8h ; CHECK-NEXT: dup v1.8h, w8 +; CHECK-NEXT: cmtst v0.8h, v0.8h, v0.8h ; CHECK-NEXT: cmeq v1.8h, v1.8h, #0 ; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.8b, v0.8h @@ -217,8 +217,8 @@ define <8 x i16> @typei1_v8i1_v8i16(i1 %src, <8 x i1> %b) { ; CHECK-LABEL: typei1_v8i1_v8i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: movi v1.8b, #1 +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: dup v2.8b, w8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NEXT: umull v0.8h, v2.8b, v0.8b diff --git a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll --- a/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll @@ -1,6 +1,6 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -mtriple=arm64-apple-ios -frame-pointer=all -disable-post-ra < %s | FileCheck %s --check-prefix=CHECK-MACHO - ; This test aims to check basic correctness of frame layout & ; frame access code. There are 8 functions in this test file, ; each function implements one element in the cartesian product @@ -81,10 +81,50 @@ ; volatile int vla[i1]; ; return i10 + (int)d10 + l1 + vla[0]; ;} - - - define i32 @novla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 { +; +; CHECK-LABEL: novla_nodynamicrealign_call: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: ldr d0, [sp, #56] +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: ldr w9, [sp, #40] +; CHECK-NEXT: ldr w10, [sp, #12] +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: orr w19, w8, w10 +; CHECK-NEXT: bl g +; CHECK-NEXT: add w0, w19, w0 +; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret +; +; CHECK-MACHO-LABEL: novla_nodynamicrealign_call: +; CHECK-MACHO: ; %bb.0: ; %entry +; CHECK-MACHO-NEXT: sub sp, sp, #48 +; CHECK-MACHO-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: add x29, sp, #32 +; CHECK-MACHO-NEXT: .cfi_def_cfa w29, 16 +; CHECK-MACHO-NEXT: .cfi_offset w30, -8 +; CHECK-MACHO-NEXT: .cfi_offset w29, -16 +; CHECK-MACHO-NEXT: .cfi_offset w19, -24 +; CHECK-MACHO-NEXT: .cfi_offset w20, -32 +; CHECK-MACHO-NEXT: ldr d0, [x29, #32] +; CHECK-MACHO-NEXT: fcvtzs w8, d0 +; CHECK-MACHO-NEXT: ldr w9, [x29, #20] +; CHECK-MACHO-NEXT: ldr w10, [sp, #12] +; CHECK-MACHO-NEXT: add w8, w8, w9 +; CHECK-MACHO-NEXT: orr w19, w8, w10 +; CHECK-MACHO-NEXT: bl _g +; CHECK-MACHO-NEXT: add w0, w19, w0 +; CHECK-MACHO-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: add sp, sp, #48 +; CHECK-MACHO-NEXT: ret entry: %l1 = alloca i32, align 4 %conv = fptosi double %d10 to i32 @@ -95,55 +135,47 @@ %add2 = add nsw i32 %add1, %call ret i32 %add2 } -; CHECK-LABEL: novla_nodynamicrealign_call -; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: sub sp, sp, #32 -; CHECK: stp x30, x19, [sp, #16] ; Check correctness of cfi pseudo-instructions -; CHECK: .cfi_def_cfa_offset 32 -; CHECK: .cfi_offset w19, -8 -; CHECK: .cfi_offset w30, -16 ; Check correct access to arguments passed on the stack, through stack pointer -; CHECK: ldr d[[DARG:[0-9]+]], [sp, #56] -; CHECK: ldr w[[IARG:[0-9]+]], [sp, #40] ; Check correct access to local variable on the stack, through stack pointer -; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12] ; Check epilogue: -; CHECK: ldp x30, x19, [sp, #16] -; CHECK: ret -; CHECK: .cfi_endproc - -; CHECK-MACHO-LABEL: _novla_nodynamicrealign_call: -; CHECK-MACHO: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK-MACHO: sub sp, sp, #48 -; CHECK-MACHO: stp x20, x19, [sp, #16] ; Check that the frame pointer is created: -; CHECK-MACHO: stp x29, x30, [sp, #32] -; CHECK-MACHO: add x29, sp, #32 ; Check correctness of cfi pseudo-instructions -; CHECK-MACHO: .cfi_def_cfa w29, 16 -; CHECK-MACHO: .cfi_offset w30, -8 -; CHECK-MACHO: .cfi_offset w29, -16 -; CHECK-MACHO: .cfi_offset w19, -24 -; CHECK-MACHO: .cfi_offset w20, -32 ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] -; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] ; Check correct access to local variable on the stack, through stack pointer -; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp, #12] ; Check epilogue: -; CHECK-MACHO: ldp x29, x30, [sp, #32] -; CHECK-MACHO: ldp x20, x19, [sp, #16] -; CHECK-MACHO: ret -; CHECK-MACHO: .cfi_endproc - - declare i32 @g() #0 - ; Function Attrs: nounwind define i32 @novla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 { +; +; CHECK-LABEL: novla_nodynamicrealign_nocall: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: ldr d0, [sp, #40] +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: ldr w9, [sp, #24] +; CHECK-NEXT: ldr w10, [sp, #12] +; CHECK-NEXT: add w9, w9, w10 +; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret +; +; CHECK-MACHO-LABEL: novla_nodynamicrealign_nocall: +; CHECK-MACHO: ; %bb.0: ; %entry +; CHECK-MACHO-NEXT: sub sp, sp, #32 +; CHECK-MACHO-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: add x29, sp, #16 +; CHECK-MACHO-NEXT: ldr d0, [x29, #32] +; CHECK-MACHO-NEXT: fcvtzs w8, d0 +; CHECK-MACHO-NEXT: ldr w9, [x29, #20] +; CHECK-MACHO-NEXT: ldur w10, [x29, #-4] +; CHECK-MACHO-NEXT: add w9, w9, w10 +; CHECK-MACHO-NEXT: add w0, w8, w9 +; CHECK-MACHO-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: add sp, sp, #32 +; CHECK-MACHO-NEXT: ret entry: %l1 = alloca i32, align 4 %conv = fptosi double %d10 to i32 @@ -152,20 +184,60 @@ %add1 = add nsw i32 %add, %l1.0.l1.0. ret i32 %add1 } -; CHECK-LABEL: novla_nodynamicrealign_nocall ; Check that space is reserved for one local variable on the stack. -; CHECK: sub sp, sp, #16 ; Check correct access to arguments passed on the stack, through stack pointer -; CHECK: ldr d[[DARG:[0-9]+]], [sp, #40] -; CHECK: ldr w[[IARG:[0-9]+]], [sp, #24] ; Check correct access to local variable on the stack, through stack pointer -; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12] ; Check epilogue: -; CHECK: add sp, sp, #16 -; CHECK: ret - - define i32 @novla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 { +; +; CHECK-LABEL: novla_dynamicrealign_call: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub x9, sp, #96 +; CHECK-NEXT: and sp, x9, #0xffffffffffffff80 +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: ldr d0, [x29, #56] +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: ldr w9, [x29, #40] +; CHECK-NEXT: ldr w10, [sp] +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: orr w19, w8, w10 +; CHECK-NEXT: bl g +; CHECK-NEXT: add w0, w19, w0 +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-MACHO-LABEL: novla_dynamicrealign_call: +; CHECK-MACHO: ; %bb.0: ; %entry +; CHECK-MACHO-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: add x29, sp, #16 +; CHECK-MACHO-NEXT: sub x9, sp, #96 +; CHECK-MACHO-NEXT: and sp, x9, #0xffffffffffffff80 +; CHECK-MACHO-NEXT: .cfi_def_cfa w29, 16 +; CHECK-MACHO-NEXT: .cfi_offset w30, -8 +; CHECK-MACHO-NEXT: .cfi_offset w29, -16 +; CHECK-MACHO-NEXT: .cfi_offset w19, -24 +; CHECK-MACHO-NEXT: .cfi_offset w20, -32 +; CHECK-MACHO-NEXT: ldr d0, [x29, #32] +; CHECK-MACHO-NEXT: fcvtzs w8, d0 +; CHECK-MACHO-NEXT: ldr w9, [x29, #20] +; CHECK-MACHO-NEXT: ldr w10, [sp] +; CHECK-MACHO-NEXT: add w8, w8, w9 +; CHECK-MACHO-NEXT: orr w19, w8, w10 +; CHECK-MACHO-NEXT: bl _g +; CHECK-MACHO-NEXT: add w0, w19, w0 +; CHECK-MACHO-NEXT: sub sp, x29, #16 +; CHECK-MACHO-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ret entry: %l1 = alloca i32, align 128 %conv = fptosi double %d10 to i32 @@ -176,67 +248,56 @@ %add2 = add nsw i32 %add1, %call ret i32 %add2 } - -; CHECK-LABEL: novla_dynamicrealign_call -; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: stp x29, x30, [sp, #-32]! ; Check that the frame pointer is created: -; CHECK: str x19, [sp, #16] -; CHECK: mov x29, sp ; Check the dynamic realignment of the stack pointer to a 128-byte boundary -; CHECK: sub x9, sp, #96 -; CHECK: and sp, x9, #0xffffffffffffff80 ; Check correctness of cfi pseudo-instructions -; CHECK: .cfi_def_cfa w29, 32 -; CHECK: .cfi_offset w19, -16 -; CHECK: .cfi_offset w30, -24 -; CHECK: .cfi_offset w29, -32 ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK: ldr d[[DARG:[0-9]+]], [x29, #56] -; CHECK: ldr w[[IARG:[0-9]+]], [x29, #40] ; Check correct access to local variable on the stack, through re-aligned stack pointer -; CHECK: ldr w[[ILOC:[0-9]+]], [sp] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK: mov sp, x29 -; CHECK: ldr x19, [sp, #16] -; CHECK: ldp x29, x30, [sp], #32 -; CHECK: ret -; CHECK: .cfi_endproc - -; CHECK-MACHO-LABEL: _novla_dynamicrealign_call: -; CHECK-MACHO: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK-MACHO: stp x20, x19, [sp, #-32]! ; Check that the frame pointer is created: -; CHECK-MACHO: stp x29, x30, [sp, #16] -; CHECK-MACHO: add x29, sp, #16 ; Check the dynamic realignment of the stack pointer to a 128-byte boundary -; CHECK-MACHO: sub x9, sp, #96 -; CHECK-MACHO: and sp, x9, #0xffffffffffffff80 ; Check correctness of cfi pseudo-instructions -; CHECK-MACHO: .cfi_def_cfa w29, 16 -; CHECK-MACHO: .cfi_offset w30, -8 -; CHECK-MACHO: .cfi_offset w29, -16 -; CHECK-MACHO: .cfi_offset w19, -24 -; CHECK-MACHO: .cfi_offset w20, -32 ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] -; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] ; Check correct access to local variable on the stack, through re-aligned stack pointer -; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [sp] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK-MACHO: sub sp, x29, #16 -; CHECK-MACHO: ldp x29, x30, [sp, #16] -; CHECK-MACHO: ldp x20, x19, [sp], #32 -; CHECK-MACHO: ret -; CHECK-MACHO: .cfi_endproc - - ; Function Attrs: nounwind define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 { +; +; CHECK-LABEL: novla_dynamicrealign_nocall: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub x9, sp, #112 +; CHECK-NEXT: and sp, x9, #0xffffffffffffff80 +; CHECK-NEXT: ldr d0, [x29, #40] +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: ldr w9, [x29, #24] +; CHECK-NEXT: ldr w10, [sp] +; CHECK-NEXT: add w9, w9, w10 +; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-MACHO-LABEL: novla_dynamicrealign_nocall: +; CHECK-MACHO: ; %bb.0: ; %entry +; CHECK-MACHO-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: mov x29, sp +; CHECK-MACHO-NEXT: sub x9, sp, #112 +; CHECK-MACHO-NEXT: and sp, x9, #0xffffffffffffff80 +; CHECK-MACHO-NEXT: ldr d0, [x29, #32] +; CHECK-MACHO-NEXT: fcvtzs w8, d0 +; CHECK-MACHO-NEXT: ldr w9, [x29, #20] +; CHECK-MACHO-NEXT: ldr w10, [sp] +; CHECK-MACHO-NEXT: add w9, w9, w10 +; CHECK-MACHO-NEXT: add w0, w8, w9 +; CHECK-MACHO-NEXT: mov sp, x29 +; CHECK-MACHO-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ret entry: %l1 = alloca i32, align 128 %conv = fptosi double %d10 to i32 @@ -245,27 +306,79 @@ %add1 = add nsw i32 %add, %l1.0.l1.0. ret i32 %add1 } - -; CHECK-LABEL: novla_dynamicrealign_nocall ; Check that the frame pointer is created: -; CHECK: stp x29, x30, [sp, #-16]! -; CHECK: mov x29, sp ; Check the dynamic realignment of the stack pointer to a 128-byte boundary -; CHECK: sub x9, sp, #112 -; CHECK: and sp, x9, #0xffffffffffffff80 ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] -; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] ; Check correct access to local variable on the stack, through re-aligned stack pointer -; CHECK: ldr w[[ILOC:[0-9]+]], [sp] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK: mov sp, x29 -; CHECK: ldp x29, x30, [sp], #16 -; CHECK: ret - - define i32 @vla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 { +; +; CHECK-LABEL: vla_nodynamicrealign_call: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: ubfiz x8, x0, #2, #32 +; CHECK-NEXT: ldr w9, [x29, #40] +; CHECK-NEXT: ldr d0, [x29, #56] +; CHECK-NEXT: add x8, x8, #15 +; CHECK-NEXT: and x8, x8, #0x7fffffff0 +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: sub x19, x10, x8 +; CHECK-NEXT: mov sp, x19 +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: ldur w10, [x29, #-4] +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: orr w20, w8, w10 +; CHECK-NEXT: bl g +; CHECK-NEXT: ldr w8, [x19] +; CHECK-NEXT: add w8, w0, w8 +; CHECK-NEXT: add w0, w20, w8 +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-MACHO-LABEL: vla_nodynamicrealign_call: +; CHECK-MACHO: ; %bb.0: ; %entry +; CHECK-MACHO-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: add x29, sp, #16 +; CHECK-MACHO-NEXT: sub sp, sp, #16 +; CHECK-MACHO-NEXT: .cfi_def_cfa w29, 16 +; CHECK-MACHO-NEXT: .cfi_offset w30, -8 +; CHECK-MACHO-NEXT: .cfi_offset w29, -16 +; CHECK-MACHO-NEXT: .cfi_offset w19, -24 +; CHECK-MACHO-NEXT: .cfi_offset w20, -32 +; CHECK-MACHO-NEXT: ; kill: def $w0 killed $w0 def $x0 +; CHECK-MACHO-NEXT: ubfiz x8, x0, #2, #32 +; CHECK-MACHO-NEXT: ldr w9, [x29, #20] +; CHECK-MACHO-NEXT: ldr d0, [x29, #32] +; CHECK-MACHO-NEXT: add x8, x8, #15 +; CHECK-MACHO-NEXT: and x8, x8, #0x7fffffff0 +; CHECK-MACHO-NEXT: mov x10, sp +; CHECK-MACHO-NEXT: sub x19, x10, x8 +; CHECK-MACHO-NEXT: mov sp, x19 +; CHECK-MACHO-NEXT: fcvtzs w8, d0 +; CHECK-MACHO-NEXT: ldur w10, [x29, #-20] +; CHECK-MACHO-NEXT: add w8, w8, w9 +; CHECK-MACHO-NEXT: orr w20, w8, w10 +; CHECK-MACHO-NEXT: bl _g +; CHECK-MACHO-NEXT: ldr w8, [x19] +; CHECK-MACHO-NEXT: add w8, w0, w8 +; CHECK-MACHO-NEXT: add w0, w20, w8 +; CHECK-MACHO-NEXT: sub sp, x29, #16 +; CHECK-MACHO-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ret entry: %l1 = alloca i32, align 4 %0 = zext i32 %i1 to i64 @@ -280,48 +393,67 @@ %add3 = add nsw i32 %add2, %1 ret i32 %add3 } - -; CHECK-LABEL: vla_nodynamicrealign_call -; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: stp x29, x30, [sp, #-32]! ; Check that the frame pointer is created: -; CHECK: stp x20, x19, [sp, #16] -; CHECK: mov x29, sp ; Check that space is reserved on the stack for the local variable, ; rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned. -; CHECK: sub sp, sp, #16 ; Check correctness of cfi pseudo-instructions -; CHECK: .cfi_def_cfa w29, 32 -; CHECK: .cfi_offset w19, -8 -; CHECK: .cfi_offset w20, -16 -; CHECK: .cfi_offset w30, -24 -; CHECK: .cfi_offset w29, -32 ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK: ldr w[[IARG:[0-9]+]], [x29, #40] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack -; CHECK: ubfiz x9, x0, #2, #32 -; CHECK: add x9, x9, #15 -; CHECK: ldr d[[DARG:[0-9]+]], [x29, #56] -; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp -; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 -; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through frame pointer -; CHECK: ldur w[[ILOC:[0-9]+]], [x29, #-4] ; Check correct accessing of the VLA variable through the base pointer -; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK: mov sp, x29 -; CHECK: ldp x20, x19, [sp, #16] -; CHECK: ldp x29, x30, [sp], #32 -; CHECK: ret -; CHECK: .cfi_endproc - - ; Function Attrs: nounwind define i32 @vla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 { +; +; CHECK-LABEL: vla_nodynamicrealign_nocall: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: ubfiz x8, x0, #2, #32 +; CHECK-NEXT: ldr w9, [x29, #24] +; CHECK-NEXT: ldr d0, [x29, #40] +; CHECK-NEXT: add x8, x8, #15 +; CHECK-NEXT: and x8, x8, #0x7fffffff0 +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: sub x8, x10, x8 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: fcvtzs w10, d0 +; CHECK-NEXT: ldur w11, [x29, #-4] +; CHECK-NEXT: add w9, w9, w11 +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: add w9, w10, w9 +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-MACHO-LABEL: vla_nodynamicrealign_nocall: +; CHECK-MACHO: ; %bb.0: ; %entry +; CHECK-MACHO-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: mov x29, sp +; CHECK-MACHO-NEXT: sub sp, sp, #16 +; CHECK-MACHO-NEXT: ; kill: def $w0 killed $w0 def $x0 +; CHECK-MACHO-NEXT: ubfiz x8, x0, #2, #32 +; CHECK-MACHO-NEXT: ldr w9, [x29, #20] +; CHECK-MACHO-NEXT: ldr d0, [x29, #32] +; CHECK-MACHO-NEXT: add x8, x8, #15 +; CHECK-MACHO-NEXT: and x8, x8, #0x7fffffff0 +; CHECK-MACHO-NEXT: mov x10, sp +; CHECK-MACHO-NEXT: sub x8, x10, x8 +; CHECK-MACHO-NEXT: mov sp, x8 +; CHECK-MACHO-NEXT: fcvtzs w10, d0 +; CHECK-MACHO-NEXT: ldur w11, [x29, #-4] +; CHECK-MACHO-NEXT: add w9, w9, w11 +; CHECK-MACHO-NEXT: ldr w8, [x8] +; CHECK-MACHO-NEXT: add w9, w10, w9 +; CHECK-MACHO-NEXT: add w0, w9, w8 +; CHECK-MACHO-NEXT: mov sp, x29 +; CHECK-MACHO-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ret entry: %l1 = alloca i32, align 4 %0 = zext i32 %i1 to i64 @@ -334,37 +466,94 @@ %add2 = add nsw i32 %add1, %1 ret i32 %add2 } - -; CHECK-LABEL: vla_nodynamicrealign_nocall ; Check that the frame pointer is created: -; CHECK: stp x29, x30, [sp, #-16]! -; CHECK: mov x29, sp ; Check that space is reserved on the stack for the local variable, ; rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned. -; CHECK: sub sp, sp, #16 ; Check correctness of cfi pseudo-instructions ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack -; CHECK: ubfiz x9, x0, #2, #32 -; CHECK: add x9, x9, #15 -; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40] -; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp -; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 -; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through frame pointer -; CHECK: ldur w[[ILOC:[0-9]+]], [x29, #-4] ; Check correct accessing of the VLA variable through the base pointer -; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK: mov sp, x29 -; CHECK: ldp x29, x30, [sp], #16 -; CHECK: ret - - define i32 @vla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 { +; +; CHECK-LABEL: vla_dynamicrealign_call: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-48]! // 16-byte Folded Spill +; CHECK-NEXT: str x21, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub x9, sp, #80 +; CHECK-NEXT: and sp, x9, #0xffffffffffffff80 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .cfi_def_cfa w29, 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -32 +; CHECK-NEXT: .cfi_offset w30, -40 +; CHECK-NEXT: .cfi_offset w29, -48 +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: ubfiz x8, x0, #2, #32 +; CHECK-NEXT: ldr w9, [x29, #56] +; CHECK-NEXT: ldr d0, [x29, #72] +; CHECK-NEXT: add x8, x8, #15 +; CHECK-NEXT: and x8, x8, #0x7fffffff0 +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: sub x20, x10, x8 +; CHECK-NEXT: mov sp, x20 +; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: ldr w10, [x19] +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: orr w21, w8, w10 +; CHECK-NEXT: bl g +; CHECK-NEXT: ldr w8, [x20] +; CHECK-NEXT: add w8, w0, w8 +; CHECK-NEXT: add w0, w21, w8 +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr x21, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #48 // 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-MACHO-LABEL: vla_dynamicrealign_call: +; CHECK-MACHO: ; %bb.0: ; %entry +; CHECK-MACHO-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: add x29, sp, #32 +; CHECK-MACHO-NEXT: sub x9, sp, #80 +; CHECK-MACHO-NEXT: and sp, x9, #0xffffffffffffff80 +; CHECK-MACHO-NEXT: mov x19, sp +; CHECK-MACHO-NEXT: .cfi_def_cfa w29, 16 +; CHECK-MACHO-NEXT: .cfi_offset w30, -8 +; CHECK-MACHO-NEXT: .cfi_offset w29, -16 +; CHECK-MACHO-NEXT: .cfi_offset w19, -24 +; CHECK-MACHO-NEXT: .cfi_offset w20, -32 +; CHECK-MACHO-NEXT: .cfi_offset w21, -40 +; CHECK-MACHO-NEXT: .cfi_offset w22, -48 +; CHECK-MACHO-NEXT: ; kill: def $w0 killed $w0 def $x0 +; CHECK-MACHO-NEXT: ubfiz x8, x0, #2, #32 +; CHECK-MACHO-NEXT: ldr w9, [x29, #20] +; CHECK-MACHO-NEXT: ldr d0, [x29, #32] +; CHECK-MACHO-NEXT: add x8, x8, #15 +; CHECK-MACHO-NEXT: and x8, x8, #0x7fffffff0 +; CHECK-MACHO-NEXT: mov x10, sp +; CHECK-MACHO-NEXT: sub x20, x10, x8 +; CHECK-MACHO-NEXT: mov sp, x20 +; CHECK-MACHO-NEXT: fcvtzs w8, d0 +; CHECK-MACHO-NEXT: ldr w10, [x19] +; CHECK-MACHO-NEXT: add w8, w8, w9 +; CHECK-MACHO-NEXT: orr w21, w8, w10 +; CHECK-MACHO-NEXT: bl _g +; CHECK-MACHO-NEXT: ldr w8, [x20] +; CHECK-MACHO-NEXT: add w8, w0, w8 +; CHECK-MACHO-NEXT: add w0, w21, w8 +; CHECK-MACHO-NEXT: sub sp, x29, #32 +; CHECK-MACHO-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ret entry: %l1 = alloca i32, align 128 %0 = zext i32 %i1 to i64 @@ -379,101 +568,90 @@ %add3 = add nsw i32 %add2, %1 ret i32 %add3 } - -; CHECK-LABEL: vla_dynamicrealign_call -; CHECK: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK: stp x29, x30, [sp, #-48]! -; CHECK: str x21, [sp, #16] -; CHECK: stp x20, x19, [sp, #32] ; Check that the frame pointer is created: -; CHECK: mov x29, sp ; Check that the stack pointer gets re-aligned to 128 ; bytes & the base pointer (x19) gets initialized to ; this 128-byte aligned area for local variables & ; spill slots -; CHECK: sub x9, sp, #80 -; CHECK: and sp, x9, #0xffffffffffffff80 -; CHECK: mov x19, sp ; Check correctness of cfi pseudo-instructions -; CHECK: .cfi_def_cfa w29, 48 -; CHECK: .cfi_offset w19, -8 -; CHECK: .cfi_offset w20, -16 -; CHECK: .cfi_offset w21, -32 -; CHECK: .cfi_offset w30, -40 -; CHECK: .cfi_offset w29, -48 ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK: ldr w[[IARG:[0-9]+]], [x29, #56] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). -; CHECK: ubfiz x9, x0, #2, #32 -; CHECK: add x9, x9, #15 -; CHECK: ldr d[[DARG:[0-9]+]], [x29, #72] -; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp -; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 -; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through base pointer -; CHECK: ldr w[[ILOC:[0-9]+]], [x19] -; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK: mov sp, x29 -; CHECK: ldp x20, x19, [sp, #32] -; CHECK: ldr x21, [sp, #16] -; CHECK: ldp x29, x30, [sp], #48 -; CHECK: ret -; CHECK: .cfi_endproc - -; CHECK-MACHO-LABEL: _vla_dynamicrealign_call: -; CHECK-MACHO: .cfi_startproc ; Check that used callee-saved registers are saved -; CHECK-MACHO: stp x22, x21, [sp, #-48]! -; CHECK-MACHO: stp x20, x19, [sp, #16] ; Check that the frame pointer is created: -; CHECK-MACHO: stp x29, x30, [sp, #32] -; CHECK-MACHO: add x29, sp, #32 ; Check that the stack pointer gets re-aligned to 128 ; bytes & the base pointer (x19) gets initialized to ; this 128-byte aligned area for local variables & ; spill slots -; CHECK-MACHO: sub x9, sp, #80 -; CHECK-MACHO: and sp, x9, #0xffffffffffffff80 -; CHECK-MACHO: mov x19, sp ; Check correctness of cfi pseudo-instructions -; CHECK-MACHO: .cfi_def_cfa w29, 16 -; CHECK-MACHO: .cfi_offset w30, -8 -; CHECK-MACHO: .cfi_offset w29, -16 -; CHECK-MACHO: .cfi_offset w19, -24 -; CHECK-MACHO: .cfi_offset w20, -32 -; CHECK-MACHO: .cfi_offset w21, -40 -; CHECK-MACHO: .cfi_offset w22, -48 ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). -; CHECK-MACHO: ubfiz x9, x0, #2, #32 -; CHECK-MACHO: add x9, x9, #15 -; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] -; CHECK-MACHO: and x9, x9, #0x7fffffff0 -; CHECK-MACHO: mov x10, sp -; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9 -; CHECK-MACHO: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through base pointer -; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19] -; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK-MACHO: sub sp, x29, #32 -; CHECK-MACHO: ldp x29, x30, [sp, #32] -; CHECK-MACHO: ldp x20, x19, [sp, #16] -; CHECK-MACHO: ldp x22, x21, [sp], #48 -; CHECK-MACHO: ret -; CHECK-MACHO: .cfi_endproc - - ; Function Attrs: nounwind define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 { +; +; CHECK-LABEL: vla_dynamicrealign_nocall: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub x9, sp, #96 +; CHECK-NEXT: and sp, x9, #0xffffffffffffff80 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: ubfiz x8, x0, #2, #32 +; CHECK-NEXT: ldr w9, [x29, #40] +; CHECK-NEXT: ldr d0, [x29, #56] +; CHECK-NEXT: add x8, x8, #15 +; CHECK-NEXT: and x8, x8, #0x7fffffff0 +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: sub x8, x10, x8 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: fcvtzs w10, d0 +; CHECK-NEXT: ldr w11, [x19] +; CHECK-NEXT: add w9, w9, w11 +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: add w9, w10, w9 +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-MACHO-LABEL: vla_dynamicrealign_nocall: +; CHECK-MACHO: ; %bb.0: ; %entry +; CHECK-MACHO-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: add x29, sp, #16 +; CHECK-MACHO-NEXT: sub x9, sp, #96 +; CHECK-MACHO-NEXT: and sp, x9, #0xffffffffffffff80 +; CHECK-MACHO-NEXT: mov x19, sp +; CHECK-MACHO-NEXT: ; kill: def $w0 killed $w0 def $x0 +; CHECK-MACHO-NEXT: ubfiz x8, x0, #2, #32 +; CHECK-MACHO-NEXT: ldr w9, [x29, #20] +; CHECK-MACHO-NEXT: ldr d0, [x29, #32] +; CHECK-MACHO-NEXT: add x8, x8, #15 +; CHECK-MACHO-NEXT: and x8, x8, #0x7fffffff0 +; CHECK-MACHO-NEXT: mov x10, sp +; CHECK-MACHO-NEXT: sub x8, x10, x8 +; CHECK-MACHO-NEXT: mov sp, x8 +; CHECK-MACHO-NEXT: fcvtzs w10, d0 +; CHECK-MACHO-NEXT: ldr w11, [x19] +; CHECK-MACHO-NEXT: add w9, w9, w11 +; CHECK-MACHO-NEXT: ldr w8, [x8] +; CHECK-MACHO-NEXT: add w9, w10, w9 +; CHECK-MACHO-NEXT: add w0, w9, w8 +; CHECK-MACHO-NEXT: sub sp, x29, #16 +; CHECK-MACHO-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ret entry: %l1 = alloca i32, align 128 %0 = zext i32 %i1 to i64 @@ -486,78 +664,90 @@ %add2 = add nsw i32 %add1, %1 ret i32 %add2 } - -; CHECK-LABEL: vla_dynamicrealign_nocall ; Check that used callee-saved registers are saved -; CHECK: stp x29, x30, [sp, #-32]! -; CHECK: str x19, [sp, #16] ; Check that the frame pointer is created: -; CHECK: mov x29, sp ; Check that the stack pointer gets re-aligned to 128 ; bytes & the base pointer (x19) gets initialized to ; this 128-byte aligned area for local variables & ; spill slots -; CHECK: sub x9, sp, #96 -; CHECK: and sp, x9, #0xffffffffffffff80 -; CHECK: mov x19, sp ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK: ldr w[[IARG:[0-9]+]], [x29, #40] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). -; CHECK: ubfiz x9, x0, #2, #32 -; CHECK: add x9, x9, #15 -; CHECK: ldr d[[DARG:[0-9]+]], [x29, #56] -; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp -; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 -; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through base pointer -; CHECK: ldr w[[ILOC:[0-9]+]], [x19] -; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK: mov sp, x29 -; CHECK: ldr x19, [sp, #16] -; CHECK: ldp x29, x30, [sp], #32 -; CHECK: ret - -; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall: ; Check that used callee-saved registers are saved -; CHECK-MACHO: stp x20, x19, [sp, #-32]! ; Check that the frame pointer is created: -; CHECK-MACHO: stp x29, x30, [sp, #16] -; CHECK-MACHO: add x29, sp, #16 ; Check that the stack pointer gets re-aligned to 128 ; bytes & the base pointer (x19) gets initialized to ; this 128-byte aligned area for local variables & ; spill slots -; CHECK-MACHO: sub x9, sp, #96 -; CHECK-MACHO: and sp, x9, #0xffffffffffffff80 -; CHECK-MACHO: mov x19, sp ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). -; CHECK-MACHO: ubfiz x9, x0, #2, #32 -; CHECK-MACHO: add x9, x9, #15 -; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] -; CHECK-MACHO: and x9, x9, #0x7fffffff0 -; CHECK-MACHO: mov x10, sp -; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9 -; CHECK-MACHO: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through base pointer -; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19] -; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK-MACHO: sub sp, x29, #16 -; CHECK-MACHO: ldp x29, x30, [sp, #16] -; CHECK-MACHO: ldp x20, x19, [sp], #32 -; CHECK-MACHO: ret - - ; Function Attrs: nounwind define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 { +; +; CHECK-LABEL: vla_dynamicrealign_nocall_large_align: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub x9, sp, #7, lsl #12 // =28672 +; CHECK-NEXT: sub x9, x9, #4064 +; CHECK-NEXT: and sp, x9, #0xffffffffffff8000 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: ubfiz x8, x0, #2, #32 +; CHECK-NEXT: ldr w9, [x29, #40] +; CHECK-NEXT: ldr d0, [x29, #56] +; CHECK-NEXT: add x8, x8, #15 +; CHECK-NEXT: and x8, x8, #0x7fffffff0 +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: sub x8, x10, x8 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: fcvtzs w10, d0 +; CHECK-NEXT: ldr w11, [x19] +; CHECK-NEXT: add w9, w9, w11 +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: add w9, w10, w9 +; CHECK-NEXT: add w0, w9, w8 +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-MACHO-LABEL: vla_dynamicrealign_nocall_large_align: +; CHECK-MACHO: ; %bb.0: ; %entry +; CHECK-MACHO-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: add x29, sp, #16 +; CHECK-MACHO-NEXT: sub x9, sp, #7, lsl #12 ; =28672 +; CHECK-MACHO-NEXT: sub x9, x9, #4064 +; CHECK-MACHO-NEXT: and sp, x9, #0xffffffffffff8000 +; CHECK-MACHO-NEXT: mov x19, sp +; CHECK-MACHO-NEXT: ; kill: def $w0 killed $w0 def $x0 +; CHECK-MACHO-NEXT: ubfiz x8, x0, #2, #32 +; CHECK-MACHO-NEXT: ldr w9, [x29, #20] +; CHECK-MACHO-NEXT: ldr d0, [x29, #32] +; CHECK-MACHO-NEXT: add x8, x8, #15 +; CHECK-MACHO-NEXT: and x8, x8, #0x7fffffff0 +; CHECK-MACHO-NEXT: mov x10, sp +; CHECK-MACHO-NEXT: sub x8, x10, x8 +; CHECK-MACHO-NEXT: mov sp, x8 +; CHECK-MACHO-NEXT: fcvtzs w10, d0 +; CHECK-MACHO-NEXT: ldr w11, [x19] +; CHECK-MACHO-NEXT: add w9, w9, w11 +; CHECK-MACHO-NEXT: ldr w8, [x8] +; CHECK-MACHO-NEXT: add w9, w10, w9 +; CHECK-MACHO-NEXT: add w0, w9, w8 +; CHECK-MACHO-NEXT: sub sp, x29, #16 +; CHECK-MACHO-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ret entry: %l1 = alloca i32, align 32768 %0 = zext i32 %i1 to i64 @@ -570,133 +760,152 @@ %add2 = add nsw i32 %add1, %1 ret i32 %add2 } - -; CHECK-LABEL: vla_dynamicrealign_nocall_large_align ; Check that used callee-saved registers are saved -; CHECK: stp x29, x30, [sp, #-32]! -; CHECK: str x19, [sp, #16] ; Check that the frame pointer is created: -; CHECK: mov x29, sp ; Check that the stack pointer gets re-aligned to 128 ; bytes & the base pointer (x19) gets initialized to ; this 128-byte aligned area for local variables & ; spill slots -; CHECK: sub x9, sp, #7, lsl #12 -; CHECK: and sp, x9, #0xffffffffffff8000 -; CHECK: mov x19, sp ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK: ldr w[[IARG:[0-9]+]], [x29, #40] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). -; CHECK: ubfiz x9, x0, #2, #32 -; CHECK: add x9, x9, #15 -; CHECK: ldr d[[DARG:[0-9]+]], [x29, #56] -; CHECK: and x9, x9, #0x7fffffff0 -; CHECK: mov x10, sp -; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9 -; CHECK: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through base pointer -; CHECK: ldr w[[ILOC:[0-9]+]], [x19] -; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK: mov sp, x29 -; CHECK: ldr x19, [sp, #16] -; CHECK: ldp x29, x30, [sp], #32 -; CHECK: ret - -; CHECK-MACHO-LABEL: _vla_dynamicrealign_nocall_large_align: ; Check that used callee-saved registers are saved -; CHECK-MACHO: stp x20, x19, [sp, #-32]! ; Check that the frame pointer is created: -; CHECK-MACHO: stp x29, x30, [sp, #16] -; CHECK-MACHO: add x29, sp, #16 ; Check that the stack pointer gets re-aligned to 128 ; bytes & the base pointer (x19) gets initialized to ; this 128-byte aligned area for local variables & ; spill slots -; CHECK-MACHO: sub x9, sp, #7, lsl #12 -; CHECK-MACHO: and sp, x9, #0xffffffffffff8000 -; CHECK-MACHO: mov x19, sp ; Check correct access to arguments passed on the stack, through frame pointer -; CHECK-MACHO: ldr w[[IARG:[0-9]+]], [x29, #20] ; Check correct reservation of 16-byte aligned VLA (size in w0) on stack ; and set-up of base pointer (x19). -; CHECK-MACHO: ubfiz x9, x0, #2, #32 -; CHECK-MACHO: add x9, x9, #15 -; CHECK-MACHO: ldr d[[DARG:[0-9]+]], [x29, #32] -; CHECK-MACHO: and x9, x9, #0x7fffffff0 -; CHECK-MACHO: mov x10, sp -; CHECK-MACHO: sub x[[VLASPTMP:[0-9]+]], x10, x9 -; CHECK-MACHO: mov sp, x[[VLASPTMP]] ; Check correct access to local variable, through base pointer -; CHECK-MACHO: ldr w[[ILOC:[0-9]+]], [x19] -; CHECK-MACHO: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]] ; Check epilogue: ; Check that stack pointer get restored from frame pointer. -; CHECK-MACHO: sub sp, x29, #16 -; CHECK-MACHO: ldp x29, x30, [sp, #16] -; CHECK-MACHO: ldp x20, x19, [sp], #32 -; CHECK-MACHO: ret - declare void @use(ptr) - define void @realign_conditional(i1 %b, ptr %p) { +; +; CHECK-LABEL: realign_conditional: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: tbz w0, #0, .LBB9_2 +; CHECK-NEXT: // %bb.1: // %bb0 +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: sub x8, sp, #64 +; CHECK-NEXT: and x8, x8, #0xffffffffffffffe0 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: .LBB9_2: // %bb1 +; CHECK-NEXT: ret +; +; CHECK-MACHO-LABEL: realign_conditional: +; CHECK-MACHO: ; %bb.0: ; %entry +; CHECK-MACHO-NEXT: tbz w0, #0, LBB9_2 +; CHECK-MACHO-NEXT: ; %bb.1: ; %bb0 +; CHECK-MACHO-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: add x29, sp, #16 +; CHECK-MACHO-NEXT: mov x19, sp +; CHECK-MACHO-NEXT: .cfi_def_cfa w29, 16 +; CHECK-MACHO-NEXT: .cfi_offset w30, -8 +; CHECK-MACHO-NEXT: .cfi_offset w29, -16 +; CHECK-MACHO-NEXT: .cfi_offset w19, -24 +; CHECK-MACHO-NEXT: .cfi_offset w20, -32 +; CHECK-MACHO-NEXT: sub x8, sp, #64 +; CHECK-MACHO-NEXT: and x8, x8, #0xffffffffffffffe0 +; CHECK-MACHO-NEXT: mov sp, x8 +; CHECK-MACHO-NEXT: str x8, [x1] +; CHECK-MACHO-NEXT: sub sp, x29, #16 +; CHECK-MACHO-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: LBB9_2: ; %bb1 +; CHECK-MACHO-NEXT: ret entry: br i1 %b, label %bb0, label %bb1 - bb0: %MyAlloca = alloca i8, i64 64, align 32 store ptr %MyAlloca, ptr %p br label %bb1 - bb1: ret void } - -; CHECK-LABEL: realign_conditional ; No realignment in the prologue. -; CHECK-NOT: and -; CHECK-NOT: 0xffffffffffffffe0 -; CHECK: tbz {{.*}} .[[LABEL:.*]] ; Stack is realigned in a non-entry BB. -; CHECK: sub [[REG:x[01-9]+]], sp, #64 -; CHECK: and [[REG]], [[REG]], #0xffffffffffffffe0 -; CHECK: mov sp, [[REG]] -; CHECK: .[[LABEL]]: -; CHECK: ret - - define void @realign_conditional2(i1 %b, ptr %p) { +; +; CHECK-LABEL: realign_conditional2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: tbz w0, #0, .LBB10_2 +; CHECK-NEXT: // %bb.1: // %bb0 +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub x9, sp, #32 +; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: sub x8, sp, #64 +; CHECK-NEXT: and x8, x8, #0xffffffffffffffe0 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: .LBB10_2: // %bb1 +; CHECK-NEXT: ret +; +; CHECK-MACHO-LABEL: realign_conditional2: +; CHECK-MACHO: ; %bb.0: ; %entry +; CHECK-MACHO-NEXT: tbz w0, #0, LBB10_2 +; CHECK-MACHO-NEXT: ; %bb.1: ; %bb0 +; CHECK-MACHO-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-MACHO-NEXT: add x29, sp, #16 +; CHECK-MACHO-NEXT: sub x9, sp, #32 +; CHECK-MACHO-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK-MACHO-NEXT: mov x19, sp +; CHECK-MACHO-NEXT: .cfi_def_cfa w29, 16 +; CHECK-MACHO-NEXT: .cfi_offset w30, -8 +; CHECK-MACHO-NEXT: .cfi_offset w29, -16 +; CHECK-MACHO-NEXT: .cfi_offset w19, -24 +; CHECK-MACHO-NEXT: .cfi_offset w20, -32 +; CHECK-MACHO-NEXT: sub x8, sp, #64 +; CHECK-MACHO-NEXT: and x8, x8, #0xffffffffffffffe0 +; CHECK-MACHO-NEXT: mov sp, x8 +; CHECK-MACHO-NEXT: str x8, [x1] +; CHECK-MACHO-NEXT: sub sp, x29, #16 +; CHECK-MACHO-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; CHECK-MACHO-NEXT: LBB10_2: ; %bb1 +; CHECK-MACHO-NEXT: ret entry: %tmp = alloca i8, i32 16 br i1 %b, label %bb0, label %bb1 - bb0: %MyAlloca = alloca i8, i64 64, align 32 store ptr %MyAlloca, ptr %p br label %bb1 - bb1: ret void } - -; CHECK-LABEL: realign_conditional2 ; Extra realignment in the prologue (performance issue). -; CHECK: tbz {{.*}} .[[LABEL:.*]] -; CHECK: sub x9, sp, #32 -; CHECK: and sp, x9, #0xffffffffffffffe0 -; CHECK: mov x19, sp ; Stack is realigned in a non-entry BB. -; CHECK: sub [[REG:x[01-9]+]], sp, #64 -; CHECK: and [[REG]], [[REG]], #0xffffffffffffffe0 -; CHECK: mov sp, [[REG]] -; CHECK: .[[LABEL]]: -; CHECK: ret - attributes #0 = { "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } attributes #1 = { nounwind "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } - !1 = !{!2, !2, i64 0} !2 = !{!"int", !3, i64 0} !3 = !{!"omnipotent char", !4, i64 0} diff --git a/llvm/test/CodeGen/AArch64/aarch64-fixup-statepoint-regs-crash.ll b/llvm/test/CodeGen/AArch64/aarch64-fixup-statepoint-regs-crash.ll --- a/llvm/test/CodeGen/AArch64/aarch64-fixup-statepoint-regs-crash.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-fixup-statepoint-regs-crash.ll @@ -14,8 +14,8 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: str q0, [sp, #16] +; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl baz // 8-byte Folded Reload ; CHECK-NEXT: .Ltmp0: ; CHECK-NEXT: ldp x19, x0, [sp, #8] // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll --- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll @@ -14,8 +14,8 @@ ; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK0-NEXT: ubfx x8, x1, #9, #8 ; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: lsl x21, x8, #1 ; CHECK0-NEXT: mov x19, x0 +; CHECK0-NEXT: lsl x21, x8, #1 ; CHECK0-NEXT: ldrh w20, [x0, x21] ; CHECK0-NEXT: bl foo ; CHECK0-NEXT: mov w0, w20 @@ -55,8 +55,8 @@ ; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK0-NEXT: ubfx x8, x1, #9, #8 ; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: lsl x21, x8, #2 ; CHECK0-NEXT: mov x19, x0 +; CHECK0-NEXT: lsl x21, x8, #2 ; CHECK0-NEXT: ldr w20, [x0, x21] ; CHECK0-NEXT: bl foo ; CHECK0-NEXT: mov w0, w20 @@ -96,8 +96,8 @@ ; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK0-NEXT: ubfx x8, x1, #9, #8 ; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: lsl x21, x8, #3 ; CHECK0-NEXT: mov x19, x0 +; CHECK0-NEXT: lsl x21, x8, #3 ; CHECK0-NEXT: ldr x20, [x0, x21] ; CHECK0-NEXT: bl foo ; CHECK0-NEXT: mov x0, x20 diff --git a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll --- a/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-interleaved-access-w-undef.ll @@ -27,14 +27,14 @@ define void @f_undef_15(<8 x i64> %a, ptr %dst) { ; CHECK-LABEL: f_undef_15: ; CHECK: // %bb.0: // %BB -; CHECK-NEXT: mov x9, x0 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $q0_q1 -; CHECK-NEXT: add x8, x0, #64 +; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x9], #32 +; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8], #32 +; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8] +; CHECK-NEXT: add x8, x0, #64 ; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8] ; CHECK-NEXT: add x8, x0, #96 -; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x9] ; CHECK-NEXT: st2 { v0.2d, v1.2d }, [x8] ; CHECK-NEXT: ret BB: @@ -46,20 +46,20 @@ define void @f_undef_1(<8 x i64> %a, ptr %dst) { ; CHECK-LABEL: f_undef_1: ; CHECK: // %bb.0: // %BB -; CHECK-NEXT: mov x9, x0 -; CHECK-NEXT: add x8, x0, #64 ; CHECK-NEXT: mov v16.16b, v0.16b -; CHECK-NEXT: // kill: def $q3 killed $q3 def $q3_q4 -; CHECK-NEXT: mov v17.16b, v16.16b +; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: mov v5.16b, v2.16b ; CHECK-NEXT: // kill: def $q1 killed $q1 def $q1_q2 -; CHECK-NEXT: st2 { v16.2d, v17.2d }, [x9], #32 -; CHECK-NEXT: mov v6.16b, v5.16b +; CHECK-NEXT: // kill: def $q3 killed $q3 def $q3_q4 ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: mov v4.16b, v3.16b +; CHECK-NEXT: mov v17.16b, v16.16b +; CHECK-NEXT: mov v6.16b, v5.16b +; CHECK-NEXT: st2 { v16.2d, v17.2d }, [x8], #32 +; CHECK-NEXT: st2 { v1.2d, v2.2d }, [x8] +; CHECK-NEXT: add x8, x0, #64 ; CHECK-NEXT: st2 { v5.2d, v6.2d }, [x8] ; CHECK-NEXT: add x8, x0, #96 -; CHECK-NEXT: st2 { v1.2d, v2.2d }, [x9] ; CHECK-NEXT: st2 { v3.2d, v4.2d }, [x8] ; CHECK-NEXT: ret BB: @@ -75,8 +75,8 @@ ; CHECK-NEXT: mov v5.16b, v2.16b ; CHECK-NEXT: // kill: def $q3 killed $q3 def $q2_q3 ; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x0], #32 ; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x0], #32 ; CHECK-NEXT: st2 { v2.4s, v3.4s }, [x0] ; CHECK-NEXT: ret BB: @@ -91,8 +91,8 @@ ; CHECK-NEXT: mov v5.16b, v2.16b ; CHECK-NEXT: // kill: def $q3 killed $q3 def $q2_q3 ; CHECK-NEXT: mov v4.16b, v0.16b -; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x0], #32 ; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: st2 { v4.4s, v5.4s }, [x0], #32 ; CHECK-NEXT: st2 { v2.4s, v3.4s }, [x0] ; CHECK-NEXT: ret BB: diff --git a/llvm/test/CodeGen/AArch64/aarch64-isel-csinc.ll b/llvm/test/CodeGen/AArch64/aarch64-isel-csinc.ll --- a/llvm/test/CodeGen/AArch64/aarch64-isel-csinc.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-isel-csinc.ll @@ -101,8 +101,8 @@ define i32 @csinc7(i32 %a, i32 %b) { ; CHECK-LABEL: csinc7: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov w8, #-4097 // =0xffffefff ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w8, #-4097 ; CHECK-NEXT: csinc w8, w8, wzr, eq ; CHECK-NEXT: add w0, w8, w1 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll --- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll @@ -106,18 +106,18 @@ ; CHECK-LE-LABEL: fsext_v2i32: ; CHECK-LE: // %bb.0: ; CHECK-LE-NEXT: ldrsb w8, [x0] +; CHECK-LE-NEXT: ldrsb w9, [x0, #1] ; CHECK-LE-NEXT: fmov s0, w8 -; CHECK-LE-NEXT: ldrsb w8, [x0, #1] -; CHECK-LE-NEXT: mov v0.s[1], w8 +; CHECK-LE-NEXT: mov v0.s[1], w9 ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: fsext_v2i32: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ldrsb w8, [x0] +; CHECK-BE-NEXT: ldrsb w9, [x0, #1] ; CHECK-BE-NEXT: fmov s0, w8 -; CHECK-BE-NEXT: ldrsb w8, [x0, #1] -; CHECK-BE-NEXT: mov v0.s[1], w8 +; CHECK-BE-NEXT: mov v0.s[1], w9 ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s ; CHECK-BE-NEXT: ret %x = load <2 x i8>, ptr %a @@ -187,12 +187,12 @@ ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] ; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: sshll2 v1.4s, v0.8h, #0 -; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-BE-NEXT: rev64 v1.4s, v1.4s +; CHECK-BE-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-BE-NEXT: sshll2 v0.4s, v0.8h, #0 ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s -; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: rev64 v2.4s, v1.4s +; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v2.16b, v2.16b, #8 ; CHECK-BE-NEXT: ret %x = load <8 x i8>, ptr %a %y = sext <8 x i8> %x to <8 x i32> @@ -251,18 +251,18 @@ ; CHECK-LE-LABEL: fsext_v2i16: ; CHECK-LE: // %bb.0: ; CHECK-LE-NEXT: ldrsb w8, [x0] +; CHECK-LE-NEXT: ldrsb w9, [x0, #1] ; CHECK-LE-NEXT: fmov s0, w8 -; CHECK-LE-NEXT: ldrsb w8, [x0, #1] -; CHECK-LE-NEXT: mov v0.s[1], w8 +; CHECK-LE-NEXT: mov v0.s[1], w9 ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: fsext_v2i16: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ldrsb w8, [x0] +; CHECK-BE-NEXT: ldrsb w9, [x0, #1] ; CHECK-BE-NEXT: fmov s0, w8 -; CHECK-BE-NEXT: ldrsb w8, [x0, #1] -; CHECK-BE-NEXT: mov v0.s[1], w8 +; CHECK-BE-NEXT: mov v0.s[1], w9 ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s ; CHECK-BE-NEXT: ret %x = load <2 x i8>, ptr %a @@ -344,12 +344,12 @@ ; CHECK-BE-LABEL: fsext_v16i16: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] -; CHECK-BE-NEXT: sshll2 v1.8h, v0.16b, #0 -; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: rev64 v1.8h, v1.8h +; CHECK-BE-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-BE-NEXT: sshll2 v0.8h, v0.16b, #0 ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h -; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: rev64 v2.8h, v1.8h +; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v2.16b, v2.16b, #8 ; CHECK-BE-NEXT: ret %x = load <16 x i8>, ptr %a %y = sext <16 x i8> %x to <16 x i16> diff --git a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll --- a/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-matrix-umull-smull.ll @@ -193,18 +193,18 @@ ; CHECK-NEXT: b.lt .LBB3_8 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: sxth w8, w1 -; CHECK-NEXT: mov w9, w3 ; CHECK-NEXT: cmp w3, #15 +; CHECK-NEXT: mov w9, w3 ; CHECK-NEXT: b.hi .LBB3_3 ; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: mov x10, xzr ; CHECK-NEXT: b .LBB3_6 ; CHECK-NEXT: .LBB3_3: // %vector.ph +; CHECK-NEXT: dup v0.8h, w8 ; CHECK-NEXT: and x10, x9, #0xfffffff0 ; CHECK-NEXT: add x11, x2, #32 ; CHECK-NEXT: add x12, x0, #16 ; CHECK-NEXT: mov x13, x10 -; CHECK-NEXT: dup v0.8h, w8 ; CHECK-NEXT: .LBB3_4: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp q1, q2, [x12, #-16] @@ -221,15 +221,15 @@ ; CHECK-NEXT: cmp x10, x9 ; CHECK-NEXT: b.eq .LBB3_8 ; CHECK-NEXT: .LBB3_6: // %for.body.preheader1 -; CHECK-NEXT: sub x9, x9, x10 ; CHECK-NEXT: add x11, x2, x10, lsl #2 -; CHECK-NEXT: add x10, x0, x10, lsl #1 +; CHECK-NEXT: add x12, x0, x10, lsl #1 +; CHECK-NEXT: sub x9, x9, x10 ; CHECK-NEXT: .LBB3_7: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrsh w12, [x10], #2 +; CHECK-NEXT: ldrsh w10, [x12], #2 ; CHECK-NEXT: subs x9, x9, #1 -; CHECK-NEXT: mul w12, w12, w8 -; CHECK-NEXT: str w12, [x11], #4 +; CHECK-NEXT: mul w10, w10, w8 +; CHECK-NEXT: str w10, [x11], #4 ; CHECK-NEXT: b.ne .LBB3_7 ; CHECK-NEXT: .LBB3_8: // %for.cond.cleanup ; CHECK-NEXT: ret @@ -304,19 +304,19 @@ ; CHECK-NEXT: cmp w3, #1 ; CHECK-NEXT: b.lt .LBB4_8 ; CHECK-NEXT: // %bb.1: // %for.body.preheader +; CHECK-NEXT: cmp w3, #15 ; CHECK-NEXT: and w8, w1, #0xffff ; CHECK-NEXT: mov w9, w3 -; CHECK-NEXT: cmp w3, #15 ; CHECK-NEXT: b.hi .LBB4_3 ; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: mov x10, xzr ; CHECK-NEXT: b .LBB4_6 ; CHECK-NEXT: .LBB4_3: // %vector.ph +; CHECK-NEXT: dup v0.8h, w8 ; CHECK-NEXT: and x10, x9, #0xfffffff0 ; CHECK-NEXT: add x11, x2, #32 ; CHECK-NEXT: add x12, x0, #16 ; CHECK-NEXT: mov x13, x10 -; CHECK-NEXT: dup v0.8h, w8 ; CHECK-NEXT: .LBB4_4: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp q1, q2, [x12, #-16] @@ -333,15 +333,15 @@ ; CHECK-NEXT: cmp x10, x9 ; CHECK-NEXT: b.eq .LBB4_8 ; CHECK-NEXT: .LBB4_6: // %for.body.preheader1 -; CHECK-NEXT: sub x9, x9, x10 ; CHECK-NEXT: add x11, x2, x10, lsl #2 -; CHECK-NEXT: add x10, x0, x10, lsl #1 +; CHECK-NEXT: add x12, x0, x10, lsl #1 +; CHECK-NEXT: sub x9, x9, x10 ; CHECK-NEXT: .LBB4_7: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrh w12, [x10], #2 +; CHECK-NEXT: ldrh w10, [x12], #2 ; CHECK-NEXT: subs x9, x9, #1 -; CHECK-NEXT: mul w12, w12, w8 -; CHECK-NEXT: str w12, [x11], #4 +; CHECK-NEXT: mul w10, w10, w8 +; CHECK-NEXT: str w10, [x11], #4 ; CHECK-NEXT: b.ne .LBB4_7 ; CHECK-NEXT: .LBB4_8: // %for.cond.cleanup ; CHECK-NEXT: ret @@ -416,8 +416,8 @@ ; CHECK-NEXT: cbz w2, .LBB5_3 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: sxtb w9, w1 -; CHECK-NEXT: mov w10, w2 ; CHECK-NEXT: cmp w2, #15 +; CHECK-NEXT: mov w10, w2 ; CHECK-NEXT: b.hi .LBB5_4 ; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: mov x11, xzr @@ -428,12 +428,12 @@ ; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB5_4: // %vector.ph -; CHECK-NEXT: and x11, x10, #0xfffffff0 -; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x12, x11 ; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: and x11, x10, #0xfffffff0 ; CHECK-NEXT: dup v2.8h, w9 +; CHECK-NEXT: add x8, x0, #8 +; CHECK-NEXT: mov x12, x11 ; CHECK-NEXT: .LBB5_5: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp d3, d4, [x8, #-8] @@ -536,8 +536,8 @@ ; CHECK-NEXT: .LBB6_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: add x8, x8, #8 ; CHECK-NEXT: subs x2, x2, #8 +; CHECK-NEXT: add x8, x8, #8 ; CHECK-NEXT: umull v1.2d, v1.2s, v0.s[1] ; CHECK-NEXT: shrn v1.2s, v1.2d, #15 ; CHECK-NEXT: str d1, [x0], #32 @@ -577,8 +577,8 @@ ; CHECK-NEXT: .LBB7_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: add x8, x8, #8 ; CHECK-NEXT: subs x2, x2, #8 +; CHECK-NEXT: add x8, x8, #8 ; CHECK-NEXT: smull v2.2d, v1.2s, v0.s[1] ; CHECK-NEXT: smull2 v1.2d, v1.4s, v0.s[1] ; CHECK-NEXT: shrn v2.2s, v2.2d, #15 @@ -620,8 +620,8 @@ ; CHECK-NEXT: .LBB8_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: add x8, x8, #8 ; CHECK-NEXT: subs x2, x2, #8 +; CHECK-NEXT: add x8, x8, #8 ; CHECK-NEXT: umull v1.8h, v1.8b, v0.8b ; CHECK-NEXT: cmlt v1.8h, v1.8h, #0 ; CHECK-NEXT: xtn v1.8b, v1.8h @@ -657,18 +657,18 @@ define void @sink_v16s16_8(i32 *%p, i32 *%d, i64 %n, <16 x i8> %a) { ; CHECK-LABEL: sink_v16s16_8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: dup v0.16b, v0.b[10] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: .LBB9_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: add x8, x8, #8 ; CHECK-NEXT: subs x2, x2, #8 -; CHECK-NEXT: smull2 v2.8h, v1.16b, v0.16b -; CHECK-NEXT: smull v1.8h, v1.8b, v0.8b -; CHECK-NEXT: cmlt v2.8h, v2.8h, #0 +; CHECK-NEXT: add x8, x8, #8 +; CHECK-NEXT: smull v2.8h, v1.8b, v0.8b +; CHECK-NEXT: smull2 v1.8h, v1.16b, v0.16b ; CHECK-NEXT: cmlt v1.8h, v1.8h, #0 -; CHECK-NEXT: uzp1 v1.16b, v1.16b, v2.16b +; CHECK-NEXT: cmlt v2.8h, v2.8h, #0 +; CHECK-NEXT: uzp1 v1.16b, v2.16b, v1.16b ; CHECK-NEXT: str q1, [x0], #32 ; CHECK-NEXT: b.ne .LBB9_1 ; CHECK-NEXT: // %bb.2: // %exit @@ -765,24 +765,24 @@ define void @matrix_mul_unsigned_and_double(i32 %N, i32* nocapture %C, i16* nocapture readonly %A, i32 %val) { ; CHECK-LABEL: matrix_mul_unsigned_and_double: ; CHECK: // %bb.0: // %vector.header -; CHECK-NEXT: and w9, w3, #0xffff +; CHECK-NEXT: and w8, w3, #0xffff ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: dup v0.8h, w8 ; CHECK-NEXT: and x8, x0, #0xfffffff0 -; CHECK-NEXT: dup v0.8h, w9 ; CHECK-NEXT: .LBB11_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x2, w0, uxtw #1 -; CHECK-NEXT: add x10, x1, w0, uxtw #2 ; CHECK-NEXT: subs x8, x8, #16 -; CHECK-NEXT: add w0, w0, #16 ; CHECK-NEXT: ldr q1, [x9] ; CHECK-NEXT: ldur q2, [x9, #8] +; CHECK-NEXT: add x9, x1, w0, uxtw #2 +; CHECK-NEXT: add w0, w0, #16 ; CHECK-NEXT: umull2 v3.4s, v0.8h, v1.8h ; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: umull2 v4.4s, v0.8h, v2.8h ; CHECK-NEXT: umull v2.4s, v0.4h, v2.4h -; CHECK-NEXT: stp q1, q3, [x10] -; CHECK-NEXT: stp q2, q4, [x10, #32] +; CHECK-NEXT: stp q1, q3, [x9] +; CHECK-NEXT: stp q2, q4, [x9, #32] ; CHECK-NEXT: b.ne .LBB11_1 ; CHECK-NEXT: // %bb.2: // %for.end12 ; CHECK-NEXT: ret @@ -833,10 +833,10 @@ define void @matrix_mul_signed_and(i32 %N, i32* nocapture %C, i16* nocapture readonly %A, i32 %val) { ; CHECK-LABEL: matrix_mul_signed_and: ; CHECK: // %bb.0: // %vector.header -; CHECK-NEXT: and w9, w3, #0xffff +; CHECK-NEXT: and w8, w3, #0xffff ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: dup v0.4s, w8 ; CHECK-NEXT: and x8, x0, #0xfffffff8 -; CHECK-NEXT: dup v0.4s, w9 ; CHECK-NEXT: .LBB12_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x2, w0, uxtw #1 @@ -899,10 +899,10 @@ define void @matrix_mul_signed_and_double(i32 %N, i32* nocapture %C, i16* nocapture readonly %A, i32 %val) { ; CHECK-LABEL: matrix_mul_signed_and_double: ; CHECK: // %bb.0: // %vector.header -; CHECK-NEXT: and w9, w3, #0xffff +; CHECK-NEXT: and w8, w3, #0xffff ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: dup v0.4s, w8 ; CHECK-NEXT: and x8, x0, #0xfffffff0 -; CHECK-NEXT: dup v0.4s, w9 ; CHECK-NEXT: .LBB13_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x2, w0, uxtw #1 diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll b/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll --- a/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-mops-consecutive.ll @@ -14,23 +14,23 @@ ; CHECK-MOPS-NEXT: .cfi_def_cfa_offset 2032 ; CHECK-MOPS-NEXT: .cfi_offset w30, -8 ; CHECK-MOPS-NEXT: .cfi_offset w29, -16 -; CHECK-MOPS-NEXT: mov w8, #1000 +; CHECK-MOPS-NEXT: mov w8, #1000 // =0x3e8 ; CHECK-MOPS-NEXT: add x9, sp, #8 ; CHECK-MOPS-NEXT: adrp x10, .LCPI0_0 ; CHECK-MOPS-NEXT: adrp x11, .LCPI0_1 -; CHECK-MOPS-NEXT: mov w12, #6424 -; CHECK-MOPS-NEXT: mov w13, #7452 ; CHECK-MOPS-NEXT: setp [x9]!, x8!, xzr ; CHECK-MOPS-NEXT: setm [x9]!, x8!, xzr ; CHECK-MOPS-NEXT: sete [x9]!, x8!, xzr -; CHECK-MOPS-NEXT: movk w12, #6938, lsl #16 +; CHECK-MOPS-NEXT: mov w12, #6424 // =0x1918 ; CHECK-MOPS-NEXT: ldr q0, [x10, :lo12:.LCPI0_0] -; CHECK-MOPS-NEXT: mov w8, #30 ; CHECK-MOPS-NEXT: ldr d1, [x11, :lo12:.LCPI0_1] +; CHECK-MOPS-NEXT: mov w8, #7452 // =0x1d1c +; CHECK-MOPS-NEXT: movk w12, #6938, lsl #16 +; CHECK-MOPS-NEXT: strh w8, [sp, #1036] +; CHECK-MOPS-NEXT: mov w8, #30 // =0x1e ; CHECK-MOPS-NEXT: add x0, sp, #1008 ; CHECK-MOPS-NEXT: add x1, sp, #8 ; CHECK-MOPS-NEXT: str w12, [sp, #1032] -; CHECK-MOPS-NEXT: strh w13, [sp, #1036] ; CHECK-MOPS-NEXT: str q0, [sp, #1008] ; CHECK-MOPS-NEXT: str d1, [sp, #1024] ; CHECK-MOPS-NEXT: strb w8, [sp, #1038] diff --git a/llvm/test/CodeGen/AArch64/aarch64-mops.ll b/llvm/test/CodeGen/AArch64/aarch64-mops.ll --- a/llvm/test/CodeGen/AArch64/aarch64-mops.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-mops.ll @@ -92,7 +92,7 @@ ; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 +; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa ; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 ; GISel-WITHOUT-MOPS-O0-NEXT: mov w1, wzr ; GISel-WITHOUT-MOPS-O0-NEXT: bl memset @@ -105,14 +105,14 @@ ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 ; GISel-WITHOUT-MOPS-O3-NEXT: mov w1, wzr -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 +; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa ; GISel-WITHOUT-MOPS-O3-NEXT: bl memset ; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memset_10_zeroval_volatile: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #10 +; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa ; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 ; GISel-MOPS-O0-NEXT: mov x9, xzr ; GISel-MOPS-O0-NEXT: setp [x0]!, x8!, x9 @@ -122,7 +122,7 @@ ; ; GISel-MOPS-O3-LABEL: memset_10_zeroval_volatile: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #10 +; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa ; GISel-MOPS-O3-NEXT: setp [x0]!, x8!, xzr ; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, xzr ; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, xzr @@ -150,7 +150,7 @@ ; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10000 +; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10000 // =0x2710 ; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 ; GISel-WITHOUT-MOPS-O0-NEXT: mov w1, wzr ; GISel-WITHOUT-MOPS-O0-NEXT: bl memset @@ -163,14 +163,14 @@ ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 ; GISel-WITHOUT-MOPS-O3-NEXT: mov w1, wzr -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10000 +; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10000 // =0x2710 ; GISel-WITHOUT-MOPS-O3-NEXT: bl memset ; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memset_10000_zeroval: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #10000 +; GISel-MOPS-O0-NEXT: mov w8, #10000 // =0x2710 ; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 ; GISel-MOPS-O0-NEXT: mov x9, xzr ; GISel-MOPS-O0-NEXT: setp [x0]!, x8!, x9 @@ -180,7 +180,7 @@ ; ; GISel-MOPS-O3-LABEL: memset_10000_zeroval: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #10000 +; GISel-MOPS-O3-NEXT: mov w8, #10000 // =0x2710 ; GISel-MOPS-O3-NEXT: setp [x0]!, x8!, xzr ; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, xzr ; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, xzr @@ -192,14 +192,14 @@ ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 ; SDAG-WITHOUT-MOPS-O2-NEXT: mov w1, wzr -; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #10000 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #10000 // =0x2710 ; SDAG-WITHOUT-MOPS-O2-NEXT: bl memset ; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; SDAG-WITHOUT-MOPS-O2-NEXT: ret ; ; SDAG-MOPS-O2-LABEL: memset_10000_zeroval: ; SDAG-MOPS-O2: // %bb.0: // %entry -; SDAG-MOPS-O2-NEXT: mov w8, #10000 +; SDAG-MOPS-O2-NEXT: mov w8, #10000 // =0x2710 ; SDAG-MOPS-O2-NEXT: setp [x0]!, x8!, xzr ; SDAG-MOPS-O2-NEXT: setm [x0]!, x8!, xzr ; SDAG-MOPS-O2-NEXT: sete [x0]!, x8!, xzr @@ -215,7 +215,7 @@ ; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10000 +; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10000 // =0x2710 ; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 ; GISel-WITHOUT-MOPS-O0-NEXT: mov w1, wzr ; GISel-WITHOUT-MOPS-O0-NEXT: bl memset @@ -228,14 +228,14 @@ ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 ; GISel-WITHOUT-MOPS-O3-NEXT: mov w1, wzr -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10000 +; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10000 // =0x2710 ; GISel-WITHOUT-MOPS-O3-NEXT: bl memset ; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memset_10000_zeroval_volatile: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #10000 +; GISel-MOPS-O0-NEXT: mov w8, #10000 // =0x2710 ; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 ; GISel-MOPS-O0-NEXT: mov x9, xzr ; GISel-MOPS-O0-NEXT: setp [x0]!, x8!, x9 @@ -245,7 +245,7 @@ ; ; GISel-MOPS-O3-LABEL: memset_10000_zeroval_volatile: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #10000 +; GISel-MOPS-O3-NEXT: mov w8, #10000 // =0x2710 ; GISel-MOPS-O3-NEXT: setp [x0]!, x8!, xzr ; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, xzr ; GISel-MOPS-O3-NEXT: sete [x0]!, x8!, xzr @@ -257,14 +257,14 @@ ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 ; SDAG-WITHOUT-MOPS-O2-NEXT: mov w1, wzr -; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #10000 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #10000 // =0x2710 ; SDAG-WITHOUT-MOPS-O2-NEXT: bl memset ; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; SDAG-WITHOUT-MOPS-O2-NEXT: ret ; ; SDAG-MOPS-O2-LABEL: memset_10000_zeroval_volatile: ; SDAG-MOPS-O2: // %bb.0: // %entry -; SDAG-MOPS-O2-NEXT: mov w8, #10000 +; SDAG-MOPS-O2-NEXT: mov w8, #10000 // =0x2710 ; SDAG-MOPS-O2-NEXT: setp [x0]!, x8!, xzr ; SDAG-MOPS-O2-NEXT: setm [x0]!, x8!, xzr ; SDAG-MOPS-O2-NEXT: sete [x0]!, x8!, xzr @@ -423,7 +423,7 @@ ; GISel-WITHOUT-MOPS-O0-NEXT: // implicit-def: $x8 ; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, w1 ; GISel-WITHOUT-MOPS-O0-NEXT: and x8, x8, #0xff -; GISel-WITHOUT-MOPS-O0-NEXT: mov x9, #72340172838076673 +; GISel-WITHOUT-MOPS-O0-NEXT: mov x9, #72340172838076673 // =0x101010101010101 ; GISel-WITHOUT-MOPS-O0-NEXT: mul x8, x8, x9 ; GISel-WITHOUT-MOPS-O0-NEXT: str x8, [x0] ; GISel-WITHOUT-MOPS-O0-NEXT: // kill: def $w8 killed $w8 killed $x8 @@ -433,7 +433,7 @@ ; GISel-WITHOUT-MOPS-O3-LABEL: memset_10: ; GISel-WITHOUT-MOPS-O3: // %bb.0: // %entry ; GISel-WITHOUT-MOPS-O3-NEXT: // kill: def $w1 killed $w1 def $x1 -; GISel-WITHOUT-MOPS-O3-NEXT: mov x8, #72340172838076673 +; GISel-WITHOUT-MOPS-O3-NEXT: mov x8, #72340172838076673 // =0x101010101010101 ; GISel-WITHOUT-MOPS-O3-NEXT: and x9, x1, #0xff ; GISel-WITHOUT-MOPS-O3-NEXT: mul x8, x9, x8 ; GISel-WITHOUT-MOPS-O3-NEXT: str x8, [x0] @@ -445,7 +445,7 @@ ; GISel-MOPS-O0-NEXT: // implicit-def: $x8 ; GISel-MOPS-O0-NEXT: mov w8, w1 ; GISel-MOPS-O0-NEXT: and x8, x8, #0xff -; GISel-MOPS-O0-NEXT: mov x9, #72340172838076673 +; GISel-MOPS-O0-NEXT: mov x9, #72340172838076673 // =0x101010101010101 ; GISel-MOPS-O0-NEXT: mul x8, x8, x9 ; GISel-MOPS-O0-NEXT: str x8, [x0] ; GISel-MOPS-O0-NEXT: // kill: def $w8 killed $w8 killed $x8 @@ -455,7 +455,7 @@ ; GISel-MOPS-O3-LABEL: memset_10: ; GISel-MOPS-O3: // %bb.0: // %entry ; GISel-MOPS-O3-NEXT: // kill: def $w1 killed $w1 def $x1 -; GISel-MOPS-O3-NEXT: mov x8, #72340172838076673 +; GISel-MOPS-O3-NEXT: mov x8, #72340172838076673 // =0x101010101010101 ; GISel-MOPS-O3-NEXT: and x9, x1, #0xff ; GISel-MOPS-O3-NEXT: mul x8, x9, x8 ; GISel-MOPS-O3-NEXT: str x8, [x0] @@ -465,7 +465,7 @@ ; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10: ; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry ; SDAG-WITHOUT-MOPS-O2-NEXT: // kill: def $w1 killed $w1 def $x1 -; SDAG-WITHOUT-MOPS-O2-NEXT: mov x8, #72340172838076673 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov x8, #72340172838076673 // =0x101010101010101 ; SDAG-WITHOUT-MOPS-O2-NEXT: and x9, x1, #0xff ; SDAG-WITHOUT-MOPS-O2-NEXT: mul x8, x9, x8 ; SDAG-WITHOUT-MOPS-O2-NEXT: str x8, [x0] @@ -475,7 +475,7 @@ ; SDAG-MOPS-O2-LABEL: memset_10: ; SDAG-MOPS-O2: // %bb.0: // %entry ; SDAG-MOPS-O2-NEXT: // kill: def $w1 killed $w1 def $x1 -; SDAG-MOPS-O2-NEXT: mov x8, #72340172838076673 +; SDAG-MOPS-O2-NEXT: mov x8, #72340172838076673 // =0x101010101010101 ; SDAG-MOPS-O2-NEXT: and x9, x1, #0xff ; SDAG-MOPS-O2-NEXT: mul x8, x9, x8 ; SDAG-MOPS-O2-NEXT: str x8, [x0] @@ -493,7 +493,7 @@ ; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 +; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa ; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 ; GISel-WITHOUT-MOPS-O0-NEXT: bl memset ; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -504,14 +504,14 @@ ; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 +; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa ; GISel-WITHOUT-MOPS-O3-NEXT: bl memset ; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memset_10_volatile: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #10 +; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa ; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 ; GISel-MOPS-O0-NEXT: // implicit-def: $x9 ; GISel-MOPS-O0-NEXT: mov w9, w1 @@ -522,7 +522,7 @@ ; ; GISel-MOPS-O3-LABEL: memset_10_volatile: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #10 +; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa ; GISel-MOPS-O3-NEXT: // kill: def $w1 killed $w1 def $x1 ; GISel-MOPS-O3-NEXT: setp [x0]!, x8!, x1 ; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, x1 @@ -532,7 +532,7 @@ ; SDAG-WITHOUT-MOPS-O2-LABEL: memset_10_volatile: ; SDAG-WITHOUT-MOPS-O2: // %bb.0: // %entry ; SDAG-WITHOUT-MOPS-O2-NEXT: // kill: def $w1 killed $w1 def $x1 -; SDAG-WITHOUT-MOPS-O2-NEXT: mov x8, #72340172838076673 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov x8, #72340172838076673 // =0x101010101010101 ; SDAG-WITHOUT-MOPS-O2-NEXT: and x9, x1, #0xff ; SDAG-WITHOUT-MOPS-O2-NEXT: mul x8, x9, x8 ; SDAG-WITHOUT-MOPS-O2-NEXT: str x8, [x0] @@ -542,7 +542,7 @@ ; SDAG-MOPS-O2-LABEL: memset_10_volatile: ; SDAG-MOPS-O2: // %bb.0: // %entry ; SDAG-MOPS-O2-NEXT: // kill: def $w1 killed $w1 def $x1 -; SDAG-MOPS-O2-NEXT: mov x8, #72340172838076673 +; SDAG-MOPS-O2-NEXT: mov x8, #72340172838076673 // =0x101010101010101 ; SDAG-MOPS-O2-NEXT: and x9, x1, #0xff ; SDAG-MOPS-O2-NEXT: mul x8, x9, x8 ; SDAG-MOPS-O2-NEXT: str x8, [x0] @@ -560,7 +560,7 @@ ; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10000 +; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10000 // =0x2710 ; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 ; GISel-WITHOUT-MOPS-O0-NEXT: bl memset ; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -571,14 +571,14 @@ ; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10000 +; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10000 // =0x2710 ; GISel-WITHOUT-MOPS-O3-NEXT: bl memset ; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memset_10000: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #10000 +; GISel-MOPS-O0-NEXT: mov w8, #10000 // =0x2710 ; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 ; GISel-MOPS-O0-NEXT: // implicit-def: $x9 ; GISel-MOPS-O0-NEXT: mov w9, w1 @@ -589,7 +589,7 @@ ; ; GISel-MOPS-O3-LABEL: memset_10000: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #10000 +; GISel-MOPS-O3-NEXT: mov w8, #10000 // =0x2710 ; GISel-MOPS-O3-NEXT: // kill: def $w1 killed $w1 def $x1 ; GISel-MOPS-O3-NEXT: setp [x0]!, x8!, x1 ; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, x1 @@ -601,14 +601,14 @@ ; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 -; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #10000 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #10000 // =0x2710 ; SDAG-WITHOUT-MOPS-O2-NEXT: bl memset ; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; SDAG-WITHOUT-MOPS-O2-NEXT: ret ; ; SDAG-MOPS-O2-LABEL: memset_10000: ; SDAG-MOPS-O2: // %bb.0: // %entry -; SDAG-MOPS-O2-NEXT: mov w8, #10000 +; SDAG-MOPS-O2-NEXT: mov w8, #10000 // =0x2710 ; SDAG-MOPS-O2-NEXT: // kill: def $w1 killed $w1 def $x1 ; SDAG-MOPS-O2-NEXT: setp [x0]!, x8!, x1 ; SDAG-MOPS-O2-NEXT: setm [x0]!, x8!, x1 @@ -626,7 +626,7 @@ ; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10000 +; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10000 // =0x2710 ; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 ; GISel-WITHOUT-MOPS-O0-NEXT: bl memset ; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -637,14 +637,14 @@ ; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10000 +; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10000 // =0x2710 ; GISel-WITHOUT-MOPS-O3-NEXT: bl memset ; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memset_10000_volatile: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #10000 +; GISel-MOPS-O0-NEXT: mov w8, #10000 // =0x2710 ; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 ; GISel-MOPS-O0-NEXT: // implicit-def: $x9 ; GISel-MOPS-O0-NEXT: mov w9, w1 @@ -655,7 +655,7 @@ ; ; GISel-MOPS-O3-LABEL: memset_10000_volatile: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #10000 +; GISel-MOPS-O3-NEXT: mov w8, #10000 // =0x2710 ; GISel-MOPS-O3-NEXT: // kill: def $w1 killed $w1 def $x1 ; GISel-MOPS-O3-NEXT: setp [x0]!, x8!, x1 ; GISel-MOPS-O3-NEXT: setm [x0]!, x8!, x1 @@ -667,14 +667,14 @@ ; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 -; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #10000 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #10000 // =0x2710 ; SDAG-WITHOUT-MOPS-O2-NEXT: bl memset ; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; SDAG-WITHOUT-MOPS-O2-NEXT: ret ; ; SDAG-MOPS-O2-LABEL: memset_10000_volatile: ; SDAG-MOPS-O2: // %bb.0: // %entry -; SDAG-MOPS-O2-NEXT: mov w8, #10000 +; SDAG-MOPS-O2-NEXT: mov w8, #10000 // =0x2710 ; SDAG-MOPS-O2-NEXT: // kill: def $w1 killed $w1 def $x1 ; SDAG-MOPS-O2-NEXT: setp [x0]!, x8!, x1 ; SDAG-MOPS-O2-NEXT: setm [x0]!, x8!, x1 @@ -910,7 +910,7 @@ ; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 +; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa ; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 ; GISel-WITHOUT-MOPS-O0-NEXT: bl memcpy ; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -921,14 +921,14 @@ ; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 +; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa ; GISel-WITHOUT-MOPS-O3-NEXT: bl memcpy ; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memcpy_10_volatile: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #10 +; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa ; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 ; GISel-MOPS-O0-NEXT: cpyfp [x0]!, [x1]!, x8! ; GISel-MOPS-O0-NEXT: cpyfm [x0]!, [x1]!, x8! @@ -937,7 +937,7 @@ ; ; GISel-MOPS-O3-LABEL: memcpy_10_volatile: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #10 +; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa ; GISel-MOPS-O3-NEXT: cpyfp [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpyfm [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpyfe [x0]!, [x1]!, x8! @@ -969,7 +969,7 @@ ; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #1000 +; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #1000 // =0x3e8 ; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 ; GISel-WITHOUT-MOPS-O0-NEXT: bl memcpy ; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -980,14 +980,14 @@ ; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #1000 +; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #1000 // =0x3e8 ; GISel-WITHOUT-MOPS-O3-NEXT: bl memcpy ; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memcpy_1000: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #1000 +; GISel-MOPS-O0-NEXT: mov w8, #1000 // =0x3e8 ; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 ; GISel-MOPS-O0-NEXT: cpyfp [x0]!, [x1]!, x8! ; GISel-MOPS-O0-NEXT: cpyfm [x0]!, [x1]!, x8! @@ -996,7 +996,7 @@ ; ; GISel-MOPS-O3-LABEL: memcpy_1000: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #1000 +; GISel-MOPS-O3-NEXT: mov w8, #1000 // =0x3e8 ; GISel-MOPS-O3-NEXT: cpyfp [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpyfm [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpyfe [x0]!, [x1]!, x8! @@ -1007,14 +1007,14 @@ ; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 -; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #1000 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #1000 // =0x3e8 ; SDAG-WITHOUT-MOPS-O2-NEXT: bl memcpy ; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; SDAG-WITHOUT-MOPS-O2-NEXT: ret ; ; SDAG-MOPS-O2-LABEL: memcpy_1000: ; SDAG-MOPS-O2: // %bb.0: // %entry -; SDAG-MOPS-O2-NEXT: mov w8, #1000 +; SDAG-MOPS-O2-NEXT: mov w8, #1000 // =0x3e8 ; SDAG-MOPS-O2-NEXT: cpyfp [x0]!, [x1]!, x8! ; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x8! ; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x8! @@ -1030,7 +1030,7 @@ ; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #1000 +; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #1000 // =0x3e8 ; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 ; GISel-WITHOUT-MOPS-O0-NEXT: bl memcpy ; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -1041,14 +1041,14 @@ ; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #1000 +; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #1000 // =0x3e8 ; GISel-WITHOUT-MOPS-O3-NEXT: bl memcpy ; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memcpy_1000_volatile: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #1000 +; GISel-MOPS-O0-NEXT: mov w8, #1000 // =0x3e8 ; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 ; GISel-MOPS-O0-NEXT: cpyfp [x0]!, [x1]!, x8! ; GISel-MOPS-O0-NEXT: cpyfm [x0]!, [x1]!, x8! @@ -1057,7 +1057,7 @@ ; ; GISel-MOPS-O3-LABEL: memcpy_1000_volatile: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #1000 +; GISel-MOPS-O3-NEXT: mov w8, #1000 // =0x3e8 ; GISel-MOPS-O3-NEXT: cpyfp [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpyfm [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpyfe [x0]!, [x1]!, x8! @@ -1068,14 +1068,14 @@ ; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 -; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #1000 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #1000 // =0x3e8 ; SDAG-WITHOUT-MOPS-O2-NEXT: bl memcpy ; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; SDAG-WITHOUT-MOPS-O2-NEXT: ret ; ; SDAG-MOPS-O2-LABEL: memcpy_1000_volatile: ; SDAG-MOPS-O2: // %bb.0: // %entry -; SDAG-MOPS-O2-NEXT: mov w8, #1000 +; SDAG-MOPS-O2-NEXT: mov w8, #1000 // =0x3e8 ; SDAG-MOPS-O2-NEXT: cpyfp [x0]!, [x1]!, x8! ; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x8! ; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x8! @@ -1461,29 +1461,29 @@ ; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #16] ; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0] ; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #80] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q3, q2, [x1, #48] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #48] ; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #80] -; SDAG-WITHOUT-MOPS-O2-NEXT: stp q3, q2, [x0, #48] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #48] ; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #144] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q3, q2, [x1, #112] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #112] ; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #144] -; SDAG-WITHOUT-MOPS-O2-NEXT: stp q3, q2, [x0, #112] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #112] ; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q1, q0, [x1, #208] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q3, q2, [x1, #176] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q3, [x1, #176] ; SDAG-WITHOUT-MOPS-O2-NEXT: stp q1, q0, [x0, #208] -; SDAG-WITHOUT-MOPS-O2-NEXT: stp q3, q2, [x0, #176] -; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q2, q1, [x1, #256] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q3, [x0, #176] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldp q3, q1, [x1, #256] ; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q0, [x8] +; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q2, [x1, #240] ; SDAG-WITHOUT-MOPS-O2-NEXT: add x8, x0, #284 -; SDAG-WITHOUT-MOPS-O2-NEXT: ldr q3, [x1, #240] ; SDAG-WITHOUT-MOPS-O2-NEXT: str q0, [x8] -; SDAG-WITHOUT-MOPS-O2-NEXT: stp q2, q1, [x0, #256] -; SDAG-WITHOUT-MOPS-O2-NEXT: str q3, [x0, #240] +; SDAG-WITHOUT-MOPS-O2-NEXT: stp q3, q1, [x0, #256] +; SDAG-WITHOUT-MOPS-O2-NEXT: str q2, [x0, #240] ; SDAG-WITHOUT-MOPS-O2-NEXT: ret ; ; SDAG-MOPS-O2-LABEL: memcpy_inline_300: ; SDAG-MOPS-O2: // %bb.0: // %entry -; SDAG-MOPS-O2-NEXT: mov w8, #300 +; SDAG-MOPS-O2-NEXT: mov w8, #300 // =0x12c ; SDAG-MOPS-O2-NEXT: cpyfp [x0]!, [x1]!, x8! ; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x8! ; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x8! @@ -1628,7 +1628,7 @@ ; ; SDAG-MOPS-O2-LABEL: memcpy_inline_300_volatile: ; SDAG-MOPS-O2: // %bb.0: // %entry -; SDAG-MOPS-O2-NEXT: mov w8, #300 +; SDAG-MOPS-O2-NEXT: mov w8, #300 // =0x12c ; SDAG-MOPS-O2-NEXT: cpyfp [x0]!, [x1]!, x8! ; SDAG-MOPS-O2-NEXT: cpyfm [x0]!, [x1]!, x8! ; SDAG-MOPS-O2-NEXT: cpyfe [x0]!, [x1]!, x8! @@ -1739,7 +1739,7 @@ ; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 +; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #10 // =0xa ; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 ; GISel-WITHOUT-MOPS-O0-NEXT: bl memmove ; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -1750,14 +1750,14 @@ ; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 +; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #10 // =0xa ; GISel-WITHOUT-MOPS-O3-NEXT: bl memmove ; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memmove_10_volatile: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #10 +; GISel-MOPS-O0-NEXT: mov w8, #10 // =0xa ; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 ; GISel-MOPS-O0-NEXT: cpyp [x0]!, [x1]!, x8! ; GISel-MOPS-O0-NEXT: cpym [x0]!, [x1]!, x8! @@ -1766,7 +1766,7 @@ ; ; GISel-MOPS-O3-LABEL: memmove_10_volatile: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #10 +; GISel-MOPS-O3-NEXT: mov w8, #10 // =0xa ; GISel-MOPS-O3-NEXT: cpyp [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpym [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpye [x0]!, [x1]!, x8! @@ -1798,7 +1798,7 @@ ; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #1000 +; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #1000 // =0x3e8 ; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 ; GISel-WITHOUT-MOPS-O0-NEXT: bl memmove ; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -1809,14 +1809,14 @@ ; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #1000 +; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #1000 // =0x3e8 ; GISel-WITHOUT-MOPS-O3-NEXT: bl memmove ; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memmove_1000: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #1000 +; GISel-MOPS-O0-NEXT: mov w8, #1000 // =0x3e8 ; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 ; GISel-MOPS-O0-NEXT: cpyp [x0]!, [x1]!, x8! ; GISel-MOPS-O0-NEXT: cpym [x0]!, [x1]!, x8! @@ -1825,7 +1825,7 @@ ; ; GISel-MOPS-O3-LABEL: memmove_1000: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #1000 +; GISel-MOPS-O3-NEXT: mov w8, #1000 // =0x3e8 ; GISel-MOPS-O3-NEXT: cpyp [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpym [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpye [x0]!, [x1]!, x8! @@ -1836,14 +1836,14 @@ ; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 -; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #1000 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #1000 // =0x3e8 ; SDAG-WITHOUT-MOPS-O2-NEXT: bl memmove ; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; SDAG-WITHOUT-MOPS-O2-NEXT: ret ; ; SDAG-MOPS-O2-LABEL: memmove_1000: ; SDAG-MOPS-O2: // %bb.0: // %entry -; SDAG-MOPS-O2-NEXT: mov w8, #1000 +; SDAG-MOPS-O2-NEXT: mov w8, #1000 // =0x3e8 ; SDAG-MOPS-O2-NEXT: cpyp [x0]!, [x1]!, x8! ; SDAG-MOPS-O2-NEXT: cpym [x0]!, [x1]!, x8! ; SDAG-MOPS-O2-NEXT: cpye [x0]!, [x1]!, x8! @@ -1859,7 +1859,7 @@ ; GISel-WITHOUT-MOPS-O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O0-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #1000 +; GISel-WITHOUT-MOPS-O0-NEXT: mov w8, #1000 // =0x3e8 ; GISel-WITHOUT-MOPS-O0-NEXT: mov w2, w8 ; GISel-WITHOUT-MOPS-O0-NEXT: bl memmove ; GISel-WITHOUT-MOPS-O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -1870,14 +1870,14 @@ ; GISel-WITHOUT-MOPS-O3-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_def_cfa_offset 16 ; GISel-WITHOUT-MOPS-O3-NEXT: .cfi_offset w30, -16 -; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #1000 +; GISel-WITHOUT-MOPS-O3-NEXT: mov w2, #1000 // =0x3e8 ; GISel-WITHOUT-MOPS-O3-NEXT: bl memmove ; GISel-WITHOUT-MOPS-O3-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISel-WITHOUT-MOPS-O3-NEXT: ret ; ; GISel-MOPS-O0-LABEL: memmove_1000_volatile: ; GISel-MOPS-O0: // %bb.0: // %entry -; GISel-MOPS-O0-NEXT: mov w8, #1000 +; GISel-MOPS-O0-NEXT: mov w8, #1000 // =0x3e8 ; GISel-MOPS-O0-NEXT: // kill: def $x8 killed $w8 ; GISel-MOPS-O0-NEXT: cpyp [x0]!, [x1]!, x8! ; GISel-MOPS-O0-NEXT: cpym [x0]!, [x1]!, x8! @@ -1886,7 +1886,7 @@ ; ; GISel-MOPS-O3-LABEL: memmove_1000_volatile: ; GISel-MOPS-O3: // %bb.0: // %entry -; GISel-MOPS-O3-NEXT: mov w8, #1000 +; GISel-MOPS-O3-NEXT: mov w8, #1000 // =0x3e8 ; GISel-MOPS-O3-NEXT: cpyp [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpym [x0]!, [x1]!, x8! ; GISel-MOPS-O3-NEXT: cpye [x0]!, [x1]!, x8! @@ -1897,14 +1897,14 @@ ; SDAG-WITHOUT-MOPS-O2-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_def_cfa_offset 16 ; SDAG-WITHOUT-MOPS-O2-NEXT: .cfi_offset w30, -16 -; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #1000 +; SDAG-WITHOUT-MOPS-O2-NEXT: mov w2, #1000 // =0x3e8 ; SDAG-WITHOUT-MOPS-O2-NEXT: bl memmove ; SDAG-WITHOUT-MOPS-O2-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; SDAG-WITHOUT-MOPS-O2-NEXT: ret ; ; SDAG-MOPS-O2-LABEL: memmove_1000_volatile: ; SDAG-MOPS-O2: // %bb.0: // %entry -; SDAG-MOPS-O2-NEXT: mov w8, #1000 +; SDAG-MOPS-O2-NEXT: mov w8, #1000 // =0x3e8 ; SDAG-MOPS-O2-NEXT: cpyp [x0]!, [x1]!, x8! ; SDAG-MOPS-O2-NEXT: cpym [x0]!, [x1]!, x8! ; SDAG-MOPS-O2-NEXT: cpye [x0]!, [x1]!, x8! diff --git a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll --- a/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-mull-masks.ll @@ -80,8 +80,8 @@ define i64 @smull_ldrsb_b(ptr %x0, i8 %x1) { ; CHECK-LABEL: smull_ldrsb_b: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: sxtb x9, w1 ; CHECK-NEXT: smull x0, w8, w9 ; CHECK-NEXT: ret @@ -96,8 +96,8 @@ define i64 @smull_ldrsb_b_commuted(ptr %x0, i8 %x1) { ; CHECK-LABEL: smull_ldrsb_b_commuted: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: sxtb x9, w1 ; CHECK-NEXT: smull x0, w9, w8 ; CHECK-NEXT: ret @@ -112,8 +112,8 @@ define i64 @smull_ldrsb_h(ptr %x0, i16 %x1) { ; CHECK-LABEL: smull_ldrsb_h: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: sxth x9, w1 ; CHECK-NEXT: smull x0, w8, w9 ; CHECK-NEXT: ret @@ -142,8 +142,8 @@ define i64 @smull_ldrsh_b(ptr %x0, i8 %x1) { ; CHECK-LABEL: smull_ldrsh_b: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsh x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsh x8, [x0] ; CHECK-NEXT: sxtb x9, w1 ; CHECK-NEXT: smull x0, w8, w9 ; CHECK-NEXT: ret @@ -158,8 +158,8 @@ define i64 @smull_ldrsh_h(ptr %x0, i16 %x1) { ; CHECK-LABEL: smull_ldrsh_h: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsh x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsh x8, [x0] ; CHECK-NEXT: sxth x9, w1 ; CHECK-NEXT: smull x0, w8, w9 ; CHECK-NEXT: ret @@ -174,8 +174,8 @@ define i64 @smull_ldrsh_h_commuted(ptr %x0, i16 %x1) { ; CHECK-LABEL: smull_ldrsh_h_commuted: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsh x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsh x8, [x0] ; CHECK-NEXT: sxth x9, w1 ; CHECK-NEXT: smull x0, w9, w8 ; CHECK-NEXT: ret @@ -204,8 +204,8 @@ define i64 @smull_ldrsw_b(ptr %x0, i8 %x1) { ; CHECK-LABEL: smull_ldrsw_b: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: sxtb x9, w1 ; CHECK-NEXT: smull x0, w8, w9 ; CHECK-NEXT: ret @@ -220,8 +220,8 @@ define i64 @smull_ldrsw_h(ptr %x0, i16 %x1) { ; CHECK-LABEL: smull_ldrsw_h: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: sxth x9, w1 ; CHECK-NEXT: smull x0, w8, w9 ; CHECK-NEXT: ret @@ -359,8 +359,8 @@ define i64 @smaddl_ldrsb_h(ptr %x0, i16 %x1, i64 %x2) { ; CHECK-LABEL: smaddl_ldrsb_h: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: sxth x9, w1 ; CHECK-NEXT: smaddl x0, w8, w9, x2 ; CHECK-NEXT: ret @@ -376,8 +376,8 @@ define i64 @smaddl_ldrsb_h_commuted(ptr %x0, i16 %x1, i64 %x2) { ; CHECK-LABEL: smaddl_ldrsb_h_commuted: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: sxth x9, w1 ; CHECK-NEXT: smaddl x0, w9, w8, x2 ; CHECK-NEXT: ret @@ -423,8 +423,8 @@ define i64 @smaddl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) { ; CHECK-LABEL: smaddl_ldrsw_b: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: sxtb x9, w1 ; CHECK-NEXT: smaddl x0, w8, w9, x2 ; CHECK-NEXT: ret @@ -439,8 +439,8 @@ define i64 @smaddl_ldrsw_b_commuted(ptr %x0, i8 %x1, i64 %x2) { ; CHECK-LABEL: smaddl_ldrsw_b_commuted: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: sxtb x9, w1 ; CHECK-NEXT: smaddl x0, w9, w8, x2 ; CHECK-NEXT: ret @@ -523,8 +523,8 @@ define i64 @smnegl_ldrsb_h(ptr %x0, i16 %x1) { ; CHECK-LABEL: smnegl_ldrsb_h: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: sxth x9, w1 ; CHECK-NEXT: smnegl x0, w8, w9 ; CHECK-NEXT: ret @@ -540,8 +540,8 @@ define i64 @smnegl_ldrsb_h_commuted(ptr %x0, i16 %x1) { ; CHECK-LABEL: smnegl_ldrsb_h_commuted: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: sxth x9, w1 ; CHECK-NEXT: smnegl x0, w9, w8 ; CHECK-NEXT: ret @@ -587,8 +587,8 @@ define i64 @smnegl_ldrsw_b(ptr %x0, i8 %x1) { ; CHECK-LABEL: smnegl_ldrsw_b: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: sxtb x9, w1 ; CHECK-NEXT: smnegl x0, w8, w9 ; CHECK-NEXT: ret @@ -603,8 +603,8 @@ define i64 @smnegl_ldrsw_b_commuted(ptr %x0, i8 %x1) { ; CHECK-LABEL: smnegl_ldrsw_b_commuted: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: sxtb x9, w1 ; CHECK-NEXT: smnegl x0, w9, w8 ; CHECK-NEXT: ret @@ -687,8 +687,8 @@ define i64 @smsubl_ldrsb_h(ptr %x0, i16 %x1, i64 %x2) { ; CHECK-LABEL: smsubl_ldrsb_h: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: sxth x9, w1 ; CHECK-NEXT: smsubl x0, w8, w9, x2 ; CHECK-NEXT: ret @@ -704,8 +704,8 @@ define i64 @smsubl_ldrsb_h_commuted(ptr %x0, i16 %x1, i64 %x2) { ; CHECK-LABEL: smsubl_ldrsb_h_commuted: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsb x8, [x0] ; CHECK-NEXT: sxth x9, w1 ; CHECK-NEXT: smsubl x0, w9, w8, x2 ; CHECK-NEXT: ret @@ -751,8 +751,8 @@ define i64 @smsubl_ldrsw_b(ptr %x0, i8 %x1, i64 %x2) { ; CHECK-LABEL: smsubl_ldrsw_b: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: sxtb x9, w1 ; CHECK-NEXT: smsubl x0, w8, w9, x2 ; CHECK-NEXT: ret @@ -767,8 +767,8 @@ define i64 @smsubl_ldrsw_b_commuted(ptr %x0, i8 %x1, i64 %x2) { ; CHECK-LABEL: smsubl_ldrsw_b_commuted: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldrsw x8, [x0] ; CHECK-NEXT: sxtb x9, w1 ; CHECK-NEXT: smsubl x0, w9, w8, x2 ; CHECK-NEXT: ret @@ -1372,10 +1372,10 @@ define i64 @umull_ldr2_w_cc2(ptr %x0, i32 %x1) { ; CHECK-LABEL: umull_ldr2_w_cc2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w8, w1 -; CHECK-NEXT: and x9, x9, #0x1ffffffff -; CHECK-NEXT: mul x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov w9, w1 +; CHECK-NEXT: and x8, x8, #0x1ffffffff +; CHECK-NEXT: mul x0, x8, x9 ; CHECK-NEXT: ret entry: %ext64 = load i64, ptr %x0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll --- a/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-neon-vector-insert-uaddlv.ll @@ -48,12 +48,12 @@ ; CHECK-LABEL: insert_vec_v16i32_uaddlv_from_v8i16: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v1, #0000000000000000 -; CHECK-NEXT: uaddlv.8h s2, v0 +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: uaddlv.8h s1, v0 ; CHECK-NEXT: stp q0, q0, [x0, #32] -; CHECK-NEXT: mov.s v1[0], v2[0] -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: mov.s v2[0], v1[0] +; CHECK-NEXT: ucvtf.4s v2, v2 +; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret entry: @@ -68,16 +68,16 @@ ; CHECK-LABEL: insert_vec_v23i32_uaddlv_from_v8i16: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 +; CHECK-NEXT: movi.2d v2, #0000000000000000 ; CHECK-NEXT: add x8, x0, #88 -; CHECK-NEXT: movi.2d v1, #0000000000000000 -; CHECK-NEXT: uaddlv.8h s2, v0 +; CHECK-NEXT: uaddlv.8h s1, v0 ; CHECK-NEXT: stp q0, q0, [x0, #16] ; CHECK-NEXT: stp q0, q0, [x0, #48] ; CHECK-NEXT: st1.s { v0 }[2], [x8] -; CHECK-NEXT: mov.s v1[0], v2[0] ; CHECK-NEXT: str d0, [x0, #80] -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: str q1, [x0] +; CHECK-NEXT: mov.s v2[0], v1[0] +; CHECK-NEXT: ucvtf.4s v2, v2 +; CHECK-NEXT: str q2, [x0] ; CHECK-NEXT: ret entry: @@ -207,8 +207,8 @@ ; CHECK-LABEL: insert_vec_v8i16_uaddlv_from_v8i16: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: uaddlv.8h s0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] ; CHECK-NEXT: ushll.4s v1, v1, #0 @@ -229,8 +229,8 @@ ; CHECK-LABEL: insert_vec_v3i16_uaddlv_from_v8i16: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: uaddlv.8h s0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] ; CHECK-NEXT: ushll.4s v1, v1, #0 @@ -252,13 +252,13 @@ ; CHECK-LABEL: insert_vec_v16i64_uaddlv_from_v4i16: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v1, #0000000000000000 -; CHECK-NEXT: uaddlv.4h s2, v0 +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: uaddlv.4h s1, v0 ; CHECK-NEXT: stp q0, q0, [x0, #32] -; CHECK-NEXT: mov.s v1[0], v2[0] -; CHECK-NEXT: ucvtf.2d v1, v1 -; CHECK-NEXT: fcvtn v1.2s, v1.2d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: mov.s v2[0], v1[0] +; CHECK-NEXT: ucvtf.2d v2, v2 +; CHECK-NEXT: fcvtn v2.2s, v2.2d +; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret entry: @@ -274,14 +274,14 @@ ; CHECK-LABEL: insert_vec_v16i8_uaddlv_from_v8i8: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v1, #0000000000000000 -; CHECK-NEXT: uaddlv.8b h2, v0 +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: uaddlv.8b h1, v0 ; CHECK-NEXT: stp q0, q0, [x0, #32] -; CHECK-NEXT: mov.h v1[0], v2[0] -; CHECK-NEXT: bic.4h v1, #255, lsl #8 -; CHECK-NEXT: ushll.4s v1, v1, #0 -; CHECK-NEXT: ucvtf.4s v1, v1 -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: mov.h v2[0], v1[0] +; CHECK-NEXT: bic.4h v2, #255, lsl #8 +; CHECK-NEXT: ushll.4s v2, v2, #0 +; CHECK-NEXT: ucvtf.4s v2, v2 +; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret entry: @@ -361,11 +361,11 @@ ; CHECK-LABEL: insert_vec_v16i32_uaddlv_from_v4i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: movi.2d v1, #0000000000000000 -; CHECK-NEXT: uaddlv.4s d2, v0 +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: uaddlv.4s d1, v0 ; CHECK-NEXT: stp q0, q0, [x0, #32] -; CHECK-NEXT: mov.s v1[0], v2[0] -; CHECK-NEXT: ucvtf.4s v1, v1 +; CHECK-NEXT: mov.s v2[0], v1[0] +; CHECK-NEXT: ucvtf.4s v1, v2 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret @@ -426,8 +426,8 @@ ; CHECK-LABEL: insert_vec_v8i8_uaddlv_from_v4i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: stp xzr, xzr, [x0, #16] ; CHECK-NEXT: uaddlv.4s d0, v0 ; CHECK-NEXT: mov.h v1[0], v0[0] ; CHECK-NEXT: bic.4h v1, #255, lsl #8 @@ -454,8 +454,8 @@ ; CHECK-NEXT: mov.h v1[0], v0[0] ; CHECK-NEXT: movi.2d v0, #0000000000000000 ; CHECK-NEXT: bic.4h v1, #255, lsl #8 -; CHECK-NEXT: ushll.4s v1, v1, #0 ; CHECK-NEXT: stp q0, q0, [x0, #32] +; CHECK-NEXT: ushll.4s v1, v1, #0 ; CHECK-NEXT: ucvtf.4s v1, v1 ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll b/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll --- a/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-pmull2.ll @@ -8,19 +8,19 @@ define void @test1(ptr %0, ptr %1) { ; CHECK-LABEL: test1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #61186 -; CHECK-NEXT: mov w8, #56824 -; CHECK-NEXT: movk w9, #29710, lsl #16 +; CHECK-NEXT: mov w8, #56824 // =0xddf8 +; CHECK-NEXT: mov w9, #61186 // =0xef02 ; CHECK-NEXT: movk w8, #40522, lsl #16 +; CHECK-NEXT: movk w9, #29710, lsl #16 ; CHECK-NEXT: ldp q0, q1, [x1] -; CHECK-NEXT: fmov d3, x9 ; CHECK-NEXT: dup v2.2d, x8 -; CHECK-NEXT: pmull2 v4.1q, v0.2d, v2.2d -; CHECK-NEXT: pmull v0.1q, v0.1d, v3.1d -; CHECK-NEXT: pmull2 v2.1q, v1.2d, v2.2d -; CHECK-NEXT: pmull v1.1q, v1.1d, v3.1d -; CHECK-NEXT: eor v0.16b, v0.16b, v4.16b -; CHECK-NEXT: eor v1.16b, v1.16b, v2.16b +; CHECK-NEXT: fmov d3, x9 +; CHECK-NEXT: pmull v4.1q, v0.1d, v3.1d +; CHECK-NEXT: pmull v3.1q, v1.1d, v3.1d +; CHECK-NEXT: pmull2 v0.1q, v0.2d, v2.2d +; CHECK-NEXT: pmull2 v1.1q, v1.2d, v2.2d +; CHECK-NEXT: eor v0.16b, v4.16b, v0.16b +; CHECK-NEXT: eor v1.16b, v3.16b, v1.16b ; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret %3 = load <2 x i64>, ptr %1 diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -87,12 +87,12 @@ ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: orr v0.8h, #128, lsl #8 -; CHECK-NEXT: sshll v2.4s, v1.4h, #0 +; CHECK-NEXT: sshll v3.4s, v1.4h, #0 ; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0 -; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: mul v1.4s, v3.4s, v1.4s -; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ushll v2.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: mul v1.4s, v0.4s, v1.4s +; CHECK-NEXT: mul v0.4s, v2.4s, v3.4s ; CHECK-NEXT: ret %load.A = load <8 x i16>, ptr %A %or.A = or <8 x i16> %load.A, @@ -123,26 +123,26 @@ ; CHECK-NEON-LABEL: smull_zext_v2i32_v2i64: ; CHECK-NEON: // %bb.0: ; CHECK-NEON-NEXT: ldr d0, [x1] -; CHECK-NEON-NEXT: ldrh w8, [x0] -; CHECK-NEON-NEXT: ldrh w11, [x0, #2] +; CHECK-NEON-NEXT: ldrh w9, [x0] +; CHECK-NEON-NEXT: ldrh w10, [x0, #2] ; CHECK-NEON-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEON-NEXT: fmov x9, d0 -; CHECK-NEON-NEXT: mov x10, v0.d[1] -; CHECK-NEON-NEXT: smull x8, w8, w9 -; CHECK-NEON-NEXT: smull x9, w11, w10 -; CHECK-NEON-NEXT: fmov d0, x8 -; CHECK-NEON-NEXT: mov v0.d[1], x9 +; CHECK-NEON-NEXT: fmov x11, d0 +; CHECK-NEON-NEXT: mov x8, v0.d[1] +; CHECK-NEON-NEXT: smull x9, w9, w11 +; CHECK-NEON-NEXT: smull x8, w10, w8 +; CHECK-NEON-NEXT: fmov d0, x9 +; CHECK-NEON-NEXT: mov v0.d[1], x8 ; CHECK-NEON-NEXT: ret ; ; CHECK-SVE-LABEL: smull_zext_v2i32_v2i64: ; CHECK-SVE: // %bb.0: ; CHECK-SVE-NEXT: ldrh w8, [x0] ; CHECK-SVE-NEXT: ptrue p0.d, vl2 -; CHECK-SVE-NEXT: ldr d1, [x1] -; CHECK-SVE-NEXT: fmov d0, x8 -; CHECK-SVE-NEXT: ldrh w8, [x0, #2] -; CHECK-SVE-NEXT: sshll v1.2d, v1.2s, #0 -; CHECK-SVE-NEXT: mov v0.d[1], x8 +; CHECK-SVE-NEXT: ldrh w9, [x0, #2] +; CHECK-SVE-NEXT: ldr d0, [x1] +; CHECK-SVE-NEXT: fmov d1, x8 +; CHECK-SVE-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-SVE-NEXT: mov v1.d[1], x9 ; CHECK-SVE-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-SVE-NEXT: ret @@ -272,8 +272,8 @@ define <8 x i16> @smlal_v8i8_v8i16(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: smlal_v8i8_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: smlal v0.8h, v1.8b, v2.8b ; CHECK-NEXT: ret @@ -290,8 +290,8 @@ define <4 x i32> @smlal_v4i16_v4i32(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: smlal_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: smlal v0.4s, v1.4h, v2.4h ; CHECK-NEXT: ret @@ -308,8 +308,8 @@ define <2 x i64> @smlal_v2i32_v2i64(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: smlal_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: smlal v0.2d, v1.2s, v2.2s ; CHECK-NEXT: ret @@ -326,8 +326,8 @@ define <8 x i16> @umlal_v8i8_v8i16(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: umlal_v8i8_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: umlal v0.8h, v1.8b, v2.8b ; CHECK-NEXT: ret @@ -344,8 +344,8 @@ define <4 x i32> @umlal_v4i16_v4i32(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: umlal_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: umlal v0.4s, v1.4h, v2.4h ; CHECK-NEXT: ret @@ -362,8 +362,8 @@ define <2 x i64> @umlal_v2i32_v2i64(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: umlal_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: umlal v0.2d, v1.2s, v2.2s ; CHECK-NEXT: ret @@ -380,8 +380,8 @@ define <8 x i16> @amlal_v8i8_v8i16(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: amlal_v8i8_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: smlal v0.8h, v1.8b, v2.8b ; CHECK-NEXT: bic v0.8h, #255, lsl #8 @@ -400,12 +400,12 @@ define <4 x i32> @amlal_v4i16_v4i32(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: amlal_v4i16_v4i32: ; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] -; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smlal v2.4s, v1.4h, v3.4h -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlal v0.4s, v1.4h, v2.4h +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -421,12 +421,12 @@ define <2 x i64> @amlal_v2i32_v2i64(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: amlal_v2i32_v2i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] -; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smlal v2.2d, v1.2s, v3.2s -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlal v0.2d, v1.2s, v2.2s +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -442,8 +442,8 @@ define <8 x i16> @smlsl_v8i8_v8i16(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: smlsl_v8i8_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: smlsl v0.8h, v1.8b, v2.8b ; CHECK-NEXT: ret @@ -460,8 +460,8 @@ define <4 x i32> @smlsl_v4i16_v4i32(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: smlsl_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.4h ; CHECK-NEXT: ret @@ -478,8 +478,8 @@ define <2 x i64> @smlsl_v2i32_v2i64(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: smlsl_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.2s ; CHECK-NEXT: ret @@ -496,8 +496,8 @@ define <8 x i16> @umlsl_v8i8_v8i16(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: umlsl_v8i8_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: umlsl v0.8h, v1.8b, v2.8b ; CHECK-NEXT: ret @@ -514,8 +514,8 @@ define <4 x i32> @umlsl_v4i16_v4i32(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: umlsl_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: umlsl v0.4s, v1.4h, v2.4h ; CHECK-NEXT: ret @@ -532,8 +532,8 @@ define <2 x i64> @umlsl_v2i32_v2i64(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: umlsl_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: umlsl v0.2d, v1.2s, v2.2s ; CHECK-NEXT: ret @@ -550,8 +550,8 @@ define <8 x i16> @amlsl_v8i8_v8i16(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: amlsl_v8i8_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] ; CHECK-NEXT: smlsl v0.8h, v1.8b, v2.8b ; CHECK-NEXT: bic v0.8h, #255, lsl #8 @@ -570,12 +570,12 @@ define <4 x i32> @amlsl_v4i16_v4i32(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: amlsl_v4i16_v4i32: ; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] -; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smlsl v2.4s, v1.4h, v3.4h -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlsl v0.4s, v1.4h, v2.4h +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -591,12 +591,12 @@ define <2 x i64> @amlsl_v2i32_v2i64(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: amlsl_v2i32_v2i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] -; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smlsl v2.2d, v1.2s, v3.2s -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlsl v0.2d, v1.2s, v2.2s +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -724,9 +724,9 @@ ; CHECK-LABEL: amull_extvec_v4i16_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1234 // =0x4d2 +; CHECK-NEXT: dup v1.4h, w8 +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: dup v2.4h, w8 -; CHECK-NEXT: smull v0.4s, v0.4h, v2.4h ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp3 = zext <4 x i16> %arg to <4 x i32> @@ -739,9 +739,9 @@ ; CHECK-LABEL: amull_extvec_v2i32_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1234 // =0x4d2 +; CHECK-NEXT: dup v1.2s, w8 +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-NEXT: dup v2.2s, w8 -; CHECK-NEXT: smull v0.2d, v0.2s, v2.2s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp3 = zext <2 x i32> %arg to <2 x i64> @@ -897,11 +897,11 @@ define <16 x i16> @amull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) { ; CHECK-LABEL: amull2_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b -; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b +; CHECK-NEXT: smull v2.8h, v0.8b, v1.8b +; CHECK-NEXT: smull2 v1.8h, v0.16b, v1.16b ; CHECK-NEXT: bic v2.8h, #255, lsl #8 -; CHECK-NEXT: bic v0.8h, #255, lsl #8 -; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: bic v1.8h, #255, lsl #8 +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %arg1_ext = zext <16 x i8> %arg1 to <16 x i16> %arg2_ext = zext <16 x i8> %arg2 to <16 x i16> @@ -914,10 +914,10 @@ ; CHECK-LABEL: amull2_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff -; CHECK-NEXT: smull2 v3.4s, v0.8h, v1.8h -; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h -; CHECK-NEXT: and v1.16b, v3.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: smull v3.4s, v0.4h, v1.4h +; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h +; CHECK-NEXT: and v1.16b, v0.16b, v2.16b +; CHECK-NEXT: and v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %arg1_ext = zext <8 x i16> %arg1 to <8 x i32> %arg2_ext = zext <8 x i16> %arg2 to <8 x i32> @@ -930,10 +930,10 @@ ; CHECK-LABEL: amull2_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v2.2d, #0x000000ffffffff -; CHECK-NEXT: smull2 v3.2d, v0.4s, v1.4s -; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s -; CHECK-NEXT: and v1.16b, v3.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: smull v3.2d, v0.2s, v1.2s +; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.4s +; CHECK-NEXT: and v1.16b, v0.16b, v2.16b +; CHECK-NEXT: and v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %arg1_ext = zext <4 x i32> %arg1 to <4 x i64> %arg2_ext = zext <4 x i32> %arg2 to <4 x i64> diff --git a/llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll b/llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll --- a/llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-uzp1-combine.ll @@ -13,10 +13,10 @@ ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: xtn v1.2s, v1.2d ; CHECK-BE-NEXT: xtn v0.2s, v0.2d -; CHECK-BE-NEXT: rev32 v1.4h, v1.4h +; CHECK-BE-NEXT: xtn v1.2s, v1.2d ; CHECK-BE-NEXT: rev32 v0.4h, v0.4h +; CHECK-BE-NEXT: rev32 v1.4h, v1.4h ; CHECK-BE-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-BE-NEXT: rev64 v0.4h, v0.4h ; CHECK-BE-NEXT: ret @@ -43,8 +43,8 @@ ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: xtn v1.4h, v1.4s ; CHECK-BE-NEXT: xtn v0.4h, v0.4s +; CHECK-BE-NEXT: xtn v1.4h, v1.4s ; CHECK-BE-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-BE-NEXT: rev64 v0.4h, v0.4h ; CHECK-BE-NEXT: ret @@ -68,10 +68,10 @@ ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: xtn v1.8b, v1.8h ; CHECK-BE-NEXT: xtn v0.8b, v0.8h -; CHECK-BE-NEXT: rev16 v1.8b, v1.8b +; CHECK-BE-NEXT: xtn v1.8b, v1.8h ; CHECK-BE-NEXT: rev16 v0.8b, v0.8b +; CHECK-BE-NEXT: rev16 v1.8b, v1.8b ; CHECK-BE-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-BE-NEXT: rev64 v0.4h, v0.4h ; CHECK-BE-NEXT: ret @@ -97,10 +97,10 @@ ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: xtn v1.2s, v1.2d ; CHECK-BE-NEXT: xtn v0.2s, v0.2d -; CHECK-BE-NEXT: rev32 v1.8b, v1.8b +; CHECK-BE-NEXT: xtn v1.2s, v1.2d ; CHECK-BE-NEXT: rev32 v0.8b, v0.8b +; CHECK-BE-NEXT: rev32 v1.8b, v1.8b ; CHECK-BE-NEXT: uzp1 v0.8b, v0.8b, v1.8b ; CHECK-BE-NEXT: rev64 v0.8b, v0.8b ; CHECK-BE-NEXT: ret @@ -127,10 +127,10 @@ ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: xtn v1.4h, v1.4s ; CHECK-BE-NEXT: xtn v0.4h, v0.4s -; CHECK-BE-NEXT: rev16 v1.8b, v1.8b +; CHECK-BE-NEXT: xtn v1.4h, v1.4s ; CHECK-BE-NEXT: rev16 v0.8b, v0.8b +; CHECK-BE-NEXT: rev16 v1.8b, v1.8b ; CHECK-BE-NEXT: uzp1 v0.8b, v0.8b, v1.8b ; CHECK-BE-NEXT: rev64 v0.8b, v0.8b ; CHECK-BE-NEXT: ret @@ -157,8 +157,8 @@ ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: xtn v1.8b, v1.8h ; CHECK-BE-NEXT: xtn v0.8b, v0.8h +; CHECK-BE-NEXT: xtn v1.8b, v1.8h ; CHECK-BE-NEXT: uzp1 v0.8b, v0.8b, v1.8b ; CHECK-BE-NEXT: rev64 v0.8b, v0.8b ; CHECK-BE-NEXT: ret @@ -181,8 +181,8 @@ ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: xtn v1.2s, v1.2d ; CHECK-BE-NEXT: xtn v0.2s, v0.2d +; CHECK-BE-NEXT: xtn v1.2s, v1.2d ; CHECK-BE-NEXT: zip1 v0.2s, v0.2s, v1.2s ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s ; CHECK-BE-NEXT: ret @@ -207,10 +207,10 @@ ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: xtn v1.4h, v1.4s ; CHECK-BE-NEXT: xtn v0.4h, v0.4s -; CHECK-BE-NEXT: rev32 v1.4h, v1.4h +; CHECK-BE-NEXT: xtn v1.4h, v1.4s ; CHECK-BE-NEXT: rev32 v0.4h, v0.4h +; CHECK-BE-NEXT: rev32 v1.4h, v1.4h ; CHECK-BE-NEXT: zip1 v0.2s, v0.2s, v1.2s ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s ; CHECK-BE-NEXT: ret @@ -238,10 +238,10 @@ ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-BE-NEXT: xtn v1.8b, v1.8h ; CHECK-BE-NEXT: xtn v0.8b, v0.8h -; CHECK-BE-NEXT: rev32 v1.8b, v1.8b +; CHECK-BE-NEXT: xtn v1.8b, v1.8h ; CHECK-BE-NEXT: rev32 v0.8b, v0.8b +; CHECK-BE-NEXT: rev32 v1.8b, v1.8b ; CHECK-BE-NEXT: zip1 v0.2s, v0.2s, v1.2s ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s ; CHECK-BE-NEXT: ret @@ -268,10 +268,10 @@ ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-BE-NEXT: xtn v0.2s, v0.2d ; CHECK-BE-NEXT: xtn v1.2s, v1.2d -; CHECK-BE-NEXT: rev32 v0.4h, v0.4h +; CHECK-BE-NEXT: xtn v0.2s, v0.2d ; CHECK-BE-NEXT: rev32 v1.4h, v1.4h +; CHECK-BE-NEXT: rev32 v0.4h, v0.4h ; CHECK-BE-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-BE-NEXT: addv h0, v0.4h ; CHECK-BE-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll b/llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll --- a/llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll @@ -21,13 +21,13 @@ ; CHECK-LABEL: mul_i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ushll v2.8h, v0.8b, #0 -; CHECK-NEXT: ushll2 v4.8h, v0.16b, #0 -; CHECK-NEXT: ushll2 v5.8h, v1.16b, #0 -; CHECK-NEXT: ushll v0.8h, v1.8b, #0 -; CHECK-NEXT: umull2 v3.4s, v4.8h, v5.8h -; CHECK-NEXT: umull2 v1.4s, v2.8h, v0.8h -; CHECK-NEXT: umull v0.4s, v2.4h, v0.4h -; CHECK-NEXT: umull v2.4s, v4.4h, v5.4h +; CHECK-NEXT: ushll v4.8h, v1.8b, #0 +; CHECK-NEXT: ushll2 v5.8h, v0.16b, #0 +; CHECK-NEXT: ushll2 v6.8h, v1.16b, #0 +; CHECK-NEXT: umull v0.4s, v2.4h, v4.4h +; CHECK-NEXT: umull2 v1.4s, v2.8h, v4.8h +; CHECK-NEXT: umull2 v3.4s, v5.8h, v6.8h +; CHECK-NEXT: umull v2.4s, v5.4h, v6.4h ; CHECK-NEXT: ret entry: %ea = zext <16 x i8> %a to <16 x i32> @@ -41,24 +41,24 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ushll v2.8h, v0.8b, #0 ; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-NEXT: ushll v4.4s, v2.4h, #0 -; CHECK-NEXT: ushll v6.4s, v0.4h, #0 -; CHECK-NEXT: ushll2 v16.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.8h, v1.8b, #0 +; CHECK-NEXT: ushll v3.8h, v1.8b, #0 ; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0 -; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0 +; CHECK-NEXT: ushll v4.4s, v2.4h, #0 ; CHECK-NEXT: ushll v5.4s, v0.4h, #0 -; CHECK-NEXT: ushll v17.4s, v1.4h, #0 +; CHECK-NEXT: ushll v6.4s, v3.4h, #0 +; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0 +; CHECK-NEXT: ushll v16.4s, v1.4h, #0 +; CHECK-NEXT: ushll2 v7.4s, v3.8h, #0 +; CHECK-NEXT: ushll2 v17.4s, v0.8h, #0 ; CHECK-NEXT: ushll2 v18.4s, v1.8h, #0 -; CHECK-NEXT: ushll2 v19.4s, v0.8h, #0 -; CHECK-NEXT: umull2 v7.2d, v16.4s, v18.4s -; CHECK-NEXT: umull2 v3.2d, v2.4s, v19.4s -; CHECK-NEXT: umull2 v1.2d, v4.4s, v5.4s -; CHECK-NEXT: umull v0.2d, v4.2s, v5.2s -; CHECK-NEXT: umull2 v5.2d, v6.4s, v17.4s -; CHECK-NEXT: umull v2.2d, v2.2s, v19.2s -; CHECK-NEXT: umull v4.2d, v6.2s, v17.2s -; CHECK-NEXT: umull v6.2d, v16.2s, v18.2s +; CHECK-NEXT: umull2 v1.2d, v4.4s, v6.4s +; CHECK-NEXT: umull v0.2d, v4.2s, v6.2s +; CHECK-NEXT: umull2 v3.2d, v2.4s, v7.4s +; CHECK-NEXT: umull v2.2d, v2.2s, v7.2s +; CHECK-NEXT: umull v4.2d, v5.2s, v16.2s +; CHECK-NEXT: umull2 v7.2d, v17.4s, v18.4s +; CHECK-NEXT: umull2 v5.2d, v5.4s, v16.4s +; CHECK-NEXT: umull v6.2d, v17.2s, v18.2s ; CHECK-NEXT: ret entry: %ea = zext <16 x i8> %a to <16 x i64> @@ -73,8 +73,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: umlal2 v3.8h, v0.16b, v1.16b ; CHECK-NEXT: umlal v2.8h, v0.8b, v1.8b -; CHECK-NEXT: mov v1.16b, v3.16b ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: mov v1.16b, v3.16b ; CHECK-NEXT: ret entry: %ea = zext <16 x i8> %a to <16 x i16> @@ -88,15 +88,15 @@ ; CHECK-LABEL: mla_i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ushll v6.8h, v0.8b, #0 +; CHECK-NEXT: ushll v7.8h, v1.8b, #0 ; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-NEXT: ushll2 v7.8h, v1.16b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: umlal2 v5.4s, v0.8h, v7.8h -; CHECK-NEXT: umlal2 v3.4s, v6.8h, v1.8h -; CHECK-NEXT: umlal v2.4s, v6.4h, v1.4h -; CHECK-NEXT: umlal v4.4s, v0.4h, v7.4h -; CHECK-NEXT: mov v1.16b, v3.16b +; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0 +; CHECK-NEXT: umlal v2.4s, v6.4h, v7.4h +; CHECK-NEXT: umlal2 v3.4s, v6.8h, v7.8h +; CHECK-NEXT: umlal2 v5.4s, v0.8h, v1.8h +; CHECK-NEXT: umlal v4.4s, v0.4h, v1.4h ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: mov v1.16b, v3.16b ; CHECK-NEXT: mov v2.16b, v4.16b ; CHECK-NEXT: mov v3.16b, v5.16b ; CHECK-NEXT: ret @@ -113,25 +113,25 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov v17.16b, v7.16b ; CHECK-NEXT: mov v16.16b, v6.16b -; CHECK-NEXT: ldp q6, q7, [sp] -; CHECK-NEXT: ushll v18.8h, v0.8b, #0 +; CHECK-NEXT: ushll v6.8h, v0.8b, #0 ; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-NEXT: ushll v21.8h, v1.8b, #0 +; CHECK-NEXT: ushll v7.8h, v1.8b, #0 ; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0 -; CHECK-NEXT: ushll v19.4s, v18.4h, #0 -; CHECK-NEXT: ushll v20.4s, v0.4h, #0 -; CHECK-NEXT: ushll2 v18.4s, v18.8h, #0 -; CHECK-NEXT: ushll v22.4s, v21.4h, #0 -; CHECK-NEXT: ushll v23.4s, v1.4h, #0 -; CHECK-NEXT: ushll2 v21.4s, v21.8h, #0 +; CHECK-NEXT: ushll v18.4s, v6.4h, #0 +; CHECK-NEXT: ushll2 v21.4s, v6.8h, #0 +; CHECK-NEXT: ushll v19.4s, v0.4h, #0 +; CHECK-NEXT: ushll v20.4s, v7.4h, #0 +; CHECK-NEXT: ushll v22.4s, v1.4h, #0 +; CHECK-NEXT: ushll2 v23.4s, v7.8h, #0 +; CHECK-NEXT: ldp q6, q7, [sp] ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 ; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 -; CHECK-NEXT: umlal2 v5.2d, v18.4s, v21.4s -; CHECK-NEXT: umlal2 v17.2d, v20.4s, v23.4s -; CHECK-NEXT: umlal2 v3.2d, v19.4s, v22.4s -; CHECK-NEXT: umlal v2.2d, v19.2s, v22.2s -; CHECK-NEXT: umlal v4.2d, v18.2s, v21.2s -; CHECK-NEXT: umlal v16.2d, v20.2s, v23.2s +; CHECK-NEXT: umlal2 v3.2d, v18.4s, v20.4s +; CHECK-NEXT: umlal v2.2d, v18.2s, v20.2s +; CHECK-NEXT: umlal v16.2d, v19.2s, v22.2s +; CHECK-NEXT: umlal2 v5.2d, v21.4s, v23.4s +; CHECK-NEXT: umlal v4.2d, v21.2s, v23.2s +; CHECK-NEXT: umlal2 v17.2d, v19.4s, v22.4s ; CHECK-NEXT: umlal2 v7.2d, v0.4s, v1.4s ; CHECK-NEXT: umlal v6.2d, v0.2s, v1.2s ; CHECK-NEXT: mov v0.16b, v2.16b diff --git a/llvm/test/CodeGen/AArch64/aarch64_fnmadd.ll b/llvm/test/CodeGen/AArch64/aarch64_fnmadd.ll --- a/llvm/test/CodeGen/AArch64/aarch64_fnmadd.ll +++ b/llvm/test/CodeGen/AArch64/aarch64_fnmadd.ll @@ -4,10 +4,10 @@ define void @fnmaddd(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fnmaddd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ldr d2, [x2] -; CHECK-NEXT: fnmadd d0, d0, d1, d2 +; CHECK-NEXT: fnmadd d0, d1, d0, d2 ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret entry: @@ -47,10 +47,10 @@ define void @fnmadds(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fnmadds: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr s0, [x1] -; CHECK-NEXT: ldr s1, [x0] +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: ldr s2, [x2] -; CHECK-NEXT: fnmadd s0, s0, s1, s2 +; CHECK-NEXT: fnmadd s0, s1, s0, s2 ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: ret entry: @@ -67,10 +67,10 @@ define void @fnmadds_nsz_contract(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fnmadds_nsz_contract: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr s0, [x1] -; CHECK-NEXT: ldr s1, [x0] +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: ldr s2, [x2] -; CHECK-NEXT: fnmadd s0, s0, s1, s2 +; CHECK-NEXT: fnmadd s0, s1, s0, s2 ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: ret entry: @@ -88,10 +88,10 @@ define void @fnmadds_contract(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fnmadds_contract: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr s0, [x1] -; CHECK-NEXT: ldr s1, [x0] +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: ldr s2, [x2] -; CHECK-NEXT: fmadd s0, s0, s1, s2 +; CHECK-NEXT: fmadd s0, s1, s0, s2 ; CHECK-NEXT: fneg s0, s0 ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll b/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll --- a/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll +++ b/llvm/test/CodeGen/AArch64/aarch64_win64cc_vararg.ll @@ -9,8 +9,8 @@ ; CHECK-NEXT: add x8, sp, #40 ; CHECK-NEXT: add x0, sp, #40 ; CHECK-NEXT: stp x30, x18, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x3, x4, [sp, #56] ; CHECK-NEXT: stp x1, x2, [sp, #40] +; CHECK-NEXT: stp x3, x4, [sp, #56] ; CHECK-NEXT: stp x5, x6, [sp, #72] ; CHECK-NEXT: str x7, [sp, #88] ; CHECK-NEXT: str x8, [sp, #8] @@ -19,11 +19,22 @@ ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret ; +; DARWIN-LABEL: pass_va: ; DARWIN: ; %bb.0: ; %entry -; DARWIN-DAG: stp x3, x4, [sp, #56] -; DARWIN-DAG: stp x1, x2, [sp, #40] -; DARWIN-DAG: stp x5, x6, [sp, #72] -; DARWIN-DAG: str x7, [sp, #88] +; DARWIN-NEXT: str x18, [sp, #-96]! ; 8-byte Folded Spill +; DARWIN-NEXT: add x8, sp, #8 +; DARWIN-NEXT: add x9, sp, #40 +; DARWIN-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; DARWIN-NEXT: str x9, [x8] +; DARWIN-NEXT: ldr x0, [sp, #8] +; DARWIN-NEXT: stp x1, x2, [sp, #40] +; DARWIN-NEXT: stp x3, x4, [sp, #56] +; DARWIN-NEXT: stp x5, x6, [sp, #72] +; DARWIN-NEXT: str x7, [sp, #88] +; DARWIN-NEXT: bl _other_func +; DARWIN-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; DARWIN-NEXT: ldr x18, [sp], #96 ; 8-byte Folded Reload +; DARWIN-NEXT: ret entry: %ap = alloca ptr, align 8 call void @llvm.va_start(ptr %ap) @@ -47,15 +58,15 @@ ; CHECK-NEXT: ldr x18, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret ; -; DARWIN-LABEL: _f9: -; DARWIN: ; %bb.0: ; %entry -; DARWIN-NEXT: str x18, [sp, #-16]! ; 8-byte Folded Spill -; DARWIN-NEXT: add x8, sp, #8 -; DARWIN-NEXT: add x9, sp, #24 -; DARWIN-NEXT: str x9, [x8] -; DARWIN-NEXT: ldr x0, [sp, #8] -; DARWIN-NEXT: ldr x18, [sp], #16 ; 8-byte Folded Reload -; DARWIN-NEXT: ret +; DARWIN-LABEL: f9: +; DARWIN: ; %bb.0: ; %entry +; DARWIN-NEXT: str x18, [sp, #-16]! ; 8-byte Folded Spill +; DARWIN-NEXT: add x8, sp, #8 +; DARWIN-NEXT: add x9, sp, #24 +; DARWIN-NEXT: str x9, [x8] +; DARWIN-NEXT: ldr x0, [sp, #8] +; DARWIN-NEXT: ldr x18, [sp], #16 ; 8-byte Folded Reload +; DARWIN-NEXT: ret entry: %ap = alloca ptr, align 8 call void @llvm.va_start(ptr %ap) @@ -73,15 +84,15 @@ ; CHECK-NEXT: ldr x18, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret ; -; DARWIN-LABEL: _f8: -; DARWIN: ; %bb.0: ; %entry -; DARWIN-NEXT: str x18, [sp, #-16]! ; 8-byte Folded Spill -; DARWIN-NEXT: add x8, sp, #8 -; DARWIN-NEXT: add x9, sp, #16 -; DARWIN-NEXT: str x9, [x8] -; DARWIN-NEXT: ldr x0, [sp, #8] -; DARWIN-NEXT: ldr x18, [sp], #16 ; 8-byte Folded Reload -; DARWIN-NEXT: ret +; DARWIN-LABEL: f8: +; DARWIN: ; %bb.0: ; %entry +; DARWIN-NEXT: str x18, [sp, #-16]! ; 8-byte Folded Spill +; DARWIN-NEXT: add x8, sp, #8 +; DARWIN-NEXT: add x9, sp, #16 +; DARWIN-NEXT: str x9, [x8] +; DARWIN-NEXT: ldr x0, [sp, #8] +; DARWIN-NEXT: ldr x18, [sp], #16 ; 8-byte Folded Reload +; DARWIN-NEXT: ret entry: %ap = alloca ptr, align 8 call void @llvm.va_start(ptr %ap) @@ -100,16 +111,16 @@ ; CHECK-NEXT: ldr x18, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret ; -; DARWIN-LABEL: _f7: -; DARWIN: ; %bb.0: ; %entry -; DARWIN-NEXT: str x18, [sp, #-32]! ; 8-byte Folded Spill -; DARWIN-NEXT: add x8, sp, #8 -; DARWIN-NEXT: add x9, sp, #24 -; DARWIN-NEXT: str x7, [sp, #24] -; DARWIN-NEXT: str x9, [x8] -; DARWIN-NEXT: ldr x0, [sp, #8] -; DARWIN-NEXT: ldr x18, [sp], #32 ; 8-byte Folded Reload -; DARWIN-NEXT: ret +; DARWIN-LABEL: f7: +; DARWIN: ; %bb.0: ; %entry +; DARWIN-NEXT: str x18, [sp, #-32]! ; 8-byte Folded Spill +; DARWIN-NEXT: add x8, sp, #8 +; DARWIN-NEXT: add x9, sp, #24 +; DARWIN-NEXT: str x7, [sp, #24] +; DARWIN-NEXT: str x9, [x8] +; DARWIN-NEXT: ldr x0, [sp, #8] +; DARWIN-NEXT: ldr x18, [sp], #32 ; 8-byte Folded Reload +; DARWIN-NEXT: ret entry: %ap = alloca ptr, align 8 call void @llvm.va_start(ptr %ap) diff --git a/llvm/test/CodeGen/AArch64/abd-combine.ll b/llvm/test/CodeGen/AArch64/abd-combine.ll --- a/llvm/test/CodeGen/AArch64/abd-combine.ll +++ b/llvm/test/CodeGen/AArch64/abd-combine.ll @@ -20,9 +20,9 @@ ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: sub v2.4s, v2.4s, v1.4s ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s +; CHECK-NEXT: abs v1.4s, v1.4s ; CHECK-NEXT: abs v0.4s, v0.4s ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret @@ -37,11 +37,11 @@ ; CHECK-LABEL: abdu_const_lhs: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: usubw2 v2.4s, v1.4s, v0.8h -; CHECK-NEXT: usubw v0.4s, v1.4s, v0.4h -; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: usubw v2.4s, v1.4s, v0.4h +; CHECK-NEXT: usubw2 v0.4s, v1.4s, v0.8h ; CHECK-NEXT: abs v0.4s, v0.4s -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> %sub = sub <8 x i32> , %zextsrc1 @@ -330,9 +330,9 @@ ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: sub v2.4s, v2.4s, v1.4s ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: sub v1.4s, v2.4s, v1.4s +; CHECK-NEXT: abs v1.4s, v1.4s ; CHECK-NEXT: abs v0.4s, v0.4s ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret @@ -347,11 +347,11 @@ ; CHECK-LABEL: abds_const_lhs: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: ssubw2 v2.4s, v1.4s, v0.8h -; CHECK-NEXT: ssubw v0.4s, v1.4s, v0.4h -; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: ssubw v2.4s, v1.4s, v0.4h +; CHECK-NEXT: ssubw2 v0.4s, v1.4s, v0.8h ; CHECK-NEXT: abs v0.4s, v0.4s -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: abs v1.4s, v2.4s +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> %sub = sub <8 x i32> , %zextsrc1 @@ -405,11 +405,11 @@ define <8 x i16> @abds_undef(<8 x i16> %src1) { ; CHECK-LABEL: abds_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: abs v1.4s, v1.4s +; CHECK-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 ; CHECK-NEXT: abs v0.4s, v0.4s -; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: abs v1.4s, v1.4s +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %zextsrc1 = sext <8 x i16> %src1 to <8 x i32> %zextsrc2 = sext <8 x i16> undef to <8 x i32> @@ -530,10 +530,10 @@ define <1 x i64> @recursive() { ; CHECK-LABEL: recursive: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v0.8b, #1 -; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff -; CHECK-NEXT: uabd v2.8b, v0.8b, v1.8b -; CHECK-NEXT: uabdl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: movi v0.2d, #0xffffffffffffffff +; CHECK-NEXT: movi v1.8b, #1 +; CHECK-NEXT: uabd v2.8b, v1.8b, v0.8b +; CHECK-NEXT: uabdl v0.8h, v1.8b, v0.8b ; CHECK-NEXT: dup v1.8b, v2.b[0] ; CHECK-NEXT: saddlp v0.1d, v0.2s ; CHECK-NEXT: orr v0.8b, v1.8b, v0.8b diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll --- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -80,9 +80,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: index z0.b, #0, #1 ; CHECK-NEXT: mov z1.b, w0 +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: uqadd z0.b, z0.b, z1.b ; CHECK-NEXT: mov z1.b, w1 -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmphi p0.b, p0/z, z1.b, z0.b ; CHECK-NEXT: ret %active.lane.mask = call @llvm.get.active.lane.mask.nxv16i1.i8(i8 %index, i8 %TC) @@ -94,13 +94,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: index z0.h, #0, #1 ; CHECK-NEXT: mov z1.h, w0 -; CHECK-NEXT: and z0.h, z0.h, #0xff +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and z1.h, z1.h, #0xff +; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: mov z1.h, w1 ; CHECK-NEXT: umin z0.h, z0.h, #255 ; CHECK-NEXT: and z1.h, z1.h, #0xff -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmphi p0.h, p0/z, z1.h, z0.h ; CHECK-NEXT: ret %active.lane.mask = call @llvm.get.active.lane.mask.nxv8i1.i8(i8 %index, i8 %TC) @@ -110,14 +110,14 @@ define @lane_mask_nxv4i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_nxv4i1_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: index z0.s, #0, #1 -; CHECK-NEXT: and w9, w1, #0xff -; CHECK-NEXT: and z0.s, z0.s, #0xff +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: and z0.s, z0.s, #0xff ; CHECK-NEXT: add z0.s, z0.s, z1.s -; CHECK-NEXT: mov z1.s, w9 +; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: umin z0.s, z0.s, #255 ; CHECK-NEXT: cmphi p0.s, p0/z, z1.s, z0.s ; CHECK-NEXT: ret @@ -128,18 +128,18 @@ define @lane_mask_nxv2i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_nxv2i1_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: and x8, x0, #0xff -; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: and x9, x1, #0xff -; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: and z0.d, z0.d, #0xff ; CHECK-NEXT: mov z1.d, x8 -; CHECK-NEXT: mov z2.d, x9 +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: and x8, x1, #0xff +; CHECK-NEXT: and z0.d, z0.d, #0xff ; CHECK-NEXT: add z0.d, z0.d, z1.d +; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: umin z0.d, z0.d, #255 -; CHECK-NEXT: cmphi p0.d, p0/z, z2.d, z0.d +; CHECK-NEXT: cmphi p0.d, p0/z, z1.d, z0.d ; CHECK-NEXT: ret %active.lane.mask = call @llvm.get.active.lane.mask.nxv2i1.i8(i8 %index, i8 %TC) ret %active.lane.mask @@ -153,47 +153,49 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: index z0.s, #0, #1 -; CHECK-NEXT: mov z3.s, w0 -; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov z1.s, w0 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z25.s, w1 ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z4.s, w1 -; CHECK-NEXT: incw z1.s -; CHECK-NEXT: uqadd z5.s, z0.s, z3.s -; CHECK-NEXT: incw z2.s, all, mul #2 -; CHECK-NEXT: mov z6.d, z1.d -; CHECK-NEXT: cmphi p1.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z5.s, z1.s, z3.s -; CHECK-NEXT: cmphi p2.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z5.s, z2.s, z3.s -; CHECK-NEXT: incw z6.s, all, mul #2 +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: uqadd z6.s, z0.s, z1.s ; CHECK-NEXT: incw z0.s, all, mul #4 -; CHECK-NEXT: cmphi p3.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z5.s, z6.s, z3.s -; CHECK-NEXT: incw z1.s, all, mul #4 -; CHECK-NEXT: cmphi p4.s, p0/z, z4.s, z5.s -; CHECK-NEXT: uqadd z0.s, z0.s, z3.s -; CHECK-NEXT: uqadd z1.s, z1.s, z3.s +; CHECK-NEXT: incw z2.s +; CHECK-NEXT: incw z3.s, all, mul #2 +; CHECK-NEXT: cmphi p2.s, p0/z, z25.s, z6.s +; CHECK-NEXT: uqadd z0.s, z0.s, z1.s +; CHECK-NEXT: mov z4.d, z2.d +; CHECK-NEXT: uqadd z5.s, z2.s, z1.s +; CHECK-NEXT: uqadd z7.s, z3.s, z1.s ; CHECK-NEXT: incw z2.s, all, mul #4 -; CHECK-NEXT: incw z6.s, all, mul #4 -; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h -; CHECK-NEXT: uzp1 p2.h, p3.h, p4.h -; CHECK-NEXT: cmphi p3.s, p0/z, z4.s, z0.s -; CHECK-NEXT: cmphi p4.s, p0/z, z4.s, z1.s -; CHECK-NEXT: uqadd z0.s, z2.s, z3.s -; CHECK-NEXT: uqadd z1.s, z6.s, z3.s -; CHECK-NEXT: cmphi p5.s, p0/z, z4.s, z0.s -; CHECK-NEXT: cmphi p0.s, p0/z, z4.s, z1.s +; CHECK-NEXT: incw z3.s, all, mul #4 +; CHECK-NEXT: cmphi p5.s, p0/z, z25.s, z0.s +; CHECK-NEXT: incw z4.s, all, mul #2 +; CHECK-NEXT: cmphi p1.s, p0/z, z25.s, z5.s +; CHECK-NEXT: cmphi p3.s, p0/z, z25.s, z7.s +; CHECK-NEXT: uqadd z2.s, z2.s, z1.s +; CHECK-NEXT: uqadd z3.s, z3.s, z1.s +; CHECK-NEXT: uqadd z24.s, z4.s, z1.s +; CHECK-NEXT: incw z4.s, all, mul #4 +; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h +; CHECK-NEXT: cmphi p6.s, p0/z, z25.s, z2.s +; CHECK-NEXT: cmphi p2.s, p0/z, z25.s, z3.s +; CHECK-NEXT: uqadd z1.s, z4.s, z1.s +; CHECK-NEXT: cmphi p4.s, p0/z, z25.s, z24.s ; CHECK-NEXT: uzp1 p3.h, p3.h, p4.h -; CHECK-NEXT: uzp1 p4.h, p5.h, p0.h -; CHECK-NEXT: uzp1 p0.b, p1.b, p2.b -; CHECK-NEXT: uzp1 p1.b, p3.b, p4.b +; CHECK-NEXT: cmphi p0.s, p0/z, z25.s, z1.s +; CHECK-NEXT: uzp1 p4.h, p5.h, p6.h +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: uzp1 p2.h, p2.h, p0.h +; CHECK-NEXT: uzp1 p0.b, p1.b, p3.b +; CHECK-NEXT: uzp1 p1.b, p4.b, p2.b ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -207,87 +209,93 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p10, [sp, #1, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p9, [sp, #2, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: mov z3.d, x0 -; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: mov z0.d, x0 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z4.d, x1 -; CHECK-NEXT: incd z1.d -; CHECK-NEXT: uqadd z5.d, z0.d, z3.d -; CHECK-NEXT: uqadd z6.d, z1.d, z3.d -; CHECK-NEXT: cmphi p1.d, p0/z, z4.d, z5.d -; CHECK-NEXT: mov z5.d, z1.d -; CHECK-NEXT: incd z2.d, all, mul #2 -; CHECK-NEXT: cmphi p2.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z2.d, z3.d -; CHECK-NEXT: mov z7.d, z0.d -; CHECK-NEXT: incd z5.d, all, mul #2 -; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s -; CHECK-NEXT: cmphi p2.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z5.d, z3.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: incd z7.d, all, mul #4 -; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z7.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: incd z24.d, all, mul #4 -; CHECK-NEXT: mov z26.d, z5.d -; CHECK-NEXT: cmphi p4.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z24.d, z3.d -; CHECK-NEXT: incd z25.d, all, mul #4 -; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z25.d, z3.d -; CHECK-NEXT: incd z26.d, all, mul #4 -; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z6.d -; CHECK-NEXT: uqadd z6.d, z26.d, z3.d -; CHECK-NEXT: uzp1 p2.s, p2.s, p3.s -; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z6.d -; CHECK-NEXT: incd z0.d, all, mul #8 +; CHECK-NEXT: mov z7.d, x1 +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov z3.d, z1.d +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: uqadd z5.d, z1.d, z0.d ; CHECK-NEXT: incd z1.d, all, mul #8 -; CHECK-NEXT: uzp1 p4.s, p4.s, p5.s -; CHECK-NEXT: uzp1 p3.s, p6.s, p3.s -; CHECK-NEXT: uqadd z0.d, z0.d, z3.d -; CHECK-NEXT: uqadd z1.d, z1.d, z3.d +; CHECK-NEXT: incd z2.d +; CHECK-NEXT: incd z3.d, all, mul #2 +; CHECK-NEXT: incd z6.d, all, mul #4 +; CHECK-NEXT: cmphi p1.d, p0/z, z7.d, z5.d +; CHECK-NEXT: uqadd z1.d, z1.d, z0.d +; CHECK-NEXT: mov z4.d, z2.d +; CHECK-NEXT: uqadd z24.d, z2.d, z0.d +; CHECK-NEXT: mov z25.d, z2.d +; CHECK-NEXT: mov z27.d, z3.d +; CHECK-NEXT: uqadd z26.d, z3.d, z0.d +; CHECK-NEXT: uqadd z28.d, z6.d, z0.d ; CHECK-NEXT: incd z2.d, all, mul #8 -; CHECK-NEXT: incd z5.d, all, mul #8 -; CHECK-NEXT: uzp1 p1.h, p1.h, p2.h -; CHECK-NEXT: uzp1 p2.h, p4.h, p3.h -; CHECK-NEXT: cmphi p3.d, p0/z, z4.d, z0.d -; CHECK-NEXT: cmphi p4.d, p0/z, z4.d, z1.d -; CHECK-NEXT: uqadd z0.d, z2.d, z3.d -; CHECK-NEXT: uqadd z1.d, z5.d, z3.d -; CHECK-NEXT: incd z7.d, all, mul #8 -; CHECK-NEXT: incd z24.d, all, mul #8 -; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z0.d -; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z1.d -; CHECK-NEXT: uqadd z0.d, z7.d, z3.d -; CHECK-NEXT: uqadd z1.d, z24.d, z3.d +; CHECK-NEXT: incd z3.d, all, mul #8 +; CHECK-NEXT: incd z6.d, all, mul #8 +; CHECK-NEXT: incd z4.d, all, mul #2 +; CHECK-NEXT: incd z25.d, all, mul #4 +; CHECK-NEXT: cmphi p2.d, p0/z, z7.d, z24.d +; CHECK-NEXT: incd z27.d, all, mul #4 +; CHECK-NEXT: cmphi p3.d, p0/z, z7.d, z26.d +; CHECK-NEXT: cmphi p5.d, p0/z, z7.d, z28.d +; CHECK-NEXT: uqadd z2.d, z2.d, z0.d +; CHECK-NEXT: uqadd z3.d, z3.d, z0.d +; CHECK-NEXT: mov z24.d, z4.d +; CHECK-NEXT: uqadd z5.d, z4.d, z0.d +; CHECK-NEXT: uqadd z26.d, z25.d, z0.d +; CHECK-NEXT: incd z4.d, all, mul #8 ; CHECK-NEXT: incd z25.d, all, mul #8 -; CHECK-NEXT: incd z26.d, all, mul #8 +; CHECK-NEXT: uzp1 p1.s, p1.s, p2.s +; CHECK-NEXT: incd z24.d, all, mul #4 +; CHECK-NEXT: cmphi p8.d, p0/z, z7.d, z2.d +; CHECK-NEXT: cmphi p4.d, p0/z, z7.d, z5.d +; CHECK-NEXT: uqadd z5.d, z27.d, z0.d +; CHECK-NEXT: incd z27.d, all, mul #8 +; CHECK-NEXT: uqadd z4.d, z4.d, z0.d +; CHECK-NEXT: cmphi p6.d, p0/z, z7.d, z26.d +; CHECK-NEXT: uqadd z28.d, z24.d, z0.d +; CHECK-NEXT: incd z24.d, all, mul #8 ; CHECK-NEXT: uzp1 p3.s, p3.s, p4.s -; CHECK-NEXT: uzp1 p4.s, p5.s, p6.s -; CHECK-NEXT: cmphi p5.d, p0/z, z4.d, z0.d -; CHECK-NEXT: cmphi p6.d, p0/z, z4.d, z1.d -; CHECK-NEXT: uqadd z0.d, z25.d, z3.d -; CHECK-NEXT: uqadd z1.d, z26.d, z3.d -; CHECK-NEXT: cmphi p7.d, p0/z, z4.d, z0.d -; CHECK-NEXT: cmphi p0.d, p0/z, z4.d, z1.d +; CHECK-NEXT: cmphi p7.d, p0/z, z7.d, z5.d +; CHECK-NEXT: uqadd z5.d, z6.d, z0.d +; CHECK-NEXT: uqadd z6.d, z25.d, z0.d +; CHECK-NEXT: uqadd z25.d, z27.d, z0.d +; CHECK-NEXT: cmphi p4.d, p0/z, z7.d, z1.d ; CHECK-NEXT: uzp1 p5.s, p5.s, p6.s -; CHECK-NEXT: uzp1 p0.s, p7.s, p0.s -; CHECK-NEXT: uzp1 p3.h, p3.h, p4.h -; CHECK-NEXT: uzp1 p4.h, p5.h, p0.h -; CHECK-NEXT: uzp1 p0.b, p1.b, p2.b -; CHECK-NEXT: uzp1 p1.b, p3.b, p4.b -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: cmphi p6.d, p0/z, z7.d, z3.d +; CHECK-NEXT: cmphi p9.d, p0/z, z7.d, z4.d +; CHECK-NEXT: uqadd z0.d, z24.d, z0.d +; CHECK-NEXT: cmphi p2.d, p0/z, z7.d, z28.d +; CHECK-NEXT: cmphi p10.d, p0/z, z7.d, z6.d +; CHECK-NEXT: uzp1 p4.s, p4.s, p8.s +; CHECK-NEXT: cmphi p8.d, p0/z, z7.d, z25.d +; CHECK-NEXT: uzp1 p6.s, p6.s, p9.s +; CHECK-NEXT: ldr p9, [sp, #2, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: uzp1 p2.s, p7.s, p2.s +; CHECK-NEXT: cmphi p7.d, p0/z, z7.d, z5.d +; CHECK-NEXT: cmphi p0.d, p0/z, z7.d, z0.d +; CHECK-NEXT: uzp1 p1.h, p1.h, p3.h +; CHECK-NEXT: uzp1 p7.s, p7.s, p10.s +; CHECK-NEXT: ldr p10, [sp, #1, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: uzp1 p0.s, p8.s, p0.s +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: uzp1 p3.h, p4.h, p6.h ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: uzp1 p2.h, p5.h, p2.h ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: uzp1 p4.h, p7.h, p0.h +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: uzp1 p0.b, p1.b, p2.b +; CHECK-NEXT: uzp1 p1.b, p3.b, p4.b ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -299,17 +307,17 @@ define @lane_mask_nxv32i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_nxv32i1_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #1 ; CHECK-NEXT: index z0.b, #0, #1 -; CHECK-NEXT: mov z1.b, w8 +; CHECK-NEXT: rdvl x8, #1 ; CHECK-NEXT: mov z2.b, w0 +; CHECK-NEXT: mov z1.b, w8 +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: add z1.b, z0.b, z1.b -; CHECK-NEXT: mov z3.b, w1 ; CHECK-NEXT: uqadd z0.b, z0.b, z2.b -; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: uqadd z1.b, z1.b, z2.b -; CHECK-NEXT: cmphi p0.b, p1/z, z3.b, z0.b -; CHECK-NEXT: cmphi p1.b, p1/z, z3.b, z1.b +; CHECK-NEXT: mov z2.b, w1 +; CHECK-NEXT: cmphi p0.b, p1/z, z2.b, z0.b +; CHECK-NEXT: cmphi p1.b, p1/z, z2.b, z1.b ; CHECK-NEXT: ret %active.lane.mask = call @llvm.get.active.lane.mask.nxv32i1.i8(i8 %index, i8 %TC) ret %active.lane.mask @@ -410,9 +418,9 @@ ; CHECK-LABEL: lane_mask_v16i1_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 -; CHECK-NEXT: dup v1.16b, w0 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI23_0] -; CHECK-NEXT: uqadd v0.16b, v1.16b, v0.16b +; CHECK-NEXT: dup v0.16b, w0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] +; CHECK-NEXT: uqadd v0.16b, v0.16b, v1.16b ; CHECK-NEXT: dup v1.16b, w1 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret @@ -423,12 +431,12 @@ define <8 x i1> @lane_mask_v8i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_v8i1_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: dup v0.8b, w0 -; CHECK-NEXT: dup v2.8b, w1 +; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI24_0] ; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b -; CHECK-NEXT: cmhi v0.8b, v2.8b, v0.8b +; CHECK-NEXT: dup v1.8b, w1 +; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b ; CHECK-NEXT: ret %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC) ret <8 x i1> %active.lane.mask @@ -437,16 +445,16 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_v4i1_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI25_0 ; CHECK-NEXT: dup v0.4h, w0 +; CHECK-NEXT: adrp x8, .LCPI25_0 ; CHECK-NEXT: movi d2, #0xff00ff00ff00ff -; CHECK-NEXT: dup v3.4h, w1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI25_0] ; CHECK-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-NEXT: bic v3.4h, #255, lsl #8 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: dup v1.4h, w1 ; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h -; CHECK-NEXT: cmhi v0.4h, v3.4h, v0.4h +; CHECK-NEXT: bic v1.4h, #255, lsl #8 +; CHECK-NEXT: cmhi v0.4h, v1.4h, v0.4h ; CHECK-NEXT: ret %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC) ret <4 x i1> %active.lane.mask @@ -455,11 +463,11 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_v2i1_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI26_0 ; CHECK-NEXT: movi d0, #0x0000ff000000ff ; CHECK-NEXT: dup v1.2s, w0 -; CHECK-NEXT: dup v3.2s, w1 +; CHECK-NEXT: adrp x8, .LCPI26_0 ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI26_0] +; CHECK-NEXT: dup v3.2s, w1 ; CHECK-NEXT: and v1.8b, v1.8b, v0.8b ; CHECK-NEXT: add v1.2s, v1.2s, v2.2s ; CHECK-NEXT: umin v1.2s, v1.2s, v0.2s @@ -483,7 +491,7 @@ define @lane_mask_nxv4i1_imm5() { ; CHECK-LABEL: lane_mask_nxv4i1_imm5: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: mov w8, #5 // =0x5 ; CHECK-NEXT: whilelo p0.s, xzr, x8 ; CHECK-NEXT: ret entry: @@ -504,7 +512,7 @@ define @lane_mask_nxv16i1_imm10() { ; CHECK-LABEL: lane_mask_nxv16i1_imm10: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #10 +; CHECK-NEXT: mov w8, #10 // =0xa ; CHECK-NEXT: whilelo p0.b, xzr, x8 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/add-extract.ll b/llvm/test/CodeGen/AArch64/add-extract.ll --- a/llvm/test/CodeGen/AArch64/add-extract.ll +++ b/llvm/test/CodeGen/AArch64/add-extract.ll @@ -44,8 +44,8 @@ define i64 @add_v2i64_ext_load(<2 x i64> %A, ptr %B) nounwind { ; CHECK-LABEL: add_v2i64_ext_load: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: add x0, x9, x8 ; CHECK-NEXT: ret %a = extractelement <2 x i64> %A, i32 0 @@ -70,8 +70,8 @@ ; CHECK-LABEL: add_i32_ext_load: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: add w0, w9, w8 ; CHECK-NEXT: ret %a = extractelement <1 x i32> %A, i32 0 diff --git a/llvm/test/CodeGen/AArch64/addcarry-crash.ll b/llvm/test/CodeGen/AArch64/addcarry-crash.ll --- a/llvm/test/CodeGen/AArch64/addcarry-crash.ll +++ b/llvm/test/CodeGen/AArch64/addcarry-crash.ll @@ -5,10 +5,10 @@ define i64 @foo(ptr nocapture readonly %ptr, i64 %a, i64 %b, i64 %c) local_unnamed_addr #0 { ; CHECK-LABEL: foo: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: ldr w8, [x0, #4] -; CHECK-NEXT: lsr x9, x1, #32 +; CHECK-NEXT: lsr x8, x1, #32 +; CHECK-NEXT: ldr w9, [x0, #4] ; CHECK-NEXT: cmn x3, x2 -; CHECK-NEXT: umull x8, w8, w9 +; CHECK-NEXT: umull x8, w9, w8 ; CHECK-NEXT: cinc x0, x8, hs ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll --- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll +++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll @@ -213,9 +213,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use +; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -290,9 +290,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use +; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/addsub.ll b/llvm/test/CodeGen/AArch64/addsub.ll --- a/llvm/test/CodeGen/AArch64/addsub.ll +++ b/llvm/test/CodeGen/AArch64/addsub.ll @@ -232,7 +232,7 @@ define i32 @add_27962026(i32 %a) { ; CHECK-LABEL: add_27962026: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43690 +; CHECK-NEXT: mov w8, #43690 // =0xaaaa ; CHECK-NEXT: movk w8, #426, lsl #16 ; CHECK-NEXT: add w0, w0, w8 ; CHECK-NEXT: ret @@ -243,7 +243,7 @@ define i32 @add_65534(i32 %a) { ; CHECK-LABEL: add_65534: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65534 +; CHECK-NEXT: mov w8, #65534 // =0xfffe ; CHECK-NEXT: add w0, w0, w8 ; CHECK-NEXT: ret %b = add i32 %a, 65534 @@ -259,7 +259,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov w19, #43690 +; CHECK-NEXT: mov w19, #43690 // =0xaaaa ; CHECK-NEXT: movk w19, #170, lsl #16 ; CHECK-NEXT: .LBB15_1: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add w0, w0, w19 @@ -373,7 +373,7 @@ define i1 @uadd_add(i8 %a, i8 %b, ptr %p) { ; CHECK-LABEL: uadd_add: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov w8, #255 // =0xff ; CHECK-NEXT: bic w8, w8, w0 ; CHECK-NEXT: add w8, w8, w1, uxtb ; CHECK-NEXT: lsr w0, w8, #8 @@ -398,7 +398,7 @@ define i64 @addl_0x80000000(i64 %a) { ; CHECK-LABEL: addl_0x80000000: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #48576 +; CHECK-NEXT: mov w8, #48576 // =0xbdc0 ; CHECK-NEXT: movk w8, #65520, lsl #16 ; CHECK-NEXT: add x0, x0, x8 ; CHECK-NEXT: ret @@ -499,7 +499,7 @@ define i1 @reject_eq(i32 %0) { ; CHECK-LABEL: reject_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #51712 +; CHECK-NEXT: mov w8, #51712 // =0xca00 ; CHECK-NEXT: movk w8, #15258, lsl #16 ; CHECK-NEXT: cmp w0, w8 ; CHECK-NEXT: cset w0, eq @@ -511,7 +511,7 @@ define i1 @reject_non_eqne_csinc(i32 %0) { ; CHECK-LABEL: reject_non_eqne_csinc: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4369 +; CHECK-NEXT: mov w8, #4369 // =0x1111 ; CHECK-NEXT: movk w8, #17, lsl #16 ; CHECK-NEXT: cmp w0, w8 ; CHECK-NEXT: cset w0, lo @@ -524,9 +524,9 @@ ; CHECK-LABEL: accept_csel: ; CHECK: // %bb.0: ; CHECK-NEXT: sub w9, w0, #273, lsl #12 // =1118208 -; CHECK-NEXT: mov w8, #17 +; CHECK-NEXT: mov w8, #17 // =0x11 ; CHECK-NEXT: cmp w9, #273 -; CHECK-NEXT: mov w9, #11 +; CHECK-NEXT: mov w9, #11 // =0xb ; CHECK-NEXT: csel w0, w9, w8, eq ; CHECK-NEXT: ret %2 = icmp eq i32 %0, 1118481 @@ -537,11 +537,11 @@ define i32 @reject_non_eqne_csel(i32 %0) { ; CHECK-LABEL: reject_non_eqne_csel: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4369 -; CHECK-NEXT: mov w9, #11 +; CHECK-NEXT: mov w8, #4369 // =0x1111 +; CHECK-NEXT: mov w9, #11 // =0xb ; CHECK-NEXT: movk w8, #17, lsl #16 ; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: mov w8, #17 +; CHECK-NEXT: mov w8, #17 // =0x11 ; CHECK-NEXT: csel w0, w9, w8, lo ; CHECK-NEXT: ret %2 = icmp ult i32 %0, 1118481 @@ -573,7 +573,7 @@ define void @reject_non_eqne_branch(i32 %0) { ; CHECK-LABEL: reject_non_eqne_branch: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #13398 +; CHECK-NEXT: mov w8, #13398 // =0x3456 ; CHECK-NEXT: movk w8, #18, lsl #16 ; CHECK-NEXT: cmp w0, w8 ; CHECK-NEXT: b.le .LBB33_2 @@ -593,20 +593,20 @@ define i32 @reject_multiple_usages(i32 %0) { ; CHECK-LABEL: reject_multiple_usages: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4369 -; CHECK-NEXT: mov w9, #3 +; CHECK-NEXT: mov w8, #4369 // =0x1111 +; CHECK-NEXT: mov w9, #3 // =0x3 +; CHECK-NEXT: mov w10, #17 // =0x11 ; CHECK-NEXT: movk w8, #17, lsl #16 -; CHECK-NEXT: mov w10, #17 +; CHECK-NEXT: mov w11, #12 // =0xc ; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: mov w11, #12 +; CHECK-NEXT: mov w8, #9 // =0x9 ; CHECK-NEXT: csel w8, w8, w9, eq ; CHECK-NEXT: csel w9, w11, w10, hi +; CHECK-NEXT: mov w10, #53312 // =0xd040 +; CHECK-NEXT: movk w10, #2, lsl #16 ; CHECK-NEXT: add w8, w8, w9 -; CHECK-NEXT: mov w9, #53312 -; CHECK-NEXT: movk w9, #2, lsl #16 -; CHECK-NEXT: cmp w0, w9 -; CHECK-NEXT: mov w9, #26304 +; CHECK-NEXT: mov w9, #26304 // =0x66c0 +; CHECK-NEXT: cmp w0, w10 ; CHECK-NEXT: movk w9, #1433, lsl #16 ; CHECK-NEXT: csel w0, w8, w9, hi ; CHECK-NEXT: ret @@ -666,11 +666,11 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: adrp x8, primary_crng -; CHECK-NEXT: adrp x9, input_pool -; CHECK-NEXT: add x9, x9, :lo12:input_pool ; CHECK-NEXT: ldr w8, [x8, :lo12:primary_crng] ; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel x0, xzr, x9, eq +; CHECK-NEXT: adrp x8, input_pool +; CHECK-NEXT: add x8, x8, :lo12:input_pool +; CHECK-NEXT: csel x0, xzr, x8, eq ; CHECK-NEXT: bl crng_reseed ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: .LBB36_3: // %if.end @@ -778,7 +778,7 @@ define i8 @commute_subop0_anyext(i16 %a, i16 %b, i32 %c) { ; CHECK-LABEL: commute_subop0_anyext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #111 +; CHECK-NEXT: mov w8, #111 // =0x6f ; CHECK-NEXT: sub w9, w2, w1 ; CHECK-NEXT: madd w8, w0, w8, w9 ; CHECK-NEXT: lsl w8, w8, #3 diff --git a/llvm/test/CodeGen/AArch64/align-down.ll b/llvm/test/CodeGen/AArch64/align-down.ll --- a/llvm/test/CodeGen/AArch64/align-down.ll +++ b/llvm/test/CodeGen/AArch64/align-down.ll @@ -84,8 +84,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub w8, w1, #1 ; CHECK-NEXT: and w9, w0, w8 -; CHECK-NEXT: sub w0, w0, w9 ; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: sub w0, w0, w9 ; CHECK-NEXT: str w9, [x3] ; CHECK-NEXT: ret %mask = add i32 %alignment, -1 diff --git a/llvm/test/CodeGen/AArch64/and-mask-removal.ll b/llvm/test/CodeGen/AArch64/and-mask-removal.ll --- a/llvm/test/CodeGen/AArch64/and-mask-removal.ll +++ b/llvm/test/CodeGen/AArch64/and-mask-removal.ll @@ -22,8 +22,8 @@ ; CHECK-SD-NEXT: adrp x9, _next_string@GOTPAGE ; CHECK-SD-NEXT: adrp x10, _string_number@GOTPAGE ; CHECK-SD-NEXT: ldr x9, [x9, _next_string@GOTPAGEOFF] -; CHECK-SD-NEXT: ldr w9, [x9] ; CHECK-SD-NEXT: ldr x10, [x10, _string_number@GOTPAGEOFF] +; CHECK-SD-NEXT: ldr w9, [x9] ; CHECK-SD-NEXT: str w9, [x10, x8, lsl #2] ; CHECK-SD-NEXT: LBB0_2: ; %if.end ; CHECK-SD-NEXT: ret @@ -40,8 +40,8 @@ ; CHECK-GI-NEXT: adrp x8, _next_string@GOTPAGE ; CHECK-GI-NEXT: adrp x9, _string_number@GOTPAGE ; CHECK-GI-NEXT: ldr x8, [x8, _next_string@GOTPAGEOFF] -; CHECK-GI-NEXT: ldr w8, [x8] ; CHECK-GI-NEXT: ldr x9, [x9, _string_number@GOTPAGEOFF] +; CHECK-GI-NEXT: ldr w8, [x8] ; CHECK-GI-NEXT: str w8, [x9, w0, sxtw #2] ; CHECK-GI-NEXT: LBB0_2: ; %if.end ; CHECK-GI-NEXT: ret @@ -270,15 +270,15 @@ define zeroext i1 @test16_0(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_0: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #5086 +; CHECK-SD-NEXT: mov w8, #5086 ; =0x13de ; CHECK-SD-NEXT: cmp w0, w8 ; CHECK-SD-NEXT: cset w0, ne ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_0: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #18547 -; CHECK-GI-NEXT: mov w9, #23633 +; CHECK-GI-NEXT: mov w8, #18547 ; =0x4873 +; CHECK-GI-NEXT: mov w9, #23633 ; =0x5c51 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ne @@ -296,8 +296,8 @@ define zeroext i1 @test16_2(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_2: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #16882 -; CHECK-SD-NEXT: mov w9, #40700 +; CHECK-SD-NEXT: mov w8, #16882 ; =0x41f2 +; CHECK-SD-NEXT: mov w9, #40700 ; =0x9efc ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w9, w8, uxth ; CHECK-SD-NEXT: cset w0, hi @@ -305,8 +305,8 @@ ; ; CHECK-GI-LABEL: test16_2: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #16882 -; CHECK-GI-NEXT: mov w9, #40699 +; CHECK-GI-NEXT: mov w8, #16882 ; =0x41f2 +; CHECK-GI-NEXT: mov w9, #40699 ; =0x9efb ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, hs @@ -324,15 +324,15 @@ define zeroext i1 @test16_3(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_3: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #53200 +; CHECK-SD-NEXT: mov w8, #53200 ; =0xcfd0 ; CHECK-SD-NEXT: cmp w0, w8 ; CHECK-SD-NEXT: cset w0, ne ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_3: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #29283 -; CHECK-GI-NEXT: mov w9, #16947 +; CHECK-GI-NEXT: mov w8, #29283 ; =0x7263 +; CHECK-GI-NEXT: mov w9, #16947 ; =0x4233 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ne @@ -350,8 +350,8 @@ define zeroext i1 @test16_4(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_4: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #29985 -; CHECK-SD-NEXT: mov w9, #15676 +; CHECK-SD-NEXT: mov w8, #29985 ; =0x7521 +; CHECK-SD-NEXT: mov w9, #15676 ; =0x3d3c ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w9, w8, uxth ; CHECK-SD-NEXT: cset w0, lo @@ -359,8 +359,8 @@ ; ; CHECK-GI-LABEL: test16_4: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #29985 -; CHECK-GI-NEXT: mov w9, #15677 +; CHECK-GI-NEXT: mov w8, #29985 ; =0x7521 +; CHECK-GI-NEXT: mov w9, #15677 ; =0x3d3d ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ls @@ -378,15 +378,15 @@ define zeroext i1 @test16_5(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_5: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #23282 +; CHECK-SD-NEXT: mov w8, #23282 ; =0x5af2 ; CHECK-SD-NEXT: cmp w0, w8 ; CHECK-SD-NEXT: cset w0, ne ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_5: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #-25214 -; CHECK-GI-NEXT: mov w9, #63604 +; CHECK-GI-NEXT: mov w8, #-25214 ; =0xffff9d82 +; CHECK-GI-NEXT: mov w9, #63604 ; =0xf874 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ne @@ -404,8 +404,8 @@ define zeroext i1 @test16_6(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_6: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #-32194 -; CHECK-SD-NEXT: mov w9, #24320 +; CHECK-SD-NEXT: mov w8, #-32194 ; =0xffff823e +; CHECK-SD-NEXT: mov w9, #24320 ; =0x5f00 ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w8, w9 ; CHECK-SD-NEXT: cset w0, hi @@ -413,8 +413,8 @@ ; ; CHECK-GI-LABEL: test16_6: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #-32194 -; CHECK-GI-NEXT: mov w9, #24321 +; CHECK-GI-NEXT: mov w8, #-32194 ; =0xffff823e +; CHECK-GI-NEXT: mov w9, #24321 ; =0x5f01 ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w8, w9 ; CHECK-GI-NEXT: cset w0, hs @@ -432,8 +432,8 @@ define zeroext i1 @test16_7(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_7: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #9272 -; CHECK-SD-NEXT: mov w9, #22619 +; CHECK-SD-NEXT: mov w8, #9272 ; =0x2438 +; CHECK-SD-NEXT: mov w9, #22619 ; =0x585b ; CHECK-SD-NEXT: add w8, w0, w8 ; CHECK-SD-NEXT: cmp w9, w8, uxth ; CHECK-SD-NEXT: cset w0, lo @@ -441,8 +441,8 @@ ; ; CHECK-GI-LABEL: test16_7: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: mov w8, #9272 -; CHECK-GI-NEXT: mov w9, #22620 +; CHECK-GI-NEXT: mov w8, #9272 ; =0x2438 +; CHECK-GI-NEXT: mov w9, #22620 ; =0x585c ; CHECK-GI-NEXT: add w8, w0, w8 ; CHECK-GI-NEXT: cmp w9, w8, uxth ; CHECK-GI-NEXT: cset w0, ls @@ -460,16 +460,16 @@ define zeroext i1 @test16_8(i16 zeroext %x) align 2 { ; CHECK-SD-LABEL: test16_8: ; CHECK-SD: ; %bb.0: ; %entry -; CHECK-SD-NEXT: mov w8, #4919 +; CHECK-SD-NEXT: mov w8, #4919 ; =0x1337 ; CHECK-SD-NEXT: cmp w0, w8 ; CHECK-SD-NEXT: cset w0, ne ; CHECK-SD-NEXT: ret ; ; CHECK-GI-LABEL: test16_8: ; CHECK-GI: ; %bb.0: ; %entry -; CHECK-GI-NEXT: add w8, w0, #1787 -; CHECK-GI-NEXT: mov w9, #6706 -; CHECK-GI-NEXT: cmp w9, w8, uxth +; CHECK-GI-NEXT: mov w8, #6706 ; =0x1a32 +; CHECK-GI-NEXT: add w9, w0, #1787 +; CHECK-GI-NEXT: cmp w8, w9, uxth ; CHECK-GI-NEXT: cset w0, ne ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/andorbrcompare.ll b/llvm/test/CodeGen/AArch64/andorbrcompare.ll --- a/llvm/test/CodeGen/AArch64/andorbrcompare.ll +++ b/llvm/test/CodeGen/AArch64/andorbrcompare.ll @@ -9,15 +9,17 @@ ; SDISEL: // %bb.0: // %entry ; SDISEL-NEXT: cmp w2, w3 ; SDISEL-NEXT: ccmp w0, w1, #0, ne -; SDISEL-NEXT: ccmp w4, w5, #0, ne -; SDISEL-NEXT: b.hs .LBB0_2 -; SDISEL-NEXT: // %bb.1: // %if -; SDISEL-NEXT: mov w0, #1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB0_2: +; SDISEL-NEXT: b.eq .LBB0_3 +; SDISEL-NEXT: // %bb.1: // %entry +; SDISEL-NEXT: cmp w4, w5 +; SDISEL-NEXT: b.lo .LBB0_3 +; SDISEL-NEXT: // %bb.2: ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret +; SDISEL-NEXT: .LBB0_3: // %if +; SDISEL-NEXT: mov w0, #1 // =0x1 +; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_eq_ne_ult: ; GISEL: // %bb.0: // %entry @@ -28,13 +30,13 @@ ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: tbnz w8, #0, .LBB0_3 ; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: cmp w4, w5 +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.lo .LBB0_3 ; GISEL-NEXT: // %bb.2: // %common.ret ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB0_3: // %if -; GISEL-NEXT: mov w0, #1 +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: str w0, [x6] ; GISEL-NEXT: ret entry: @@ -58,15 +60,17 @@ ; SDISEL: // %bb.0: // %entry ; SDISEL-NEXT: cmp w2, w3 ; SDISEL-NEXT: ccmp w0, w1, #4, lo -; SDISEL-NEXT: ccmp w4, w5, #0, eq -; SDISEL-NEXT: b.hi .LBB1_2 -; SDISEL-NEXT: // %bb.1: // %if -; SDISEL-NEXT: mov w0, #1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB1_2: +; SDISEL-NEXT: b.ne .LBB1_3 +; SDISEL-NEXT: // %bb.1: // %entry +; SDISEL-NEXT: cmp w4, w5 +; SDISEL-NEXT: b.ls .LBB1_3 +; SDISEL-NEXT: // %bb.2: ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret +; SDISEL-NEXT: .LBB1_3: // %if +; SDISEL-NEXT: mov w0, #1 // =0x1 +; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_ne_ult_ule: ; GISEL: // %bb.0: // %entry @@ -77,13 +81,13 @@ ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: tbnz w8, #0, .LBB1_3 ; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: cmp w4, w5 +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.ls .LBB1_3 ; GISEL-NEXT: // %bb.2: // %common.ret ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB1_3: // %if -; GISEL-NEXT: mov w0, #1 +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: str w0, [x6] ; GISEL-NEXT: ret entry: @@ -107,15 +111,17 @@ ; SDISEL: // %bb.0: // %entry ; SDISEL-NEXT: cmp w2, w3 ; SDISEL-NEXT: ccmp w0, w1, #2, ls -; SDISEL-NEXT: ccmp w4, w5, #2, hs -; SDISEL-NEXT: b.ls .LBB2_2 -; SDISEL-NEXT: // %bb.1: // %if -; SDISEL-NEXT: mov w0, #1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB2_2: +; SDISEL-NEXT: b.lo .LBB2_3 +; SDISEL-NEXT: // %bb.1: // %entry +; SDISEL-NEXT: cmp w4, w5 +; SDISEL-NEXT: b.hi .LBB2_3 +; SDISEL-NEXT: // %bb.2: ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret +; SDISEL-NEXT: .LBB2_3: // %if +; SDISEL-NEXT: mov w0, #1 // =0x1 +; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_ult_ule_ugt: ; GISEL: // %bb.0: // %entry @@ -126,13 +132,13 @@ ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: tbnz w8, #0, .LBB2_3 ; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: cmp w4, w5 +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.hi .LBB2_3 ; GISEL-NEXT: // %bb.2: // %common.ret ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB2_3: // %if -; GISEL-NEXT: mov w0, #1 +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: str w0, [x6] ; GISEL-NEXT: ret entry: @@ -156,15 +162,17 @@ ; SDISEL: // %bb.0: // %entry ; SDISEL-NEXT: cmp w2, w3 ; SDISEL-NEXT: ccmp w0, w1, #2, hi -; SDISEL-NEXT: ccmp w4, w5, #2, hi -; SDISEL-NEXT: b.lo .LBB3_2 -; SDISEL-NEXT: // %bb.1: // %if -; SDISEL-NEXT: mov w0, #1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB3_2: +; SDISEL-NEXT: b.ls .LBB3_3 +; SDISEL-NEXT: // %bb.1: // %entry +; SDISEL-NEXT: cmp w4, w5 +; SDISEL-NEXT: b.hs .LBB3_3 +; SDISEL-NEXT: // %bb.2: ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret +; SDISEL-NEXT: .LBB3_3: // %if +; SDISEL-NEXT: mov w0, #1 // =0x1 +; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_ule_ugt_uge: ; GISEL: // %bb.0: // %entry @@ -175,13 +183,13 @@ ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: tbnz w8, #0, .LBB3_3 ; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: cmp w4, w5 +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.hs .LBB3_3 ; GISEL-NEXT: // %bb.2: // %common.ret ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB3_3: // %if -; GISEL-NEXT: mov w0, #1 +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: str w0, [x6] ; GISEL-NEXT: ret entry: @@ -205,15 +213,17 @@ ; SDISEL: // %bb.0: // %entry ; SDISEL-NEXT: cmp w2, w3 ; SDISEL-NEXT: ccmp w0, w1, #0, hs -; SDISEL-NEXT: ccmp w4, w5, #8, ls -; SDISEL-NEXT: b.ge .LBB4_2 -; SDISEL-NEXT: // %bb.1: // %if -; SDISEL-NEXT: mov w0, #1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB4_2: +; SDISEL-NEXT: b.hi .LBB4_3 +; SDISEL-NEXT: // %bb.1: // %entry +; SDISEL-NEXT: cmp w4, w5 +; SDISEL-NEXT: b.lt .LBB4_3 +; SDISEL-NEXT: // %bb.2: ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret +; SDISEL-NEXT: .LBB4_3: // %if +; SDISEL-NEXT: mov w0, #1 // =0x1 +; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_ugt_uge_slt: ; GISEL: // %bb.0: // %entry @@ -224,13 +234,13 @@ ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: tbnz w8, #0, .LBB4_3 ; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: cmp w4, w5 +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.lt .LBB4_3 ; GISEL-NEXT: // %bb.2: // %common.ret ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB4_3: // %if -; GISEL-NEXT: mov w0, #1 +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: str w0, [x6] ; GISEL-NEXT: ret entry: @@ -254,15 +264,17 @@ ; SDISEL: // %bb.0: // %entry ; SDISEL-NEXT: cmp w2, w3 ; SDISEL-NEXT: ccmp w0, w1, #0, lt -; SDISEL-NEXT: ccmp w4, w5, #4, lo -; SDISEL-NEXT: b.gt .LBB5_2 -; SDISEL-NEXT: // %bb.1: // %if -; SDISEL-NEXT: mov w0, #1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB5_2: +; SDISEL-NEXT: b.hs .LBB5_3 +; SDISEL-NEXT: // %bb.1: // %entry +; SDISEL-NEXT: cmp w4, w5 +; SDISEL-NEXT: b.le .LBB5_3 +; SDISEL-NEXT: // %bb.2: ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret +; SDISEL-NEXT: .LBB5_3: // %if +; SDISEL-NEXT: mov w0, #1 // =0x1 +; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_uge_slt_sle: ; GISEL: // %bb.0: // %entry @@ -273,13 +285,13 @@ ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: tbnz w8, #0, .LBB5_3 ; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: cmp w4, w5 +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.le .LBB5_3 ; GISEL-NEXT: // %bb.2: // %common.ret ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB5_3: // %if -; GISEL-NEXT: mov w0, #1 +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: str w0, [x6] ; GISEL-NEXT: ret entry: @@ -303,15 +315,17 @@ ; SDISEL: // %bb.0: // %entry ; SDISEL-NEXT: cmp w2, w3 ; SDISEL-NEXT: ccmp w0, w1, #0, le -; SDISEL-NEXT: ccmp w4, w5, #0, ge -; SDISEL-NEXT: b.le .LBB6_2 -; SDISEL-NEXT: // %bb.1: // %if -; SDISEL-NEXT: mov w0, #1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB6_2: +; SDISEL-NEXT: b.lt .LBB6_3 +; SDISEL-NEXT: // %bb.1: // %entry +; SDISEL-NEXT: cmp w4, w5 +; SDISEL-NEXT: b.gt .LBB6_3 +; SDISEL-NEXT: // %bb.2: ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret +; SDISEL-NEXT: .LBB6_3: // %if +; SDISEL-NEXT: mov w0, #1 // =0x1 +; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_slt_sle_sgt: ; GISEL: // %bb.0: // %entry @@ -322,13 +336,13 @@ ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: tbnz w8, #0, .LBB6_3 ; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: cmp w4, w5 +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.gt .LBB6_3 ; GISEL-NEXT: // %bb.2: // %common.ret ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB6_3: // %if -; GISEL-NEXT: mov w0, #1 +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: str w0, [x6] ; GISEL-NEXT: ret entry: @@ -352,15 +366,17 @@ ; SDISEL: // %bb.0: // %entry ; SDISEL-NEXT: cmp w2, w3 ; SDISEL-NEXT: ccmp w0, w1, #0, gt -; SDISEL-NEXT: ccmp w4, w5, #0, gt -; SDISEL-NEXT: b.lt .LBB7_2 -; SDISEL-NEXT: // %bb.1: // %if -; SDISEL-NEXT: mov w0, #1 -; SDISEL-NEXT: str w0, [x6] -; SDISEL-NEXT: ret -; SDISEL-NEXT: .LBB7_2: +; SDISEL-NEXT: b.le .LBB7_3 +; SDISEL-NEXT: // %bb.1: // %entry +; SDISEL-NEXT: cmp w4, w5 +; SDISEL-NEXT: b.ge .LBB7_3 +; SDISEL-NEXT: // %bb.2: ; SDISEL-NEXT: mov w0, wzr ; SDISEL-NEXT: ret +; SDISEL-NEXT: .LBB7_3: // %if +; SDISEL-NEXT: mov w0, #1 // =0x1 +; SDISEL-NEXT: str w0, [x6] +; SDISEL-NEXT: ret ; ; GISEL-LABEL: and_sle_sgt_sge: ; GISEL: // %bb.0: // %entry @@ -371,13 +387,13 @@ ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: tbnz w8, #0, .LBB7_3 ; GISEL-NEXT: // %bb.1: // %entry -; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: cmp w4, w5 +; GISEL-NEXT: mov w0, wzr ; GISEL-NEXT: b.ge .LBB7_3 ; GISEL-NEXT: // %bb.2: // %common.ret ; GISEL-NEXT: ret ; GISEL-NEXT: .LBB7_3: // %if -; GISEL-NEXT: mov w0, #1 +; GISEL-NEXT: mov w0, #1 // =0x1 ; GISEL-NEXT: str w0, [x6] ; GISEL-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll --- a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll +++ b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll @@ -283,8 +283,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: ret ret %T_NESTED_STRUCT_DIFFM zeroinitializer } @@ -294,8 +294,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: ret ret [ 1 x %T_NESTED_STRUCT_DIFFM ] zeroinitializer } @@ -305,12 +305,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: movi d1, #0000000000000000 +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: movi d2, #0000000000000000 ; CHECK-NEXT: movi d3, #0000000000000000 +; CHECK-NEXT: mov w1, wzr ; CHECK-NEXT: movi d4, #0000000000000000 ; CHECK-NEXT: movi d5, #0000000000000000 -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: mov w1, wzr ; CHECK-NEXT: ret ret [ 2 x %T_NESTED_STRUCT_DIFFM ] zeroinitializer } @@ -458,16 +458,16 @@ ; CHECK-NEXT: add x8, sp, #8 ; CHECK-NEXT: bl return_in_memory ; CHECK-NEXT: ldur q0, [sp, #24] +; CHECK-NEXT: ldur q1, [sp, #8] ; CHECK-NEXT: adrp x8, in_memory_store ; CHECK-NEXT: add x8, x8, :lo12:in_memory_store -; CHECK-NEXT: ldur q1, [sp, #8] -; CHECK-NEXT: ldur q2, [sp, #56] -; CHECK-NEXT: ldur q3, [sp, #40] -; CHECK-NEXT: ldr d4, [sp, #72] -; CHECK-NEXT: stp q1, q0, [x8] +; CHECK-NEXT: ldr d2, [sp, #72] +; CHECK-NEXT: ldur q3, [sp, #56] +; CHECK-NEXT: ldur q4, [sp, #40] ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-NEXT: stp q3, q2, [x8, #32] -; CHECK-NEXT: str d4, [x8, #64] +; CHECK-NEXT: stp q1, q0, [x8] +; CHECK-NEXT: str d2, [x8, #64] +; CHECK-NEXT: stp q4, q3, [x8, #32] ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %1 = call %T_IN_MEMORY @return_in_memory() @@ -478,15 +478,16 @@ define void @callee_in_memory(%T_IN_MEMORY %a) { ; CHECK-LABEL: callee_in_memory: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [sp, #64] +; CHECK-NEXT: ldp q1, q2, [sp, #32] ; CHECK-NEXT: adrp x8, in_memory_store ; CHECK-NEXT: add x8, x8, :lo12:in_memory_store -; CHECK-NEXT: ldr q3, [sp, #16] -; CHECK-NEXT: ldp q1, q2, [sp, #32] +; CHECK-NEXT: ldr d0, [sp, #64] ; CHECK-NEXT: str d0, [x8, #64] -; CHECK-NEXT: ldr q0, [sp] -; CHECK-NEXT: stp q1, q2, [x8, #32] -; CHECK-NEXT: stp q0, q3, [x8] +; CHECK-NEXT: ldr q0, [sp, #16] +; CHECK-NEXT: str q2, [x8, #48] +; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: stp q0, q1, [x8, #16] +; CHECK-NEXT: str q2, [x8] ; CHECK-NEXT: ret store %T_IN_MEMORY %a, ptr @in_memory_store ret void @@ -502,11 +503,11 @@ ; CHECK-NEXT: adrp x8, in_memory_store ; CHECK-NEXT: add x8, x8, :lo12:in_memory_store ; CHECK-NEXT: ldp q0, q1, [x8] -; CHECK-NEXT: ldp q2, q3, [x8, #32] ; CHECK-NEXT: ldr d4, [x8, #64] +; CHECK-NEXT: ldp q2, q3, [x8, #32] +; CHECK-NEXT: str d4, [sp, #64] ; CHECK-NEXT: stp q0, q1, [sp] ; CHECK-NEXT: stp q2, q3, [sp, #32] -; CHECK-NEXT: str d4, [sp, #64] ; CHECK-NEXT: bl callee_in_memory ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 diff --git a/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll b/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll --- a/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll +++ b/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll @@ -68,16 +68,16 @@ define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK-LABEL: add_sub_su64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d2, xzr ; CHECK-NEXT: add d0, d1, d0 -; CHECK-NEXT: sub d0, d2, d0 +; CHECK-NEXT: fmov d1, xzr +; CHECK-NEXT: sub d0, d1, d0 ; CHECK-NEXT: ret ; ; GENERIC-LABEL: add_sub_su64: ; GENERIC: // %bb.0: -; GENERIC-NEXT: fmov d2, xzr ; GENERIC-NEXT: add d0, d1, d0 -; GENERIC-NEXT: sub d0, d2, d0 +; GENERIC-NEXT: fmov d1, xzr +; GENERIC-NEXT: sub d0, d1, d0 ; GENERIC-NEXT: ret %vecext = extractelement <2 x i64> %a, i32 0 %vecext1 = extractelement <2 x i64> %b, i32 0 diff --git a/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll b/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll --- a/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addr-type-promotion.ll @@ -10,28 +10,28 @@ define zeroext i8 @fullGtU(i32 %i1, i32 %i2) { ; CHECK-LABEL: fullGtU: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: adrp x10, _block@GOTPAGE +; CHECK-NEXT: adrp x8, _block@GOTPAGE ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: ; kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x8, w0 -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: ldr x10, [x10, _block@GOTPAGEOFF] -; CHECK-NEXT: ldr x10, [x10] -; CHECK-NEXT: ldrb w11, [x10, x8] -; CHECK-NEXT: ldrb w12, [x10, x9] +; CHECK-NEXT: sxtw x9, w0 +; CHECK-NEXT: sxtw x10, w1 +; CHECK-NEXT: ldr x8, [x8, _block@GOTPAGEOFF] +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: ldrb w11, [x8, x9] +; CHECK-NEXT: ldrb w12, [x8, x10] ; CHECK-NEXT: cmp w11, w12 ; CHECK-NEXT: b.ne LBB0_3 ; CHECK-NEXT: ; %bb.1: ; %if.end -; CHECK-NEXT: add x8, x8, x10 -; CHECK-NEXT: add x9, x9, x10 -; CHECK-NEXT: ldrb w10, [x8, #1] -; CHECK-NEXT: ldrb w11, [x9, #1] +; CHECK-NEXT: add x9, x9, x8 +; CHECK-NEXT: add x8, x10, x8 +; CHECK-NEXT: ldrb w10, [x9, #1] +; CHECK-NEXT: ldrb w11, [x8, #1] ; CHECK-NEXT: cmp w10, w11 ; CHECK-NEXT: b.ne LBB0_3 ; CHECK-NEXT: ; %bb.2: ; %if.end25 -; CHECK-NEXT: ldrb w8, [x8, #2] ; CHECK-NEXT: ldrb w9, [x9, #2] -; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: ldrb w8, [x8, #2] +; CHECK-NEXT: cmp w9, w8 ; CHECK-NEXT: cset w8, hi ; CHECK-NEXT: csel w0, wzr, w8, eq ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll --- a/llvm/test/CodeGen/AArch64/arm64-addrmode.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addrmode.ll @@ -42,7 +42,7 @@ define void @t4(ptr %object) { ; CHECK-LABEL: t4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 +; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: ldr xzr, [x0, x8] ; CHECK-NEXT: ret %incdec.ptr = getelementptr inbounds i64, ptr %object, i64 4096 @@ -67,9 +67,9 @@ define void @t6(i64 %a, ptr %object) { ; CHECK-LABEL: t6: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 -; CHECK-NEXT: add x9, x1, x0, lsl #3 -; CHECK-NEXT: ldr xzr, [x9, x8] +; CHECK-NEXT: add x8, x1, x0, lsl #3 +; CHECK-NEXT: mov w9, #32768 // =0x8000 +; CHECK-NEXT: ldr xzr, [x8, x9] ; CHECK-NEXT: ret %tmp1 = getelementptr inbounds i64, ptr %object, i64 %a %incdec.ptr = getelementptr inbounds i64, ptr %tmp1, i64 4096 @@ -81,7 +81,7 @@ define void @t7(i64 %a) { ; CHECK-LABEL: t7: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov w8, #65535 // =0xffff ; CHECK-NEXT: ldr xzr, [x0, x8] ; CHECK-NEXT: ret %1 = add i64 %a, 65535 ;0xffff @@ -93,7 +93,7 @@ define void @t8(i64 %a) { ; CHECK-LABEL: t8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4662 +; CHECK-NEXT: mov x8, #-4662 // =0xffffffffffffedca ; CHECK-NEXT: ldr xzr, [x0, x8] ; CHECK-NEXT: ret %1 = sub i64 %a, 4662 ;-4662 is 0xffffffffffffedca @@ -105,7 +105,7 @@ define void @t9(i64 %a) { ; CHECK-LABEL: t9: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-305463297 +; CHECK-NEXT: mov x8, #-305463297 // =0xffffffffedcaffff ; CHECK-NEXT: ldr xzr, [x0, x8] ; CHECK-NEXT: ret %1 = add i64 -305463297, %a ;-305463297 is 0xffffffffedcaffff @@ -117,7 +117,7 @@ define void @t10(i64 %a) { ; CHECK-LABEL: t10: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #81909218222800896 +; CHECK-NEXT: mov x8, #81909218222800896 // =0x123000000000000 ; CHECK-NEXT: ldr xzr, [x0, x8] ; CHECK-NEXT: ret %1 = add i64 %a, 81909218222800896 ;0x123000000000000 @@ -129,7 +129,7 @@ define void @t11(i64 %a) { ; CHECK-LABEL: t11: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #17767 +; CHECK-NEXT: mov w8, #17767 // =0x4567 ; CHECK-NEXT: movk w8, #291, lsl #16 ; CHECK-NEXT: ldr xzr, [x0, x8] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll --- a/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll +++ b/llvm/test/CodeGen/AArch64/arm64-bitfield-extract.ll @@ -22,6 +22,7 @@ ; OPT-NEXT: [[FROMBOOL:%.*]] = trunc i32 [[BF_CLEAR_LOBIT]] to i8 ; OPT-NEXT: store i8 [[FROMBOOL]], ptr [[B]], align 1 ; OPT-NEXT: ret void +; %tmp1 = load i32, ptr %x, align 4 %b = getelementptr inbounds %struct.Y, ptr %y, i64 0, i32 1 %bf.clear = lshr i32 %tmp1, 3 @@ -41,6 +42,7 @@ ; OPT-NEXT: [[TMP1:%.*]] = shl i32 [[TMP]], 28 ; OPT-NEXT: [[BF_VAL_SEXT:%.*]] = ashr exact i32 [[TMP1]], 28 ; OPT-NEXT: ret i32 [[BF_VAL_SEXT]] +; %tmp = trunc i64 %cav1.coerce to i32 %tmp1 = shl i32 %tmp, 28 %bf.val.sext = ashr exact i32 %tmp1, 28 @@ -57,6 +59,7 @@ ; OPT-NEXT: [[CAV1_SROA_0_1_INSERT:%.*]] = shl i32 [[TMP]], 22 ; OPT-NEXT: [[TMP1:%.*]] = ashr i32 [[CAV1_SROA_0_1_INSERT]], 26 ; OPT-NEXT: ret i32 [[TMP1]] +; %tmp = trunc i64 %cav1.coerce to i32 %cav1.sroa.0.1.insert = shl i32 %tmp, 22 %tmp1 = ashr i32 %cav1.sroa.0.1.insert, 26 @@ -76,6 +79,7 @@ ; OPT-NEXT: [[BF_CLEAR_LOBIT:%.*]] = and i64 [[BF_CLEAR]], 1 ; OPT-NEXT: store i64 [[BF_CLEAR_LOBIT]], ptr [[Y:%.*]], align 8 ; OPT-NEXT: ret void +; %tmp1 = load i64, ptr %x, align 4 %bf.clear = lshr i64 %tmp1, 3 %bf.clear.lobit = and i64 %bf.clear, 1 @@ -92,6 +96,7 @@ ; OPT-NEXT: [[TMP:%.*]] = shl i64 [[CAV1_COERCE:%.*]], 28 ; OPT-NEXT: [[BF_VAL_SEXT:%.*]] = ashr exact i64 [[TMP]], 28 ; OPT-NEXT: ret i64 [[BF_VAL_SEXT]] +; %tmp = shl i64 %cav1.coerce, 28 %bf.val.sext = ashr exact i64 %tmp, 28 ret i64 %bf.val.sext @@ -106,6 +111,7 @@ ; OPT-NEXT: [[CAV1_SROA_0_1_INSERT:%.*]] = shl i64 [[CAV1_COERCE:%.*]], 22 ; OPT-NEXT: [[TMP1:%.*]] = ashr i64 [[CAV1_SROA_0_1_INSERT]], 26 ; OPT-NEXT: ret i64 [[TMP1]] +; %cav1.sroa.0.1.insert = shl i64 %cav1.coerce, 22 %tmp1 = ashr i64 %cav1.sroa.0.1.insert, 26 ret i64 %tmp1 @@ -127,6 +133,7 @@ ; OPT-NEXT: [[OR:%.*]] = or i64 [[AND]], [[AND1]] ; OPT-NEXT: store i64 [[OR]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: %0 = load i64, ptr %y, align 8 %and = and i64 %0, -16777216 @@ -153,6 +160,7 @@ ; OPT-NEXT: [[OR:%.*]] = or i32 [[AND]], [[AND1]] ; OPT-NEXT: store i32 [[OR]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: %0 = load i32, ptr %y, align 8 %and = and i32 %0, -8 @@ -182,6 +190,7 @@ ; OPT-NEXT: [[SHR1:%.*]] = lshr i32 [[OR]], 2 ; OPT-NEXT: store i32 [[SHR1]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; lsr is an alias of ubfm %0 = load i32, ptr %y, align 8 @@ -214,6 +223,7 @@ ; OPT-NEXT: [[SHL:%.*]] = shl i32 [[OR]], 2 ; OPT-NEXT: store i32 [[SHL]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; lsl is an alias of ubfm %0 = load i32, ptr %y, align 8 @@ -247,6 +257,7 @@ ; OPT-NEXT: [[SHR1:%.*]] = lshr i64 [[OR]], 2 ; OPT-NEXT: store i64 [[SHR1]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; lsr is an alias of ubfm %0 = load i64, ptr %y, align 8 @@ -280,6 +291,7 @@ ; OPT-NEXT: [[SHL:%.*]] = shl i64 [[OR]], 2 ; OPT-NEXT: store i64 [[SHL]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; lsr is an alias of ubfm %0 = load i64, ptr %y, align 8 @@ -311,6 +323,7 @@ ; OPT-NEXT: [[SHL:%.*]] = shl i32 [[OR]], 2 ; OPT-NEXT: store i32 [[SHL]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; lsl is an alias of ubfm %0 = load i32, ptr %y, align 8 @@ -341,6 +354,7 @@ ; OPT-NEXT: [[SHL:%.*]] = shl i64 [[OR]], 2 ; OPT-NEXT: store i64 [[SHL]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; lsl is an alias of ubfm %0 = load i64, ptr %y, align 8 @@ -361,6 +375,7 @@ ; OPT-NEXT: [[AND_I_I:%.*]] = and i32 [[TMP2:%.*]], 2048 ; OPT-NEXT: [[TOBOOL_I_I:%.*]] = icmp ne i32 [[AND_I_I]], 0 ; OPT-NEXT: ret i1 [[TOBOOL_I_I]] +; %and.i.i = and i32 %tmp2, 2048 %tobool.i.i = icmp ne i32 %and.i.i, 0 ret i1 %tobool.i.i @@ -387,6 +402,7 @@ ; OPT-NEXT: [[SHR2:%.*]] = lshr i32 [[SHL]], 4 ; OPT-NEXT: store i32 [[SHR2]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; lsr is an alias of ubfm %0 = load i32, ptr %y, align 8 @@ -419,6 +435,7 @@ ; OPT-NEXT: [[MASK:%.*]] = and i32 [[LSHR]], 268435455 ; OPT-NEXT: store i32 [[MASK]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; lsr is an alias of ubfm %0 = load i32, ptr %y, align 8 @@ -454,6 +471,7 @@ ; OPT-NEXT: [[SHR2:%.*]] = lshr i64 [[SHL]], 4 ; OPT-NEXT: store i64 [[SHR2]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; lsr is an alias of ubfm %0 = load i64, ptr %y, align 8 @@ -486,6 +504,7 @@ ; OPT-NEXT: [[MASK:%.*]] = and i64 [[LSHR]], 1152921504606846975 ; OPT-NEXT: store i64 [[MASK]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; lsr is an alias of ubfm %0 = load i64, ptr %y, align 8 @@ -527,6 +546,7 @@ ; OPT-NEXT: [[SHL1:%.*]] = shl i32 [[OR1]], 2 ; OPT-NEXT: store i32 [[SHL1]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; lsr is an alias of ubfm ; lsl is an alias of ubfm @@ -573,6 +593,7 @@ ; OPT-NEXT: [[SHL1:%.*]] = shl i64 [[OR1]], 2 ; OPT-NEXT: store i64 [[SHL1]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; lsr is an alias of ubfm ; lsl is an alias of ubfm @@ -615,6 +636,7 @@ ; OPT-NEXT: [[SHR2:%.*]] = lshr i32 [[SHL]], 4 ; OPT-NEXT: store i32 [[SHR2]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; Create the constant ; Do the masking @@ -651,6 +673,7 @@ ; OPT-NEXT: [[MASK:%.*]] = and i32 [[LSHR]], 268435455 ; OPT-NEXT: store i32 [[MASK]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; Create the constant ; Do the masking @@ -692,6 +715,7 @@ ; OPT-NEXT: [[SHR2:%.*]] = lshr i64 [[SHL]], 4 ; OPT-NEXT: store i64 [[SHR2]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; Create the constant ; Do the masking @@ -728,6 +752,7 @@ ; OPT-NEXT: [[MASK:%.*]] = and i64 [[LSHR]], 1152921504606846975 ; OPT-NEXT: store i64 [[MASK]], ptr [[Y]], align 8 ; OPT-NEXT: ret void +; entry: ; Create the constant ; Do the masking @@ -754,6 +779,7 @@ ; OPT-NEXT: [[CONV82:%.*]] = zext i32 [[SHR81]] to i64 ; OPT-NEXT: [[RESULT:%.*]] = and i64 [[CONV82]], 255 ; OPT-NEXT: ret i64 [[RESULT]] +; %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %result = and i64 %conv82, 255 @@ -836,6 +862,7 @@ ; OPT: return: ; OPT-NEXT: [[RETVAL_0:%.*]] = phi i32 [ [[CONV]], [[IF_THEN]] ], [ [[ADD]], [[IF_THEN7]] ], [ [[ADD23]], [[IF_THEN17]] ], [ 64, [[IF_END13]] ] ; OPT-NEXT: ret i32 [[RETVAL_0]] +; entry: %x.sroa.1.0.extract.shift = lshr i64 %arg1, 16 %x.sroa.1.0.extract.trunc = trunc i64 %x.sroa.1.0.extract.shift to i16 @@ -889,20 +916,20 @@ define i80 @fct20(i128 %a, i128 %b) { ; LLC-LABEL: fct20: ; LLC: // %bb.0: // %entry -; LLC-NEXT: mov x12, #11776 -; LLC-NEXT: extr x9, x1, x0, #18 -; LLC-NEXT: movk x12, #25856, lsl #16 -; LLC-NEXT: lsr x8, x1, #18 -; LLC-NEXT: movk x12, #11077, lsl #32 -; LLC-NEXT: orr x10, x2, x3 -; LLC-NEXT: mov w11, #26220 -; LLC-NEXT: movk x12, #45, lsl #48 -; LLC-NEXT: and x11, x8, x11 -; LLC-NEXT: and x12, x9, x12 -; LLC-NEXT: cmp x10, #0 -; LLC-NEXT: csel x0, x12, x9, eq -; LLC-NEXT: csel x1, x11, x8, eq -; LLC-NEXT: ret +; LLC-NEXT: mov x12, #11776 // =0x2e00 +; LLC-NEXT: lsr x8, x1, #18 +; LLC-NEXT: extr x9, x1, x0, #18 +; LLC-NEXT: movk x12, #25856, lsl #16 +; LLC-NEXT: orr x10, x2, x3 +; LLC-NEXT: mov w11, #26220 // =0x666c +; LLC-NEXT: movk x12, #11077, lsl #32 +; LLC-NEXT: and x11, x8, x11 +; LLC-NEXT: cmp x10, #0 +; LLC-NEXT: movk x12, #45, lsl #48 +; LLC-NEXT: csel x1, x11, x8, eq +; LLC-NEXT: and x12, x9, x12 +; LLC-NEXT: csel x0, x12, x9, eq +; LLC-NEXT: ret ; OPT-LABEL: @fct20( ; OPT-NEXT: entry: ; OPT-NEXT: [[SHR:%.*]] = lshr i128 [[A:%.*]], 18 @@ -916,6 +943,7 @@ ; OPT: end: ; OPT-NEXT: [[CONV3:%.*]] = phi i80 [ [[CONV]], [[ENTRY:%.*]] ], [ [[CONV2]], [[THEN]] ] ; OPT-NEXT: ret i80 [[CONV3]] +; entry: %shr = lshr i128 %a, 18 %conv = trunc i128 %shr to i80 @@ -947,6 +975,7 @@ ; OPT-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [8 x [64 x i64]], ptr @arr, i64 0, i64 0, i64 [[AND]] ; OPT-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 ; OPT-NEXT: ret i64 [[TMP0]] +; entry: %shr = lshr i64 %x, 4 %and = and i64 %shr, 15 @@ -971,6 +1000,7 @@ ; OPT-NEXT: [[OR18:%.*]] = or i32 [[SHL16]], [[INSERTION]] ; OPT-NEXT: [[CONV19:%.*]] = trunc i32 [[OR18]] to i16 ; OPT-NEXT: ret i16 [[CONV19]] +; %positioned_field = shl i32 %in, 3 %positioned_masked_field = and i32 %positioned_field, 120 %masked_dst = and i32 %dst, 7 @@ -1016,6 +1046,7 @@ ; OPT-NEXT: br label [[END]] ; OPT: end: ; OPT-NEXT: ret void +; entry: %shr47 = lshr i64 %src, 47 %src2.trunc = trunc i64 %src2 to i32 diff --git a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll --- a/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll +++ b/llvm/test/CodeGen/AArch64/arm64-convert-v4f64.ll @@ -6,10 +6,10 @@ ; CHECK-LABEL: fptosi_v4f64_to_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x double>, ptr %ptr @@ -20,17 +20,17 @@ define <8 x i8> @fptosi_v4f64_to_v4i8(ptr %ptr) { ; CHECK-LABEL: fptosi_v4f64_to_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] -; CHECK-NEXT: fcvtzs v0.2d, v0.2d +; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: fcvtzs v3.2d, v3.2d ; CHECK-NEXT: fcvtzs v2.2d, v2.2d +; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: fcvtzs v3.2d, v3.2d -; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h -; CHECK-NEXT: xtn v2.2s, v2.2d ; CHECK-NEXT: xtn v3.2s, v3.2d +; CHECK-NEXT: xtn v2.2s, v2.2d +; CHECK-NEXT: uzp1 v0.4h, v1.4h, v0.4h ; CHECK-NEXT: uzp1 v1.4h, v2.4h, v3.4h ; CHECK-NEXT: uzp1 v0.8b, v1.8b, v0.8b ; CHECK-NEXT: ret @@ -70,10 +70,10 @@ ; CHECK-LABEL: fptoui_v4f64_to_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: fcvtzs v1.2d, v1.2d -; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: xtn v1.2s, v1.2d +; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: uzp1 v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %tmp1 = load <4 x double>, ptr %ptr diff --git a/llvm/test/CodeGen/AArch64/arm64-cse.ll b/llvm/test/CodeGen/AArch64/arm64-cse.ll --- a/llvm/test/CodeGen/AArch64/arm64-cse.ll +++ b/llvm/test/CodeGen/AArch64/arm64-cse.ll @@ -15,8 +15,8 @@ ; CHECK-NEXT: mov x0, xzr ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_2: ; %if.end -; CHECK-NEXT: sub w9, w9, w8 ; CHECK-NEXT: add x0, x0, w8, sxtw +; CHECK-NEXT: sub w9, w9, w8 ; CHECK-NEXT: str w9, [x1] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/arm64-csel.ll b/llvm/test/CodeGen/AArch64/arm64-csel.ll --- a/llvm/test/CodeGen/AArch64/arm64-csel.ll +++ b/llvm/test/CodeGen/AArch64/arm64-csel.ll @@ -214,8 +214,8 @@ define i32 @foo15(i32 %a, i32 %b) nounwind readnone optsize ssp { ; CHECK-LABEL: foo15: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp w0, w1 ; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: cmp w0, w1 ; CHECK-NEXT: cinc w0, w8, gt ; CHECK-NEXT: ret entry: @@ -227,8 +227,8 @@ define i32 @foo16(i32 %a, i32 %b) nounwind readnone optsize ssp { ; CHECK-LABEL: foo16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp w0, w1 ; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: cmp w0, w1 ; CHECK-NEXT: cinc w0, w8, le ; CHECK-NEXT: ret entry: @@ -240,8 +240,8 @@ define i64 @foo17(i64 %a, i64 %b) nounwind readnone optsize ssp { ; CHECK-LABEL: foo17: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: cinc x0, x8, gt ; CHECK-NEXT: ret entry: @@ -253,8 +253,8 @@ define i64 @foo18(i64 %a, i64 %b) nounwind readnone optsize ssp { ; CHECK-LABEL: foo18: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: cinc x0, x8, le ; CHECK-NEXT: ret entry: @@ -267,8 +267,8 @@ define i64 @foo18_overflow1(i64 %a, i64 %b) nounwind readnone optsize ssp { ; CHECK-LABEL: foo18_overflow1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: csel x0, x8, xzr, gt ; CHECK-NEXT: ret entry: @@ -281,8 +281,8 @@ define i64 @foo18_overflow2(i64 %a, i64 %b) nounwind readnone optsize ssp { ; CHECK-LABEL: foo18_overflow2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: mov x8, #9223372036854775807 // =0x7fffffffffffffff +; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: csel x0, xzr, x8, gt ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll @@ -6,8 +6,8 @@ define float @test1(float %x, float %y) nounwind { ; CHECK-LABEL: test1: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: ; kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mvni.4s v2, #128, lsl #24 +; CHECK-NEXT: ; kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: ; kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: bif.16b v0, v1, v2 ; CHECK-NEXT: ; kill: def $s0 killed $s0 killed $q0 @@ -37,8 +37,8 @@ ; CHECK-LABEL: test3: ; CHECK: ; %bb.0: ; CHECK-NEXT: movi.2d v3, #0xffffffffffffffff -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fadd s1, s1, s2 +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fneg.2d v2, v3 ; CHECK-NEXT: fcvt d1, s1 ; CHECK-NEXT: bif.16b v0, v1, v2 diff --git a/llvm/test/CodeGen/AArch64/arm64-fmadd.ll b/llvm/test/CodeGen/AArch64/arm64-fmadd.ll --- a/llvm/test/CodeGen/AArch64/arm64-fmadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fmadd.ll @@ -222,11 +222,11 @@ define float @negated_constant(float %x) { ; CHECK-LABEL: negated_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1037565952 +; CHECK-NEXT: mov w8, #-1037565952 // =0xc2280000 ; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, #1109917696 -; CHECK-NEXT: fmul s1, s0, s1 +; CHECK-NEXT: mov w8, #1109917696 // =0x42280000 ; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: fmul s1, s0, s1 ; CHECK-NEXT: fmadd s0, s0, s2, s1 ; CHECK-NEXT: ret %m = fmul float %x, 42.0 diff --git a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll --- a/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll +++ b/llvm/test/CodeGen/AArch64/arm64-homogeneous-prolog-epilog-no-helper.ll @@ -20,12 +20,12 @@ ; CHECK-NEXT: .cfi_offset b10, -56 ; CHECK-NEXT: .cfi_offset b11, -64 ; CHECK-NEXT: fmov s3, #1.00000000 -; CHECK-NEXT: scvtf s4, w0 ; CHECK-NEXT: sub w19, w0, #1 ; CHECK-NEXT: fadd s8, s0, s3 ; CHECK-NEXT: fadd s0, s8, s1 +; CHECK-NEXT: scvtf s1, w0 ; CHECK-NEXT: fadd s0, s0, s2 -; CHECK-NEXT: fsub s9, s0, s4 +; CHECK-NEXT: fsub s9, s0, s1 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __Z3goof ; CHECK-NEXT: fmov s10, s0 @@ -59,12 +59,12 @@ ; CHECK-LINUX-NEXT: .cfi_offset b10, -56 ; CHECK-LINUX-NEXT: .cfi_offset b11, -64 ; CHECK-LINUX-NEXT: fmov s3, #1.00000000 -; CHECK-LINUX-NEXT: scvtf s4, w0 ; CHECK-LINUX-NEXT: sub w19, w0, #1 ; CHECK-LINUX-NEXT: fadd s8, s0, s3 ; CHECK-LINUX-NEXT: fadd s0, s8, s1 +; CHECK-LINUX-NEXT: scvtf s1, w0 ; CHECK-LINUX-NEXT: fadd s0, s0, s2 -; CHECK-LINUX-NEXT: fsub s9, s0, s4 +; CHECK-LINUX-NEXT: fsub s9, s0, s1 ; CHECK-LINUX-NEXT: fmov s0, s8 ; CHECK-LINUX-NEXT: bl _Z3goof ; CHECK-LINUX-NEXT: fmov s10, s0 diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll --- a/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-vector-ldst.ll @@ -593,7 +593,7 @@ define ptr @test_v16i8_post_reg_st1_lane(<16 x i8> %in, ptr %addr) { ; CHECK-LABEL: test_v16i8_post_reg_st1_lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: mov w8, #2 ; =0x2 ; CHECK-NEXT: st1.b { v0 }[3], [x0], x8 ; CHECK-NEXT: ret %elt = extractelement <16 x i8> %in, i32 3 @@ -619,7 +619,7 @@ define ptr @test_v8i16_post_reg_st1_lane(<8 x i16> %in, ptr %addr) { ; CHECK-LABEL: test_v8i16_post_reg_st1_lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w8, #4 ; =0x4 ; CHECK-NEXT: st1.h { v0 }[3], [x0], x8 ; CHECK-NEXT: ret %elt = extractelement <8 x i16> %in, i32 3 @@ -644,7 +644,7 @@ define ptr @test_v4i32_post_reg_st1_lane(<4 x i32> %in, ptr %addr) { ; CHECK-LABEL: test_v4i32_post_reg_st1_lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: mov w8, #8 ; =0x8 ; CHECK-NEXT: st1.s { v0 }[3], [x0], x8 ; CHECK-NEXT: ret %elt = extractelement <4 x i32> %in, i32 3 @@ -669,7 +669,7 @@ define ptr @test_v4f32_post_reg_st1_lane(<4 x float> %in, ptr %addr) { ; CHECK-LABEL: test_v4f32_post_reg_st1_lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: mov w8, #8 ; =0x8 ; CHECK-NEXT: st1.s { v0 }[3], [x0], x8 ; CHECK-NEXT: ret %elt = extractelement <4 x float> %in, i32 3 @@ -694,7 +694,7 @@ define ptr @test_v2i64_post_reg_st1_lane(<2 x i64> %in, ptr %addr) { ; CHECK-LABEL: test_v2i64_post_reg_st1_lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: mov w8, #16 ; =0x10 ; CHECK-NEXT: st1.d { v0 }[1], [x0], x8 ; CHECK-NEXT: ret %elt = extractelement <2 x i64> %in, i64 1 @@ -719,7 +719,7 @@ define ptr @test_v2f64_post_reg_st1_lane(<2 x double> %in, ptr %addr) { ; CHECK-LABEL: test_v2f64_post_reg_st1_lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: mov w8, #16 ; =0x10 ; CHECK-NEXT: st1.d { v0 }[1], [x0], x8 ; CHECK-NEXT: ret %elt = extractelement <2 x double> %in, i32 1 @@ -745,7 +745,7 @@ define ptr @test_v8i8_post_reg_st1_lane(<8 x i8> %in, ptr %addr) { ; CHECK-LABEL: test_v8i8_post_reg_st1_lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: mov w8, #2 ; =0x2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: st1.b { v0 }[3], [x0], x8 ; CHECK-NEXT: ret @@ -772,7 +772,7 @@ define ptr @test_v4i16_post_reg_st1_lane(<4 x i16> %in, ptr %addr) { ; CHECK-LABEL: test_v4i16_post_reg_st1_lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w8, #4 ; =0x4 ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: st1.h { v0 }[3], [x0], x8 ; CHECK-NEXT: ret @@ -799,7 +799,7 @@ define ptr @test_v2i32_post_reg_st1_lane(<2 x i32> %in, ptr %addr) { ; CHECK-LABEL: test_v2i32_post_reg_st1_lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: mov w8, #8 ; =0x8 ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: st1.s { v0 }[1], [x0], x8 ; CHECK-NEXT: ret @@ -826,7 +826,7 @@ define ptr @test_v2f32_post_reg_st1_lane(<2 x float> %in, ptr %addr) { ; CHECK-LABEL: test_v2f32_post_reg_st1_lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: mov w8, #8 ; =0x8 ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: st1.s { v0 }[1], [x0], x8 ; CHECK-NEXT: ret @@ -3909,8 +3909,8 @@ define { <8 x i16>, <8 x i16> } @test_v8i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <8 x i16> %B, <8 x i16> %C) nounwind { ; CHECK-LABEL: test_v8i16_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.h { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -3941,8 +3941,8 @@ define { <4 x i16>, <4 x i16> } @test_v4i16_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i16> %B, <4 x i16> %C) nounwind { ; CHECK-LABEL: test_v4i16_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.h { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -3973,8 +3973,8 @@ define { <4 x i32>, <4 x i32> } @test_v4i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x i32> %B, <4 x i32> %C) nounwind { ; CHECK-LABEL: test_v4i32_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4005,8 +4005,8 @@ define { <2 x i32>, <2 x i32> } @test_v2i32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i32> %B, <2 x i32> %C) nounwind { ; CHECK-LABEL: test_v2i32_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4037,8 +4037,8 @@ define { <2 x i64>, <2 x i64> } @test_v2i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x i64> %B, <2 x i64> %C) nounwind { ; CHECK-LABEL: test_v2i64_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4069,8 +4069,8 @@ define { <1 x i64>, <1 x i64> } @test_v1i64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x i64> %B, <1 x i64> %C) nounwind { ; CHECK-LABEL: test_v1i64_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4101,8 +4101,8 @@ define { <4 x float>, <4 x float> } @test_v4f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <4 x float> %B, <4 x float> %C) nounwind { ; CHECK-LABEL: test_v4f32_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4133,8 +4133,8 @@ define { <2 x float>, <2 x float> } @test_v2f32_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x float> %B, <2 x float> %C) nounwind { ; CHECK-LABEL: test_v2f32_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.s { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4165,8 +4165,8 @@ define { <2 x double>, <2 x double> } @test_v2f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <2 x double> %B, <2 x double> %C) nounwind { ; CHECK-LABEL: test_v2f64_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -4197,8 +4197,8 @@ define { <1 x double>, <1 x double> } @test_v1f64_post_reg_ld2lane(ptr %A, ptr %ptr, i64 %inc, <1 x double> %B, <1 x double> %C) nounwind { ; CHECK-LABEL: test_v1f64_post_reg_ld2lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ld2.d { v0, v1 }[0], [x0], x8 ; CHECK-NEXT: str x0, [x1] @@ -5456,8 +5456,8 @@ define ptr @test_v8i16_post_reg_st3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v8i16_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.8h { v0, v1, v2 }, [x0], x8 @@ -5486,8 +5486,8 @@ define ptr @test_v4i16_post_reg_st3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4i16_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st3.4h { v0, v1, v2 }, [x0], x8 @@ -5516,8 +5516,8 @@ define ptr @test_v4i32_post_reg_st3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4i32_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 @@ -5546,8 +5546,8 @@ define ptr @test_v2i32_post_reg_st3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2i32_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 @@ -5576,8 +5576,8 @@ define ptr @test_v2i64_post_reg_st3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2i64_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 @@ -5606,8 +5606,8 @@ define ptr @test_v1i64_post_reg_st3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v1i64_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 @@ -5636,8 +5636,8 @@ define ptr @test_v4f32_post_reg_st3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4f32_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.4s { v0, v1, v2 }, [x0], x8 @@ -5666,8 +5666,8 @@ define ptr @test_v2f32_post_reg_st3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2f32_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st3.2s { v0, v1, v2 }, [x0], x8 @@ -5696,8 +5696,8 @@ define ptr @test_v2f64_post_reg_st3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2f64_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.2d { v0, v1, v2 }, [x0], x8 @@ -5726,8 +5726,8 @@ define ptr @test_v1f64_post_reg_st3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v1f64_post_reg_st3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 @@ -6530,8 +6530,8 @@ define ptr @test_v8i16_post_reg_st1x3(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v8i16_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st1.8h { v0, v1, v2 }, [x0], x8 @@ -6560,8 +6560,8 @@ define ptr @test_v4i16_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4i16_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.4h { v0, v1, v2 }, [x0], x8 @@ -6590,8 +6590,8 @@ define ptr @test_v4i32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4i32_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 @@ -6620,8 +6620,8 @@ define ptr @test_v2i32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2i32_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 @@ -6650,8 +6650,8 @@ define ptr @test_v2i64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2i64_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 @@ -6680,8 +6680,8 @@ define ptr @test_v1i64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v1i64_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 @@ -6710,8 +6710,8 @@ define ptr @test_v4f32_post_reg_st1x3(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4f32_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st1.4s { v0, v1, v2 }, [x0], x8 @@ -6740,8 +6740,8 @@ define ptr @test_v2f32_post_reg_st1x3(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2f32_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.2s { v0, v1, v2 }, [x0], x8 @@ -6770,8 +6770,8 @@ define ptr @test_v2f64_post_reg_st1x3(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2f64_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st1.2d { v0, v1, v2 }, [x0], x8 @@ -6800,8 +6800,8 @@ define ptr @test_v1f64_post_reg_st1x3(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v1f64_post_reg_st1x3: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $d0_d1_d2 def $d0_d1_d2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $d0_d1_d2 def $d0_d1_d2 ; CHECK-NEXT: st1.1d { v0, v1, v2 }, [x0], x8 @@ -7603,8 +7603,8 @@ define ptr @test_v8i16_post_reg_st3lane(ptr %A, ptr %ptr, <8 x i16> %B, <8 x i16> %C, <8 x i16> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v8i16_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 @@ -7633,8 +7633,8 @@ define ptr @test_v4i16_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i16> %B, <4 x i16> %C, <4 x i16> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4i16_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #1 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.h { v0, v1, v2 }[0], [x0], x8 @@ -7663,8 +7663,8 @@ define ptr @test_v4i32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x i32> %B, <4 x i32> %C, <4 x i32> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4i32_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 @@ -7693,8 +7693,8 @@ define ptr @test_v2i32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i32> %B, <2 x i32> %C, <2 x i32> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2i32_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 @@ -7723,8 +7723,8 @@ define ptr @test_v2i64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x i64> %B, <2 x i64> %C, <2 x i64> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2i64_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 @@ -7753,8 +7753,8 @@ define ptr @test_v1i64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x i64> %B, <1 x i64> %C, <1 x i64> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v1i64_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 @@ -7783,8 +7783,8 @@ define ptr @test_v4f32_post_reg_st3lane(ptr %A, ptr %ptr, <4 x float> %B, <4 x float> %C, <4 x float> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v4f32_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 @@ -7813,8 +7813,8 @@ define ptr @test_v2f32_post_reg_st3lane(ptr %A, ptr %ptr, <2 x float> %B, <2 x float> %C, <2 x float> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2f32_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #2 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.s { v0, v1, v2 }[0], [x0], x8 @@ -7843,8 +7843,8 @@ define ptr @test_v2f64_post_reg_st3lane(ptr %A, ptr %ptr, <2 x double> %B, <2 x double> %C, <2 x double> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v2f64_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q2 killed $q2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $q1 killed $q1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $q0 killed $q0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 @@ -7873,8 +7873,8 @@ define ptr @test_v1f64_post_reg_st3lane(ptr %A, ptr %ptr, <1 x double> %B, <1 x double> %C, <1 x double> %D, i64 %inc) nounwind { ; CHECK-LABEL: test_v1f64_post_reg_st3lane: ; CHECK: ; %bb.0: -; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: lsl x8, x2, #3 ; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: st3.d { v0, v1, v2 }[0], [x0], x8 @@ -8910,10 +8910,10 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr s1, [x0] ; CHECK-NEXT: str q0, [x3] -; CHECK-NEXT: ldr q0, [x4] ; CHECK-NEXT: add x8, x0, x2, lsl #2 -; CHECK-NEXT: mov.s v0[1], v1[0] +; CHECK-NEXT: ldr q0, [x4] ; CHECK-NEXT: str x8, [x1] +; CHECK-NEXT: mov.s v0[1], v1[0] ; CHECK-NEXT: ret %tmp1 = load float, ptr %bar store <4 x float> %vec, ptr %dep_ptr_1, align 16 @@ -9071,10 +9071,10 @@ ; CHECK-LABEL: test_inc_cycle: ; CHECK: ; %bb.0: ; CHECK-NEXT: ld1.s { v0 }[0], [x0] -; CHECK-NEXT: adrp x8, _var@PAGE -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: add x9, x0, x9, lsl #2 -; CHECK-NEXT: str x9, [x8, _var@PAGEOFF] +; CHECK-NEXT: adrp x9, _var@PAGE +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: add x8, x0, x8, lsl #2 +; CHECK-NEXT: str x8, [x9, _var@PAGEOFF] ; CHECK-NEXT: ret %elt = load i32, ptr %in %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0 @@ -9143,7 +9143,7 @@ ; CHECK-LABEL: load_single_extract_variable_index_v3i32_small_align: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w9, w1 -; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: mov w8, #2 ; =0x2 ; CHECK-NEXT: cmp x9, #2 ; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: ldr w0, [x0, x8, lsl #2] @@ -9157,7 +9157,7 @@ ; CHECK-LABEL: load_single_extract_variable_index_v3i32_default_align: ; CHECK: ; %bb.0: ; CHECK-NEXT: mov w9, w1 -; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: mov w8, #2 ; =0x2 ; CHECK-NEXT: cmp x9, #2 ; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: ldr w0, [x0, x8, lsl #2] diff --git a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll --- a/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll +++ b/llvm/test/CodeGen/AArch64/arm64-inline-asm.ll @@ -367,13 +367,13 @@ define void @test_zero_reg(ptr %addr) { ; CHECK-LABEL: test_zero_reg: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: USE(xzr) ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: USE(wzr) ; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: mov w8, #1 ; =0x1 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: USE(w8) ; CHECK-NEXT: ; InlineAsm End @@ -487,8 +487,8 @@ ; CHECK-NEXT: ldp q2, q1, [x0] ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ldr s0, [x0, #32] -; CHECK-NEXT: stp q2, q1, [sp] ; CHECK-NEXT: str s0, [sp, #32] +; CHECK-NEXT: stp q2, q1, [sp] ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: add sp, sp, #64 diff --git a/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll b/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll --- a/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll +++ b/llvm/test/CodeGen/AArch64/arm64-instruction-mix-remarks.ll @@ -36,9 +36,9 @@ ; CHECK-NEXT: b.eq LBB0_2 ; CHECK-NEXT: ; %bb.1: ; %else ; CHECK-NEXT: mul w9, w0, w1 -; CHECK-NEXT: mov w10, #10 ; CHECK-NEXT: mul w0, w9, w1 -; CHECK-NEXT: str w10, [x8] +; CHECK-NEXT: mov w9, #10 ; =0xa +; CHECK-NEXT: str w9, [x8] ; CHECK-NEXT: LBB0_2: ; %common.ret ; CHECK-NEXT: ; kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-ld1.ll b/llvm/test/CodeGen/AArch64/arm64-ld1.ll --- a/llvm/test/CodeGen/AArch64/arm64-ld1.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ld1.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=arm64-eabi -aarch64-neon-syntax=apple -verify-machineinstrs -asm-verbose=false | FileCheck %s %struct.__neon_int8x8x2_t = type { <8 x i8>, <8 x i8> } @@ -5,29 +6,20 @@ %struct.__neon_int8x8x4_t = type { <8 x i8>, <8 x i8>, <8 x i8>, <8 x i8> } define %struct.__neon_int8x8x2_t @ld2_8b(ptr %A) nounwind { -; CHECK-LABEL: ld2_8b ; Make sure we are loading into the results defined by the ABI (i.e., v0, v1) ; and from the argument of the function also defined by ABI (i.e., x0) -; CHECK: ld2.8b { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2.v8i8.p0(ptr %A) ret %struct.__neon_int8x8x2_t %tmp2 } define %struct.__neon_int8x8x3_t @ld3_8b(ptr %A) nounwind { -; CHECK-LABEL: ld3_8b ; Make sure we are using the operands defined by the ABI -; CHECK: ld3.8b { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3.v8i8.p0(ptr %A) ret %struct.__neon_int8x8x3_t %tmp2 } define %struct.__neon_int8x8x4_t @ld4_8b(ptr %A) nounwind { -; CHECK-LABEL: ld4_8b ; Make sure we are using the operands defined by the ABI -; CHECK: ld4.8b { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4.v8i8.p0(ptr %A) ret %struct.__neon_int8x8x4_t %tmp2 } @@ -41,28 +33,19 @@ %struct.__neon_int8x16x4_t = type { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } define %struct.__neon_int8x16x2_t @ld2_16b(ptr %A) nounwind { -; CHECK-LABEL: ld2_16b ; Make sure we are using the operands defined by the ABI -; CHECK: ld2.16b { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2.v16i8.p0(ptr %A) ret %struct.__neon_int8x16x2_t %tmp2 } define %struct.__neon_int8x16x3_t @ld3_16b(ptr %A) nounwind { -; CHECK-LABEL: ld3_16b ; Make sure we are using the operands defined by the ABI -; CHECK: ld3.16b { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3.v16i8.p0(ptr %A) ret %struct.__neon_int8x16x3_t %tmp2 } define %struct.__neon_int8x16x4_t @ld4_16b(ptr %A) nounwind { -; CHECK-LABEL: ld4_16b ; Make sure we are using the operands defined by the ABI -; CHECK: ld4.16b { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4.v16i8.p0(ptr %A) ret %struct.__neon_int8x16x4_t %tmp2 } @@ -76,28 +59,19 @@ %struct.__neon_int16x4x4_t = type { <4 x i16>, <4 x i16>, <4 x i16>, <4 x i16> } define %struct.__neon_int16x4x2_t @ld2_4h(ptr %A) nounwind { -; CHECK-LABEL: ld2_4h ; Make sure we are using the operands defined by the ABI -; CHECK: ld2.4h { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2.v4i16.p0(ptr %A) ret %struct.__neon_int16x4x2_t %tmp2 } define %struct.__neon_int16x4x3_t @ld3_4h(ptr %A) nounwind { -; CHECK-LABEL: ld3_4h ; Make sure we are using the operands defined by the ABI -; CHECK: ld3.4h { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3.v4i16.p0(ptr %A) ret %struct.__neon_int16x4x3_t %tmp2 } define %struct.__neon_int16x4x4_t @ld4_4h(ptr %A) nounwind { -; CHECK-LABEL: ld4_4h ; Make sure we are using the operands defined by the ABI -; CHECK: ld4.4h { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4.v4i16.p0(ptr %A) ret %struct.__neon_int16x4x4_t %tmp2 } @@ -111,28 +85,19 @@ %struct.__neon_int16x8x4_t = type { <8 x i16>, <8 x i16>, <8 x i16>, <8 x i16> } define %struct.__neon_int16x8x2_t @ld2_8h(ptr %A) nounwind { -; CHECK-LABEL: ld2_8h ; Make sure we are using the operands defined by the ABI -; CHECK: ld2.8h { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2.v8i16.p0(ptr %A) ret %struct.__neon_int16x8x2_t %tmp2 } define %struct.__neon_int16x8x3_t @ld3_8h(ptr %A) nounwind { -; CHECK-LABEL: ld3_8h ; Make sure we are using the operands defined by the ABI -; CHECK: ld3.8h { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3.v8i16.p0(ptr %A) ret %struct.__neon_int16x8x3_t %tmp2 } define %struct.__neon_int16x8x4_t @ld4_8h(ptr %A) nounwind { -; CHECK-LABEL: ld4_8h ; Make sure we are using the operands defined by the ABI -; CHECK: ld4.8h { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4.v8i16.p0(ptr %A) ret %struct.__neon_int16x8x4_t %tmp2 } @@ -146,28 +111,19 @@ %struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> } define %struct.__neon_int32x2x2_t @ld2_2s(ptr %A) nounwind { -; CHECK-LABEL: ld2_2s ; Make sure we are using the operands defined by the ABI -; CHECK: ld2.2s { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2.v2i32.p0(ptr %A) ret %struct.__neon_int32x2x2_t %tmp2 } define %struct.__neon_int32x2x3_t @ld3_2s(ptr %A) nounwind { -; CHECK-LABEL: ld3_2s ; Make sure we are using the operands defined by the ABI -; CHECK: ld3.2s { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3.v2i32.p0(ptr %A) ret %struct.__neon_int32x2x3_t %tmp2 } define %struct.__neon_int32x2x4_t @ld4_2s(ptr %A) nounwind { -; CHECK-LABEL: ld4_2s ; Make sure we are using the operands defined by the ABI -; CHECK: ld4.2s { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4.v2i32.p0(ptr %A) ret %struct.__neon_int32x2x4_t %tmp2 } @@ -181,28 +137,19 @@ %struct.__neon_int32x4x4_t = type { <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32> } define %struct.__neon_int32x4x2_t @ld2_4s(ptr %A) nounwind { -; CHECK-LABEL: ld2_4s ; Make sure we are using the operands defined by the ABI -; CHECK: ld2.4s { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2.v4i32.p0(ptr %A) ret %struct.__neon_int32x4x2_t %tmp2 } define %struct.__neon_int32x4x3_t @ld3_4s(ptr %A) nounwind { -; CHECK-LABEL: ld3_4s ; Make sure we are using the operands defined by the ABI -; CHECK: ld3.4s { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3.v4i32.p0(ptr %A) ret %struct.__neon_int32x4x3_t %tmp2 } define %struct.__neon_int32x4x4_t @ld4_4s(ptr %A) nounwind { -; CHECK-LABEL: ld4_4s ; Make sure we are using the operands defined by the ABI -; CHECK: ld4.4s { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4.v4i32.p0(ptr %A) ret %struct.__neon_int32x4x4_t %tmp2 } @@ -216,28 +163,19 @@ %struct.__neon_int64x2x4_t = type { <2 x i64>, <2 x i64>, <2 x i64>, <2 x i64> } define %struct.__neon_int64x2x2_t @ld2_2d(ptr %A) nounwind { -; CHECK-LABEL: ld2_2d ; Make sure we are using the operands defined by the ABI -; CHECK: ld2.2d { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2.v2i64.p0(ptr %A) ret %struct.__neon_int64x2x2_t %tmp2 } define %struct.__neon_int64x2x3_t @ld3_2d(ptr %A) nounwind { -; CHECK-LABEL: ld3_2d ; Make sure we are using the operands defined by the ABI -; CHECK: ld3.2d { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3.v2i64.p0(ptr %A) ret %struct.__neon_int64x2x3_t %tmp2 } define %struct.__neon_int64x2x4_t @ld4_2d(ptr %A) nounwind { -; CHECK-LABEL: ld4_2d ; Make sure we are using the operands defined by the ABI -; CHECK: ld4.2d { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4.v2i64.p0(ptr %A) ret %struct.__neon_int64x2x4_t %tmp2 } @@ -252,28 +190,19 @@ define %struct.__neon_int64x1x2_t @ld2_1di64(ptr %A) nounwind { -; CHECK-LABEL: ld2_1di64 ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.1d { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2.v1i64.p0(ptr %A) ret %struct.__neon_int64x1x2_t %tmp2 } define %struct.__neon_int64x1x3_t @ld3_1di64(ptr %A) nounwind { -; CHECK-LABEL: ld3_1di64 ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.1d { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3.v1i64.p0(ptr %A) ret %struct.__neon_int64x1x3_t %tmp2 } define %struct.__neon_int64x1x4_t @ld4_1di64(ptr %A) nounwind { -; CHECK-LABEL: ld4_1di64 ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4.v1i64.p0(ptr %A) ret %struct.__neon_int64x1x4_t %tmp2 } @@ -289,28 +218,19 @@ define %struct.__neon_float64x1x2_t @ld2_1df64(ptr %A) nounwind { -; CHECK-LABEL: ld2_1df64 ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.1d { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld2.v1f64.p0(ptr %A) ret %struct.__neon_float64x1x2_t %tmp2 } define %struct.__neon_float64x1x3_t @ld3_1df64(ptr %A) nounwind { -; CHECK-LABEL: ld3_1df64 ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.1d { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld3.v1f64.p0(ptr %A) ret %struct.__neon_float64x1x3_t %tmp2 } define %struct.__neon_float64x1x4_t @ld4_1df64(ptr %A) nounwind { -; CHECK-LABEL: ld4_1df64 ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.1d { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld4.v1f64.p0(ptr %A) ret %struct.__neon_float64x1x4_t %tmp2 } @@ -322,27 +242,18 @@ define %struct.__neon_int8x16x2_t @ld2lane_16b(<16 x i8> %L1, <16 x i8> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK: ld2lane_16b -; CHECK: ld2.b { v0, v1 }[1], [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, i64 1, ptr %A) ret %struct.__neon_int8x16x2_t %tmp2 } define %struct.__neon_int8x16x3_t @ld3lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK: ld3lane_16b -; CHECK: ld3.b { v0, v1, v2 }[1], [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, i64 1, ptr %A) ret %struct.__neon_int8x16x3_t %tmp2 } define %struct.__neon_int8x16x4_t @ld4lane_16b(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK: ld4lane_16b -; CHECK: ld4.b { v0, v1, v2, v3 }[1], [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4lane.v16i8.p0(<16 x i8> %L1, <16 x i8> %L2, <16 x i8> %L3, <16 x i8> %L4, i64 1, ptr %A) ret %struct.__neon_int8x16x4_t %tmp2 } @@ -353,27 +264,18 @@ define %struct.__neon_int16x8x2_t @ld2lane_8h(<8 x i16> %L1, <8 x i16> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK: ld2lane_8h -; CHECK: ld2.h { v0, v1 }[1], [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, i64 1, ptr %A) ret %struct.__neon_int16x8x2_t %tmp2 } define %struct.__neon_int16x8x3_t @ld3lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK: ld3lane_8h -; CHECK: ld3.h { v0, v1, v2 }[1], [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, i64 1, ptr %A) ret %struct.__neon_int16x8x3_t %tmp2 } define %struct.__neon_int16x8x4_t @ld4lane_8h(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK: ld4lane_8h -; CHECK: ld4.h { v0, v1, v2, v3 }[1], [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4lane.v8i16.p0(<8 x i16> %L1, <8 x i16> %L2, <8 x i16> %L3, <8 x i16> %L4, i64 1, ptr %A) ret %struct.__neon_int16x8x4_t %tmp2 } @@ -384,27 +286,18 @@ define %struct.__neon_int32x4x2_t @ld2lane_4s(<4 x i32> %L1, <4 x i32> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK: ld2lane_4s -; CHECK: ld2.s { v0, v1 }[1], [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, i64 1, ptr %A) ret %struct.__neon_int32x4x2_t %tmp2 } define %struct.__neon_int32x4x3_t @ld3lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK: ld3lane_4s -; CHECK: ld3.s { v0, v1, v2 }[1], [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, i64 1, ptr %A) ret %struct.__neon_int32x4x3_t %tmp2 } define %struct.__neon_int32x4x4_t @ld4lane_4s(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK: ld4lane_4s -; CHECK: ld4.s { v0, v1, v2, v3 }[1], [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4lane.v4i32.p0(<4 x i32> %L1, <4 x i32> %L2, <4 x i32> %L3, <4 x i32> %L4, i64 1, ptr %A) ret %struct.__neon_int32x4x4_t %tmp2 } @@ -415,27 +308,18 @@ define %struct.__neon_int64x2x2_t @ld2lane_2d(<2 x i64> %L1, <2 x i64> %L2, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK: ld2lane_2d -; CHECK: ld2.d { v0, v1 }[1], [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, i64 1, ptr %A) ret %struct.__neon_int64x2x2_t %tmp2 } define %struct.__neon_int64x2x3_t @ld3lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK: ld3lane_2d -; CHECK: ld3.d { v0, v1, v2 }[1], [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, i64 1, ptr %A) ret %struct.__neon_int64x2x3_t %tmp2 } define %struct.__neon_int64x2x4_t @ld4lane_2d(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, ptr %A) nounwind { ; Make sure we are using the operands defined by the ABI -; CHECK: ld4lane_2d -; CHECK: ld4.d { v0, v1, v2, v3 }[1], [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64> %L1, <2 x i64> %L2, <2 x i64> %L3, <2 x i64> %L4, i64 1, ptr %A) ret %struct.__neon_int64x2x4_t %tmp2 } @@ -445,10 +329,7 @@ declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4lane.v2i64.p0(<2 x i64>, <2 x i64>, <2 x i64>, <2 x i64>, i64, ptr) nounwind readonly define <8 x i8> @ld1r_8b(ptr %bar) { -; CHECK: ld1r_8b ; Make sure we are using the operands defined by the ABI -; CHECK: ld1r.8b { v0 }, [x0] -; CHECK-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <8 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <8 x i8> %tmp2, i8 %tmp1, i32 1 @@ -462,10 +343,7 @@ } define <16 x i8> @ld1r_16b(ptr %bar) { -; CHECK: ld1r_16b ; Make sure we are using the operands defined by the ABI -; CHECK: ld1r.16b { v0 }, [x0] -; CHECK-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> , i8 %tmp1, i32 0 %tmp3 = insertelement <16 x i8> %tmp2, i8 %tmp1, i32 1 @@ -487,10 +365,7 @@ } define <4 x i16> @ld1r_4h(ptr %bar) { -; CHECK: ld1r_4h ; Make sure we are using the operands defined by the ABI -; CHECK: ld1r.4h { v0 }, [x0] -; CHECK-NEXT: ret %tmp1 = load i16, ptr %bar %tmp2 = insertelement <4 x i16> , i16 %tmp1, i32 0 %tmp3 = insertelement <4 x i16> %tmp2, i16 %tmp1, i32 1 @@ -500,10 +375,7 @@ } define <8 x i16> @ld1r_8h(ptr %bar) { -; CHECK: ld1r_8h ; Make sure we are using the operands defined by the ABI -; CHECK: ld1r.8h { v0 }, [x0] -; CHECK-NEXT: ret %tmp1 = load i16, ptr %bar %tmp2 = insertelement <8 x i16> , i16 %tmp1, i32 0 %tmp3 = insertelement <8 x i16> %tmp2, i16 %tmp1, i32 1 @@ -517,10 +389,7 @@ } define <2 x i32> @ld1r_2s(ptr %bar) { -; CHECK: ld1r_2s ; Make sure we are using the operands defined by the ABI -; CHECK: ld1r.2s { v0 }, [x0] -; CHECK-NEXT: ret %tmp1 = load i32, ptr %bar %tmp2 = insertelement <2 x i32> , i32 %tmp1, i32 0 %tmp3 = insertelement <2 x i32> %tmp2, i32 %tmp1, i32 1 @@ -528,10 +397,7 @@ } define <4 x i32> @ld1r_4s(ptr %bar) { -; CHECK: ld1r_4s ; Make sure we are using the operands defined by the ABI -; CHECK: ld1r.4s { v0 }, [x0] -; CHECK-NEXT: ret %tmp1 = load i32, ptr %bar %tmp2 = insertelement <4 x i32> , i32 %tmp1, i32 0 %tmp3 = insertelement <4 x i32> %tmp2, i32 %tmp1, i32 1 @@ -541,10 +407,7 @@ } define <2 x i64> @ld1r_2d(ptr %bar) { -; CHECK: ld1r_2d ; Make sure we are using the operands defined by the ABI -; CHECK: ld1r.2d { v0 }, [x0] -; CHECK-NEXT: ret %tmp1 = load i64, ptr %bar %tmp2 = insertelement <2 x i64> , i64 %tmp1, i32 0 %tmp3 = insertelement <2 x i64> %tmp2, i64 %tmp1, i32 1 @@ -552,28 +415,19 @@ } define %struct.__neon_int8x8x2_t @ld2r_8b(ptr %A) nounwind { -; CHECK: ld2r_8b ; Make sure we are using the operands defined by the ABI -; CHECK: ld2r.8b { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld2r.v8i8.p0(ptr %A) ret %struct.__neon_int8x8x2_t %tmp2 } define %struct.__neon_int8x8x3_t @ld3r_8b(ptr %A) nounwind { -; CHECK: ld3r_8b ; Make sure we are using the operands defined by the ABI -; CHECK: ld3r.8b { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld3r.v8i8.p0(ptr %A) ret %struct.__neon_int8x8x3_t %tmp2 } define %struct.__neon_int8x8x4_t @ld4r_8b(ptr %A) nounwind { -; CHECK: ld4r_8b ; Make sure we are using the operands defined by the ABI -; CHECK: ld4r.8b { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0(ptr %A) ret %struct.__neon_int8x8x4_t %tmp2 } @@ -583,28 +437,19 @@ declare %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld4r.v8i8.p0(ptr) nounwind readonly define %struct.__neon_int8x16x2_t @ld2r_16b(ptr %A) nounwind { -; CHECK: ld2r_16b ; Make sure we are using the operands defined by the ABI -; CHECK: ld2r.16b { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld2r.v16i8.p0(ptr %A) ret %struct.__neon_int8x16x2_t %tmp2 } define %struct.__neon_int8x16x3_t @ld3r_16b(ptr %A) nounwind { -; CHECK: ld3r_16b ; Make sure we are using the operands defined by the ABI -; CHECK: ld3r.16b { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld3r.v16i8.p0(ptr %A) ret %struct.__neon_int8x16x3_t %tmp2 } define %struct.__neon_int8x16x4_t @ld4r_16b(ptr %A) nounwind { -; CHECK: ld4r_16b ; Make sure we are using the operands defined by the ABI -; CHECK: ld4r.16b { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0(ptr %A) ret %struct.__neon_int8x16x4_t %tmp2 } @@ -614,28 +459,19 @@ declare %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld4r.v16i8.p0(ptr) nounwind readonly define %struct.__neon_int16x4x2_t @ld2r_4h(ptr %A) nounwind { -; CHECK: ld2r_4h ; Make sure we are using the operands defined by the ABI -; CHECK: ld2r.4h { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld2r.v4i16.p0(ptr %A) ret %struct.__neon_int16x4x2_t %tmp2 } define %struct.__neon_int16x4x3_t @ld3r_4h(ptr %A) nounwind { -; CHECK: ld3r_4h ; Make sure we are using the operands defined by the ABI -; CHECK: ld3r.4h { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld3r.v4i16.p0(ptr %A) ret %struct.__neon_int16x4x3_t %tmp2 } define %struct.__neon_int16x4x4_t @ld4r_4h(ptr %A) nounwind { -; CHECK: ld4r_4h ; Make sure we are using the operands defined by the ABI -; CHECK: ld4r.4h { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0(ptr %A) ret %struct.__neon_int16x4x4_t %tmp2 } @@ -645,28 +481,19 @@ declare %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld4r.v4i16.p0(ptr) nounwind readonly define %struct.__neon_int16x8x2_t @ld2r_8h(ptr %A) nounwind { -; CHECK: ld2r_8h ; Make sure we are using the operands defined by the ABI -; CHECK: ld2r.8h { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld2r.v8i16.p0(ptr %A) ret %struct.__neon_int16x8x2_t %tmp2 } define %struct.__neon_int16x8x3_t @ld3r_8h(ptr %A) nounwind { -; CHECK: ld3r_8h ; Make sure we are using the operands defined by the ABI -; CHECK: ld3r.8h { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld3r.v8i16.p0(ptr %A) ret %struct.__neon_int16x8x3_t %tmp2 } define %struct.__neon_int16x8x4_t @ld4r_8h(ptr %A) nounwind { -; CHECK: ld4r_8h ; Make sure we are using the operands defined by the ABI -; CHECK: ld4r.8h { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0(ptr %A) ret %struct.__neon_int16x8x4_t %tmp2 } @@ -676,28 +503,19 @@ declare %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld4r.v8i16.p0(ptr) nounwind readonly define %struct.__neon_int32x2x2_t @ld2r_2s(ptr %A) nounwind { -; CHECK: ld2r_2s ; Make sure we are using the operands defined by the ABI -; CHECK: ld2r.2s { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld2r.v2i32.p0(ptr %A) ret %struct.__neon_int32x2x2_t %tmp2 } define %struct.__neon_int32x2x3_t @ld3r_2s(ptr %A) nounwind { -; CHECK: ld3r_2s ; Make sure we are using the operands defined by the ABI -; CHECK: ld3r.2s { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld3r.v2i32.p0(ptr %A) ret %struct.__neon_int32x2x3_t %tmp2 } define %struct.__neon_int32x2x4_t @ld4r_2s(ptr %A) nounwind { -; CHECK: ld4r_2s ; Make sure we are using the operands defined by the ABI -; CHECK: ld4r.2s { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0(ptr %A) ret %struct.__neon_int32x2x4_t %tmp2 } @@ -707,28 +525,19 @@ declare %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld4r.v2i32.p0(ptr) nounwind readonly define %struct.__neon_int32x4x2_t @ld2r_4s(ptr %A) nounwind { -; CHECK: ld2r_4s ; Make sure we are using the operands defined by the ABI -; CHECK: ld2r.4s { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld2r.v4i32.p0(ptr %A) ret %struct.__neon_int32x4x2_t %tmp2 } define %struct.__neon_int32x4x3_t @ld3r_4s(ptr %A) nounwind { -; CHECK: ld3r_4s ; Make sure we are using the operands defined by the ABI -; CHECK: ld3r.4s { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld3r.v4i32.p0(ptr %A) ret %struct.__neon_int32x4x3_t %tmp2 } define %struct.__neon_int32x4x4_t @ld4r_4s(ptr %A) nounwind { -; CHECK: ld4r_4s ; Make sure we are using the operands defined by the ABI -; CHECK: ld4r.4s { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0(ptr %A) ret %struct.__neon_int32x4x4_t %tmp2 } @@ -738,28 +547,19 @@ declare %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld4r.v4i32.p0(ptr) nounwind readonly define %struct.__neon_int64x1x2_t @ld2r_1d(ptr %A) nounwind { -; CHECK: ld2r_1d ; Make sure we are using the operands defined by the ABI -; CHECK: ld2r.1d { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld2r.v1i64.p0(ptr %A) ret %struct.__neon_int64x1x2_t %tmp2 } define %struct.__neon_int64x1x3_t @ld3r_1d(ptr %A) nounwind { -; CHECK: ld3r_1d ; Make sure we are using the operands defined by the ABI -; CHECK: ld3r.1d { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld3r.v1i64.p0(ptr %A) ret %struct.__neon_int64x1x3_t %tmp2 } define %struct.__neon_int64x1x4_t @ld4r_1d(ptr %A) nounwind { -; CHECK: ld4r_1d ; Make sure we are using the operands defined by the ABI -; CHECK: ld4r.1d { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0(ptr %A) ret %struct.__neon_int64x1x4_t %tmp2 } @@ -769,28 +569,19 @@ declare %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld4r.v1i64.p0(ptr) nounwind readonly define %struct.__neon_int64x2x2_t @ld2r_2d(ptr %A) nounwind { -; CHECK: ld2r_2d ; Make sure we are using the operands defined by the ABI -; CHECK: ld2r.2d { v0, v1 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld2r.v2i64.p0(ptr %A) ret %struct.__neon_int64x2x2_t %tmp2 } define %struct.__neon_int64x2x3_t @ld3r_2d(ptr %A) nounwind { -; CHECK: ld3r_2d ; Make sure we are using the operands defined by the ABI -; CHECK: ld3r.2d { v0, v1, v2 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld3r.v2i64.p0(ptr %A) ret %struct.__neon_int64x2x3_t %tmp2 } define %struct.__neon_int64x2x4_t @ld4r_2d(ptr %A) nounwind { -; CHECK: ld4r_2d ; Make sure we are using the operands defined by the ABI -; CHECK: ld4r.2d { v0, v1, v2, v3 }, [x0] -; CHECK-NEXT: ret %tmp2 = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr %A) ret %struct.__neon_int64x2x4_t %tmp2 } @@ -800,109 +591,76 @@ declare %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld4r.v2i64.p0(ptr) nounwind readonly define <16 x i8> @ld1_16b(<16 x i8> %V, ptr %bar) { -; CHECK-LABEL: ld1_16b ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.b { v0 }[0], [x0] -; CHECK-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <16 x i8> %V, i8 %tmp1, i32 0 ret <16 x i8> %tmp2 } define <8 x i16> @ld1_8h(<8 x i16> %V, ptr %bar) { -; CHECK-LABEL: ld1_8h ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.h { v0 }[0], [x0] -; CHECK-NEXT: ret %tmp1 = load i16, ptr %bar %tmp2 = insertelement <8 x i16> %V, i16 %tmp1, i32 0 ret <8 x i16> %tmp2 } define <4 x i32> @ld1_4s(<4 x i32> %V, ptr %bar) { -; CHECK-LABEL: ld1_4s ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.s { v0 }[0], [x0] -; CHECK-NEXT: ret %tmp1 = load i32, ptr %bar %tmp2 = insertelement <4 x i32> %V, i32 %tmp1, i32 0 ret <4 x i32> %tmp2 } define <4 x float> @ld1_4s_float(<4 x float> %V, ptr %bar) { -; CHECK-LABEL: ld1_4s_float: ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.s { v0 }[0], [x0] -; CHECK-NEXT: ret %tmp1 = load float, ptr %bar %tmp2 = insertelement <4 x float> %V, float %tmp1, i32 0 ret <4 x float> %tmp2 } define <2 x i64> @ld1_2d(<2 x i64> %V, ptr %bar) { -; CHECK-LABEL: ld1_2d ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.d { v0 }[0], [x0] -; CHECK-NEXT: ret %tmp1 = load i64, ptr %bar %tmp2 = insertelement <2 x i64> %V, i64 %tmp1, i32 0 ret <2 x i64> %tmp2 } define <2 x double> @ld1_2d_double(<2 x double> %V, ptr %bar) { -; CHECK-LABEL: ld1_2d_double: ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.d { v0 }[0], [x0] -; CHECK-NEXT: ret %tmp1 = load double, ptr %bar %tmp2 = insertelement <2 x double> %V, double %tmp1, i32 0 ret <2 x double> %tmp2 } define <1 x i64> @ld1_1d(ptr %p) { -; CHECK-LABEL: ld1_1d ; Make sure we are using the operands defined by the ABI -; CHECK: ldr [[REG:d[0-9]+]], [x0] -; CHECK-NEXT: ret %tmp = load <1 x i64>, ptr %p, align 8 ret <1 x i64> %tmp } define <8 x i8> @ld1_8b(<8 x i8> %V, ptr %bar) { -; CHECK-LABEL: ld1_8b ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.b { v0 }[0], [x0] -; CHECK-NEXT: ret %tmp1 = load i8, ptr %bar %tmp2 = insertelement <8 x i8> %V, i8 %tmp1, i32 0 ret <8 x i8> %tmp2 } define <4 x i16> @ld1_4h(<4 x i16> %V, ptr %bar) { -; CHECK-LABEL: ld1_4h ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.h { v0 }[0], [x0] -; CHECK-NEXT: ret %tmp1 = load i16, ptr %bar %tmp2 = insertelement <4 x i16> %V, i16 %tmp1, i32 0 ret <4 x i16> %tmp2 } define <2 x i32> @ld1_2s(<2 x i32> %V, ptr %bar) { -; CHECK-LABEL: ld1_2s: ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.s { v0 }[0], [x0] -; CHECK-NEXT: ret %tmp1 = load i32, ptr %bar %tmp2 = insertelement <2 x i32> %V, i32 %tmp1, i32 0 ret <2 x i32> %tmp2 } define <2 x float> @ld1_2s_float(<2 x float> %V, ptr %bar) { -; CHECK-LABEL: ld1_2s_float: ; Make sure we are using the operands defined by the ABI -; CHECK: ld1.s { v0 }[0], [x0] -; CHECK-NEXT: ret %tmp1 = load float, ptr %bar %tmp2 = insertelement <2 x float> %V, float %tmp1, i32 0 ret <2 x float> %tmp2 @@ -912,12 +670,6 @@ ; Add rdar://13098923 test case: vld1_dup_u32 doesn't generate ld1r.2s define void @ld1r_2s_from_dup(ptr nocapture %a, ptr nocapture %b, ptr nocapture %diff) nounwind ssp { entry: -; CHECK: ld1r_2s_from_dup -; CHECK: ld1r.2s { [[ARG2:v[0-9]+]] }, [x1] -; CHECK-NEXT: ld1r.2s { [[ARG1:v[0-9]+]] }, [x0] -; CHECK-NEXT: usubl.8h v[[RESREGNUM:[0-9]+]], [[ARG1]], [[ARG2]] -; CHECK-NEXT: str d[[RESREGNUM]], [x2] -; CHECK-NEXT: ret %tmp1 = load i32, ptr %a, align 4 %tmp2 = insertelement <2 x i32> undef, i32 %tmp1, i32 0 %lane = shufflevector <2 x i32> %tmp2, <2 x i32> undef, <2 x i32> zeroinitializer @@ -939,10 +691,7 @@ ; Tests for rdar://11947069: vld1_dup_* and vld1q_dup_* code gen is suboptimal define <4 x float> @ld1r_4s_float(ptr nocapture %x) { entry: -; CHECK-LABEL: ld1r_4s_float ; Make sure we are using the operands defined by the ABI -; CHECK: ld1r.4s { v0 }, [x0] -; CHECK-NEXT: ret %tmp = load float, ptr %x, align 4 %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0 %tmp2 = insertelement <4 x float> %tmp1, float %tmp, i32 1 @@ -953,10 +702,7 @@ define <2 x float> @ld1r_2s_float(ptr nocapture %x) { entry: -; CHECK-LABEL: ld1r_2s_float ; Make sure we are using the operands defined by the ABI -; CHECK: ld1r.2s { v0 }, [x0] -; CHECK-NEXT: ret %tmp = load float, ptr %x, align 4 %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0 %tmp2 = insertelement <2 x float> %tmp1, float %tmp, i32 1 @@ -965,10 +711,7 @@ define <2 x double> @ld1r_2d_double(ptr nocapture %x) { entry: -; CHECK-LABEL: ld1r_2d_double ; Make sure we are using the operands defined by the ABI -; CHECK: ld1r.2d { v0 }, [x0] -; CHECK-NEXT: ret %tmp = load double, ptr %x, align 4 %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0 %tmp2 = insertelement <2 x double> %tmp1, double %tmp, i32 1 @@ -977,10 +720,7 @@ define <1 x double> @ld1r_1d_double(ptr nocapture %x) { entry: -; CHECK-LABEL: ld1r_1d_double ; Make sure we are using the operands defined by the ABI -; CHECK: ldr d0, [x0] -; CHECK-NEXT: ret %tmp = load double, ptr %x, align 4 %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0 ret <1 x double> %tmp1 @@ -988,10 +728,7 @@ define <4 x float> @ld1r_4s_float_shuff(ptr nocapture %x) { entry: -; CHECK-LABEL: ld1r_4s_float_shuff ; Make sure we are using the operands defined by the ABI -; CHECK: ld1r.4s { v0 }, [x0] -; CHECK-NEXT: ret %tmp = load float, ptr %x, align 4 %tmp1 = insertelement <4 x float> undef, float %tmp, i32 0 %lane = shufflevector <4 x float> %tmp1, <4 x float> undef, <4 x i32> zeroinitializer @@ -1000,10 +737,7 @@ define <2 x float> @ld1r_2s_float_shuff(ptr nocapture %x) { entry: -; CHECK-LABEL: ld1r_2s_float_shuff ; Make sure we are using the operands defined by the ABI -; CHECK: ld1r.2s { v0 }, [x0] -; CHECK-NEXT: ret %tmp = load float, ptr %x, align 4 %tmp1 = insertelement <2 x float> undef, float %tmp, i32 0 %lane = shufflevector <2 x float> %tmp1, <2 x float> undef, <2 x i32> zeroinitializer @@ -1012,10 +746,7 @@ define <2 x double> @ld1r_2d_double_shuff(ptr nocapture %x) { entry: -; CHECK-LABEL: ld1r_2d_double_shuff ; Make sure we are using the operands defined by the ABI -; CHECK: ld1r.2d { v0 }, [x0] -; CHECK-NEXT: ret %tmp = load double, ptr %x, align 4 %tmp1 = insertelement <2 x double> undef, double %tmp, i32 0 %lane = shufflevector <2 x double> %tmp1, <2 x double> undef, <2 x i32> zeroinitializer @@ -1024,10 +755,7 @@ define <1 x double> @ld1r_1d_double_shuff(ptr nocapture %x) { entry: -; CHECK-LABEL: ld1r_1d_double_shuff ; Make sure we are using the operands defined by the ABI -; CHECK: ldr d0, [x0] -; CHECK-NEXT: ret %tmp = load double, ptr %x, align 4 %tmp1 = insertelement <1 x double> undef, double %tmp, i32 0 %lane = shufflevector <1 x double> %tmp1, <1 x double> undef, <1 x i32> zeroinitializer @@ -1046,43 +774,31 @@ declare %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr) nounwind readonly define %struct.__neon_int8x8x2_t @ld1_x2_v8i8(ptr %addr) { -; CHECK-LABEL: ld1_x2_v8i8: -; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int8x8x2_t @llvm.aarch64.neon.ld1x2.v8i8.p0(ptr %addr) ret %struct.__neon_int8x8x2_t %val } define %struct.__neon_int16x4x2_t @ld1_x2_v4i16(ptr %addr) { -; CHECK-LABEL: ld1_x2_v4i16: -; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int16x4x2_t @llvm.aarch64.neon.ld1x2.v4i16.p0(ptr %addr) ret %struct.__neon_int16x4x2_t %val } define %struct.__neon_int32x2x2_t @ld1_x2_v2i32(ptr %addr) { -; CHECK-LABEL: ld1_x2_v2i32: -; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int32x2x2_t @llvm.aarch64.neon.ld1x2.v2i32.p0(ptr %addr) ret %struct.__neon_int32x2x2_t %val } define %struct.__neon_float32x2x2_t @ld1_x2_v2f32(ptr %addr) { -; CHECK-LABEL: ld1_x2_v2f32: -; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_float32x2x2_t @llvm.aarch64.neon.ld1x2.v2f32.p0(ptr %addr) ret %struct.__neon_float32x2x2_t %val } define %struct.__neon_int64x1x2_t @ld1_x2_v1i64(ptr %addr) { -; CHECK-LABEL: ld1_x2_v1i64: -; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int64x1x2_t @llvm.aarch64.neon.ld1x2.v1i64.p0(ptr %addr) ret %struct.__neon_int64x1x2_t %val } define %struct.__neon_float64x1x2_t @ld1_x2_v1f64(ptr %addr) { -; CHECK-LABEL: ld1_x2_v1f64: -; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_float64x1x2_t @llvm.aarch64.neon.ld1x2.v1f64.p0(ptr %addr) ret %struct.__neon_float64x1x2_t %val } @@ -1104,43 +820,31 @@ declare %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr) nounwind readonly define %struct.__neon_int8x16x2_t @ld1_x2_v16i8(ptr %addr) { -; CHECK-LABEL: ld1_x2_v16i8: -; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int8x16x2_t @llvm.aarch64.neon.ld1x2.v16i8.p0(ptr %addr) ret %struct.__neon_int8x16x2_t %val } define %struct.__neon_int16x8x2_t @ld1_x2_v8i16(ptr %addr) { -; CHECK-LABEL: ld1_x2_v8i16: -; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int16x8x2_t @llvm.aarch64.neon.ld1x2.v8i16.p0(ptr %addr) ret %struct.__neon_int16x8x2_t %val } define %struct.__neon_int32x4x2_t @ld1_x2_v4i32(ptr %addr) { -; CHECK-LABEL: ld1_x2_v4i32: -; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int32x4x2_t @llvm.aarch64.neon.ld1x2.v4i32.p0(ptr %addr) ret %struct.__neon_int32x4x2_t %val } define %struct.__neon_float32x4x2_t @ld1_x2_v4f32(ptr %addr) { -; CHECK-LABEL: ld1_x2_v4f32: -; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_float32x4x2_t @llvm.aarch64.neon.ld1x2.v4f32.p0(ptr %addr) ret %struct.__neon_float32x4x2_t %val } define %struct.__neon_int64x2x2_t @ld1_x2_v2i64(ptr %addr) { -; CHECK-LABEL: ld1_x2_v2i64: -; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int64x2x2_t @llvm.aarch64.neon.ld1x2.v2i64.p0(ptr %addr) ret %struct.__neon_int64x2x2_t %val } define %struct.__neon_float64x2x2_t @ld1_x2_v2f64(ptr %addr) { -; CHECK-LABEL: ld1_x2_v2f64: -; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_float64x2x2_t @llvm.aarch64.neon.ld1x2.v2f64.p0(ptr %addr) ret %struct.__neon_float64x2x2_t %val } @@ -1153,43 +857,31 @@ declare %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr) nounwind readonly define %struct.__neon_int8x8x3_t @ld1_x3_v8i8(ptr %addr) { -; CHECK-LABEL: ld1_x3_v8i8: -; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int8x8x3_t @llvm.aarch64.neon.ld1x3.v8i8.p0(ptr %addr) ret %struct.__neon_int8x8x3_t %val } define %struct.__neon_int16x4x3_t @ld1_x3_v4i16(ptr %addr) { -; CHECK-LABEL: ld1_x3_v4i16: -; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int16x4x3_t @llvm.aarch64.neon.ld1x3.v4i16.p0(ptr %addr) ret %struct.__neon_int16x4x3_t %val } define %struct.__neon_int32x2x3_t @ld1_x3_v2i32(ptr %addr) { -; CHECK-LABEL: ld1_x3_v2i32: -; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int32x2x3_t @llvm.aarch64.neon.ld1x3.v2i32.p0(ptr %addr) ret %struct.__neon_int32x2x3_t %val } define %struct.__neon_float32x2x3_t @ld1_x3_v2f32(ptr %addr) { -; CHECK-LABEL: ld1_x3_v2f32: -; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_float32x2x3_t @llvm.aarch64.neon.ld1x3.v2f32.p0(ptr %addr) ret %struct.__neon_float32x2x3_t %val } define %struct.__neon_int64x1x3_t @ld1_x3_v1i64(ptr %addr) { -; CHECK-LABEL: ld1_x3_v1i64: -; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int64x1x3_t @llvm.aarch64.neon.ld1x3.v1i64.p0(ptr %addr) ret %struct.__neon_int64x1x3_t %val } define %struct.__neon_float64x1x3_t @ld1_x3_v1f64(ptr %addr) { -; CHECK-LABEL: ld1_x3_v1f64: -; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_float64x1x3_t @llvm.aarch64.neon.ld1x3.v1f64.p0(ptr %addr) ret %struct.__neon_float64x1x3_t %val } @@ -1202,43 +894,31 @@ declare %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr) nounwind readonly define %struct.__neon_int8x16x3_t @ld1_x3_v16i8(ptr %addr) { -; CHECK-LABEL: ld1_x3_v16i8: -; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int8x16x3_t @llvm.aarch64.neon.ld1x3.v16i8.p0(ptr %addr) ret %struct.__neon_int8x16x3_t %val } define %struct.__neon_int16x8x3_t @ld1_x3_v8i16(ptr %addr) { -; CHECK-LABEL: ld1_x3_v8i16: -; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int16x8x3_t @llvm.aarch64.neon.ld1x3.v8i16.p0(ptr %addr) ret %struct.__neon_int16x8x3_t %val } define %struct.__neon_int32x4x3_t @ld1_x3_v4i32(ptr %addr) { -; CHECK-LABEL: ld1_x3_v4i32: -; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int32x4x3_t @llvm.aarch64.neon.ld1x3.v4i32.p0(ptr %addr) ret %struct.__neon_int32x4x3_t %val } define %struct.__neon_float32x4x3_t @ld1_x3_v4f32(ptr %addr) { -; CHECK-LABEL: ld1_x3_v4f32: -; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_float32x4x3_t @llvm.aarch64.neon.ld1x3.v4f32.p0(ptr %addr) ret %struct.__neon_float32x4x3_t %val } define %struct.__neon_int64x2x3_t @ld1_x3_v2i64(ptr %addr) { -; CHECK-LABEL: ld1_x3_v2i64: -; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int64x2x3_t @llvm.aarch64.neon.ld1x3.v2i64.p0(ptr %addr) ret %struct.__neon_int64x2x3_t %val } define %struct.__neon_float64x2x3_t @ld1_x3_v2f64(ptr %addr) { -; CHECK-LABEL: ld1_x3_v2f64: -; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_float64x2x3_t @llvm.aarch64.neon.ld1x3.v2f64.p0(ptr %addr) ret %struct.__neon_float64x2x3_t %val } @@ -1251,43 +931,31 @@ declare %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr) nounwind readonly define %struct.__neon_int8x8x4_t @ld1_x4_v8i8(ptr %addr) { -; CHECK-LABEL: ld1_x4_v8i8: -; CHECK: ld1.8b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int8x8x4_t @llvm.aarch64.neon.ld1x4.v8i8.p0(ptr %addr) ret %struct.__neon_int8x8x4_t %val } define %struct.__neon_int16x4x4_t @ld1_x4_v4i16(ptr %addr) { -; CHECK-LABEL: ld1_x4_v4i16: -; CHECK: ld1.4h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int16x4x4_t @llvm.aarch64.neon.ld1x4.v4i16.p0(ptr %addr) ret %struct.__neon_int16x4x4_t %val } define %struct.__neon_int32x2x4_t @ld1_x4_v2i32(ptr %addr) { -; CHECK-LABEL: ld1_x4_v2i32: -; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int32x2x4_t @llvm.aarch64.neon.ld1x4.v2i32.p0(ptr %addr) ret %struct.__neon_int32x2x4_t %val } define %struct.__neon_float32x2x4_t @ld1_x4_v2f32(ptr %addr) { -; CHECK-LABEL: ld1_x4_v2f32: -; CHECK: ld1.2s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_float32x2x4_t @llvm.aarch64.neon.ld1x4.v2f32.p0(ptr %addr) ret %struct.__neon_float32x2x4_t %val } define %struct.__neon_int64x1x4_t @ld1_x4_v1i64(ptr %addr) { -; CHECK-LABEL: ld1_x4_v1i64: -; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int64x1x4_t @llvm.aarch64.neon.ld1x4.v1i64.p0(ptr %addr) ret %struct.__neon_int64x1x4_t %val } define %struct.__neon_float64x1x4_t @ld1_x4_v1f64(ptr %addr) { -; CHECK-LABEL: ld1_x4_v1f64: -; CHECK: ld1.1d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_float64x1x4_t @llvm.aarch64.neon.ld1x4.v1f64.p0(ptr %addr) ret %struct.__neon_float64x1x4_t %val } @@ -1300,43 +968,33 @@ declare %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr) nounwind readonly define %struct.__neon_int8x16x4_t @ld1_x4_v16i8(ptr %addr) { -; CHECK-LABEL: ld1_x4_v16i8: -; CHECK: ld1.16b { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int8x16x4_t @llvm.aarch64.neon.ld1x4.v16i8.p0(ptr %addr) ret %struct.__neon_int8x16x4_t %val } define %struct.__neon_int16x8x4_t @ld1_x4_v8i16(ptr %addr) { -; CHECK-LABEL: ld1_x4_v8i16: -; CHECK: ld1.8h { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int16x8x4_t @llvm.aarch64.neon.ld1x4.v8i16.p0(ptr %addr) ret %struct.__neon_int16x8x4_t %val } define %struct.__neon_int32x4x4_t @ld1_x4_v4i32(ptr %addr) { -; CHECK-LABEL: ld1_x4_v4i32: -; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int32x4x4_t @llvm.aarch64.neon.ld1x4.v4i32.p0(ptr %addr) ret %struct.__neon_int32x4x4_t %val } define %struct.__neon_float32x4x4_t @ld1_x4_v4f32(ptr %addr) { -; CHECK-LABEL: ld1_x4_v4f32: -; CHECK: ld1.4s { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_float32x4x4_t @llvm.aarch64.neon.ld1x4.v4f32.p0(ptr %addr) ret %struct.__neon_float32x4x4_t %val } define %struct.__neon_int64x2x4_t @ld1_x4_v2i64(ptr %addr) { -; CHECK-LABEL: ld1_x4_v2i64: -; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_int64x2x4_t @llvm.aarch64.neon.ld1x4.v2i64.p0(ptr %addr) ret %struct.__neon_int64x2x4_t %val } define %struct.__neon_float64x2x4_t @ld1_x4_v2f64(ptr %addr) { -; CHECK-LABEL: ld1_x4_v2f64: -; CHECK: ld1.2d { {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}}, {{v[0-9]+}} }, [x0] %val = call %struct.__neon_float64x2x4_t @llvm.aarch64.neon.ld1x4.v2f64.p0(ptr %addr) ret %struct.__neon_float64x2x4_t %val } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/arm64-ldp.ll b/llvm/test/CodeGen/AArch64/arm64-ldp.ll --- a/llvm/test/CodeGen/AArch64/arm64-ldp.ll +++ b/llvm/test/CodeGen/AArch64/arm64-ldp.ll @@ -422,10 +422,8 @@ ; CHECK-LABEL: ldp_sext_int_post: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill -; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: ldpsw x19, x20, [x0] -; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: ldpsw x19, x20, [x0], #8 ; CHECK-NEXT: bl "use-ptr" ; CHECK-NEXT: add x0, x20, x19 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll b/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll --- a/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-misaligned-memcpy-inline.ll @@ -10,7 +10,7 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov w2, #16 +; CHECK-NEXT: mov w2, #16 ; =0x10 ; CHECK-NEXT: bl _memcpy ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: ret @@ -39,12 +39,12 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: ldrb w8, [x1, #3] ; CHECK-NEXT: ldrb w9, [x1, #2] -; CHECK-NEXT: ldrb w10, [x1, #1] -; CHECK-NEXT: ldrb w11, [x1] +; CHECK-NEXT: ldrb w10, [x1] +; CHECK-NEXT: ldrb w11, [x1, #1] ; CHECK-NEXT: strb w8, [x0, #3] ; CHECK-NEXT: strb w9, [x0, #2] -; CHECK-NEXT: strb w10, [x0, #1] -; CHECK-NEXT: strb w11, [x0] +; CHECK-NEXT: strb w11, [x0, #1] +; CHECK-NEXT: strb w10, [x0] ; CHECK-NEXT: ret entry: call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false) diff --git a/llvm/test/CodeGen/AArch64/arm64-mul.ll b/llvm/test/CodeGen/AArch64/arm64-mul.ll --- a/llvm/test/CodeGen/AArch64/arm64-mul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-mul.ll @@ -111,7 +111,7 @@ define i64 @t9(i32 %a) nounwind { ; CHECK-LABEL: t9: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #8896 +; CHECK-NEXT: mov w8, #8896 // =0x22c0 ; CHECK-NEXT: movk w8, #2, lsl #16 ; CHECK-NEXT: umull x0, w0, w8 ; CHECK-NEXT: ret @@ -125,11 +125,11 @@ define i64 @t10(i32 %a) nounwind { ; CHECK-LABEL: t10: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #2 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: sxtw x9, w0 -; CHECK-NEXT: movk w8, #32768, lsl #16 -; CHECK-NEXT: mul x0, x9, x8 +; CHECK-NEXT: sxtw x8, w0 +; CHECK-NEXT: mov w9, #2 // =0x2 +; CHECK-NEXT: movk w9, #32768, lsl #16 +; CHECK-NEXT: mul x0, x8, x9 ; CHECK-NEXT: ret entry: %tmp1 = sext i32 %a to i64 @@ -141,7 +141,7 @@ define i64 @t11(i64 %a) nounwind { ; CHECK-LABEL: t11: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #29594 +; CHECK-NEXT: mov w8, #29594 // =0x739a ; CHECK-NEXT: movk w8, #65499, lsl #16 ; CHECK-NEXT: smnegl x0, w0, w8 ; CHECK-NEXT: ret @@ -156,7 +156,7 @@ define i64 @t12(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: t12: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #35118 +; CHECK-NEXT: mov w8, #35118 // =0x892e ; CHECK-NEXT: movk w8, #65008, lsl #16 ; CHECK-NEXT: smaddl x0, w0, w8, x1 ; CHECK-NEXT: ret @@ -171,7 +171,7 @@ define i64 @t13(i32 %a, i64 %b) nounwind { ; CHECK-LABEL: t13: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #24910 +; CHECK-NEXT: mov w8, #24910 // =0x614e ; CHECK-NEXT: movk w8, #188, lsl #16 ; CHECK-NEXT: umsubl x0, w0, w8, x1 ; CHECK-NEXT: ret @@ -185,7 +185,7 @@ define i64 @t14(i32 %a, i64 %b) nounwind { ; CHECK-LABEL: t14: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #40626 +; CHECK-NEXT: mov w8, #40626 // =0x9eb2 ; CHECK-NEXT: movk w8, #65347, lsl #16 ; CHECK-NEXT: smsubl x0, w0, w8, x1 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -2537,18 +2537,18 @@ define <8 x i16> @cmplx_mul_combined_re_im(<8 x i16> noundef %a, i64 %scale.coerce) { ; CHECK-LABEL: cmplx_mul_combined_re_im: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x8, x0, #16 -; CHECK-NEXT: adrp x9, .LCPI196_0 -; CHECK-NEXT: fmov d4, x0 -; CHECK-NEXT: rev32 v5.8h, v0.8h -; CHECK-NEXT: dup v1.8h, w8 -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI196_0] +; CHECK-NEXT: lsr x9, x0, #16 +; CHECK-NEXT: adrp x8, .LCPI196_0 +; CHECK-NEXT: fmov d5, x0 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI196_0] +; CHECK-NEXT: rev32 v4.8h, v0.8h +; CHECK-NEXT: dup v1.8h, w9 ; CHECK-NEXT: sqneg v2.8h, v1.8h ; CHECK-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v3.16b -; CHECK-NEXT: sqdmull v2.4s, v0.4h, v4.h[0] -; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v4.h[0] -; CHECK-NEXT: sqdmlal v2.4s, v5.4h, v1.4h -; CHECK-NEXT: sqdmlal2 v0.4s, v5.8h, v1.8h +; CHECK-NEXT: sqdmull v2.4s, v0.4h, v5.h[0] +; CHECK-NEXT: sqdmull2 v0.4s, v0.8h, v5.h[0] +; CHECK-NEXT: sqdmlal v2.4s, v4.4h, v1.4h +; CHECK-NEXT: sqdmlal2 v0.4s, v4.8h, v1.8h ; CHECK-NEXT: uzp2 v0.8h, v2.8h, v0.8h ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll @@ -73,13 +73,13 @@ define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) { ; CHECK-LABEL: mul2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: fmov x10, d1 +; CHECK-NEXT: fmov x11, d0 ; CHECK-NEXT: mov x8, v1.d[1] -; CHECK-NEXT: mov x11, v0.d[1] -; CHECK-NEXT: mul x9, x10, x9 -; CHECK-NEXT: mul x8, x11, x8 -; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: mov x9, v0.d[1] +; CHECK-NEXT: mul x10, x11, x10 +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %tmp3 = mul <2 x i64> %A, %B; @@ -164,6 +164,7 @@ ; CHECK-NEXT: smov w11, v0.b[2] ; CHECK-NEXT: smov w12, v0.b[3] ; CHECK-NEXT: smov w13, v0.b[4] +; CHECK-NEXT: smov w14, v0.b[5] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.b[0] ; CHECK-NEXT: sdiv w9, w10, w9 @@ -171,18 +172,17 @@ ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.b[5] +; CHECK-NEXT: smov w9, v1.b[6] ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 ; CHECK-NEXT: smov w10, v0.b[6] ; CHECK-NEXT: sdiv w12, w13, w12 -; CHECK-NEXT: smov w13, v0.b[5] +; CHECK-NEXT: smov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 ; CHECK-NEXT: smov w11, v0.b[7] -; CHECK-NEXT: sdiv w8, w13, w9 -; CHECK-NEXT: smov w9, v1.b[6] +; CHECK-NEXT: sdiv w8, w14, w13 ; CHECK-NEXT: mov v2.b[4], w12 ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[7] @@ -207,16 +207,17 @@ ; CHECK-NEXT: smov w13, v0.b[4] ; CHECK-NEXT: smov w14, v0.b[5] ; CHECK-NEXT: smov w15, v0.b[6] -; CHECK-NEXT: sdiv w8, w9, w8 -; CHECK-NEXT: smov w9, v1.b[0] ; CHECK-NEXT: smov w16, v0.b[7] ; CHECK-NEXT: smov w17, v0.b[8] +; CHECK-NEXT: smov w18, v0.b[9] +; CHECK-NEXT: sdiv w8, w9, w8 +; CHECK-NEXT: smov w9, v1.b[0] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.b[9] +; CHECK-NEXT: smov w9, v1.b[10] ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.b[4] @@ -238,10 +239,9 @@ ; CHECK-NEXT: smov w16, v1.b[8] ; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: sdiv w16, w17, w16 -; CHECK-NEXT: smov w17, v0.b[9] +; CHECK-NEXT: smov w17, v1.b[9] ; CHECK-NEXT: mov v2.b[7], w15 -; CHECK-NEXT: sdiv w8, w17, w9 -; CHECK-NEXT: smov w9, v1.b[10] +; CHECK-NEXT: sdiv w8, w18, w17 ; CHECK-NEXT: mov v2.b[8], w16 ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[11] @@ -319,6 +319,7 @@ ; CHECK-NEXT: smov w11, v0.h[2] ; CHECK-NEXT: smov w12, v0.h[3] ; CHECK-NEXT: smov w13, v0.h[4] +; CHECK-NEXT: smov w14, v0.h[5] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.h[0] ; CHECK-NEXT: sdiv w9, w10, w9 @@ -326,18 +327,17 @@ ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.h[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.h[5] +; CHECK-NEXT: smov w9, v1.h[6] ; CHECK-NEXT: mov v2.h[1], w8 ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.h[4] ; CHECK-NEXT: mov v2.h[2], w10 ; CHECK-NEXT: smov w10, v0.h[6] ; CHECK-NEXT: sdiv w12, w13, w12 -; CHECK-NEXT: smov w13, v0.h[5] +; CHECK-NEXT: smov w13, v1.h[5] ; CHECK-NEXT: mov v2.h[3], w11 ; CHECK-NEXT: smov w11, v0.h[7] -; CHECK-NEXT: sdiv w8, w13, w9 -; CHECK-NEXT: smov w9, v1.h[6] +; CHECK-NEXT: sdiv w8, w14, w13 ; CHECK-NEXT: mov v2.h[4], w12 ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.h[7] @@ -463,6 +463,7 @@ ; CHECK-NEXT: umov w11, v0.b[2] ; CHECK-NEXT: umov w12, v0.b[3] ; CHECK-NEXT: umov w13, v0.b[4] +; CHECK-NEXT: umov w14, v0.b[5] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: udiv w9, w10, w9 @@ -470,18 +471,17 @@ ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.b[5] +; CHECK-NEXT: umov w9, v1.b[6] ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 ; CHECK-NEXT: umov w10, v0.b[6] ; CHECK-NEXT: udiv w12, w13, w12 -; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 ; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: udiv w8, w13, w9 -; CHECK-NEXT: umov w9, v1.b[6] +; CHECK-NEXT: udiv w8, w14, w13 ; CHECK-NEXT: mov v2.b[4], w12 ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[7] @@ -506,16 +506,17 @@ ; CHECK-NEXT: umov w13, v0.b[4] ; CHECK-NEXT: umov w14, v0.b[5] ; CHECK-NEXT: umov w15, v0.b[6] -; CHECK-NEXT: udiv w8, w9, w8 -; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: umov w16, v0.b[7] ; CHECK-NEXT: umov w17, v0.b[8] +; CHECK-NEXT: umov w18, v0.b[9] +; CHECK-NEXT: udiv w8, w9, w8 +; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.b[9] +; CHECK-NEXT: umov w9, v1.b[10] ; CHECK-NEXT: mov v2.b[1], w8 ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.b[4] @@ -537,10 +538,9 @@ ; CHECK-NEXT: umov w16, v1.b[8] ; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: udiv w16, w17, w16 -; CHECK-NEXT: umov w17, v0.b[9] +; CHECK-NEXT: umov w17, v1.b[9] ; CHECK-NEXT: mov v2.b[7], w15 -; CHECK-NEXT: udiv w8, w17, w9 -; CHECK-NEXT: umov w9, v1.b[10] +; CHECK-NEXT: udiv w8, w18, w17 ; CHECK-NEXT: mov v2.b[8], w16 ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[11] @@ -618,6 +618,7 @@ ; CHECK-NEXT: umov w11, v0.h[2] ; CHECK-NEXT: umov w12, v0.h[3] ; CHECK-NEXT: umov w13, v0.h[4] +; CHECK-NEXT: umov w14, v0.h[5] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.h[0] ; CHECK-NEXT: udiv w9, w10, w9 @@ -625,18 +626,17 @@ ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.h[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.h[5] +; CHECK-NEXT: umov w9, v1.h[6] ; CHECK-NEXT: mov v2.h[1], w8 ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.h[4] ; CHECK-NEXT: mov v2.h[2], w10 ; CHECK-NEXT: umov w10, v0.h[6] ; CHECK-NEXT: udiv w12, w13, w12 -; CHECK-NEXT: umov w13, v0.h[5] +; CHECK-NEXT: umov w13, v1.h[5] ; CHECK-NEXT: mov v2.h[3], w11 ; CHECK-NEXT: umov w11, v0.h[7] -; CHECK-NEXT: udiv w8, w13, w9 -; CHECK-NEXT: umov w9, v1.h[6] +; CHECK-NEXT: udiv w8, w14, w13 ; CHECK-NEXT: mov v2.h[4], w12 ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.h[7] @@ -765,37 +765,37 @@ ; CHECK-NEXT: smov w15, v0.b[2] ; CHECK-NEXT: smov w17, v1.b[3] ; CHECK-NEXT: smov w18, v0.b[3] -; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.b[4] ; CHECK-NEXT: smov w2, v0.b[4] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.b[5] +; CHECK-NEXT: smov w4, v1.b[5] +; CHECK-NEXT: smov w5, v0.b[5] +; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.b[5] +; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: smov w13, v1.b[7] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: smov w11, v0.b[6] +; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: smov w10, v1.b[6] -; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.b[7] ; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.b[7] +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: smov w14, v0.b[7] ; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[5], w9 -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: sdiv w12, w16, w15 +; CHECK-NEXT: msub w8, w0, w17, w18 +; CHECK-NEXT: mov v2.b[3], w8 +; CHECK-NEXT: sdiv w9, w5, w4 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: mov v2.b[4], w8 +; CHECK-NEXT: sdiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w4, w5 +; CHECK-NEXT: mov v2.b[5], w8 +; CHECK-NEXT: sdiv w9, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 ; CHECK-NEXT: mov v2.b[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 +; CHECK-NEXT: msub w8, w9, w13, w14 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret @@ -806,11 +806,12 @@ define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: srem16x8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -819,6 +820,8 @@ ; CHECK-NEXT: .cfi_offset w24, -48 ; CHECK-NEXT: .cfi_offset w25, -56 ; CHECK-NEXT: .cfi_offset w26, -64 +; CHECK-NEXT: .cfi_offset w27, -72 +; CHECK-NEXT: .cfi_offset w28, -80 ; CHECK-NEXT: smov w11, v1.b[0] ; CHECK-NEXT: smov w12, v0.b[0] ; CHECK-NEXT: smov w8, v1.b[1] @@ -827,83 +830,84 @@ ; CHECK-NEXT: smov w15, v0.b[2] ; CHECK-NEXT: smov w17, v1.b[3] ; CHECK-NEXT: smov w18, v0.b[3] -; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.b[4] ; CHECK-NEXT: smov w2, v0.b[4] ; CHECK-NEXT: smov w4, v1.b[5] ; CHECK-NEXT: smov w5, v0.b[5] +; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w7, v1.b[6] ; CHECK-NEXT: smov w19, v0.b[6] ; CHECK-NEXT: smov w21, v1.b[7] ; CHECK-NEXT: smov w22, v0.b[7] ; CHECK-NEXT: smov w24, v1.b[8] ; CHECK-NEXT: smov w25, v0.b[8] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.b[9] +; CHECK-NEXT: smov w27, v1.b[9] +; CHECK-NEXT: smov w28, v0.b[9] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.b[9] +; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: smov w13, v1.b[11] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: smov w11, v0.b[10] +; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: smov w10, v1.b[10] -; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.b[11] ; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.b[11] +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: smov w14, v0.b[11] +; CHECK-NEXT: smov w16, v1.b[12] ; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 -; CHECK-NEXT: smov w18, v1.b[12] ; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: smov w0, v0.b[12] -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: smov w2, v1.b[13] +; CHECK-NEXT: msub w8, w0, w17, w18 +; CHECK-NEXT: smov w17, v0.b[12] +; CHECK-NEXT: smov w0, v1.b[13] +; CHECK-NEXT: mov v2.b[3], w8 ; CHECK-NEXT: sdiv w6, w5, w4 -; CHECK-NEXT: smov w3, v0.b[13] -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w17, w6, w4, w5 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: smov w1, v0.b[13] +; CHECK-NEXT: mov v2.b[4], w8 ; CHECK-NEXT: sdiv w20, w19, w7 -; CHECK-NEXT: mov v2.b[5], w17 -; CHECK-NEXT: msub w17, w20, w7, w19 +; CHECK-NEXT: msub w8, w6, w4, w5 +; CHECK-NEXT: mov v2.b[5], w8 ; CHECK-NEXT: sdiv w23, w22, w21 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[6], w17 -; CHECK-NEXT: msub w1, w23, w21, w22 +; CHECK-NEXT: msub w8, w20, w7, w19 +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[6], w8 ; CHECK-NEXT: sdiv w26, w25, w24 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[7], w1 -; CHECK-NEXT: msub w1, w26, w24, w25 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[8], w1 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: smov w13, v1.b[15] -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[9], w9 -; CHECK-NEXT: smov w9, v1.b[14] -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: smov w10, v0.b[14] -; CHECK-NEXT: sdiv w14, w16, w15 +; CHECK-NEXT: msub w8, w23, w21, w22 +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[7], w8 +; CHECK-NEXT: sdiv w9, w28, w27 +; CHECK-NEXT: msub w8, w26, w24, w25 +; CHECK-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[8], w8 +; CHECK-NEXT: sdiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w27, w28 +; CHECK-NEXT: mov v2.b[9], w8 +; CHECK-NEXT: sdiv w15, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 +; CHECK-NEXT: smov w10, v1.b[14] +; CHECK-NEXT: smov w11, v0.b[14] ; CHECK-NEXT: mov v2.b[10], w8 -; CHECK-NEXT: msub w11, w14, w15, w16 +; CHECK-NEXT: sdiv w18, w17, w16 +; CHECK-NEXT: msub w8, w15, w13, w14 +; CHECK-NEXT: smov w13, v1.b[15] ; CHECK-NEXT: smov w14, v0.b[15] -; CHECK-NEXT: sdiv w17, w0, w18 -; CHECK-NEXT: mov v2.b[11], w11 -; CHECK-NEXT: msub w11, w17, w18, w0 -; CHECK-NEXT: sdiv w12, w3, w2 -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: msub w12, w12, w2, w3 -; CHECK-NEXT: sdiv w8, w10, w9 -; CHECK-NEXT: mov v2.b[13], w12 -; CHECK-NEXT: msub w8, w8, w9, w10 -; CHECK-NEXT: sdiv w11, w14, w13 +; CHECK-NEXT: mov v2.b[11], w8 +; CHECK-NEXT: sdiv w9, w1, w0 +; CHECK-NEXT: msub w8, w18, w16, w17 +; CHECK-NEXT: mov v2.b[12], w8 +; CHECK-NEXT: sdiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w0, w1 +; CHECK-NEXT: mov v2.b[13], w8 +; CHECK-NEXT: sdiv w9, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 ; CHECK-NEXT: mov v2.b[14], w8 -; CHECK-NEXT: msub w8, w11, w13, w14 +; CHECK-NEXT: msub w8, w9, w13, w14 ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = srem <16 x i8> %A, %B; ret <16 x i8> %tmp3 @@ -935,19 +939,19 @@ ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w14, v1.h[2] ; CHECK-NEXT: smov w15, v0.h[2] +; CHECK-NEXT: smov w17, v1.h[3] +; CHECK-NEXT: smov w18, v0.h[3] ; CHECK-NEXT: sdiv w13, w12, w11 -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.h[3] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.h[3] +; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w16, w15, w14 +; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w9, w12, w13 +; CHECK-NEXT: sdiv w9, w18, w17 +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: msub w8, w9, w17, w18 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -966,37 +970,37 @@ ; CHECK-NEXT: smov w15, v0.h[2] ; CHECK-NEXT: smov w17, v1.h[3] ; CHECK-NEXT: smov w18, v0.h[3] -; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.h[4] ; CHECK-NEXT: smov w2, v0.h[4] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.h[5] +; CHECK-NEXT: smov w4, v1.h[5] +; CHECK-NEXT: smov w5, v0.h[5] +; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.h[5] +; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: smov w13, v1.h[7] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: smov w11, v0.h[6] +; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: smov w10, v1.h[6] -; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.h[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.h[7] ; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.h[7] +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: smov w14, v0.h[7] ; CHECK-NEXT: mov v2.h[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: mov v2.h[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: mov v2.h[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.h[5], w9 -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: sdiv w12, w16, w15 +; CHECK-NEXT: msub w8, w0, w17, w18 +; CHECK-NEXT: mov v2.h[3], w8 +; CHECK-NEXT: sdiv w9, w5, w4 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: mov v2.h[4], w8 +; CHECK-NEXT: sdiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w4, w5 +; CHECK-NEXT: mov v2.h[5], w8 +; CHECK-NEXT: sdiv w9, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 ; CHECK-NEXT: mov v2.h[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 +; CHECK-NEXT: msub w8, w9, w13, w14 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret @@ -1029,8 +1033,8 @@ ; CHECK-NEXT: mov w11, v1.s[1] ; CHECK-NEXT: mov w12, v0.s[1] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w13, w12, w11 +; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: msub w9, w13, w11, w12 ; CHECK-NEXT: mov v0.s[1], w9 @@ -1049,18 +1053,18 @@ ; CHECK-NEXT: mov w9, v0.s[1] ; CHECK-NEXT: mov w14, v1.s[2] ; CHECK-NEXT: mov w15, v0.s[2] -; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: mov w17, v1.s[3] ; CHECK-NEXT: mov w18, v0.s[3] -; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: sdiv w10, w9, w8 +; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w16, w15, w14 +; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 ; CHECK-NEXT: sdiv w9, w18, w17 -; CHECK-NEXT: mov v0.s[2], w10 +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: msub w8, w9, w17, w18 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret @@ -1091,8 +1095,8 @@ ; CHECK-NEXT: mov x11, v1.d[1] ; CHECK-NEXT: mov x12, v0.d[1] ; CHECK-NEXT: sdiv x10, x9, x8 -; CHECK-NEXT: msub x8, x10, x8, x9 ; CHECK-NEXT: sdiv x13, x12, x11 +; CHECK-NEXT: msub x8, x10, x8, x9 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: msub x9, x13, x11, x12 ; CHECK-NEXT: mov v0.d[1], x9 @@ -1129,37 +1133,37 @@ ; CHECK-NEXT: umov w15, v0.b[2] ; CHECK-NEXT: umov w17, v1.b[3] ; CHECK-NEXT: umov w18, v0.b[3] -; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.b[4] ; CHECK-NEXT: umov w2, v0.b[4] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.b[5] +; CHECK-NEXT: umov w4, v1.b[5] +; CHECK-NEXT: umov w5, v0.b[5] +; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: umov w13, v1.b[7] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: umov w11, v0.b[6] +; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: umov w10, v1.b[6] -; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.b[7] ; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.b[7] +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: umov w14, v0.b[7] ; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[5], w9 -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: udiv w12, w16, w15 +; CHECK-NEXT: msub w8, w0, w17, w18 +; CHECK-NEXT: mov v2.b[3], w8 +; CHECK-NEXT: udiv w9, w5, w4 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: mov v2.b[4], w8 +; CHECK-NEXT: udiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w4, w5 +; CHECK-NEXT: mov v2.b[5], w8 +; CHECK-NEXT: udiv w9, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 ; CHECK-NEXT: mov v2.b[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 +; CHECK-NEXT: msub w8, w9, w13, w14 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret @@ -1170,11 +1174,12 @@ define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: urem16x8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: stp x28, x27, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -1183,6 +1188,8 @@ ; CHECK-NEXT: .cfi_offset w24, -48 ; CHECK-NEXT: .cfi_offset w25, -56 ; CHECK-NEXT: .cfi_offset w26, -64 +; CHECK-NEXT: .cfi_offset w27, -72 +; CHECK-NEXT: .cfi_offset w28, -80 ; CHECK-NEXT: umov w11, v1.b[0] ; CHECK-NEXT: umov w12, v0.b[0] ; CHECK-NEXT: umov w8, v1.b[1] @@ -1191,83 +1198,84 @@ ; CHECK-NEXT: umov w15, v0.b[2] ; CHECK-NEXT: umov w17, v1.b[3] ; CHECK-NEXT: umov w18, v0.b[3] -; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.b[4] ; CHECK-NEXT: umov w2, v0.b[4] ; CHECK-NEXT: umov w4, v1.b[5] ; CHECK-NEXT: umov w5, v0.b[5] +; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w7, v1.b[6] ; CHECK-NEXT: umov w19, v0.b[6] ; CHECK-NEXT: umov w21, v1.b[7] ; CHECK-NEXT: umov w22, v0.b[7] ; CHECK-NEXT: umov w24, v1.b[8] ; CHECK-NEXT: umov w25, v0.b[8] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.b[9] +; CHECK-NEXT: umov w27, v1.b[9] +; CHECK-NEXT: umov w28, v0.b[9] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.b[9] +; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: umov w13, v1.b[11] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: umov w11, v0.b[10] +; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: umov w10, v1.b[10] -; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.b[11] ; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.b[11] +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: umov w14, v0.b[11] +; CHECK-NEXT: umov w16, v1.b[12] ; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 -; CHECK-NEXT: umov w18, v1.b[12] ; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: umov w0, v0.b[12] -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: umov w2, v1.b[13] +; CHECK-NEXT: msub w8, w0, w17, w18 +; CHECK-NEXT: umov w17, v0.b[12] +; CHECK-NEXT: umov w0, v1.b[13] +; CHECK-NEXT: mov v2.b[3], w8 ; CHECK-NEXT: udiv w6, w5, w4 -; CHECK-NEXT: umov w3, v0.b[13] -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w17, w6, w4, w5 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: umov w1, v0.b[13] +; CHECK-NEXT: mov v2.b[4], w8 ; CHECK-NEXT: udiv w20, w19, w7 -; CHECK-NEXT: mov v2.b[5], w17 -; CHECK-NEXT: msub w17, w20, w7, w19 +; CHECK-NEXT: msub w8, w6, w4, w5 +; CHECK-NEXT: mov v2.b[5], w8 ; CHECK-NEXT: udiv w23, w22, w21 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[6], w17 -; CHECK-NEXT: msub w1, w23, w21, w22 +; CHECK-NEXT: msub w8, w20, w7, w19 +; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[6], w8 ; CHECK-NEXT: udiv w26, w25, w24 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[7], w1 -; CHECK-NEXT: msub w1, w26, w24, w25 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[8], w1 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: umov w13, v1.b[15] -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[9], w9 -; CHECK-NEXT: umov w9, v1.b[14] -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: umov w10, v0.b[14] -; CHECK-NEXT: udiv w14, w16, w15 +; CHECK-NEXT: msub w8, w23, w21, w22 +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[7], w8 +; CHECK-NEXT: udiv w9, w28, w27 +; CHECK-NEXT: msub w8, w26, w24, w25 +; CHECK-NEXT: ldp x24, x23, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[8], w8 +; CHECK-NEXT: udiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w27, w28 +; CHECK-NEXT: mov v2.b[9], w8 +; CHECK-NEXT: udiv w15, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 +; CHECK-NEXT: umov w10, v1.b[14] +; CHECK-NEXT: umov w11, v0.b[14] ; CHECK-NEXT: mov v2.b[10], w8 -; CHECK-NEXT: msub w11, w14, w15, w16 +; CHECK-NEXT: udiv w18, w17, w16 +; CHECK-NEXT: msub w8, w15, w13, w14 +; CHECK-NEXT: umov w13, v1.b[15] ; CHECK-NEXT: umov w14, v0.b[15] -; CHECK-NEXT: udiv w17, w0, w18 -; CHECK-NEXT: mov v2.b[11], w11 -; CHECK-NEXT: msub w11, w17, w18, w0 -; CHECK-NEXT: udiv w12, w3, w2 -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: msub w12, w12, w2, w3 -; CHECK-NEXT: udiv w8, w10, w9 -; CHECK-NEXT: mov v2.b[13], w12 -; CHECK-NEXT: msub w8, w8, w9, w10 -; CHECK-NEXT: udiv w11, w14, w13 +; CHECK-NEXT: mov v2.b[11], w8 +; CHECK-NEXT: udiv w9, w1, w0 +; CHECK-NEXT: msub w8, w18, w16, w17 +; CHECK-NEXT: mov v2.b[12], w8 +; CHECK-NEXT: udiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w0, w1 +; CHECK-NEXT: mov v2.b[13], w8 +; CHECK-NEXT: udiv w9, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 ; CHECK-NEXT: mov v2.b[14], w8 -; CHECK-NEXT: msub w8, w11, w13, w14 +; CHECK-NEXT: msub w8, w9, w13, w14 ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: ldp x28, x27, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = urem <16 x i8> %A, %B; ret <16 x i8> %tmp3 @@ -1299,19 +1307,19 @@ ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w14, v1.h[2] ; CHECK-NEXT: umov w15, v0.h[2] +; CHECK-NEXT: umov w17, v1.h[3] +; CHECK-NEXT: umov w18, v0.h[3] ; CHECK-NEXT: udiv w13, w12, w11 -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.h[3] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.h[3] +; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w16, w15, w14 +; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w9, w12, w13 +; CHECK-NEXT: udiv w9, w18, w17 +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: msub w8, w9, w17, w18 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -1330,37 +1338,37 @@ ; CHECK-NEXT: umov w15, v0.h[2] ; CHECK-NEXT: umov w17, v1.h[3] ; CHECK-NEXT: umov w18, v0.h[3] -; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.h[4] ; CHECK-NEXT: umov w2, v0.h[4] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.h[5] +; CHECK-NEXT: umov w4, v1.h[5] +; CHECK-NEXT: umov w5, v0.h[5] +; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.h[5] +; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: umov w13, v1.h[7] ; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: umov w11, v0.h[6] +; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: umov w10, v1.h[6] -; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.h[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.h[7] ; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.h[7] +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: umov w14, v0.h[7] ; CHECK-NEXT: mov v2.h[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 ; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: mov v2.h[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: mov v2.h[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.h[5], w9 -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: udiv w12, w16, w15 +; CHECK-NEXT: msub w8, w0, w17, w18 +; CHECK-NEXT: mov v2.h[3], w8 +; CHECK-NEXT: udiv w9, w5, w4 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: mov v2.h[4], w8 +; CHECK-NEXT: udiv w12, w11, w10 +; CHECK-NEXT: msub w8, w9, w4, w5 +; CHECK-NEXT: mov v2.h[5], w8 +; CHECK-NEXT: udiv w9, w14, w13 +; CHECK-NEXT: msub w8, w12, w10, w11 ; CHECK-NEXT: mov v2.h[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 +; CHECK-NEXT: msub w8, w9, w13, w14 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret @@ -1393,8 +1401,8 @@ ; CHECK-NEXT: mov w11, v1.s[1] ; CHECK-NEXT: mov w12, v0.s[1] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w13, w12, w11 +; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: msub w9, w13, w11, w12 ; CHECK-NEXT: mov v0.s[1], w9 @@ -1413,18 +1421,18 @@ ; CHECK-NEXT: mov w9, v0.s[1] ; CHECK-NEXT: mov w14, v1.s[2] ; CHECK-NEXT: mov w15, v0.s[2] -; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: mov w17, v1.s[3] ; CHECK-NEXT: mov w18, v0.s[3] -; CHECK-NEXT: msub w11, w13, w11, w12 +; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: udiv w10, w9, w8 +; CHECK-NEXT: msub w11, w13, w11, w12 ; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w16, w15, w14 +; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 ; CHECK-NEXT: udiv w9, w18, w17 -; CHECK-NEXT: mov v0.s[2], w10 +; CHECK-NEXT: msub w8, w16, w14, w15 +; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: msub w8, w9, w17, w18 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret @@ -1455,8 +1463,8 @@ ; CHECK-NEXT: mov x11, v1.d[1] ; CHECK-NEXT: mov x12, v0.d[1] ; CHECK-NEXT: udiv x10, x9, x8 -; CHECK-NEXT: msub x8, x10, x8, x9 ; CHECK-NEXT: udiv x13, x12, x11 +; CHECK-NEXT: msub x8, x10, x8, x9 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: msub x9, x13, x11, x12 ; CHECK-NEXT: mov v0.d[1], x9 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-v8.1a.ll @@ -614,11 +614,11 @@ define i16 @test_vqrdmlahh_s16(i16 %a, i16 %b, i16 %c) { ; CHECK-LABEL: test_vqrdmlahh_s16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s0, w1 -; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s1, w1 ; CHECK-NEXT: fmov s2, w2 -; CHECK-NEXT: sqrdmlah v1.4h, v0.4h, v2.4h -; CHECK-NEXT: umov w0, v1.h[0] +; CHECK-NEXT: sqrdmlah v0.4h, v1.4h, v2.4h +; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: ret entry: %0 = insertelement <4 x i16> undef, i16 %a, i64 0 @@ -632,11 +632,11 @@ define i32 @test_vqrdmlahs_s32(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: test_vqrdmlahs_s32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s0, w1 -; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s1, w1 ; CHECK-NEXT: fmov s2, w2 -; CHECK-NEXT: sqrdmlah s1, s0, s2 -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: sqrdmlah s0, s1, s2 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %vqrdmlahs_s32.i = tail call i32 @llvm.aarch64.neon.sqrdmlah.i32(i32 %a, i32 %b, i32 %c) #4 @@ -646,11 +646,11 @@ define i16 @test_vqrdmlahh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) { ; CHECK-LABEL: test_vqrdmlahh_lane_s16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqrdmlah v2.4h, v1.4h, v0.h[3] -; CHECK-NEXT: umov w0, v2.h[0] +; CHECK-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[3] +; CHECK-NEXT: umov w0, v1.h[0] ; CHECK-NEXT: ret entry: %0 = insertelement <4 x i16> undef, i16 %a, i64 0 @@ -664,11 +664,11 @@ define i32 @test_vqrdmlahs_lane_s32(i32 %a, i32 %b, <2 x i32> %c) { ; CHECK-LABEL: test_vqrdmlahs_lane_s32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqrdmlah s2, s1, v0.s[1] -; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: sqrdmlah s1, s2, v0.s[1] +; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: ret entry: %vget_lane = extractelement <2 x i32> %c, i64 1 @@ -679,10 +679,10 @@ define i16 @test_vqrdmlahh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) { ; CHECK-LABEL: test_vqrdmlahh_laneq_s16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov s2, w0 -; CHECK-NEXT: sqrdmlah v2.4h, v1.4h, v0.h[7] -; CHECK-NEXT: umov w0, v2.h[0] +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: sqrdmlah v1.4h, v2.4h, v0.h[7] +; CHECK-NEXT: umov w0, v1.h[0] ; CHECK-NEXT: ret entry: %0 = insertelement <4 x i16> undef, i16 %a, i64 0 @@ -696,10 +696,10 @@ define i32 @test_vqrdmlahs_laneq_s32(i32 %a, i32 %b, <4 x i32> %c) { ; CHECK-LABEL: test_vqrdmlahs_laneq_s32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov s2, w0 -; CHECK-NEXT: sqrdmlah s2, s1, v0.s[3] -; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: sqrdmlah s1, s2, v0.s[3] +; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: ret entry: %vgetq_lane = extractelement <4 x i32> %c, i64 3 @@ -754,11 +754,11 @@ define i16 @test_vqrdmlshh_s16(i16 %a, i16 %b, i16 %c) { ; CHECK-LABEL: test_vqrdmlshh_s16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s0, w1 -; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s1, w1 ; CHECK-NEXT: fmov s2, w2 -; CHECK-NEXT: sqrdmlsh v1.4h, v0.4h, v2.4h -; CHECK-NEXT: umov w0, v1.h[0] +; CHECK-NEXT: sqrdmlsh v0.4h, v1.4h, v2.4h +; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: ret entry: %0 = insertelement <4 x i16> undef, i16 %a, i64 0 @@ -772,11 +772,11 @@ define i32 @test_vqrdmlshs_s32(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: test_vqrdmlshs_s32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s0, w1 -; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s1, w1 ; CHECK-NEXT: fmov s2, w2 -; CHECK-NEXT: sqrdmlsh s1, s0, s2 -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: sqrdmlsh s0, s1, s2 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: %vqrdmlshs_s32.i = tail call i32 @llvm.aarch64.neon.sqrdmlsh.i32(i32 %a, i32 %b, i32 %c) #4 @@ -786,11 +786,11 @@ define i16 @test_vqrdmlshh_lane_s16(i16 %a, i16 %b, <4 x i16> %c) { ; CHECK-LABEL: test_vqrdmlshh_lane_s16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqrdmlsh v2.4h, v1.4h, v0.h[3] -; CHECK-NEXT: umov w0, v2.h[0] +; CHECK-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[3] +; CHECK-NEXT: umov w0, v1.h[0] ; CHECK-NEXT: ret entry: %0 = insertelement <4 x i16> undef, i16 %a, i64 0 @@ -804,11 +804,11 @@ define i32 @test_vqrdmlshs_lane_s32(i32 %a, i32 %b, <2 x i32> %c) { ; CHECK-LABEL: test_vqrdmlshs_lane_s32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqrdmlsh s2, s1, v0.s[1] -; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: sqrdmlsh s1, s2, v0.s[1] +; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: ret entry: %vget_lane = extractelement <2 x i32> %c, i64 1 @@ -819,10 +819,10 @@ define i16 @test_vqrdmlshh_laneq_s16(i16 %a, i16 %b, <8 x i16> %c) { ; CHECK-LABEL: test_vqrdmlshh_laneq_s16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov s2, w0 -; CHECK-NEXT: sqrdmlsh v2.4h, v1.4h, v0.h[7] -; CHECK-NEXT: umov w0, v2.h[0] +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: sqrdmlsh v1.4h, v2.4h, v0.h[7] +; CHECK-NEXT: umov w0, v1.h[0] ; CHECK-NEXT: ret entry: %0 = insertelement <4 x i16> undef, i16 %a, i64 0 @@ -836,10 +836,10 @@ define i32 @test_vqrdmlshs_laneq_s32(i32 %a, i32 %b, <4 x i32> %c) { ; CHECK-LABEL: test_vqrdmlshs_laneq_s32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov s2, w0 -; CHECK-NEXT: sqrdmlsh s2, s1, v0.s[3] -; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s2, w1 +; CHECK-NEXT: sqrdmlsh s1, s2, v0.s[3] +; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: ret entry: %vgetq_lane = extractelement <4 x i32> %c, i64 3 diff --git a/llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll --- a/llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-non-pow2-ldst.ll @@ -17,9 +17,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #6] ; CHECK-NEXT: ldrh w9, [x0, #4] -; CHECK-NEXT: ldr w10, [x0] ; CHECK-NEXT: orr w8, w9, w8, lsl #16 -; CHECK-NEXT: orr x0, x10, x8, lsl #32 +; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: orr x0, x9, x8, lsl #32 ; CHECK-NEXT: ret %r = load i56, i56* %p ret i56 %r @@ -41,10 +41,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #14] ; CHECK-NEXT: ldrh w9, [x0, #12] -; CHECK-NEXT: ldr w10, [x0, #8] -; CHECK-NEXT: ldr x0, [x0] ; CHECK-NEXT: orr w8, w9, w8, lsl #16 -; CHECK-NEXT: orr x1, x10, x8, lsl #32 +; CHECK-NEXT: ldr w9, [x0, #8] +; CHECK-NEXT: ldr x0, [x0] +; CHECK-NEXT: orr x1, x9, x8, lsl #32 ; CHECK-NEXT: ret %r = load i120, i120* %p ret i120 %r @@ -53,12 +53,12 @@ define i280 @ldi280(ptr %p) nounwind { ; CHECK-LABEL: ldi280: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x1, [x0] ; CHECK-NEXT: ldrb w9, [x0, #34] ; CHECK-NEXT: ldrh w10, [x0, #32] +; CHECK-NEXT: ldp x8, x1, [x0] ; CHECK-NEXT: ldp x2, x3, [x0, #16] -; CHECK-NEXT: mov x0, x8 ; CHECK-NEXT: orr x4, x10, x9, lsl #16 +; CHECK-NEXT: mov x0, x8 ; CHECK-NEXT: ret %r = load i280, i280* %p ret i280 %r @@ -128,15 +128,15 @@ define void @i56_or(ptr %a) { ; CHECK-LABEL: i56_or: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, x0 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: ldrh w10, [x8, #4]! -; CHECK-NEXT: ldrb w11, [x8, #2] -; CHECK-NEXT: orr w9, w9, #0x180 -; CHECK-NEXT: orr w10, w10, w11, lsl #16 -; CHECK-NEXT: str w9, [x0] -; CHECK-NEXT: strb w11, [x8, #2] -; CHECK-NEXT: strh w10, [x8] +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov x9, x0 +; CHECK-NEXT: ldrh w10, [x9, #4]! +; CHECK-NEXT: ldrb w11, [x9, #2] +; CHECK-NEXT: orr w8, w8, #0x180 +; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: orr w8, w10, w11, lsl #16 +; CHECK-NEXT: strb w11, [x9, #2] +; CHECK-NEXT: strh w8, [x9] ; CHECK-NEXT: ret %aa = load i56, ptr %a, align 1 %b = or i56 %aa, 384 @@ -147,16 +147,16 @@ define void @i56_and_or(ptr %a) { ; CHECK-LABEL: i56_and_or: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, x0 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: ldrh w10, [x8, #4]! -; CHECK-NEXT: ldrb w11, [x8, #2] -; CHECK-NEXT: orr w9, w9, #0x180 -; CHECK-NEXT: and w9, w9, #0xffffff80 -; CHECK-NEXT: orr w10, w10, w11, lsl #16 -; CHECK-NEXT: strb w11, [x8, #2] -; CHECK-NEXT: str w9, [x0] -; CHECK-NEXT: strh w10, [x8] +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov x9, x0 +; CHECK-NEXT: ldrh w10, [x9, #4]! +; CHECK-NEXT: ldrb w11, [x9, #2] +; CHECK-NEXT: orr w8, w8, #0x180 +; CHECK-NEXT: and w8, w8, #0xffffff80 +; CHECK-NEXT: strb w11, [x9, #2] +; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: orr w8, w10, w11, lsl #16 +; CHECK-NEXT: strh w8, [x9] ; CHECK-NEXT: ret %b = load i56, ptr %a, align 1 %c = and i56 %b, -128 @@ -175,8 +175,8 @@ ; CHECK-NEXT: orr w9, w9, w10, lsl #16 ; CHECK-NEXT: strb w10, [x8, #2] ; CHECK-NEXT: orr x11, x11, x9, lsl #32 -; CHECK-NEXT: and x11, x11, #0xffffffffffffdfff ; CHECK-NEXT: strh w9, [x8] +; CHECK-NEXT: and x11, x11, #0xffffffffffffdfff ; CHECK-NEXT: orr w11, w11, w1, lsl #13 ; CHECK-NEXT: str w11, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll --- a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll +++ b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll @@ -51,8 +51,8 @@ define internal void @nvcast_f32_v8i8() { ; CHECK-LABEL: nvcast_f32_v8i8: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: adrp x8, __gv@GOTPAGE ; CHECK-NEXT: movi.8b v0, #254 +; CHECK-NEXT: adrp x8, __gv@GOTPAGE ; CHECK-NEXT: ldr x8, [x8, __gv@GOTPAGEOFF] ; CHECK-NEXT: str d0, [x8] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll --- a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll +++ b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -o - %s | FileCheck %s ; AsmPrinter cannot lower floating point constant expressions in global @@ -8,54 +9,37 @@ target triple = "arm64-apple-ios14.0.0" define [1 x <4 x float>] @test1() { -; CHECK-LABEL: .p2align 4, 0x0 ; -- Begin function test1 -; CHECK-NEXT: lCPI0_0: -; CHECK-NEXT: .quad 0 ; 0x0 -; CHECK-NEXT: .quad 4575657221408423936 ; 0x3f80000000000000 -; CHECK-NEXT: .section __TEXT,__text,regular,pure_instructions -; CHECK-NEXT: .globl _test1 -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: _test1: ; @test1 -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: ; %bb.0: -; CHECK-NEXT: Lloh0: -; CHECK-NEXT: adrp x8, lCPI0_0@PAGE -; CHECK-NEXT: Lloh1: -; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF] -; CHECK-NEXT: ret +; CHECK-LABEL: test1: +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x8, lCPI0_0@PAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 ret [1 x <4 x float>] [<4 x float> bitcast (<1 x i128> to <4 x float>)] } define [1 x <4 x float>] @test2() { -; CHECK-LABEL: .p2align 4, 0x0 ; -- Begin function test2 -; CHECK-NEXT: lCPI1_0: -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x3f800000 ; float 1 -; CHECK-NEXT: .section __TEXT,__text,regular,pure_instructions -; CHECK-NEXT: .globl _test2 -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: _test2: ; @test2 -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: ; %bb.0: -; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x8, lCPI1_0@PAGE -; CHECK-NEXT: Lloh3: -; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] -; CHECK-NEXT: mov s2, v1[1] -; CHECK-NEXT: mov s3, v1[2] -; CHECK-NEXT: fneg s0, s1 -; CHECK-NEXT: mov s1, v1[3] -; CHECK-NEXT: fneg s2, s2 -; CHECK-NEXT: fneg s3, s3 -; CHECK-NEXT: fneg s1, s1 -; CHECK-NEXT: mov.s v0[1], v2[0] -; CHECK-NEXT: mov.s v0[2], v3[0] -; CHECK-NEXT: mov.s v0[3], v1[0] -; CHECK-NEXT: ret -; +; CHECK-LABEL: test2: +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: adrp x8, lCPI1_0@PAGE +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] +; CHECK-NEXT: mov s2, v1[1] +; CHECK-NEXT: fneg s0, s1 +; CHECK-NEXT: mov s3, v1[2] +; CHECK-NEXT: mov s1, v1[3] +; CHECK-NEXT: fneg s2, s2 +; CHECK-NEXT: fneg s3, s3 +; CHECK-NEXT: fneg s1, s1 +; CHECK-NEXT: mov.s v0[1], v2[0] +; CHECK-NEXT: mov.s v0[2], v3[0] +; CHECK-NEXT: mov.s v0[3], v1[0] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3 %constexpr = fneg float extractelement (<4 x float> bitcast (<1 x i128> to <4 x float>), i32 0) %constexpr1 = fneg float extractelement (<4 x float> bitcast (<1 x i128> to <4 x float>), i32 1) %constexpr2 = fneg float extractelement (<4 x float> bitcast (<1 x i128> to <4 x float>), i32 2) diff --git a/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll b/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll --- a/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll +++ b/llvm/test/CodeGen/AArch64/arm64-register-pairing.ll @@ -17,7 +17,7 @@ ; CHECK-NEXT: stp x22, x21, [sp, #112] ; 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #128] ; 16-byte Folded Spill ; CHECK-NEXT: ; InlineAsm Start -; CHECK-NEXT: mov x0, #42 +; CHECK-NEXT: mov x0, #42 ; =0x2a ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ldp x20, x19, [sp, #128] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #112] ; 16-byte Folded Reload @@ -38,12 +38,12 @@ ; CHECK-NOTMACHO-NEXT: stp x25, x23, [sp, #48] // 16-byte Folded Spill ; CHECK-NOTMACHO-NEXT: stp x21, x19, [sp, #64] // 16-byte Folded Spill ; CHECK-NOTMACHO-NEXT: //APP -; CHECK-NOTMACHO-NEXT: mov x0, #42 +; CHECK-NOTMACHO-NEXT: mov x0, #42 // =0x2a ; CHECK-NOTMACHO-NEXT: //NO_APP ; CHECK-NOTMACHO-NEXT: ldp x21, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NOTMACHO-NEXT: ldr x27, [sp, #32] // 8-byte Folded Reload ; CHECK-NOTMACHO-NEXT: ldp x25, x23, [sp, #48] // 16-byte Folded Reload ; CHECK-NOTMACHO-NEXT: ldp d10, d8, [sp, #16] // 16-byte Folded Reload -; CHECK-NOTMACHO-NEXT: ldr x27, [sp, #32] // 8-byte Folded Reload ; CHECK-NOTMACHO-NEXT: ldp d14, d12, [sp], #80 // 16-byte Folded Reload ; CHECK-NOTMACHO-NEXT: ret @@ -64,7 +64,7 @@ ; CHECK-NEXT: stp x22, x21, [sp, #112] ; 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #128] ; 16-byte Folded Spill ; CHECK-NEXT: ; InlineAsm Start -; CHECK-NEXT: mov x0, #42 +; CHECK-NEXT: mov x0, #42 ; =0x2a ; CHECK-NEXT: ; InlineAsm End ; CHECK-NEXT: ldp x20, x19, [sp, #128] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #112] ; 16-byte Folded Reload @@ -85,12 +85,12 @@ ; CHECK-NOTMACHO-NEXT: stp x26, x24, [sp, #48] // 16-byte Folded Spill ; CHECK-NOTMACHO-NEXT: stp x22, x20, [sp, #64] // 16-byte Folded Spill ; CHECK-NOTMACHO-NEXT: //APP -; CHECK-NOTMACHO-NEXT: mov x0, #42 +; CHECK-NOTMACHO-NEXT: mov x0, #42 // =0x2a ; CHECK-NOTMACHO-NEXT: //NO_APP ; CHECK-NOTMACHO-NEXT: ldp x22, x20, [sp, #64] // 16-byte Folded Reload +; CHECK-NOTMACHO-NEXT: ldr x28, [sp, #32] // 8-byte Folded Reload ; CHECK-NOTMACHO-NEXT: ldp x26, x24, [sp, #48] // 16-byte Folded Reload ; CHECK-NOTMACHO-NEXT: ldp d11, d9, [sp, #16] // 16-byte Folded Reload -; CHECK-NOTMACHO-NEXT: ldr x28, [sp, #32] // 8-byte Folded Reload ; CHECK-NOTMACHO-NEXT: ldp d15, d13, [sp], #80 // 16-byte Folded Reload ; CHECK-NOTMACHO-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-rev.ll b/llvm/test/CodeGen/AArch64/arm64-rev.ll --- a/llvm/test/CodeGen/AArch64/arm64-rev.ll +++ b/llvm/test/CodeGen/AArch64/arm64-rev.ll @@ -530,16 +530,16 @@ define void @test_vrev64(ptr nocapture %source, ptr nocapture %dst) nounwind ssp { ; CHECK-LABEL: test_vrev64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: add x8, x1, #2 ; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: add x8, x1, #2 ; CHECK-NEXT: st1.h { v0 }[5], [x8] ; CHECK-NEXT: st1.h { v0 }[6], [x1] ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_vrev64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: add x8, x1, #2 ; GISEL-NEXT: ldr q0, [x0] +; GISEL-NEXT: add x8, x1, #2 ; GISEL-NEXT: st1.h { v0 }[6], [x1] ; GISEL-NEXT: st1.h { v0 }[5], [x8] ; GISEL-NEXT: ret @@ -566,8 +566,8 @@ ; ; GISEL-LABEL: float_vrev64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: adrp x8, .LCPI28_0 ; GISEL-NEXT: movi d0, #0000000000000000 +; GISEL-NEXT: adrp x8, .LCPI28_0 ; GISEL-NEXT: ldr q1, [x0] ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI28_0] ; GISEL-NEXT: tbl.16b v0, { v0, v1 }, v2 @@ -704,19 +704,19 @@ define i64 @test_rev16_x_hwbyteswaps_complex1(i64 %a) nounwind { ; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x8, x0, #48 -; CHECK-NEXT: lsr x9, x0, #8 -; CHECK-NEXT: lsr x10, x0, #32 -; CHECK-NEXT: and x11, x9, #0xff000000000000 -; CHECK-NEXT: lsr x12, x0, #16 -; CHECK-NEXT: bfi x11, x8, #56, #8 -; CHECK-NEXT: and x8, x9, #0xff00000000 -; CHECK-NEXT: orr x8, x11, x8 -; CHECK-NEXT: and x9, x9, #0xff0000 -; CHECK-NEXT: bfi x8, x10, #40, #8 -; CHECK-NEXT: orr x8, x8, x9 +; CHECK-NEXT: lsr x8, x0, #8 +; CHECK-NEXT: lsr x9, x0, #48 +; CHECK-NEXT: and x10, x8, #0xff000000000000 +; CHECK-NEXT: and x11, x8, #0xff00000000 +; CHECK-NEXT: and x8, x8, #0xff0000 +; CHECK-NEXT: bfi x10, x9, #56, #8 +; CHECK-NEXT: lsr x9, x0, #32 +; CHECK-NEXT: orr x10, x10, x11 +; CHECK-NEXT: bfi x10, x9, #40, #8 +; CHECK-NEXT: lsr x9, x0, #16 +; CHECK-NEXT: orr x8, x10, x8 +; CHECK-NEXT: bfi x8, x9, #24, #8 ; CHECK-NEXT: ubfiz x9, x0, #8, #8 -; CHECK-NEXT: bfi x8, x12, #24, #8 ; CHECK-NEXT: bfxil x8, x0, #8, #8 ; CHECK-NEXT: orr x0, x8, x9 ; CHECK-NEXT: ret @@ -729,16 +729,16 @@ ; GISEL-NEXT: and x11, x9, #0xff00000000000000 ; GISEL-NEXT: and x12, x8, #0xff00000000 ; GISEL-NEXT: and x13, x9, #0xff0000000000 +; GISEL-NEXT: and x14, x8, #0xff0000 ; GISEL-NEXT: orr x10, x10, x11 -; GISEL-NEXT: orr x11, x12, x13 -; GISEL-NEXT: and x12, x8, #0xff0000 -; GISEL-NEXT: and x13, x9, #0xff000000 +; GISEL-NEXT: and x11, x9, #0xff000000 ; GISEL-NEXT: orr x12, x12, x13 ; GISEL-NEXT: and x8, x8, #0xff -; GISEL-NEXT: orr x10, x10, x11 -; GISEL-NEXT: orr x8, x12, x8 -; GISEL-NEXT: orr x8, x10, x8 +; GISEL-NEXT: orr x11, x14, x11 +; GISEL-NEXT: orr x10, x10, x12 ; GISEL-NEXT: and x9, x9, #0xff00 +; GISEL-NEXT: orr x8, x11, x8 +; GISEL-NEXT: orr x8, x10, x8 ; GISEL-NEXT: orr x0, x8, x9 ; GISEL-NEXT: ret entry: @@ -765,14 +765,14 @@ define i64 @test_rev16_x_hwbyteswaps_complex2(i64 %a) nounwind { ; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex2: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: lsr x8, x0, #8 ; CHECK-NEXT: lsr x9, x0, #48 ; CHECK-NEXT: lsr x10, x0, #32 -; CHECK-NEXT: lsr x8, x0, #8 -; CHECK-NEXT: lsr x11, x0, #16 ; CHECK-NEXT: and x8, x8, #0xff00ff00ff00ff ; CHECK-NEXT: bfi x8, x9, #56, #8 +; CHECK-NEXT: lsr x9, x0, #16 ; CHECK-NEXT: bfi x8, x10, #40, #8 -; CHECK-NEXT: bfi x8, x11, #24, #8 +; CHECK-NEXT: bfi x8, x9, #24, #8 ; CHECK-NEXT: bfi x8, x0, #8, #8 ; CHECK-NEXT: mov x0, x8 ; CHECK-NEXT: ret @@ -785,16 +785,16 @@ ; GISEL-NEXT: and x11, x8, #0xff00000000 ; GISEL-NEXT: and x12, x8, #0xff0000 ; GISEL-NEXT: and x8, x8, #0xff +; GISEL-NEXT: and x13, x9, #0xff00000000000000 ; GISEL-NEXT: orr x10, x10, x11 +; GISEL-NEXT: and x11, x9, #0xff0000000000 ; GISEL-NEXT: orr x8, x12, x8 -; GISEL-NEXT: and x11, x9, #0xff00000000000000 -; GISEL-NEXT: and x12, x9, #0xff0000000000 -; GISEL-NEXT: orr x11, x11, x12 ; GISEL-NEXT: and x12, x9, #0xff000000 +; GISEL-NEXT: orr x11, x13, x11 ; GISEL-NEXT: orr x8, x10, x8 +; GISEL-NEXT: and x9, x9, #0xff00 ; GISEL-NEXT: orr x10, x11, x12 ; GISEL-NEXT: orr x8, x8, x10 -; GISEL-NEXT: and x9, x9, #0xff00 ; GISEL-NEXT: orr x0, x8, x9 ; GISEL-NEXT: ret entry: @@ -822,19 +822,19 @@ define i64 @test_rev16_x_hwbyteswaps_complex3(i64 %a) nounwind { ; CHECK-LABEL: test_rev16_x_hwbyteswaps_complex3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x8, x0, #48 -; CHECK-NEXT: lsr x9, x0, #8 -; CHECK-NEXT: lsr x10, x0, #32 -; CHECK-NEXT: and x11, x9, #0xff000000000000 -; CHECK-NEXT: lsr x12, x0, #16 -; CHECK-NEXT: bfi x11, x8, #56, #8 -; CHECK-NEXT: and x8, x9, #0xff00000000 -; CHECK-NEXT: orr x8, x8, x11 -; CHECK-NEXT: and x9, x9, #0xff0000 -; CHECK-NEXT: bfi x8, x10, #40, #8 -; CHECK-NEXT: orr x8, x9, x8 +; CHECK-NEXT: lsr x8, x0, #8 +; CHECK-NEXT: lsr x9, x0, #48 +; CHECK-NEXT: and x10, x8, #0xff000000000000 +; CHECK-NEXT: and x11, x8, #0xff00000000 +; CHECK-NEXT: and x8, x8, #0xff0000 +; CHECK-NEXT: bfi x10, x9, #56, #8 +; CHECK-NEXT: lsr x9, x0, #32 +; CHECK-NEXT: orr x10, x11, x10 +; CHECK-NEXT: bfi x10, x9, #40, #8 +; CHECK-NEXT: lsr x9, x0, #16 +; CHECK-NEXT: orr x8, x8, x10 +; CHECK-NEXT: bfi x8, x9, #24, #8 ; CHECK-NEXT: ubfiz x9, x0, #8, #8 -; CHECK-NEXT: bfi x8, x12, #24, #8 ; CHECK-NEXT: bfxil x8, x0, #8, #8 ; CHECK-NEXT: orr x0, x9, x8 ; CHECK-NEXT: ret @@ -847,16 +847,16 @@ ; GISEL-NEXT: and x11, x9, #0xff00000000000000 ; GISEL-NEXT: and x12, x8, #0xff00000000 ; GISEL-NEXT: and x13, x9, #0xff0000000000 +; GISEL-NEXT: and x14, x8, #0xff0000 ; GISEL-NEXT: orr x10, x11, x10 -; GISEL-NEXT: orr x11, x13, x12 -; GISEL-NEXT: and x12, x8, #0xff0000 -; GISEL-NEXT: and x13, x9, #0xff000000 +; GISEL-NEXT: and x11, x9, #0xff000000 ; GISEL-NEXT: orr x12, x13, x12 ; GISEL-NEXT: and x8, x8, #0xff -; GISEL-NEXT: orr x10, x11, x10 -; GISEL-NEXT: orr x8, x8, x12 -; GISEL-NEXT: orr x8, x8, x10 +; GISEL-NEXT: orr x11, x11, x14 +; GISEL-NEXT: orr x10, x12, x10 ; GISEL-NEXT: and x9, x9, #0xff00 +; GISEL-NEXT: orr x8, x8, x11 +; GISEL-NEXT: orr x8, x8, x10 ; GISEL-NEXT: orr x0, x9, x8 ; GISEL-NEXT: ret entry: @@ -883,11 +883,11 @@ define i64 @test_or_and_combine1(i64 %a) nounwind { ; CHECK-LABEL: test_or_and_combine1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: lsr x8, x0, #24 -; CHECK-NEXT: lsr x9, x0, #8 -; CHECK-NEXT: and x10, x9, #0xff000000000000 -; CHECK-NEXT: bfi x10, x8, #32, #8 -; CHECK-NEXT: and x8, x9, #0xff0000 +; CHECK-NEXT: lsr x8, x0, #8 +; CHECK-NEXT: lsr x9, x0, #24 +; CHECK-NEXT: and x10, x8, #0xff000000000000 +; CHECK-NEXT: and x8, x8, #0xff0000 +; CHECK-NEXT: bfi x10, x9, #32, #8 ; CHECK-NEXT: orr x0, x10, x8 ; CHECK-NEXT: ret ; @@ -897,8 +897,8 @@ ; GISEL-NEXT: lsl x9, x0, #8 ; GISEL-NEXT: and x10, x8, #0xff000000000000 ; GISEL-NEXT: and x9, x9, #0xff00000000 -; GISEL-NEXT: orr x9, x10, x9 ; GISEL-NEXT: and x8, x8, #0xff0000 +; GISEL-NEXT: orr x9, x10, x9 ; GISEL-NEXT: orr x0, x9, x8 ; GISEL-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll --- a/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll +++ b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll @@ -40,9 +40,9 @@ define <4 x float> @foo2(<4 x float> %val, <4 x float> %test) nounwind { ; CHECK-LABEL: foo2: ; CHECK: ; %bb.0: +; CHECK-NEXT: fcmeq.4s v0, v0, v1 ; CHECK-NEXT: Lloh0: ; CHECK-NEXT: adrp x8, lCPI2_0@PAGE -; CHECK-NEXT: fcmeq.4s v0, v0, v1 ; CHECK-NEXT: Lloh1: ; CHECK-NEXT: ldr q1, [x8, lCPI2_0@PAGEOFF] ; CHECK-NEXT: and.16b v0, v0, v1 diff --git a/llvm/test/CodeGen/AArch64/arm64-setcc-swap-infloop.ll b/llvm/test/CodeGen/AArch64/arm64-setcc-swap-infloop.ll --- a/llvm/test/CodeGen/AArch64/arm64-setcc-swap-infloop.ll +++ b/llvm/test/CodeGen/AArch64/arm64-setcc-swap-infloop.ll @@ -12,14 +12,14 @@ define <16 x i1> @setcc_swap_infloop(ptr %arg) { ; CHECK-LABEL: setcc_swap_infloop: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: mov w9, #16 ; =0x10 ; CHECK-NEXT: movi.16b v1, #1 +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: ldr q0, [x8] +; CHECK-NEXT: cmeq.16b v0, v0, #0 ; CHECK-NEXT: cmeq.16b v2, v1, #0 ; CHECK-NEXT: str q1, [x8] -; CHECK-NEXT: cmeq.16b v0, v0, #0 -; CHECK-NEXT: str q1, [x9] +; CHECK-NEXT: mov w8, #16 ; =0x10 +; CHECK-NEXT: str q1, [x8] ; CHECK-NEXT: orr.16b v0, v0, v2 ; CHECK-NEXT: ret call void @llvm.memset.p0.i64(ptr nonnull null, i8 1, i64 32, i1 false) diff --git a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll --- a/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll +++ b/llvm/test/CodeGen/AArch64/arm64-shrink-wrapping.ll @@ -80,7 +80,7 @@ ; ENABLE-NEXT: .cfi_offset w19, -24 ; ENABLE-NEXT: .cfi_offset w20, -32 ; ENABLE-NEXT: mov w19, wzr -; ENABLE-NEXT: mov w20, #10 +; ENABLE-NEXT: mov w20, #10 ; =0xa ; ENABLE-NEXT: LBB1_2: ; %for.body ; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: bl _something @@ -109,7 +109,7 @@ ; DISABLE-NEXT: cbz w0, LBB1_4 ; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader ; DISABLE-NEXT: mov w19, wzr -; DISABLE-NEXT: mov w20, #10 +; DISABLE-NEXT: mov w20, #10 ; =0xa ; DISABLE-NEXT: LBB1_2: ; %for.body ; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: bl _something @@ -167,7 +167,7 @@ ; ENABLE-NEXT: .cfi_offset w19, -24 ; ENABLE-NEXT: .cfi_offset w20, -32 ; ENABLE-NEXT: mov w19, wzr -; ENABLE-NEXT: mov w20, #10 +; ENABLE-NEXT: mov w20, #10 ; =0xa ; ENABLE-NEXT: LBB2_1: ; %for.body ; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: bl _something @@ -191,7 +191,7 @@ ; DISABLE-NEXT: .cfi_offset w19, -24 ; DISABLE-NEXT: .cfi_offset w20, -32 ; DISABLE-NEXT: mov w19, wzr -; DISABLE-NEXT: mov w20, #10 +; DISABLE-NEXT: mov w20, #10 ; =0xa ; DISABLE-NEXT: LBB2_1: ; %for.body ; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: bl _something @@ -235,7 +235,7 @@ ; ENABLE-NEXT: .cfi_offset w19, -24 ; ENABLE-NEXT: .cfi_offset w20, -32 ; ENABLE-NEXT: mov w19, wzr -; ENABLE-NEXT: mov w20, #10 +; ENABLE-NEXT: mov w20, #10 ; =0xa ; ENABLE-NEXT: LBB3_2: ; %for.body ; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: bl _something @@ -265,7 +265,7 @@ ; DISABLE-NEXT: cbz w0, LBB3_4 ; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader ; DISABLE-NEXT: mov w19, wzr -; DISABLE-NEXT: mov w20, #10 +; DISABLE-NEXT: mov w20, #10 ; =0xa ; DISABLE-NEXT: LBB3_2: ; %for.body ; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: bl _something @@ -329,7 +329,7 @@ ; ENABLE-NEXT: .cfi_offset w20, -32 ; ENABLE-NEXT: bl _somethingElse ; ENABLE-NEXT: mov w19, wzr -; ENABLE-NEXT: mov w20, #10 +; ENABLE-NEXT: mov w20, #10 ; =0xa ; ENABLE-NEXT: LBB4_2: ; %for.body ; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: bl _something @@ -366,7 +366,7 @@ ; DISABLE-NEXT: ; %bb.1: ; %if.then ; DISABLE-NEXT: bl _somethingElse ; DISABLE-NEXT: mov w19, wzr -; DISABLE-NEXT: mov w20, #10 +; DISABLE-NEXT: mov w20, #10 ; =0xa ; DISABLE-NEXT: LBB4_2: ; %for.body ; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: bl _something @@ -452,8 +452,8 @@ ; ENABLE-NEXT: add x9, x8, #8 ; ENABLE-NEXT: str x9, [sp, #8] ; ENABLE-NEXT: ldr w8, [x8] -; ENABLE-NEXT: add w0, w0, w8 ; ENABLE-NEXT: subs w1, w1, #1 +; ENABLE-NEXT: add w0, w0, w8 ; ENABLE-NEXT: b.ne LBB6_2 ; ENABLE-NEXT: LBB6_3: ; %for.end ; ENABLE-NEXT: add sp, sp, #16 @@ -480,8 +480,8 @@ ; DISABLE-NEXT: add x9, x8, #8 ; DISABLE-NEXT: str x9, [sp, #8] ; DISABLE-NEXT: ldr w8, [x8] -; DISABLE-NEXT: add w0, w0, w8 ; DISABLE-NEXT: subs w1, w1, #1 +; DISABLE-NEXT: add w0, w0, w8 ; DISABLE-NEXT: b.ne LBB6_2 ; DISABLE-NEXT: b LBB6_4 ; DISABLE-NEXT: LBB6_3: ; %if.else @@ -537,7 +537,7 @@ ; ENABLE-NEXT: .cfi_def_cfa_offset 16 ; ENABLE-NEXT: .cfi_offset w19, -8 ; ENABLE-NEXT: .cfi_offset w20, -16 -; ENABLE-NEXT: mov w8, #10 +; ENABLE-NEXT: mov w8, #10 ; =0xa ; ENABLE-NEXT: LBB7_2: ; %for.body ; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: subs w8, w8, #1 @@ -561,7 +561,7 @@ ; DISABLE-NEXT: .cfi_offset w20, -16 ; DISABLE-NEXT: cbz w0, LBB7_4 ; DISABLE-NEXT: ; %bb.1: ; %for.body.preheader -; DISABLE-NEXT: mov w8, #10 +; DISABLE-NEXT: mov w8, #10 ; =0xa ; DISABLE-NEXT: LBB7_2: ; %for.body ; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: subs w8, w8, #1 @@ -612,8 +612,8 @@ ; ENABLE-NEXT: .cfi_offset w29, -16 ; ENABLE-NEXT: stp x1, x1, [sp, #32] ; ENABLE-NEXT: stp x1, x1, [sp, #16] -; ENABLE-NEXT: mov w0, w1 ; ENABLE-NEXT: stp x1, x1, [sp] +; ENABLE-NEXT: mov w0, w1 ; ENABLE-NEXT: bl _someVariadicFunc ; ENABLE-NEXT: lsl w0, w0, #3 ; ENABLE-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload @@ -636,8 +636,8 @@ ; DISABLE-NEXT: ; %bb.1: ; %if.then ; DISABLE-NEXT: stp x1, x1, [sp, #32] ; DISABLE-NEXT: stp x1, x1, [sp, #16] -; DISABLE-NEXT: mov w0, w1 ; DISABLE-NEXT: stp x1, x1, [sp] +; DISABLE-NEXT: mov w0, w1 ; DISABLE-NEXT: bl _someVariadicFunc ; DISABLE-NEXT: lsl w0, w0, #3 ; DISABLE-NEXT: b LBB8_3 @@ -676,7 +676,7 @@ ; ENABLE: ; %bb.0: ; %entry ; ENABLE-NEXT: cbnz w0, LBB9_2 ; ENABLE-NEXT: ; %bb.1: ; %if.end -; ENABLE-NEXT: mov w0, #42 +; ENABLE-NEXT: mov w0, #42 ; =0x2a ; ENABLE-NEXT: ret ; ENABLE-NEXT: LBB9_2: ; %if.abort ; ENABLE-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill @@ -695,7 +695,7 @@ ; DISABLE-NEXT: .cfi_offset w29, -16 ; DISABLE-NEXT: cbnz w0, LBB9_2 ; DISABLE-NEXT: ; %bb.1: ; %if.end -; DISABLE-NEXT: mov w0, #42 +; DISABLE-NEXT: mov w0, #42 ; =0x2a ; DISABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; DISABLE-NEXT: ret ; DISABLE-NEXT: LBB9_2: ; %if.abort @@ -816,10 +816,10 @@ ; ENABLE-NEXT: LBB11_2: ; %for.body ; ENABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; ENABLE-NEXT: ; InlineAsm Start -; ENABLE-NEXT: mov x10, #0 +; ENABLE-NEXT: mov x10, #0 ; =0x0 ; ENABLE-NEXT: ; InlineAsm End ; ENABLE-NEXT: add w10, w10, w9 -; ENABLE-NEXT: mov w9, #1 +; ENABLE-NEXT: mov w9, #1 ; =0x1 ; ENABLE-NEXT: str w10, [x8] ; ENABLE-NEXT: ; InlineAsm Start ; ENABLE-NEXT: nop @@ -849,10 +849,10 @@ ; DISABLE-NEXT: LBB11_2: ; %for.body ; DISABLE-NEXT: ; =>This Inner Loop Header: Depth=1 ; DISABLE-NEXT: ; InlineAsm Start -; DISABLE-NEXT: mov x10, #0 +; DISABLE-NEXT: mov x10, #0 ; =0x0 ; DISABLE-NEXT: ; InlineAsm End ; DISABLE-NEXT: add w10, w10, w9 -; DISABLE-NEXT: mov w9, #1 +; DISABLE-NEXT: mov w9, #1 ; =0x1 ; DISABLE-NEXT: str w10, [x8] ; DISABLE-NEXT: ; InlineAsm Start ; DISABLE-NEXT: nop @@ -969,8 +969,8 @@ define i32 @stack_realign(i32 %a, i32 %b, ptr %ptr1, ptr %ptr2) { ; ENABLE-LABEL: stack_realign: ; ENABLE: ; %bb.0: -; ENABLE-NEXT: lsl w8, w0, w1 -; ENABLE-NEXT: lsl w9, w1, w0 +; ENABLE-NEXT: lsl w9, w0, w1 +; ENABLE-NEXT: lsl w8, w1, w0 ; ENABLE-NEXT: cmp w0, w1 ; ENABLE-NEXT: b.ge LBB13_2 ; ENABLE-NEXT: ; %bb.1: ; %true @@ -985,8 +985,8 @@ ; ENABLE-NEXT: mov sp, x29 ; ENABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; ENABLE-NEXT: LBB13_2: ; %false -; ENABLE-NEXT: str w8, [x2] -; ENABLE-NEXT: str w9, [x3] +; ENABLE-NEXT: str w9, [x2] +; ENABLE-NEXT: str w8, [x3] ; ENABLE-NEXT: ret ; ; DISABLE-LABEL: stack_realign: @@ -998,15 +998,15 @@ ; DISABLE-NEXT: .cfi_def_cfa w29, 16 ; DISABLE-NEXT: .cfi_offset w30, -8 ; DISABLE-NEXT: .cfi_offset w29, -16 -; DISABLE-NEXT: lsl w8, w0, w1 -; DISABLE-NEXT: lsl w9, w1, w0 +; DISABLE-NEXT: lsl w9, w0, w1 +; DISABLE-NEXT: lsl w8, w1, w0 ; DISABLE-NEXT: cmp w0, w1 ; DISABLE-NEXT: b.ge LBB13_2 ; DISABLE-NEXT: ; %bb.1: ; %true ; DISABLE-NEXT: str w0, [sp] ; DISABLE-NEXT: LBB13_2: ; %false -; DISABLE-NEXT: str w8, [x2] -; DISABLE-NEXT: str w9, [x3] +; DISABLE-NEXT: str w9, [x2] +; DISABLE-NEXT: str w8, [x3] ; DISABLE-NEXT: mov sp, x29 ; DISABLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; DISABLE-NEXT: ret @@ -1058,16 +1058,16 @@ ; ENABLE-NEXT: .cfi_offset w26, -80 ; ENABLE-NEXT: .cfi_offset w27, -88 ; ENABLE-NEXT: .cfi_offset w28, -96 -; ENABLE-NEXT: add w8, w1, w0 -; ENABLE-NEXT: lsl w9, w0, w1 -; ENABLE-NEXT: lsl w10, w1, w0 -; ENABLE-NEXT: lsr w12, w0, w1 -; ENABLE-NEXT: lsr w13, w1, w0 -; ENABLE-NEXT: sub w11, w10, w12 +; ENABLE-NEXT: lsl w8, w1, w0 +; ENABLE-NEXT: lsr w10, w0, w1 +; ENABLE-NEXT: lsl w16, w0, w1 +; ENABLE-NEXT: lsr w11, w1, w0 +; ENABLE-NEXT: add w14, w1, w0 +; ENABLE-NEXT: sub w9, w8, w10 ; ENABLE-NEXT: subs w17, w1, w0 -; ENABLE-NEXT: add w16, w9, w10 -; ENABLE-NEXT: add w14, w12, w13 -; ENABLE-NEXT: add w15, w13, w8 +; ENABLE-NEXT: add w15, w16, w8 +; ENABLE-NEXT: add w12, w10, w11 +; ENABLE-NEXT: add w13, w11, w14 ; ENABLE-NEXT: b.le LBB14_2 ; ENABLE-NEXT: ; %bb.1: ; %true ; ENABLE-NEXT: str w0, [sp] @@ -1075,15 +1075,15 @@ ; ENABLE-NEXT: nop ; ENABLE-NEXT: ; InlineAsm End ; ENABLE-NEXT: LBB14_2: ; %false -; ENABLE-NEXT: str w9, [x2] -; ENABLE-NEXT: str w10, [x3] -; ENABLE-NEXT: str w12, [x4] -; ENABLE-NEXT: str w13, [x5] -; ENABLE-NEXT: str w8, [x6] +; ENABLE-NEXT: str w16, [x2] +; ENABLE-NEXT: str w8, [x3] +; ENABLE-NEXT: str w10, [x4] +; ENABLE-NEXT: str w11, [x5] +; ENABLE-NEXT: str w14, [x6] ; ENABLE-NEXT: str w17, [x7] ; ENABLE-NEXT: stp w0, w1, [x2, #4] -; ENABLE-NEXT: stp w16, w11, [x2, #12] -; ENABLE-NEXT: stp w14, w15, [x2, #20] +; ENABLE-NEXT: stp w15, w9, [x2, #12] +; ENABLE-NEXT: stp w12, w13, [x2, #20] ; ENABLE-NEXT: sub sp, x29, #80 ; ENABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload ; ENABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload @@ -1117,16 +1117,16 @@ ; DISABLE-NEXT: .cfi_offset w26, -80 ; DISABLE-NEXT: .cfi_offset w27, -88 ; DISABLE-NEXT: .cfi_offset w28, -96 -; DISABLE-NEXT: add w8, w1, w0 -; DISABLE-NEXT: lsl w9, w0, w1 -; DISABLE-NEXT: lsl w10, w1, w0 -; DISABLE-NEXT: lsr w12, w0, w1 -; DISABLE-NEXT: lsr w13, w1, w0 -; DISABLE-NEXT: sub w11, w10, w12 +; DISABLE-NEXT: lsl w8, w1, w0 +; DISABLE-NEXT: lsr w10, w0, w1 +; DISABLE-NEXT: lsl w16, w0, w1 +; DISABLE-NEXT: lsr w11, w1, w0 +; DISABLE-NEXT: add w14, w1, w0 +; DISABLE-NEXT: sub w9, w8, w10 ; DISABLE-NEXT: subs w17, w1, w0 -; DISABLE-NEXT: add w16, w9, w10 -; DISABLE-NEXT: add w14, w12, w13 -; DISABLE-NEXT: add w15, w13, w8 +; DISABLE-NEXT: add w15, w16, w8 +; DISABLE-NEXT: add w12, w10, w11 +; DISABLE-NEXT: add w13, w11, w14 ; DISABLE-NEXT: b.le LBB14_2 ; DISABLE-NEXT: ; %bb.1: ; %true ; DISABLE-NEXT: str w0, [sp] @@ -1134,15 +1134,15 @@ ; DISABLE-NEXT: nop ; DISABLE-NEXT: ; InlineAsm End ; DISABLE-NEXT: LBB14_2: ; %false -; DISABLE-NEXT: str w9, [x2] -; DISABLE-NEXT: str w10, [x3] -; DISABLE-NEXT: str w12, [x4] -; DISABLE-NEXT: str w13, [x5] -; DISABLE-NEXT: str w8, [x6] +; DISABLE-NEXT: str w16, [x2] +; DISABLE-NEXT: str w8, [x3] +; DISABLE-NEXT: str w10, [x4] +; DISABLE-NEXT: str w11, [x5] +; DISABLE-NEXT: str w14, [x6] ; DISABLE-NEXT: str w17, [x7] ; DISABLE-NEXT: stp w0, w1, [x2, #4] -; DISABLE-NEXT: stp w16, w11, [x2, #12] -; DISABLE-NEXT: stp w14, w15, [x2, #20] +; DISABLE-NEXT: stp w15, w9, [x2, #12] +; DISABLE-NEXT: stp w12, w13, [x2, #20] ; DISABLE-NEXT: sub sp, x29, #80 ; DISABLE-NEXT: ldp x29, x30, [sp, #80] ; 16-byte Folded Reload ; DISABLE-NEXT: ldp x20, x19, [sp, #64] ; 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -179,8 +179,8 @@ ; CHECK-NEXT: ushll.4s v0, v0, #0 ; CHECK-NEXT: ushll.2d v1, v0, #0 ; CHECK-NEXT: ushll2.2d v0, v0, #0 -; CHECK-NEXT: shl.2d v2, v1, #56 ; CHECK-NEXT: shl.2d v0, v0, #56 +; CHECK-NEXT: shl.2d v2, v1, #56 ; CHECK-NEXT: sshr.2d v1, v0, #56 ; CHECK-NEXT: sshr.2d v0, v2, #56 ; CHECK-NEXT: ret @@ -192,11 +192,11 @@ ; CHECK-LABEL: zext_v8i8_to_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ushll.8h v0, v0, #0 +; CHECK-NEXT: ushll.4s v1, v0, #0 ; CHECK-NEXT: ushll2.4s v2, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll.2d v0, v1, #0 ; CHECK-NEXT: ushll2.2d v3, v2, #0 -; CHECK-NEXT: ushll2.2d v1, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: ushll2.2d v1, v1, #0 ; CHECK-NEXT: ushll.2d v2, v2, #0 ; CHECK-NEXT: ret %r = zext <8 x i8> %v0 to <8 x i64> @@ -207,11 +207,11 @@ ; CHECK-LABEL: sext_v8i8_to_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: sshll.8h v0, v0, #0 +; CHECK-NEXT: sshll.4s v1, v0, #0 ; CHECK-NEXT: sshll2.4s v2, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #0 +; CHECK-NEXT: sshll.2d v0, v1, #0 ; CHECK-NEXT: sshll2.2d v3, v2, #0 -; CHECK-NEXT: sshll2.2d v1, v0, #0 -; CHECK-NEXT: sshll.2d v0, v0, #0 +; CHECK-NEXT: sshll2.2d v1, v1, #0 ; CHECK-NEXT: sshll.2d v2, v2, #0 ; CHECK-NEXT: ret %r = sext <8 x i8> %v0 to <8 x i64> @@ -225,14 +225,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #64] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp] -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: ldr w9, [sp, #72] ; CHECK-NEXT: movi.16b v2, #1 -; CHECK-NEXT: mov.b v1[1], w8 +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: ldr w8, [sp, #80] +; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: mov.b v1[1], w9 +; CHECK-NEXT: ldr w9, [sp] ; CHECK-NEXT: mov.b v0[2], w2 ; CHECK-NEXT: mov.b v1[2], w8 ; CHECK-NEXT: ldr w8, [sp, #88] @@ -252,33 +251,34 @@ ; CHECK-NEXT: mov.b v1[7], w8 ; CHECK-NEXT: ldr w8, [sp, #128] ; CHECK-NEXT: mov.b v0[8], w9 -; CHECK-NEXT: ldr w9, [sp, #16] +; CHECK-NEXT: ldr w9, [sp, #8] ; CHECK-NEXT: mov.b v1[8], w8 ; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #24] +; CHECK-NEXT: mov.b v0[9], w9 +; CHECK-NEXT: ldr w9, [sp, #16] ; CHECK-NEXT: mov.b v1[9], w8 ; CHECK-NEXT: ldr w8, [sp, #144] ; CHECK-NEXT: mov.b v0[10], w9 -; CHECK-NEXT: ldr w9, [sp, #32] +; CHECK-NEXT: ldr w9, [sp, #24] ; CHECK-NEXT: mov.b v1[10], w8 ; CHECK-NEXT: ldr w8, [sp, #152] -; CHECK-NEXT: mov.b v0[11], w10 -; CHECK-NEXT: ldr w10, [sp, #40] +; CHECK-NEXT: mov.b v0[11], w9 +; CHECK-NEXT: ldr w9, [sp, #32] ; CHECK-NEXT: mov.b v1[11], w8 ; CHECK-NEXT: ldr w8, [sp, #160] ; CHECK-NEXT: mov.b v0[12], w9 -; CHECK-NEXT: ldr w9, [sp, #48] +; CHECK-NEXT: ldr w9, [sp, #40] ; CHECK-NEXT: mov.b v1[12], w8 ; CHECK-NEXT: ldr w8, [sp, #168] -; CHECK-NEXT: mov.b v0[13], w10 -; CHECK-NEXT: ldr w10, [sp, #56] +; CHECK-NEXT: mov.b v0[13], w9 +; CHECK-NEXT: ldr w9, [sp, #48] ; CHECK-NEXT: mov.b v1[13], w8 ; CHECK-NEXT: ldr w8, [sp, #176] ; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: ldr w9, [sp, #56] ; CHECK-NEXT: mov.b v1[14], w8 ; CHECK-NEXT: ldr w8, [sp, #184] -; CHECK-NEXT: mov.b v0[15], w10 +; CHECK-NEXT: mov.b v0[15], w9 ; CHECK-NEXT: mov.b v1[15], w8 ; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: and.16b v1, v1, v2 @@ -291,65 +291,65 @@ ; CHECK-LABEL: sext_v32i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #64] -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp] -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: mov.b v1[1], w8 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ldr w8, [sp, #80] -; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: mov.b v1[2], w8 +; CHECK-NEXT: mov.b v1[1], w1 +; CHECK-NEXT: mov.b v0[1], w9 +; CHECK-NEXT: ldr w9, [sp] +; CHECK-NEXT: mov.b v1[2], w2 +; CHECK-NEXT: mov.b v0[2], w8 ; CHECK-NEXT: ldr w8, [sp, #88] -; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: mov.b v1[3], w8 +; CHECK-NEXT: mov.b v1[3], w3 +; CHECK-NEXT: mov.b v0[3], w8 ; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: mov.b v1[4], w8 +; CHECK-NEXT: mov.b v1[4], w4 +; CHECK-NEXT: mov.b v0[4], w8 ; CHECK-NEXT: ldr w8, [sp, #104] -; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: mov.b v1[5], w8 +; CHECK-NEXT: mov.b v1[5], w5 +; CHECK-NEXT: mov.b v0[5], w8 ; CHECK-NEXT: ldr w8, [sp, #112] -; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w8 +; CHECK-NEXT: mov.b v1[6], w6 +; CHECK-NEXT: mov.b v0[6], w8 ; CHECK-NEXT: ldr w8, [sp, #120] -; CHECK-NEXT: mov.b v0[7], w7 -; CHECK-NEXT: mov.b v1[7], w8 +; CHECK-NEXT: mov.b v1[7], w7 +; CHECK-NEXT: mov.b v0[7], w8 ; CHECK-NEXT: ldr w8, [sp, #128] -; CHECK-NEXT: mov.b v0[8], w9 -; CHECK-NEXT: ldr w9, [sp, #16] -; CHECK-NEXT: mov.b v1[8], w8 +; CHECK-NEXT: mov.b v1[8], w9 +; CHECK-NEXT: ldr w9, [sp, #8] +; CHECK-NEXT: mov.b v0[8], w8 ; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #24] -; CHECK-NEXT: mov.b v1[9], w8 +; CHECK-NEXT: mov.b v1[9], w9 +; CHECK-NEXT: ldr w9, [sp, #16] +; CHECK-NEXT: mov.b v0[9], w8 ; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v0[10], w9 -; CHECK-NEXT: ldr w9, [sp, #32] -; CHECK-NEXT: mov.b v1[10], w8 +; CHECK-NEXT: mov.b v1[10], w9 +; CHECK-NEXT: ldr w9, [sp, #24] +; CHECK-NEXT: mov.b v0[10], w8 ; CHECK-NEXT: ldr w8, [sp, #152] -; CHECK-NEXT: mov.b v0[11], w10 -; CHECK-NEXT: ldr w10, [sp, #40] -; CHECK-NEXT: mov.b v1[11], w8 +; CHECK-NEXT: mov.b v1[11], w9 +; CHECK-NEXT: ldr w9, [sp, #32] +; CHECK-NEXT: mov.b v0[11], w8 ; CHECK-NEXT: ldr w8, [sp, #160] -; CHECK-NEXT: mov.b v0[12], w9 -; CHECK-NEXT: ldr w9, [sp, #48] -; CHECK-NEXT: mov.b v1[12], w8 +; CHECK-NEXT: mov.b v1[12], w9 +; CHECK-NEXT: ldr w9, [sp, #40] +; CHECK-NEXT: mov.b v0[12], w8 ; CHECK-NEXT: ldr w8, [sp, #168] -; CHECK-NEXT: mov.b v0[13], w10 -; CHECK-NEXT: ldr w10, [sp, #56] -; CHECK-NEXT: mov.b v1[13], w8 +; CHECK-NEXT: mov.b v1[13], w9 +; CHECK-NEXT: ldr w9, [sp, #48] +; CHECK-NEXT: mov.b v0[13], w8 ; CHECK-NEXT: ldr w8, [sp, #176] -; CHECK-NEXT: mov.b v0[14], w9 -; CHECK-NEXT: mov.b v1[14], w8 +; CHECK-NEXT: mov.b v1[14], w9 +; CHECK-NEXT: ldr w9, [sp, #56] +; CHECK-NEXT: mov.b v0[14], w8 ; CHECK-NEXT: ldr w8, [sp, #184] -; CHECK-NEXT: mov.b v0[15], w10 -; CHECK-NEXT: mov.b v1[15], w8 -; CHECK-NEXT: shl.16b v0, v0, #7 +; CHECK-NEXT: mov.b v1[15], w9 +; CHECK-NEXT: mov.b v0[15], w8 ; CHECK-NEXT: shl.16b v1, v1, #7 -; CHECK-NEXT: cmlt.16b v0, v0, #0 -; CHECK-NEXT: cmlt.16b v1, v1, #0 +; CHECK-NEXT: shl.16b v2, v0, #7 +; CHECK-NEXT: cmlt.16b v0, v1, #0 +; CHECK-NEXT: cmlt.16b v1, v2, #0 ; CHECK-NEXT: ret %res = sext <32 x i1> %arg to <32 x i8> ret <32 x i8> %res @@ -362,130 +362,130 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ldr w8, [sp, #336] +; CHECK-NEXT: ldr w9, [sp, #208] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp, #80] -; CHECK-NEXT: ldr w10, [sp, #208] +; CHECK-NEXT: ldr w10, [sp, #80] +; CHECK-NEXT: ldr w11, [sp, #216] +; CHECK-NEXT: movi.16b v4, #1 ; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: ldr w8, [sp, #344] -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldr w9, [sp, #216] -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: ldr w10, [sp, #352] -; CHECK-NEXT: mov.b v3[1], w8 -; CHECK-NEXT: ldr w8, [sp, #88] +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: ldr w12, [sp, #88] ; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: ldr w11, [sp, #368] -; CHECK-NEXT: mov.b v2[1], w9 -; CHECK-NEXT: ldr w9, [sp, #96] -; CHECK-NEXT: mov.b v1[1], w8 -; CHECK-NEXT: ldr w8, [sp, #360] -; CHECK-NEXT: mov.b v3[2], w10 -; CHECK-NEXT: ldr w10, [sp, #224] +; CHECK-NEXT: ldr w9, [sp, #224] +; CHECK-NEXT: ldr w10, [sp, #96] +; CHECK-NEXT: mov.b v3[1], w8 +; CHECK-NEXT: mov.b v2[1], w11 +; CHECK-NEXT: ldr w8, [sp, #352] +; CHECK-NEXT: mov.b v1[1], w12 +; CHECK-NEXT: ldr w11, [sp, #144] ; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: ldr w12, [sp, #384] -; CHECK-NEXT: ldr w13, [sp, #400] -; CHECK-NEXT: mov.b v1[2], w9 -; CHECK-NEXT: ldr w9, [sp, #376] -; CHECK-NEXT: mov.b v2[2], w10 +; CHECK-NEXT: mov.b v3[2], w8 +; CHECK-NEXT: mov.b v2[2], w9 +; CHECK-NEXT: ldr w8, [sp, #360] +; CHECK-NEXT: mov.b v1[2], w10 +; CHECK-NEXT: ldr w9, [sp, #232] ; CHECK-NEXT: ldr w10, [sp, #104] -; CHECK-NEXT: mov.b v3[3], w8 -; CHECK-NEXT: ldr w8, [sp, #232] ; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: ldr w14, [sp, #416] +; CHECK-NEXT: mov.b v3[3], w8 +; CHECK-NEXT: mov.b v2[3], w9 +; CHECK-NEXT: ldr w8, [sp, #368] ; CHECK-NEXT: mov.b v1[3], w10 -; CHECK-NEXT: ldr w10, [sp, #392] -; CHECK-NEXT: mov.b v2[3], w8 -; CHECK-NEXT: ldr w8, [sp, #112] -; CHECK-NEXT: mov.b v3[4], w11 -; CHECK-NEXT: ldr w11, [sp, #240] +; CHECK-NEXT: ldr w9, [sp, #240] +; CHECK-NEXT: ldr w10, [sp, #112] ; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: ldr w15, [sp, #432] -; CHECK-NEXT: mov.b v1[4], w8 -; CHECK-NEXT: ldr w8, [sp, #408] -; CHECK-NEXT: mov.b v2[4], w11 -; CHECK-NEXT: ldr w11, [sp, #120] -; CHECK-NEXT: mov.b v3[5], w9 +; CHECK-NEXT: mov.b v3[4], w8 +; CHECK-NEXT: mov.b v2[4], w9 +; CHECK-NEXT: ldr w8, [sp, #376] +; CHECK-NEXT: mov.b v1[4], w10 ; CHECK-NEXT: ldr w9, [sp, #248] +; CHECK-NEXT: ldr w10, [sp, #120] ; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: ldr w16, [sp, #448] -; CHECK-NEXT: mov.b v1[5], w11 -; CHECK-NEXT: ldr w11, [sp, #424] +; CHECK-NEXT: mov.b v3[5], w8 ; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w9, [sp, #128] -; CHECK-NEXT: mov.b v3[6], w12 -; CHECK-NEXT: ldr w12, [sp, #256] +; CHECK-NEXT: ldr w8, [sp, #384] +; CHECK-NEXT: mov.b v1[5], w10 +; CHECK-NEXT: ldr w9, [sp, #256] +; CHECK-NEXT: ldr w10, [sp, #128] ; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w9 -; CHECK-NEXT: ldr w9, [sp, #440] -; CHECK-NEXT: mov.b v2[6], w12 -; CHECK-NEXT: ldr w12, [sp, #136] -; CHECK-NEXT: mov.b v3[7], w10 -; CHECK-NEXT: ldr w10, [sp, #264] +; CHECK-NEXT: mov.b v3[6], w8 +; CHECK-NEXT: mov.b v2[6], w9 +; CHECK-NEXT: ldr w8, [sp, #392] +; CHECK-NEXT: mov.b v1[6], w10 +; CHECK-NEXT: ldr w9, [sp, #264] +; CHECK-NEXT: ldr w10, [sp, #136] ; CHECK-NEXT: mov.b v0[7], w7 -; CHECK-NEXT: mov.b v1[7], w12 -; CHECK-NEXT: ldr w12, [sp, #16] -; CHECK-NEXT: mov.b v2[7], w10 -; CHECK-NEXT: ldr w10, [sp, #144] -; CHECK-NEXT: mov.b v3[8], w13 -; CHECK-NEXT: ldr w13, [sp, #272] -; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #456] -; CHECK-NEXT: mov.b v1[8], w10 -; CHECK-NEXT: ldr w10, [sp, #24] -; CHECK-NEXT: mov.b v2[8], w13 -; CHECK-NEXT: ldr w13, [sp, #152] -; CHECK-NEXT: mov.b v3[9], w8 -; CHECK-NEXT: ldr w8, [sp, #280] -; CHECK-NEXT: mov.b v0[9], w10 +; CHECK-NEXT: mov.b v3[7], w8 +; CHECK-NEXT: mov.b v2[7], w9 +; CHECK-NEXT: ldr w8, [sp, #16] +; CHECK-NEXT: mov.b v1[7], w10 +; CHECK-NEXT: ldr w9, [sp, #400] +; CHECK-NEXT: ldr w10, [sp, #272] +; CHECK-NEXT: mov.b v0[8], w8 +; CHECK-NEXT: ldr w8, [sp, #24] +; CHECK-NEXT: mov.b v3[8], w9 +; CHECK-NEXT: mov.b v2[8], w10 +; CHECK-NEXT: ldr w9, [sp, #408] +; CHECK-NEXT: mov.b v1[8], w11 +; CHECK-NEXT: ldr w10, [sp, #280] +; CHECK-NEXT: ldr w11, [sp, #152] +; CHECK-NEXT: mov.b v0[9], w8 +; CHECK-NEXT: ldr w8, [sp, #32] +; CHECK-NEXT: mov.b v3[9], w9 +; CHECK-NEXT: mov.b v2[9], w10 +; CHECK-NEXT: ldr w9, [sp, #416] +; CHECK-NEXT: mov.b v1[9], w11 ; CHECK-NEXT: ldr w10, [sp, #288] -; CHECK-NEXT: mov.b v1[9], w13 -; CHECK-NEXT: ldr w13, [sp, #32] -; CHECK-NEXT: mov.b v2[9], w8 -; CHECK-NEXT: ldr w8, [sp, #160] -; CHECK-NEXT: mov.b v3[10], w14 -; CHECK-NEXT: ldr w14, [sp, #296] -; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #312] -; CHECK-NEXT: mov.b v1[10], w8 +; CHECK-NEXT: ldr w11, [sp, #160] +; CHECK-NEXT: mov.b v0[10], w8 ; CHECK-NEXT: ldr w8, [sp, #40] +; CHECK-NEXT: mov.b v3[10], w9 ; CHECK-NEXT: mov.b v2[10], w10 -; CHECK-NEXT: ldr w10, [sp, #168] -; CHECK-NEXT: mov.b v3[11], w11 -; CHECK-NEXT: ldr w11, [sp, #304] +; CHECK-NEXT: ldr w9, [sp, #424] +; CHECK-NEXT: mov.b v1[10], w11 +; CHECK-NEXT: ldr w10, [sp, #296] +; CHECK-NEXT: ldr w11, [sp, #168] ; CHECK-NEXT: mov.b v0[11], w8 ; CHECK-NEXT: ldr w8, [sp, #48] -; CHECK-NEXT: mov.b v1[11], w10 -; CHECK-NEXT: ldr w10, [sp, #176] -; CHECK-NEXT: mov.b v2[11], w14 -; CHECK-NEXT: mov.b v3[12], w15 +; CHECK-NEXT: mov.b v3[11], w9 +; CHECK-NEXT: mov.b v2[11], w10 +; CHECK-NEXT: ldr w9, [sp, #432] +; CHECK-NEXT: mov.b v1[11], w11 +; CHECK-NEXT: ldr w10, [sp, #304] +; CHECK-NEXT: ldr w11, [sp, #176] ; CHECK-NEXT: mov.b v0[12], w8 ; CHECK-NEXT: ldr w8, [sp, #56] -; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #184] -; CHECK-NEXT: mov.b v2[12], w11 -; CHECK-NEXT: ldr w11, [sp, #328] -; CHECK-NEXT: mov.b v3[13], w9 -; CHECK-NEXT: ldr w9, [sp, #320] +; CHECK-NEXT: mov.b v3[12], w9 +; CHECK-NEXT: mov.b v2[12], w10 +; CHECK-NEXT: ldr w9, [sp, #440] +; CHECK-NEXT: mov.b v1[12], w11 +; CHECK-NEXT: ldr w10, [sp, #312] +; CHECK-NEXT: ldr w11, [sp, #184] ; CHECK-NEXT: mov.b v0[13], w8 ; CHECK-NEXT: ldr w8, [sp, #64] -; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #192] -; CHECK-NEXT: mov.b v2[13], w13 -; CHECK-NEXT: mov.b v3[14], w16 +; CHECK-NEXT: mov.b v3[13], w9 +; CHECK-NEXT: mov.b v2[13], w10 +; CHECK-NEXT: ldr w9, [sp, #448] +; CHECK-NEXT: mov.b v1[13], w11 +; CHECK-NEXT: ldr w10, [sp, #320] +; CHECK-NEXT: ldr w11, [sp, #192] ; CHECK-NEXT: mov.b v0[14], w8 ; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: mov.b v1[14], w10 -; CHECK-NEXT: mov.b v2[14], w9 -; CHECK-NEXT: ldr w9, [sp, #200] -; CHECK-NEXT: movi.16b v4, #1 +; CHECK-NEXT: mov.b v3[14], w9 +; CHECK-NEXT: mov.b v2[14], w10 +; CHECK-NEXT: ldr w9, [sp, #456] +; CHECK-NEXT: mov.b v1[14], w11 +; CHECK-NEXT: ldr w10, [sp, #328] +; CHECK-NEXT: ldr w11, [sp, #200] ; CHECK-NEXT: mov.b v0[15], w8 -; CHECK-NEXT: mov.b v1[15], w9 -; CHECK-NEXT: mov.b v2[15], w11 -; CHECK-NEXT: mov.b v3[15], w12 +; CHECK-NEXT: mov.b v3[15], w9 +; CHECK-NEXT: mov.b v2[15], w10 +; CHECK-NEXT: mov.b v1[15], w11 ; CHECK-NEXT: and.16b v0, v0, v4 -; CHECK-NEXT: and.16b v1, v1, v4 ; CHECK-NEXT: and.16b v2, v2, v4 ; CHECK-NEXT: and.16b v3, v3, v4 +; CHECK-NEXT: and.16b v1, v1, v4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %res = zext <64 x i1> %arg to <64 x i8> @@ -499,133 +499,133 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ldr w8, [sp, #336] -; CHECK-NEXT: fmov s3, w0 -; CHECK-NEXT: ldr w9, [sp, #80] -; CHECK-NEXT: ldr w10, [sp, #208] +; CHECK-NEXT: ldr w9, [sp, #208] +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: ldr w10, [sp, #80] +; CHECK-NEXT: ldr w11, [sp, #216] +; CHECK-NEXT: ldr w12, [sp, #88] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldr w8, [sp, #344] ; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldr w9, [sp, #88] -; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: ldr w8, [sp, #344] +; CHECK-NEXT: fmov s3, w10 +; CHECK-NEXT: mov.b v2[1], w1 +; CHECK-NEXT: ldr w9, [sp, #224] ; CHECK-NEXT: ldr w10, [sp, #96] ; CHECK-NEXT: mov.b v0[1], w8 -; CHECK-NEXT: ldr w8, [sp, #216] -; CHECK-NEXT: mov.b v1[1], w9 -; CHECK-NEXT: ldr w9, [sp, #352] -; CHECK-NEXT: mov.b v3[1], w1 -; CHECK-NEXT: ldr w11, [sp, #104] -; CHECK-NEXT: mov.b v2[1], w8 +; CHECK-NEXT: mov.b v1[1], w11 +; CHECK-NEXT: ldr w8, [sp, #352] +; CHECK-NEXT: mov.b v3[1], w12 +; CHECK-NEXT: ldr w11, [sp, #144] +; CHECK-NEXT: mov.b v2[2], w2 +; CHECK-NEXT: mov.b v0[2], w8 +; CHECK-NEXT: mov.b v1[2], w9 ; CHECK-NEXT: ldr w8, [sp, #360] -; CHECK-NEXT: mov.b v0[2], w9 -; CHECK-NEXT: ldr w9, [sp, #224] -; CHECK-NEXT: mov.b v1[2], w10 -; CHECK-NEXT: ldr w10, [sp, #368] -; CHECK-NEXT: mov.b v3[2], w2 -; CHECK-NEXT: ldr w12, [sp, #112] -; CHECK-NEXT: mov.b v2[2], w9 -; CHECK-NEXT: ldr w9, [sp, #376] +; CHECK-NEXT: mov.b v3[2], w10 +; CHECK-NEXT: ldr w9, [sp, #232] +; CHECK-NEXT: ldr w10, [sp, #104] +; CHECK-NEXT: mov.b v2[3], w3 ; CHECK-NEXT: mov.b v0[3], w8 -; CHECK-NEXT: ldr w8, [sp, #232] -; CHECK-NEXT: mov.b v1[3], w11 -; CHECK-NEXT: ldr w13, [sp, #120] -; CHECK-NEXT: mov.b v3[3], w3 -; CHECK-NEXT: ldr w11, [sp, #384] -; CHECK-NEXT: mov.b v2[3], w8 -; CHECK-NEXT: ldr w14, [sp, #128] -; CHECK-NEXT: mov.b v0[4], w10 -; CHECK-NEXT: ldr w10, [sp, #240] -; CHECK-NEXT: mov.b v1[4], w12 -; CHECK-NEXT: ldr w8, [sp, #392] -; CHECK-NEXT: mov.b v3[4], w4 -; CHECK-NEXT: ldr w15, [sp, #136] -; CHECK-NEXT: mov.b v2[4], w10 -; CHECK-NEXT: ldr w12, [sp, #400] -; CHECK-NEXT: mov.b v0[5], w9 +; CHECK-NEXT: mov.b v1[3], w9 +; CHECK-NEXT: ldr w8, [sp, #368] +; CHECK-NEXT: mov.b v3[3], w10 +; CHECK-NEXT: ldr w9, [sp, #240] +; CHECK-NEXT: ldr w10, [sp, #112] +; CHECK-NEXT: mov.b v2[4], w4 +; CHECK-NEXT: mov.b v0[4], w8 +; CHECK-NEXT: mov.b v1[4], w9 +; CHECK-NEXT: ldr w8, [sp, #376] +; CHECK-NEXT: mov.b v3[4], w10 ; CHECK-NEXT: ldr w9, [sp, #248] -; CHECK-NEXT: mov.b v1[5], w13 -; CHECK-NEXT: ldr w16, [sp, #144] -; CHECK-NEXT: mov.b v3[5], w5 -; CHECK-NEXT: ldr w10, [sp, #408] -; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w13, [sp, #416] -; CHECK-NEXT: mov.b v0[6], w11 -; CHECK-NEXT: ldr w11, [sp, #256] -; CHECK-NEXT: mov.b v1[6], w14 -; CHECK-NEXT: ldr w9, [sp, #424] -; CHECK-NEXT: mov.b v3[6], w6 -; CHECK-NEXT: ldr w14, [sp, #432] -; CHECK-NEXT: mov.b v2[6], w11 -; CHECK-NEXT: ldr w11, [sp, #440] +; CHECK-NEXT: ldr w10, [sp, #120] +; CHECK-NEXT: mov.b v2[5], w5 +; CHECK-NEXT: mov.b v0[5], w8 +; CHECK-NEXT: mov.b v1[5], w9 +; CHECK-NEXT: ldr w8, [sp, #384] +; CHECK-NEXT: mov.b v3[5], w10 +; CHECK-NEXT: ldr w9, [sp, #256] +; CHECK-NEXT: ldr w10, [sp, #128] +; CHECK-NEXT: mov.b v2[6], w6 +; CHECK-NEXT: mov.b v0[6], w8 +; CHECK-NEXT: mov.b v1[6], w9 +; CHECK-NEXT: ldr w8, [sp, #392] +; CHECK-NEXT: mov.b v3[6], w10 +; CHECK-NEXT: ldr w9, [sp, #264] +; CHECK-NEXT: ldr w10, [sp, #136] +; CHECK-NEXT: mov.b v2[7], w7 ; CHECK-NEXT: mov.b v0[7], w8 -; CHECK-NEXT: ldr w8, [sp, #264] -; CHECK-NEXT: mov.b v1[7], w15 -; CHECK-NEXT: ldr w15, [sp, #448] -; CHECK-NEXT: mov.b v3[7], w7 -; CHECK-NEXT: mov.b v2[7], w8 +; CHECK-NEXT: mov.b v1[7], w9 ; CHECK-NEXT: ldr w8, [sp, #16] -; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #272] -; CHECK-NEXT: mov.b v1[8], w16 -; CHECK-NEXT: ldr w16, [sp, #456] -; CHECK-NEXT: mov.b v3[8], w8 -; CHECK-NEXT: ldr w8, [sp, #152] -; CHECK-NEXT: mov.b v2[8], w12 -; CHECK-NEXT: ldr w12, [sp, #24] -; CHECK-NEXT: mov.b v0[9], w10 +; CHECK-NEXT: mov.b v3[7], w10 +; CHECK-NEXT: ldr w9, [sp, #400] +; CHECK-NEXT: ldr w10, [sp, #272] +; CHECK-NEXT: mov.b v2[8], w8 +; CHECK-NEXT: ldr w8, [sp, #24] +; CHECK-NEXT: mov.b v0[8], w9 +; CHECK-NEXT: mov.b v1[8], w10 +; CHECK-NEXT: ldr w9, [sp, #408] +; CHECK-NEXT: mov.b v3[8], w11 ; CHECK-NEXT: ldr w10, [sp, #280] -; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #288] -; CHECK-NEXT: mov.b v3[9], w12 -; CHECK-NEXT: ldr w12, [sp, #160] -; CHECK-NEXT: mov.b v2[9], w10 -; CHECK-NEXT: ldr w10, [sp, #32] -; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #296] -; CHECK-NEXT: mov.b v1[10], w12 -; CHECK-NEXT: ldr w12, [sp, #168] -; CHECK-NEXT: mov.b v3[10], w10 -; CHECK-NEXT: ldr w10, [sp, #176] +; CHECK-NEXT: ldr w11, [sp, #152] +; CHECK-NEXT: mov.b v2[9], w8 +; CHECK-NEXT: ldr w8, [sp, #32] +; CHECK-NEXT: mov.b v0[9], w9 +; CHECK-NEXT: mov.b v1[9], w10 +; CHECK-NEXT: ldr w9, [sp, #416] +; CHECK-NEXT: mov.b v3[9], w11 +; CHECK-NEXT: ldr w10, [sp, #288] +; CHECK-NEXT: ldr w11, [sp, #160] ; CHECK-NEXT: mov.b v2[10], w8 ; CHECK-NEXT: ldr w8, [sp, #40] -; CHECK-NEXT: mov.b v0[11], w9 -; CHECK-NEXT: ldr w9, [sp, #304] -; CHECK-NEXT: mov.b v1[11], w12 -; CHECK-NEXT: ldr w12, [sp, #312] -; CHECK-NEXT: mov.b v3[11], w8 +; CHECK-NEXT: mov.b v0[10], w9 +; CHECK-NEXT: mov.b v1[10], w10 +; CHECK-NEXT: ldr w9, [sp, #424] +; CHECK-NEXT: mov.b v3[10], w11 +; CHECK-NEXT: ldr w10, [sp, #296] +; CHECK-NEXT: ldr w11, [sp, #168] +; CHECK-NEXT: mov.b v2[11], w8 ; CHECK-NEXT: ldr w8, [sp, #48] -; CHECK-NEXT: mov.b v2[11], w13 -; CHECK-NEXT: mov.b v0[12], w14 -; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #184] -; CHECK-NEXT: mov.b v3[12], w8 +; CHECK-NEXT: mov.b v0[11], w9 +; CHECK-NEXT: mov.b v1[11], w10 +; CHECK-NEXT: ldr w9, [sp, #432] +; CHECK-NEXT: mov.b v3[11], w11 +; CHECK-NEXT: ldr w10, [sp, #304] +; CHECK-NEXT: ldr w11, [sp, #176] +; CHECK-NEXT: mov.b v2[12], w8 ; CHECK-NEXT: ldr w8, [sp, #56] -; CHECK-NEXT: mov.b v2[12], w9 -; CHECK-NEXT: ldr w9, [sp, #320] -; CHECK-NEXT: mov.b v0[13], w11 -; CHECK-NEXT: ldr w11, [sp, #328] -; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #192] -; CHECK-NEXT: mov.b v3[13], w8 +; CHECK-NEXT: mov.b v0[12], w9 +; CHECK-NEXT: mov.b v1[12], w10 +; CHECK-NEXT: ldr w9, [sp, #440] +; CHECK-NEXT: mov.b v3[12], w11 +; CHECK-NEXT: ldr w10, [sp, #312] +; CHECK-NEXT: ldr w11, [sp, #184] +; CHECK-NEXT: mov.b v2[13], w8 ; CHECK-NEXT: ldr w8, [sp, #64] -; CHECK-NEXT: mov.b v2[13], w12 -; CHECK-NEXT: mov.b v0[14], w15 -; CHECK-NEXT: mov.b v1[14], w10 -; CHECK-NEXT: ldr w10, [sp, #200] -; CHECK-NEXT: mov.b v3[14], w8 +; CHECK-NEXT: mov.b v0[13], w9 +; CHECK-NEXT: mov.b v1[13], w10 +; CHECK-NEXT: ldr w9, [sp, #448] +; CHECK-NEXT: mov.b v3[13], w11 +; CHECK-NEXT: ldr w10, [sp, #320] +; CHECK-NEXT: ldr w11, [sp, #192] +; CHECK-NEXT: mov.b v2[14], w8 ; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: mov.b v2[14], w9 -; CHECK-NEXT: mov.b v0[15], w16 +; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: mov.b v1[14], w10 +; CHECK-NEXT: ldr w9, [sp, #456] +; CHECK-NEXT: mov.b v3[14], w11 +; CHECK-NEXT: ldr w10, [sp, #328] +; CHECK-NEXT: ldr w11, [sp, #200] +; CHECK-NEXT: mov.b v2[15], w8 +; CHECK-NEXT: mov.b v0[15], w9 ; CHECK-NEXT: mov.b v1[15], w10 -; CHECK-NEXT: mov.b v3[15], w8 -; CHECK-NEXT: mov.b v2[15], w11 -; CHECK-NEXT: shl.16b v4, v0, #7 -; CHECK-NEXT: shl.16b v1, v1, #7 -; CHECK-NEXT: shl.16b v3, v3, #7 +; CHECK-NEXT: mov.b v3[15], w11 ; CHECK-NEXT: shl.16b v2, v2, #7 -; CHECK-NEXT: cmlt.16b v0, v3, #0 -; CHECK-NEXT: cmlt.16b v1, v1, #0 -; CHECK-NEXT: cmlt.16b v2, v2, #0 -; CHECK-NEXT: cmlt.16b v3, v4, #0 +; CHECK-NEXT: shl.16b v4, v1, #7 +; CHECK-NEXT: shl.16b v5, v0, #7 +; CHECK-NEXT: shl.16b v3, v3, #7 +; CHECK-NEXT: cmlt.16b v0, v2, #0 +; CHECK-NEXT: cmlt.16b v2, v4, #0 +; CHECK-NEXT: cmlt.16b v1, v3, #0 +; CHECK-NEXT: cmlt.16b v3, v5, #0 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %res = sext <64 x i1> %arg to <64 x i8> diff --git a/llvm/test/CodeGen/AArch64/arm64-tbl.ll b/llvm/test/CodeGen/AArch64/arm64-tbl.ll --- a/llvm/test/CodeGen/AArch64/arm64-tbl.ll +++ b/llvm/test/CodeGen/AArch64/arm64-tbl.ll @@ -107,9 +107,9 @@ ; CHECK-NEXT: adrp x8, .LCPI8_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 +; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI8_0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 -; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI8_0] ; CHECK-NEXT: tbl.8b v0, { v0, v1 }, v4 ; CHECK-NEXT: tbl.8b v1, { v2, v3 }, v4 ; CHECK-NEXT: mov.s v0[1], v1[1] @@ -142,11 +142,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-LABEL: shuffled_tbl2_to_tbl4: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI9_0 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: adrp x8, .LCPI9_0 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-NEXT: ret @@ -160,11 +160,11 @@ ; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s4, w0 -; CHECK-NEXT: mov w8, #32 +; CHECK-NEXT: mov w8, #32 // =0x20 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: mov.b v4[1], w0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: mov.b v4[1], w0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: mov.b v4[2], w0 ; CHECK-NEXT: mov.b v4[3], w0 @@ -173,19 +173,19 @@ ; CHECK-NEXT: mov.b v4[6], w0 ; CHECK-NEXT: mov.b v4[7], w0 ; CHECK-NEXT: mov.b v4[8], w8 -; CHECK-NEXT: mov w8, #36 +; CHECK-NEXT: mov w8, #36 // =0x24 ; CHECK-NEXT: mov.b v4[9], w8 -; CHECK-NEXT: mov w8, #40 +; CHECK-NEXT: mov w8, #40 // =0x28 ; CHECK-NEXT: mov.b v4[10], w8 -; CHECK-NEXT: mov w8, #44 +; CHECK-NEXT: mov w8, #44 // =0x2c ; CHECK-NEXT: mov.b v4[11], w8 -; CHECK-NEXT: mov w8, #48 +; CHECK-NEXT: mov w8, #48 // =0x30 ; CHECK-NEXT: mov.b v4[12], w8 -; CHECK-NEXT: mov w8, #52 +; CHECK-NEXT: mov w8, #52 // =0x34 ; CHECK-NEXT: mov.b v4[13], w8 -; CHECK-NEXT: mov w8, #56 +; CHECK-NEXT: mov w8, #56 // =0x38 ; CHECK-NEXT: mov.b v4[14], w8 -; CHECK-NEXT: mov w8, #60 +; CHECK-NEXT: mov w8, #60 // =0x3c ; CHECK-NEXT: mov.b v4[15], w8 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-NEXT: ret @@ -214,11 +214,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_first_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { ; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_first_mask2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: fmov s4, w8 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: fmov s4, w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: mov.b v4[1], w8 ; CHECK-NEXT: mov.b v4[2], w8 @@ -226,22 +226,22 @@ ; CHECK-NEXT: mov.b v4[4], w8 ; CHECK-NEXT: mov.b v4[5], w8 ; CHECK-NEXT: mov.b v4[6], w8 -; CHECK-NEXT: mov w8, #32 +; CHECK-NEXT: mov w8, #32 // =0x20 ; CHECK-NEXT: mov.b v4[7], w0 ; CHECK-NEXT: mov.b v4[8], w8 -; CHECK-NEXT: mov w8, #36 +; CHECK-NEXT: mov w8, #36 // =0x24 ; CHECK-NEXT: mov.b v4[9], w8 -; CHECK-NEXT: mov w8, #40 +; CHECK-NEXT: mov w8, #40 // =0x28 ; CHECK-NEXT: mov.b v4[10], w8 -; CHECK-NEXT: mov w8, #44 +; CHECK-NEXT: mov w8, #44 // =0x2c ; CHECK-NEXT: mov.b v4[11], w8 -; CHECK-NEXT: mov w8, #48 +; CHECK-NEXT: mov w8, #48 // =0x30 ; CHECK-NEXT: mov.b v4[12], w8 -; CHECK-NEXT: mov w8, #52 +; CHECK-NEXT: mov w8, #52 // =0x34 ; CHECK-NEXT: mov.b v4[13], w8 -; CHECK-NEXT: mov w8, #56 +; CHECK-NEXT: mov w8, #56 // =0x38 ; CHECK-NEXT: mov.b v4[14], w8 -; CHECK-NEXT: mov w8, #31 +; CHECK-NEXT: mov w8, #31 // =0x1f ; CHECK-NEXT: mov.b v4[15], w8 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-NEXT: ret @@ -274,11 +274,11 @@ ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 +; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: mov.b v4[0], w0 ; CHECK-NEXT: tbl.16b v2, { v2, v3 }, v5 +; CHECK-NEXT: mov.b v4[0], w0 ; CHECK-NEXT: mov.b v4[1], w0 ; CHECK-NEXT: mov.b v4[2], w0 ; CHECK-NEXT: mov.b v4[3], w0 @@ -315,22 +315,22 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_nonconst_second_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d, i8 %v) { ; CHECK-LABEL: shuffled_tbl2_to_tbl4_nonconst_second_mask2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #255 ; CHECK-NEXT: dup.16b v4, w0 -; CHECK-NEXT: adrp x9, .LCPI13_0 +; CHECK-NEXT: mov w8, #255 // =0xff ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q2_q3 def $q2_q3 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q2_q3 def $q2_q3 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: mov.b v4[8], w8 -; CHECK-NEXT: ldr q5, [x9, :lo12:.LCPI13_0] ; CHECK-NEXT: mov.b v4[9], w8 -; CHECK-NEXT: tbl.16b v2, { v2, v3 }, v5 ; CHECK-NEXT: mov.b v4[10], w8 ; CHECK-NEXT: mov.b v4[11], w8 ; CHECK-NEXT: mov.b v4[12], w8 ; CHECK-NEXT: mov.b v4[13], w8 +; CHECK-NEXT: adrp x8, .LCPI13_0 +; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI13_0] ; CHECK-NEXT: adrp x8, .LCPI13_1 +; CHECK-NEXT: tbl.16b v2, { v2, v3 }, v5 ; CHECK-NEXT: tbl.16b v3, { v0, v1 }, v4 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_1] ; CHECK-NEXT: tbl.16b v0, { v2, v3 }, v0 @@ -379,11 +379,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_shuffle(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-LABEL: shuffled_tbl2_to_tbl4_mixed_shuffle: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-NEXT: ret @@ -414,11 +414,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask1: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-NEXT: ret @@ -449,11 +449,11 @@ define <16 x i8> @shuffled_tbl2_to_tbl4_mixed_tbl2_mask2(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, <16 x i8> %d) { ; CHECK-LABEL: shuffled_tbl2_to_tbl4_mixed_tbl2_mask2: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl.16b v0, { v0, v1, v2, v3 }, v4 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -49,12 +49,12 @@ ; FALLBACK-NOT: remark:{{.*}} sabdl2_8h define <8 x i16> @sabdl2_8h(ptr %A, ptr %B) nounwind { -; DAG-LABEL: sabdl2_8h: -; DAG: // %bb.0: -; DAG-NEXT: ldr d0, [x0, #8] -; DAG-NEXT: ldr d1, [x1, #8] -; DAG-NEXT: sabdl.8h v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: sabdl2_8h: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: sabdl.8h v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabdl2_8h: ; GISEL: // %bb.0: @@ -62,7 +62,7 @@ ; GISEL-NEXT: ldr q1, [x1] ; GISEL-NEXT: ext.16b v0, v0, v0, #8 ; GISEL-NEXT: ext.16b v1, v1, v0, #8 -; GISEL-NEXT: sabdl.8h v0, v0, v1 +; GISEL-NEXT: sabdl.8h v0, v0, v1 ; GISEL-NEXT: ret %load1 = load <16 x i8>, ptr %A %load2 = load <16 x i8>, ptr %B @@ -75,12 +75,12 @@ ; FALLBACK-NOT: remark:{{.*}} sabdl2_4s define <4 x i32> @sabdl2_4s(ptr %A, ptr %B) nounwind { -; DAG-LABEL: sabdl2_4s: -; DAG: // %bb.0: -; DAG-NEXT: ldr d0, [x0, #8] -; DAG-NEXT: ldr d1, [x1, #8] -; DAG-NEXT: sabdl.4s v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: sabdl2_4s: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: sabdl.4s v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabdl2_4s: ; GISEL: // %bb.0: @@ -101,12 +101,12 @@ ; FALLBACK-NOT: remark:{{.*}} sabdl2_2d define <2 x i64> @sabdl2_2d(ptr %A, ptr %B) nounwind { -; DAG-LABEL: sabdl2_2d: -; DAG: // %bb.0: -; DAG-NEXT: ldr d0, [x0, #8] -; DAG-NEXT: ldr d1, [x1, #8] -; DAG-NEXT: sabdl.2d v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: sabdl2_2d: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: sabdl.2d v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabdl2_2d: ; GISEL: // %bb.0: @@ -172,12 +172,12 @@ ; FALLBACK-NOT: remark:{{.*}} uabdl2_8h define <8 x i16> @uabdl2_8h(ptr %A, ptr %B) nounwind { -; DAG-LABEL: uabdl2_8h: -; DAG: // %bb.0: -; DAG-NEXT: ldr d0, [x0, #8] -; DAG-NEXT: ldr d1, [x1, #8] -; DAG-NEXT: uabdl.8h v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: uabdl2_8h: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: uabdl.8h v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabdl2_8h: ; GISEL: // %bb.0: @@ -185,7 +185,7 @@ ; GISEL-NEXT: ldr q1, [x1] ; GISEL-NEXT: ext.16b v0, v0, v0, #8 ; GISEL-NEXT: ext.16b v1, v1, v0, #8 -; GISEL-NEXT: uabdl.8h v0, v0, v1 +; GISEL-NEXT: uabdl.8h v0, v0, v1 ; GISEL-NEXT: ret %load1 = load <16 x i8>, ptr %A %load2 = load <16 x i8>, ptr %B @@ -199,12 +199,12 @@ ; FALLBACK-NOT: remark:{{.*}} uabdl2_4s define <4 x i32> @uabdl2_4s(ptr %A, ptr %B) nounwind { -; DAG-LABEL: uabdl2_4s: -; DAG: // %bb.0: -; DAG-NEXT: ldr d0, [x0, #8] -; DAG-NEXT: ldr d1, [x1, #8] -; DAG-NEXT: uabdl.4s v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: uabdl2_4s: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: uabdl.4s v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabdl2_4s: ; GISEL: // %bb.0: @@ -212,7 +212,7 @@ ; GISEL-NEXT: ldr q1, [x1] ; GISEL-NEXT: ext.16b v0, v0, v0, #8 ; GISEL-NEXT: ext.16b v1, v1, v0, #8 -; GISEL-NEXT: uabdl.4s v0, v0, v1 +; GISEL-NEXT: uabdl.4s v0, v0, v1 ; GISEL-NEXT: ret %load1 = load <8 x i16>, ptr %A %load2 = load <8 x i16>, ptr %B @@ -225,12 +225,12 @@ ; FALLBACK-NOT: remark:{{.*}} uabdl2_2d define <2 x i64> @uabdl2_2d(ptr %A, ptr %B) nounwind { -; DAG-LABEL: uabdl2_2d: -; DAG: // %bb.0: -; DAG-NEXT: ldr d0, [x0, #8] -; DAG-NEXT: ldr d1, [x1, #8] -; DAG-NEXT: uabdl.2d v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: uabdl2_2d: +; DAG: // %bb.0: +; DAG-NEXT: ldr d0, [x0, #8] +; DAG-NEXT: ldr d1, [x1, #8] +; DAG-NEXT: uabdl.2d v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabdl2_2d: ; GISEL: // %bb.0: @@ -238,7 +238,7 @@ ; GISEL-NEXT: ldr q1, [x1] ; GISEL-NEXT: ext.16b v0, v0, v0, #8 ; GISEL-NEXT: ext.16b v1, v1, v0, #8 -; GISEL-NEXT: uabdl.2d v0, v0, v1 +; GISEL-NEXT: uabdl.2d v0, v0, v1 ; GISEL-NEXT: ret %load1 = load <4 x i32>, ptr %A %load2 = load <4 x i32>, ptr %B @@ -361,10 +361,10 @@ ; ; GISEL-LABEL: uabdl4s_rdx_i32: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: usubl.4s v0, v0, v1 -; GISEL-NEXT: cmgt.4s v1, v2, v0 +; GISEL-NEXT: movi.2d v1, #0000000000000000 ; GISEL-NEXT: neg.4s v2, v0 +; GISEL-NEXT: cmgt.4s v1, v1, v0 ; GISEL-NEXT: bit.16b v0, v2, v1 ; GISEL-NEXT: addv.4s s0, v0 ; GISEL-NEXT: fmov w0, s0 @@ -432,10 +432,10 @@ ; ; GISEL-LABEL: uabdl2d_rdx_i64: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: usubl.2d v0, v0, v1 -; GISEL-NEXT: cmgt.2d v1, v2, v0 +; GISEL-NEXT: movi.2d v1, #0000000000000000 ; GISEL-NEXT: neg.2d v2, v0 +; GISEL-NEXT: cmgt.2d v1, v1, v0 ; GISEL-NEXT: bit.16b v0, v2, v1 ; GISEL-NEXT: addp.2d d0, v0 ; GISEL-NEXT: fmov x0, d0 @@ -954,21 +954,13 @@ ; FALLBACK-NOT: remark:{{.*}} sabal8h define <8 x i16> @sabal8h(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: sabal8h: -; DAG: // %bb.0: -; DAG-NEXT: ldr d1, [x1] -; DAG-NEXT: ldr d2, [x0] -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: sabal.8h v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: sabal8h: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr d1, [x0] -; GISEL-NEXT: ldr d2, [x1] -; GISEL-NEXT: ldr q0, [x2] -; GISEL-NEXT: sabal.8h v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: sabal8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: sabal.8h v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B %tmp3 = load <8 x i16>, ptr %C @@ -980,21 +972,13 @@ ; FALLBACK-NOT: remark:{{.*}} sabal4s define <4 x i32> @sabal4s(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: sabal4s: -; DAG: // %bb.0: -; DAG-NEXT: ldr d1, [x1] -; DAG-NEXT: ldr d2, [x0] -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: sabal.4s v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: sabal4s: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr d1, [x0] -; GISEL-NEXT: ldr d2, [x1] -; GISEL-NEXT: ldr q0, [x2] -; GISEL-NEXT: sabal.4s v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: sabal4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: sabal.4s v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B %tmp3 = load <4 x i32>, ptr %C @@ -1006,21 +990,13 @@ ; FALLBACK-NOT: remark:{{.*}} sabal2d define <2 x i64> @sabal2d(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: sabal2d: -; DAG: // %bb.0: -; DAG-NEXT: ldr d1, [x1] -; DAG-NEXT: ldr d2, [x0] -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: sabal.2d v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: sabal2d: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr d1, [x0] -; GISEL-NEXT: ldr d2, [x1] -; GISEL-NEXT: ldr q0, [x2] -; GISEL-NEXT: sabal.2d v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: sabal2d: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: sabal.2d v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B %tmp3 = load <2 x i64>, ptr %C @@ -1033,13 +1009,13 @@ ; FALLBACK-NOT: remark:{{.*}} sabal2_8h define <8 x i16> @sabal2_8h(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: sabal2_8h: -; DAG: // %bb.0: -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: ldr d1, [x0, #8] -; DAG-NEXT: ldr d2, [x1, #8] -; DAG-NEXT: sabal.8h v0, v1, v2 -; DAG-NEXT: ret +; DAG-LABEL: sabal2_8h: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: sabal.8h v0, v1, v2 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabal2_8h: ; GISEL: // %bb.0: @@ -1063,13 +1039,13 @@ ; FALLBACK-NOT: remark:{{.*}} sabal2_4s define <4 x i32> @sabal2_4s(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: sabal2_4s: -; DAG: // %bb.0: -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: ldr d1, [x0, #8] -; DAG-NEXT: ldr d2, [x1, #8] -; DAG-NEXT: sabal.4s v0, v1, v2 -; DAG-NEXT: ret +; DAG-LABEL: sabal2_4s: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: sabal.4s v0, v1, v2 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabal2_4s: ; GISEL: // %bb.0: @@ -1093,13 +1069,13 @@ ; FALLBACK-NOT: remark:{{.*}} sabal2_2d define <2 x i64> @sabal2_2d(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: sabal2_2d: -; DAG: // %bb.0: -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: ldr d1, [x0, #8] -; DAG-NEXT: ldr d2, [x1, #8] -; DAG-NEXT: sabal.2d v0, v1, v2 -; DAG-NEXT: ret +; DAG-LABEL: sabal2_2d: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: sabal.2d v0, v1, v2 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabal2_2d: ; GISEL: // %bb.0: @@ -1123,21 +1099,13 @@ ; FALLBACK-NOT: remark:{{.*}} uabal8h define <8 x i16> @uabal8h(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uabal8h: -; DAG: // %bb.0: -; DAG-NEXT: ldr d1, [x1] -; DAG-NEXT: ldr d2, [x0] -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: uabal.8h v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: uabal8h: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr d1, [x0] -; GISEL-NEXT: ldr d2, [x1] -; GISEL-NEXT: ldr q0, [x2] -; GISEL-NEXT: uabal.8h v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: uabal8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: uabal.8h v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B %tmp3 = load <8 x i16>, ptr %C @@ -1149,21 +1117,13 @@ ; FALLBACK-NOT: remark:{{.*}} uabal8s define <4 x i32> @uabal4s(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uabal4s: -; DAG: // %bb.0: -; DAG-NEXT: ldr d1, [x1] -; DAG-NEXT: ldr d2, [x0] -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: uabal.4s v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: uabal4s: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr d1, [x0] -; GISEL-NEXT: ldr d2, [x1] -; GISEL-NEXT: ldr q0, [x2] -; GISEL-NEXT: uabal.4s v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: uabal4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: uabal.4s v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B %tmp3 = load <4 x i32>, ptr %C @@ -1175,21 +1135,13 @@ ; FALLBACK-NOT: remark:{{.*}} uabal2d define <2 x i64> @uabal2d(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uabal2d: -; DAG: // %bb.0: -; DAG-NEXT: ldr d1, [x1] -; DAG-NEXT: ldr d2, [x0] -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: uabal.2d v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: uabal2d: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr d1, [x0] -; GISEL-NEXT: ldr d2, [x1] -; GISEL-NEXT: ldr q0, [x2] -; GISEL-NEXT: uabal.2d v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: uabal2d: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: uabal.2d v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B %tmp3 = load <2 x i64>, ptr %C @@ -1201,13 +1153,13 @@ ; FALLBACK-NOT: remark:{{.*}} uabal2_8h define <8 x i16> @uabal2_8h(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uabal2_8h: -; DAG: // %bb.0: -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: ldr d1, [x0, #8] -; DAG-NEXT: ldr d2, [x1, #8] -; DAG-NEXT: uabal.8h v0, v1, v2 -; DAG-NEXT: ret +; DAG-LABEL: uabal2_8h: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: uabal.8h v0, v1, v2 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabal2_8h: ; GISEL: // %bb.0: @@ -1231,13 +1183,13 @@ ; FALLBACK-NOT: remark:{{.*}} uabal2_4s define <4 x i32> @uabal2_4s(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uabal2_4s: -; DAG: // %bb.0: -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: ldr d1, [x0, #8] -; DAG-NEXT: ldr d2, [x1, #8] -; DAG-NEXT: uabal.4s v0, v1, v2 -; DAG-NEXT: ret +; DAG-LABEL: uabal2_4s: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: uabal.4s v0, v1, v2 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabal2_4s: ; GISEL: // %bb.0: @@ -1261,13 +1213,13 @@ ; FALLBACK-NOT: remark:{{.*}} uabal2_2d define <2 x i64> @uabal2_2d(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uabal2_2d: -; DAG: // %bb.0: -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: ldr d1, [x0, #8] -; DAG-NEXT: ldr d2, [x1, #8] -; DAG-NEXT: uabal.2d v0, v1, v2 -; DAG-NEXT: ret +; DAG-LABEL: uabal2_2d: +; DAG: // %bb.0: +; DAG-NEXT: ldr q0, [x2] +; DAG-NEXT: ldr d1, [x0, #8] +; DAG-NEXT: ldr d2, [x1, #8] +; DAG-NEXT: uabal.2d v0, v1, v2 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabal2_2d: ; GISEL: // %bb.0: @@ -1290,21 +1242,13 @@ } define <8 x i8> @saba_8b(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: saba_8b: -; DAG: // %bb.0: -; DAG-NEXT: ldr d1, [x1] -; DAG-NEXT: ldr d2, [x0] -; DAG-NEXT: ldr d0, [x2] -; DAG-NEXT: saba.8b v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: saba_8b: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr d1, [x0] -; GISEL-NEXT: ldr d2, [x1] -; GISEL-NEXT: ldr d0, [x2] -; GISEL-NEXT: saba.8b v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: saba_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: ldr d0, [x2] +; CHECK-NEXT: saba.8b v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B %tmp3 = call <8 x i8> @llvm.aarch64.neon.sabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -1314,21 +1258,13 @@ } define <16 x i8> @saba_16b(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: saba_16b: -; DAG: // %bb.0: -; DAG-NEXT: ldr q1, [x1] -; DAG-NEXT: ldr q2, [x0] -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: saba.16b v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: saba_16b: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr q1, [x0] -; GISEL-NEXT: ldr q2, [x1] -; GISEL-NEXT: ldr q0, [x2] -; GISEL-NEXT: saba.16b v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: saba_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: saba.16b v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B %tmp3 = call <16 x i8> @llvm.aarch64.neon.sabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -1338,21 +1274,13 @@ } define <4 x i16> @saba_4h(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: saba_4h: -; DAG: // %bb.0: -; DAG-NEXT: ldr d1, [x1] -; DAG-NEXT: ldr d2, [x0] -; DAG-NEXT: ldr d0, [x2] -; DAG-NEXT: saba.4h v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: saba_4h: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr d1, [x0] -; GISEL-NEXT: ldr d2, [x1] -; GISEL-NEXT: ldr d0, [x2] -; GISEL-NEXT: saba.4h v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: saba_4h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: ldr d0, [x2] +; CHECK-NEXT: saba.4h v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B %tmp3 = call <4 x i16> @llvm.aarch64.neon.sabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -1362,21 +1290,13 @@ } define <8 x i16> @saba_8h(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: saba_8h: -; DAG: // %bb.0: -; DAG-NEXT: ldr q1, [x1] -; DAG-NEXT: ldr q2, [x0] -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: saba.8h v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: saba_8h: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr q1, [x0] -; GISEL-NEXT: ldr q2, [x1] -; GISEL-NEXT: ldr q0, [x2] -; GISEL-NEXT: saba.8h v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: saba_8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: saba.8h v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %tmp3 = call <8 x i16> @llvm.aarch64.neon.sabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -1386,21 +1306,13 @@ } define <2 x i32> @saba_2s(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: saba_2s: -; DAG: // %bb.0: -; DAG-NEXT: ldr d1, [x1] -; DAG-NEXT: ldr d2, [x0] -; DAG-NEXT: ldr d0, [x2] -; DAG-NEXT: saba.2s v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: saba_2s: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr d1, [x0] -; GISEL-NEXT: ldr d2, [x1] -; GISEL-NEXT: ldr d0, [x2] -; GISEL-NEXT: saba.2s v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: saba_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: ldr d0, [x2] +; CHECK-NEXT: saba.2s v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B %tmp3 = call <2 x i32> @llvm.aarch64.neon.sabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -1410,21 +1322,13 @@ } define <4 x i32> @saba_4s(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: saba_4s: -; DAG: // %bb.0: -; DAG-NEXT: ldr q1, [x1] -; DAG-NEXT: ldr q2, [x0] -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: saba.4s v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: saba_4s: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr q1, [x0] -; GISEL-NEXT: ldr q2, [x1] -; GISEL-NEXT: ldr q0, [x2] -; GISEL-NEXT: saba.4s v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: saba_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: saba.4s v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %tmp3 = call <4 x i32> @llvm.aarch64.neon.sabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -1434,21 +1338,13 @@ } define <8 x i8> @uaba_8b(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uaba_8b: -; DAG: // %bb.0: -; DAG-NEXT: ldr d1, [x1] -; DAG-NEXT: ldr d2, [x0] -; DAG-NEXT: ldr d0, [x2] -; DAG-NEXT: uaba.8b v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: uaba_8b: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr d1, [x0] -; GISEL-NEXT: ldr d2, [x1] -; GISEL-NEXT: ldr d0, [x2] -; GISEL-NEXT: uaba.8b v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: uaba_8b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: ldr d0, [x2] +; CHECK-NEXT: uaba.8b v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <8 x i8>, ptr %A %tmp2 = load <8 x i8>, ptr %B %tmp3 = call <8 x i8> @llvm.aarch64.neon.uabd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2) @@ -1458,21 +1354,13 @@ } define <16 x i8> @uaba_16b(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uaba_16b: -; DAG: // %bb.0: -; DAG-NEXT: ldr q1, [x1] -; DAG-NEXT: ldr q2, [x0] -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: uaba.16b v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: uaba_16b: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr q1, [x0] -; GISEL-NEXT: ldr q2, [x1] -; GISEL-NEXT: ldr q0, [x2] -; GISEL-NEXT: uaba.16b v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: uaba_16b: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: uaba.16b v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <16 x i8>, ptr %A %tmp2 = load <16 x i8>, ptr %B %tmp3 = call <16 x i8> @llvm.aarch64.neon.uabd.v16i8(<16 x i8> %tmp1, <16 x i8> %tmp2) @@ -1482,21 +1370,13 @@ } define <4 x i16> @uaba_4h(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uaba_4h: -; DAG: // %bb.0: -; DAG-NEXT: ldr d1, [x1] -; DAG-NEXT: ldr d2, [x0] -; DAG-NEXT: ldr d0, [x2] -; DAG-NEXT: uaba.4h v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: uaba_4h: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr d1, [x0] -; GISEL-NEXT: ldr d2, [x1] -; GISEL-NEXT: ldr d0, [x2] -; GISEL-NEXT: uaba.4h v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: uaba_4h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: ldr d0, [x2] +; CHECK-NEXT: uaba.4h v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B %tmp3 = call <4 x i16> @llvm.aarch64.neon.uabd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2) @@ -1506,21 +1386,13 @@ } define <8 x i16> @uaba_8h(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uaba_8h: -; DAG: // %bb.0: -; DAG-NEXT: ldr q1, [x1] -; DAG-NEXT: ldr q2, [x0] -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: uaba.8h v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: uaba_8h: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr q1, [x0] -; GISEL-NEXT: ldr q2, [x1] -; GISEL-NEXT: ldr q0, [x2] -; GISEL-NEXT: uaba.8h v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: uaba_8h: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: uaba.8h v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <8 x i16>, ptr %A %tmp2 = load <8 x i16>, ptr %B %tmp3 = call <8 x i16> @llvm.aarch64.neon.uabd.v8i16(<8 x i16> %tmp1, <8 x i16> %tmp2) @@ -1530,21 +1402,13 @@ } define <2 x i32> @uaba_2s(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uaba_2s: -; DAG: // %bb.0: -; DAG-NEXT: ldr d1, [x1] -; DAG-NEXT: ldr d2, [x0] -; DAG-NEXT: ldr d0, [x2] -; DAG-NEXT: uaba.2s v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: uaba_2s: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr d1, [x0] -; GISEL-NEXT: ldr d2, [x1] -; GISEL-NEXT: ldr d0, [x2] -; GISEL-NEXT: uaba.2s v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: uaba_2s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: ldr d0, [x2] +; CHECK-NEXT: uaba.2s v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B %tmp3 = call <2 x i32> @llvm.aarch64.neon.uabd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2) @@ -1554,21 +1418,13 @@ } define <4 x i32> @uaba_4s(ptr %A, ptr %B, ptr %C) nounwind { -; DAG-LABEL: uaba_4s: -; DAG: // %bb.0: -; DAG-NEXT: ldr q1, [x1] -; DAG-NEXT: ldr q2, [x0] -; DAG-NEXT: ldr q0, [x2] -; DAG-NEXT: uaba.4s v0, v2, v1 -; DAG-NEXT: ret -; -; GISEL-LABEL: uaba_4s: -; GISEL: // %bb.0: -; GISEL-NEXT: ldr q1, [x0] -; GISEL-NEXT: ldr q2, [x1] -; GISEL-NEXT: ldr q0, [x2] -; GISEL-NEXT: uaba.4s v0, v1, v2 -; GISEL-NEXT: ret +; CHECK-LABEL: uaba_4s: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: uaba.4s v0, v1, v2 +; CHECK-NEXT: ret %tmp1 = load <4 x i32>, ptr %A %tmp2 = load <4 x i32>, ptr %B %tmp3 = call <4 x i32> @llvm.aarch64.neon.uabd.v4i32(<4 x i32> %tmp1, <4 x i32> %tmp2) @@ -1624,12 +1480,18 @@ ; FALLBACK-NOT: remark:{{.*}} uabdl_from_extract_dup define <2 x i64> @uabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { -; CHECK-LABEL: uabdl_from_extract_dup: -; CHECK: // %bb.0: -; CHECK-NEXT: dup.2s v1, w0 +; DAG-LABEL: uabdl_from_extract_dup: +; DAG: // %bb.0: +; DAG-NEXT: dup.2s v1, w0 +; DAG-NEXT: uabdl.2d v0, v0, v1 +; DAG-NEXT: ret +; +; GISEL-LABEL: uabdl_from_extract_dup: +; GISEL: // %bb.0: +; GISEL-NEXT: dup.2s v1, w0 ; GISEL-NEXT: ext.16b v0, v0, v0, #0 -; CHECK-NEXT: uabdl.2d v0, v0, v1 -; CHECK-NEXT: ret +; GISEL-NEXT: uabdl.2d v0, v0, v1 +; GISEL-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 @@ -1642,11 +1504,11 @@ ; FALLBACK-NOT: remark:{{.*}} uabdl2_from_extract_dup define <2 x i64> @uabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { -; DAG-LABEL: uabdl2_from_extract_dup: -; DAG: // %bb.0: -; DAG-NEXT: dup.4s v1, w0 -; DAG-NEXT: uabdl2.2d v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: uabdl2_from_extract_dup: +; DAG: // %bb.0: +; DAG-NEXT: dup.4s v1, w0 +; DAG-NEXT: uabdl2.2d v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: uabdl2_from_extract_dup: ; GISEL: // %bb.0: @@ -1666,12 +1528,18 @@ ; FALLBACK-NOT: remark:{{.*}} sabdl_from_extract_dup define <2 x i64> @sabdl_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { -; CHECK-LABEL: sabdl_from_extract_dup: -; CHECK: // %bb.0: -; CHECK-NEXT: dup.2s v1, w0 +; DAG-LABEL: sabdl_from_extract_dup: +; DAG: // %bb.0: +; DAG-NEXT: dup.2s v1, w0 +; DAG-NEXT: sabdl.2d v0, v0, v1 +; DAG-NEXT: ret +; +; GISEL-LABEL: sabdl_from_extract_dup: +; GISEL: // %bb.0: +; GISEL-NEXT: dup.2s v1, w0 ; GISEL-NEXT: ext.16b v0, v0, v0, #0 -; CHECK-NEXT: sabdl.2d v0, v0, v1 -; CHECK-NEXT: ret +; GISEL-NEXT: sabdl.2d v0, v0, v1 +; GISEL-NEXT: ret %rhsvec.tmp = insertelement <2 x i32> undef, i32 %rhs, i32 0 %rhsvec = insertelement <2 x i32> %rhsvec.tmp, i32 %rhs, i32 1 @@ -1684,11 +1552,11 @@ ; FALLBACK-NOT: remark:{{.*}} sabdl2_from_extract_dup define <2 x i64> @sabdl2_from_extract_dup(<4 x i32> %lhs, i32 %rhs) { -; DAG-LABEL: sabdl2_from_extract_dup: -; DAG: // %bb.0: -; DAG-NEXT: dup.4s v1, w0 -; DAG-NEXT: sabdl2.2d v0, v0, v1 -; DAG-NEXT: ret +; DAG-LABEL: sabdl2_from_extract_dup: +; DAG: // %bb.0: +; DAG-NEXT: dup.4s v1, w0 +; DAG-NEXT: sabdl2.2d v0, v0, v1 +; DAG-NEXT: ret ; ; GISEL-LABEL: sabdl2_from_extract_dup: ; GISEL: // %bb.0: @@ -1855,10 +1723,10 @@ ; ; GISEL-LABEL: uabd_i32: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: ssubl.2d v0, v0, v1 -; GISEL-NEXT: cmgt.2d v1, v2, v0 +; GISEL-NEXT: movi.2d v1, #0000000000000000 ; GISEL-NEXT: neg.2d v2, v0 +; GISEL-NEXT: cmgt.2d v1, v1, v0 ; GISEL-NEXT: bit.16b v0, v2, v1 ; GISEL-NEXT: ret %aext = sext <2 x i32> %a to <2 x i64> @@ -1875,28 +1743,28 @@ ; CHECK-LABEL: uabd_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov.d x8, v0[1] -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: mov.d x10, v1[1] +; CHECK-NEXT: mov.d x9, v1[1] +; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: asr x12, x9, #63 +; CHECK-NEXT: asr x12, x10, #63 ; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: subs x9, x9, x11 -; CHECK-NEXT: sbc x11, x12, x13 -; CHECK-NEXT: asr x12, x8, #63 -; CHECK-NEXT: asr x13, x10, #63 -; CHECK-NEXT: subs x8, x8, x10 -; CHECK-NEXT: sbc x10, x12, x13 -; CHECK-NEXT: asr x12, x11, #63 -; CHECK-NEXT: asr x13, x10, #63 -; CHECK-NEXT: eor x9, x9, x12 -; CHECK-NEXT: eor x8, x8, x13 +; CHECK-NEXT: subs x10, x10, x11 +; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: asr x14, x9, #63 +; CHECK-NEXT: sbc x12, x12, x13 +; CHECK-NEXT: subs x8, x8, x9 +; CHECK-NEXT: sbc x9, x11, x14 +; CHECK-NEXT: asr x13, x12, #63 +; CHECK-NEXT: asr x11, x9, #63 ; CHECK-NEXT: eor x10, x10, x13 -; CHECK-NEXT: subs x2, x8, x13 -; CHECK-NEXT: sbc x3, x10, x13 -; CHECK-NEXT: subs x8, x9, x12 -; CHECK-NEXT: eor x9, x11, x12 -; CHECK-NEXT: sbc x1, x9, x12 -; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: eor x8, x8, x11 +; CHECK-NEXT: eor x9, x9, x11 +; CHECK-NEXT: subs x2, x8, x11 +; CHECK-NEXT: eor x8, x12, x13 +; CHECK-NEXT: sbc x3, x9, x11 +; CHECK-NEXT: subs x9, x10, x13 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: sbc x1, x8, x13 ; CHECK-NEXT: mov.d v0[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll --- a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll @@ -52,12 +52,12 @@ define void @fct1_64x2(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_64x2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:globalArray64x2 -; CHECK-NEXT: lsl x8, x1, #4 -; CHECK-NEXT: ldr x9, [x9, :got_lo12:globalArray64x2] -; CHECK-NEXT: ldr q0, [x0, x8] -; CHECK-NEXT: ldr x9, [x9] -; CHECK-NEXT: str q0, [x9, x8] +; CHECK-NEXT: adrp x8, :got:globalArray64x2 +; CHECK-NEXT: lsl x9, x1, #4 +; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x2] +; CHECK-NEXT: ldr q0, [x0, x9] +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str q0, [x8, x9] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <2 x i64>, ptr %array, i64 %offset @@ -89,12 +89,12 @@ define void @fct1_32x4(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_32x4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:globalArray32x4 -; CHECK-NEXT: lsl x8, x1, #4 -; CHECK-NEXT: ldr x9, [x9, :got_lo12:globalArray32x4] -; CHECK-NEXT: ldr q0, [x0, x8] -; CHECK-NEXT: ldr x9, [x9] -; CHECK-NEXT: str q0, [x9, x8] +; CHECK-NEXT: adrp x8, :got:globalArray32x4 +; CHECK-NEXT: lsl x9, x1, #4 +; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x4] +; CHECK-NEXT: ldr q0, [x0, x9] +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str q0, [x8, x9] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <4 x i32>, ptr %array, i64 %offset @@ -126,12 +126,12 @@ define void @fct1_16x8(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_16x8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:globalArray16x8 -; CHECK-NEXT: lsl x8, x1, #4 -; CHECK-NEXT: ldr x9, [x9, :got_lo12:globalArray16x8] -; CHECK-NEXT: ldr q0, [x0, x8] -; CHECK-NEXT: ldr x9, [x9] -; CHECK-NEXT: str q0, [x9, x8] +; CHECK-NEXT: adrp x8, :got:globalArray16x8 +; CHECK-NEXT: lsl x9, x1, #4 +; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x8] +; CHECK-NEXT: ldr q0, [x0, x9] +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str q0, [x8, x9] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <8 x i16>, ptr %array, i64 %offset @@ -163,12 +163,12 @@ define void @fct1_8x16(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_8x16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:globalArray8x16 -; CHECK-NEXT: lsl x8, x1, #4 -; CHECK-NEXT: ldr x9, [x9, :got_lo12:globalArray8x16] -; CHECK-NEXT: ldr q0, [x0, x8] -; CHECK-NEXT: ldr x9, [x9] -; CHECK-NEXT: str q0, [x9, x8] +; CHECK-NEXT: adrp x8, :got:globalArray8x16 +; CHECK-NEXT: lsl x9, x1, #4 +; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray8x16] +; CHECK-NEXT: ldr q0, [x0, x9] +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str q0, [x8, x9] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <16 x i8>, ptr %array, i64 %offset @@ -200,12 +200,12 @@ define void @fct1_64x1(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_64x1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:globalArray64x1 -; CHECK-NEXT: lsl x8, x1, #3 -; CHECK-NEXT: ldr x9, [x9, :got_lo12:globalArray64x1] -; CHECK-NEXT: ldr d0, [x0, x8] -; CHECK-NEXT: ldr x9, [x9] -; CHECK-NEXT: str d0, [x9, x8] +; CHECK-NEXT: adrp x8, :got:globalArray64x1 +; CHECK-NEXT: lsl x9, x1, #3 +; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x1] +; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str d0, [x8, x9] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <1 x i64>, ptr %array, i64 %offset @@ -237,12 +237,12 @@ define void @fct1_32x2(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_32x2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:globalArray32x2 -; CHECK-NEXT: lsl x8, x1, #3 -; CHECK-NEXT: ldr x9, [x9, :got_lo12:globalArray32x2] -; CHECK-NEXT: ldr d0, [x0, x8] -; CHECK-NEXT: ldr x9, [x9] -; CHECK-NEXT: str d0, [x9, x8] +; CHECK-NEXT: adrp x8, :got:globalArray32x2 +; CHECK-NEXT: lsl x9, x1, #3 +; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x2] +; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str d0, [x8, x9] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <2 x i32>, ptr %array, i64 %offset @@ -274,12 +274,12 @@ define void @fct1_16x4(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_16x4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:globalArray16x4 -; CHECK-NEXT: lsl x8, x1, #3 -; CHECK-NEXT: ldr x9, [x9, :got_lo12:globalArray16x4] -; CHECK-NEXT: ldr d0, [x0, x8] -; CHECK-NEXT: ldr x9, [x9] -; CHECK-NEXT: str d0, [x9, x8] +; CHECK-NEXT: adrp x8, :got:globalArray16x4 +; CHECK-NEXT: lsl x9, x1, #3 +; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x4] +; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str d0, [x8, x9] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <4 x i16>, ptr %array, i64 %offset @@ -311,12 +311,12 @@ define void @fct1_8x8(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_8x8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:globalArray8x8 -; CHECK-NEXT: lsl x8, x1, #3 -; CHECK-NEXT: ldr x9, [x9, :got_lo12:globalArray8x8] -; CHECK-NEXT: ldr d0, [x0, x8] -; CHECK-NEXT: ldr x9, [x9] -; CHECK-NEXT: str d0, [x9, x8] +; CHECK-NEXT: adrp x8, :got:globalArray8x8 +; CHECK-NEXT: lsl x9, x1, #3 +; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray8x8] +; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr x8, [x8] +; CHECK-NEXT: str d0, [x8, x9] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <8 x i8>, ptr %array, i64 %offset diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll --- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll @@ -903,10 +903,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: shl.2s v0, v0, #24 ; CHECK-NEXT: shl.2s v1, v1, #24 -; CHECK-NEXT: movi d2, #0x00ffff0000ffff ; CHECK-NEXT: sshr.2s v0, v0, #24 ; CHECK-NEXT: ssra.2s v0, v1, #24 -; CHECK-NEXT: and.8b v0, v0, v2 +; CHECK-NEXT: movi d1, #0x00ffff0000ffff +; CHECK-NEXT: and.8b v0, v0, v1 ; CHECK-NEXT: ushr.2s v0, v0, #1 ; CHECK-NEXT: ret %zextsrc1 = sext <2 x i8> %src1 to <2 x i16> @@ -968,10 +968,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: shl.4h v0, v0, #8 ; CHECK-NEXT: shl.4h v1, v1, #8 -; CHECK-NEXT: movi.4h v2, #1 ; CHECK-NEXT: sshr.4h v0, v0, #8 ; CHECK-NEXT: ssra.4h v0, v1, #8 -; CHECK-NEXT: add.4h v0, v0, v2 +; CHECK-NEXT: movi.4h v1, #1 +; CHECK-NEXT: add.4h v0, v0, v1 ; CHECK-NEXT: ushr.4h v0, v0, #1 ; CHECK-NEXT: ret %zextsrc1 = sext <4 x i8> %src1 to <4 x i16> @@ -1283,13 +1283,13 @@ define <16 x i8> @andmask2v16i8(<16 x i16> %src1, <16 x i16> %src2) { ; CHECK-LABEL: andmask2v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.16b v4, #3 -; CHECK-NEXT: movi.16b v5, #7 ; CHECK-NEXT: uzp1.16b v2, v2, v3 +; CHECK-NEXT: movi.16b v3, #3 ; CHECK-NEXT: uzp1.16b v0, v0, v1 -; CHECK-NEXT: and.16b v1, v2, v4 -; CHECK-NEXT: and.16b v0, v0, v5 -; CHECK-NEXT: uhadd.16b v0, v0, v1 +; CHECK-NEXT: movi.16b v1, #7 +; CHECK-NEXT: and.16b v2, v2, v3 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: uhadd.16b v0, v0, v2 ; CHECK-NEXT: ret %zextsrc1 = and <16 x i16> %src1, %zextsrc2 = and <16 x i16> %src2, diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -356,10 +356,10 @@ define <4 x i32> @smlal4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: smlal4s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: smlal.4s v0, v2, v1 +; CHECK-NEXT: smlal.4s v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -372,10 +372,10 @@ define <2 x i64> @smlal2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: smlal2d: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: smlal.2d v0, v2, v1 +; CHECK-NEXT: smlal.2d v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -406,7 +406,7 @@ define void @smlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { ; CHECK-LABEL: smlal2d_chain_with_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: dup.2d v3, x8 ; CHECK-NEXT: smlal.2d v3, v0, v2 ; CHECK-NEXT: mvn.8b v0, v2 @@ -425,10 +425,10 @@ define <4 x i32> @smlsl4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: smlsl4s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: smlsl.4s v0, v2, v1 +; CHECK-NEXT: smlsl.4s v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -441,10 +441,10 @@ define <2 x i64> @smlsl2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: smlsl2d: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: smlsl.2d v0, v2, v1 +; CHECK-NEXT: smlsl.2d v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -475,7 +475,7 @@ define void @smlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { ; CHECK-LABEL: smlsl2d_chain_with_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: dup.2d v3, x8 ; CHECK-NEXT: smlsl.2d v3, v0, v2 ; CHECK-NEXT: mvn.8b v0, v2 @@ -499,10 +499,10 @@ define <4 x i32> @sqdmlal4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: sqdmlal4s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: sqdmlal.4s v0, v2, v1 +; CHECK-NEXT: sqdmlal.4s v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -515,10 +515,10 @@ define <2 x i64> @sqdmlal2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: sqdmlal2d: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: sqdmlal.2d v0, v2, v1 +; CHECK-NEXT: sqdmlal.2d v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -567,10 +567,10 @@ define <4 x i32> @sqdmlsl4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: sqdmlsl4s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: sqdmlsl.4s v0, v2, v1 +; CHECK-NEXT: sqdmlsl.4s v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -583,10 +583,10 @@ define <2 x i64> @sqdmlsl2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: sqdmlsl2d: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: sqdmlsl.2d v0, v2, v1 +; CHECK-NEXT: sqdmlsl.2d v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -635,10 +635,10 @@ define <4 x i32> @umlal4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: umlal4s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: umlal.4s v0, v2, v1 +; CHECK-NEXT: umlal.4s v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -651,10 +651,10 @@ define <2 x i64> @umlal2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: umlal2d: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: umlal.2d v0, v2, v1 +; CHECK-NEXT: umlal.2d v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -685,7 +685,7 @@ define void @umlal2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { ; CHECK-LABEL: umlal2d_chain_with_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: dup.2d v3, x8 ; CHECK-NEXT: umlal.2d v3, v0, v2 ; CHECK-NEXT: mvn.8b v0, v2 @@ -704,10 +704,10 @@ define <4 x i32> @umlsl4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: umlsl4s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: umlsl.4s v0, v2, v1 +; CHECK-NEXT: umlsl.4s v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, ptr %A %tmp2 = load <4 x i16>, ptr %B @@ -720,10 +720,10 @@ define <2 x i64> @umlsl2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: umlsl2d: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: umlsl.2d v0, v2, v1 +; CHECK-NEXT: umlsl.2d v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, ptr %A %tmp2 = load <2 x i32>, ptr %B @@ -754,7 +754,7 @@ define void @umlsl2d_chain_with_constant(ptr %dst, <2 x i32> %v1, <2 x i32> %v2, <2 x i32> %v3) { ; CHECK-LABEL: umlsl2d_chain_with_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: dup.2d v3, x8 ; CHECK-NEXT: umlsl.2d v3, v0, v2 ; CHECK-NEXT: mvn.8b v0, v2 @@ -773,10 +773,10 @@ define <2 x float> @fmla_2s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: fmla_2s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr d0, [x2] -; CHECK-NEXT: fmla.2s v0, v1, v2 +; CHECK-NEXT: fmla.2s v0, v2, v1 ; CHECK-NEXT: ret %tmp1 = load <2 x float>, ptr %A %tmp2 = load <2 x float>, ptr %B @@ -788,10 +788,10 @@ define <4 x float> @fmla_4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: fmla_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmla.4s v0, v1, v2 +; CHECK-NEXT: fmla.4s v0, v2, v1 ; CHECK-NEXT: ret %tmp1 = load <4 x float>, ptr %A %tmp2 = load <4 x float>, ptr %B @@ -803,10 +803,10 @@ define <2 x double> @fmla_2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: fmla_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmla.2d v0, v1, v2 +; CHECK-NEXT: fmla.2d v0, v2, v1 ; CHECK-NEXT: ret %tmp1 = load <2 x double>, ptr %A %tmp2 = load <2 x double>, ptr %B @@ -822,10 +822,10 @@ define <2 x float> @fmls_2s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: fmls_2s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr d0, [x2] -; CHECK-NEXT: fmls.2s v0, v2, v1 +; CHECK-NEXT: fmls.2s v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <2 x float>, ptr %A %tmp2 = load <2 x float>, ptr %B @@ -838,10 +838,10 @@ define <4 x float> @fmls_4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: fmls_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmls.4s v0, v2, v1 +; CHECK-NEXT: fmls.4s v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <4 x float>, ptr %A %tmp2 = load <4 x float>, ptr %B @@ -854,10 +854,10 @@ define <2 x double> @fmls_2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: fmls_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmls.2d v0, v2, v1 +; CHECK-NEXT: fmls.2d v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <2 x double>, ptr %A %tmp2 = load <2 x double>, ptr %B @@ -870,10 +870,10 @@ define <2 x float> @fmls_commuted_neg_2s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: fmls_commuted_neg_2s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] ; CHECK-NEXT: ldr d0, [x2] -; CHECK-NEXT: fmls.2s v0, v2, v1 +; CHECK-NEXT: fmls.2s v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <2 x float>, ptr %A %tmp2 = load <2 x float>, ptr %B @@ -886,10 +886,10 @@ define <4 x float> @fmls_commuted_neg_4s(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: fmls_commuted_neg_4s: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmls.4s v0, v2, v1 +; CHECK-NEXT: fmls.4s v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <4 x float>, ptr %A %tmp2 = load <4 x float>, ptr %B @@ -902,10 +902,10 @@ define <2 x double> @fmls_commuted_neg_2d(ptr %A, ptr %B, ptr %C) nounwind { ; CHECK-LABEL: fmls_commuted_neg_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q0, [x2] -; CHECK-NEXT: fmls.2d v0, v2, v1 +; CHECK-NEXT: fmls.2d v0, v1, v2 ; CHECK-NEXT: ret %tmp1 = load <2 x double>, ptr %A %tmp2 = load <2 x double>, ptr %B @@ -1122,13 +1122,13 @@ define <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-LABEL: mul_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: fmov x10, d1 +; CHECK-NEXT: fmov x11, d0 ; CHECK-NEXT: mov.d x8, v1[1] -; CHECK-NEXT: mov.d x11, v0[1] -; CHECK-NEXT: mul x9, x10, x9 -; CHECK-NEXT: mul x8, x11, x8 -; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: mov.d x9, v0[1] +; CHECK-NEXT: mul x10, x11, x10 +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: mov.d v0[1], x8 ; CHECK-NEXT: ret %tmp1 = mul <2 x i64> %A, %B @@ -1533,10 +1533,10 @@ ; CHECK-LABEL: sqadd_lane1_sqdmull4s: ; CHECK: // %bb.0: ; CHECK-NEXT: sqdmull.4s v0, v0, v1 -; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: mov.s w8, v0[1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: sqadd s0, s1, s0 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: sqadd s0, s0, s1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C) @@ -1549,10 +1549,10 @@ ; CHECK-LABEL: sqsub_lane1_sqdmull4s: ; CHECK: // %bb.0: ; CHECK-NEXT: sqdmull.4s v0, v0, v1 -; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: mov.s w8, v0[1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: sqsub s0, s1, s0 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: sqsub s0, s0, s1 ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %prod.vec = call <4 x i32> @llvm.aarch64.neon.sqdmull.v4i32(<4 x i16> %B, <4 x i16> %C) @@ -1564,11 +1564,11 @@ define i64 @sqdmlal_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind { ; CHECK-LABEL: sqdmlal_lane_1d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov d2, x0 +; CHECK-NEXT: fmov d1, x0 +; CHECK-NEXT: fmov s2, w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqdmlal.s d2, s1, v0[1] -; CHECK-NEXT: fmov x0, d2 +; CHECK-NEXT: sqdmlal.s d1, s2, v0[1] +; CHECK-NEXT: fmov x0, d1 ; CHECK-NEXT: ret %rhs = extractelement <2 x i32> %C, i32 1 %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) @@ -1581,11 +1581,11 @@ define i64 @sqdmlsl_lane_1d(i64 %A, i32 %B, <2 x i32> %C) nounwind { ; CHECK-LABEL: sqdmlsl_lane_1d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s1, w1 -; CHECK-NEXT: fmov d2, x0 +; CHECK-NEXT: fmov d1, x0 +; CHECK-NEXT: fmov s2, w1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: sqdmlsl.s d2, s1, v0[1] -; CHECK-NEXT: fmov x0, d2 +; CHECK-NEXT: sqdmlsl.s d1, s2, v0[1] +; CHECK-NEXT: fmov x0, d1 ; CHECK-NEXT: ret %rhs = extractelement <2 x i32> %C, i32 1 %prod = call i64 @llvm.aarch64.neon.sqdmulls.scalar(i32 %B, i32 %rhs) @@ -2767,10 +2767,10 @@ define i32 @sqdmlal_s(i16 %A, i16 %B, i32 %C) nounwind { ; CHECK-LABEL: sqdmlal_s: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s0, w1 -; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s1, w1 ; CHECK-NEXT: fmov s2, w2 -; CHECK-NEXT: sqdmlal.h s2, h1, v0[0] +; CHECK-NEXT: sqdmlal.h s2, h0, v1[0] ; CHECK-NEXT: fmov w0, s2 ; CHECK-NEXT: ret %tmp1 = insertelement <4 x i16> undef, i16 %A, i64 0 @@ -2798,10 +2798,10 @@ define i32 @sqdmlsl_s(i16 %A, i16 %B, i32 %C) nounwind { ; CHECK-LABEL: sqdmlsl_s: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s0, w1 -; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s1, w1 ; CHECK-NEXT: fmov s2, w2 -; CHECK-NEXT: sqdmlsl.h s2, h1, v0[0] +; CHECK-NEXT: sqdmlsl.h s2, h0, v1[0] ; CHECK-NEXT: fmov w0, s2 ; CHECK-NEXT: ret %tmp1 = insertelement <4 x i16> undef, i16 %A, i64 0 diff --git a/llvm/test/CodeGen/AArch64/arm64-vshift.ll b/llvm/test/CodeGen/AArch64/arm64-vshift.ll --- a/llvm/test/CodeGen/AArch64/arm64-vshift.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vshift.ll @@ -396,8 +396,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: mov w9, #1 // =0x1 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: srshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -492,8 +492,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: mov w9, #1 // =0x1 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: urshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -805,8 +805,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: mov w9, #1 // =0x1 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: sqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -914,8 +914,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: mov w9, #1 // =0x1 -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: uqrshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -2378,8 +2378,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: sshl d0, d0, d1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-xaluo.ll b/llvm/test/CodeGen/AArch64/arm64-xaluo.ll --- a/llvm/test/CodeGen/AArch64/arm64-xaluo.ll +++ b/llvm/test/CodeGen/AArch64/arm64-xaluo.ll @@ -18,8 +18,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds w8, w0, w1 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo1.i32: @@ -49,8 +49,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds w8, w0, #4 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo2.i32: @@ -80,8 +80,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs w8, w0, #4 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo3.i32: @@ -102,7 +102,7 @@ define zeroext i1 @saddo4.i32(i32 %v1, ptr %res) { ; SDAG-LABEL: saddo4.i32: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: mov w8, #16777215 +; SDAG-NEXT: mov w8, #16777215 // =0xffffff ; SDAG-NEXT: adds w8, w0, w8 ; SDAG-NEXT: cset w0, vs ; SDAG-NEXT: str w8, [x1] @@ -110,16 +110,16 @@ ; ; FAST-LABEL: saddo4.i32: ; FAST: // %bb.0: // %entry -; FAST-NEXT: mov w8, #16777215 +; FAST-NEXT: mov w8, #16777215 // =0xffffff ; FAST-NEXT: adds w8, w0, w8 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo4.i32: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: mov w8, #16777215 +; GISEL-NEXT: mov w8, #16777215 // =0xffffff ; GISEL-NEXT: adds w8, w0, w8 ; GISEL-NEXT: cset w0, vs ; GISEL-NEXT: str w8, [x1] @@ -176,8 +176,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, x1 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo1.i64: @@ -206,8 +206,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, #4 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo2.i64: @@ -236,8 +236,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs x8, x0, #4 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo3.i64: @@ -266,8 +266,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds w8, w0, w1 ; FAST-NEXT: cset w9, hs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.i32: @@ -296,8 +296,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, x1 ; FAST-NEXT: cset w9, hs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.i64: @@ -326,8 +326,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs w8, w0, w1 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo1.i32: @@ -356,8 +356,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds w8, w0, #4 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo2.i32: @@ -386,8 +386,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs x8, x0, x1 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo.i64: @@ -416,8 +416,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs w8, w0, w1 ; FAST-NEXT: cset w9, lo -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str w8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.i32: @@ -446,8 +446,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: subs x8, x0, x1 ; FAST-NEXT: cset w9, lo -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x2] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.i64: @@ -469,16 +469,16 @@ ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: smull x8, w0, w1 ; SDAG-NEXT: cmp x8, w8, sxtw -; SDAG-NEXT: cset w0, ne ; SDAG-NEXT: str w8, [x2] +; SDAG-NEXT: cset w0, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: smulo.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: smull x8, w0, w1 ; FAST-NEXT: cmp x8, w8, sxtw -; FAST-NEXT: cset w9, ne ; FAST-NEXT: str w8, [x2] +; FAST-NEXT: cset w9, ne ; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; @@ -487,9 +487,9 @@ ; GISEL-NEXT: smull x8, w0, w1 ; GISEL-NEXT: mul w9, w0, w1 ; GISEL-NEXT: asr x8, x8, #32 +; GISEL-NEXT: str w9, [x2] ; GISEL-NEXT: cmp w8, w9, asr #31 ; GISEL-NEXT: cset w0, ne -; GISEL-NEXT: str w9, [x2] ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %v1, i32 %v2) @@ -504,28 +504,28 @@ ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: mul x8, x0, x1 ; SDAG-NEXT: smulh x9, x0, x1 +; SDAG-NEXT: str x8, [x2] ; SDAG-NEXT: cmp x9, x8, asr #63 ; SDAG-NEXT: cset w0, ne -; SDAG-NEXT: str x8, [x2] ; SDAG-NEXT: ret ; ; FAST-LABEL: smulo.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: mul x8, x0, x1 ; FAST-NEXT: smulh x9, x0, x1 +; FAST-NEXT: str x8, [x2] ; FAST-NEXT: cmp x9, x8, asr #63 ; FAST-NEXT: cset w9, ne -; FAST-NEXT: str x8, [x2] ; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: smulo.i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: mul x8, x0, x1 -; GISEL-NEXT: smulh x9, x0, x1 -; GISEL-NEXT: cmp x9, x8, asr #63 +; GISEL-NEXT: smulh x8, x0, x1 +; GISEL-NEXT: mul x9, x0, x1 +; GISEL-NEXT: cmp x8, x9, asr #63 +; GISEL-NEXT: str x9, [x2] ; GISEL-NEXT: cset w0, ne -; GISEL-NEXT: str x8, [x2] ; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) @@ -547,8 +547,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, x0 ; FAST-NEXT: cset w9, vs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: smulo2.i64: @@ -570,17 +570,17 @@ ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: umull x8, w0, w1 ; SDAG-NEXT: tst x8, #0xffffffff00000000 -; SDAG-NEXT: cset w0, ne ; SDAG-NEXT: str w8, [x2] +; SDAG-NEXT: cset w0, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: umulo.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umull x8, w0, w1 ; FAST-NEXT: tst x8, #0xffffffff00000000 +; FAST-NEXT: str w8, [x2] ; FAST-NEXT: cset w9, ne ; FAST-NEXT: and w0, w9, #0x1 -; FAST-NEXT: str w8, [x2] ; FAST-NEXT: ret ; ; GISEL-LABEL: umulo.i32: @@ -588,10 +588,9 @@ ; GISEL-NEXT: umull x8, w0, w1 ; GISEL-NEXT: mul w9, w0, w1 ; GISEL-NEXT: lsr x8, x8, #32 -; GISEL-NEXT: cmp w8, #0 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: mov w0, w8 ; GISEL-NEXT: str w9, [x2] +; GISEL-NEXT: cmp w8, #0 +; GISEL-NEXT: cset w0, ne ; GISEL-NEXT: ret entry: %t = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %v1, i32 %v2) @@ -607,8 +606,7 @@ ; SDAG-NEXT: umulh x8, x0, x1 ; SDAG-NEXT: mul x9, x0, x1 ; SDAG-NEXT: cmp xzr, x8 -; SDAG-NEXT: cset w8, ne -; SDAG-NEXT: mov w0, w8 +; SDAG-NEXT: cset w0, ne ; SDAG-NEXT: str x9, [x2] ; SDAG-NEXT: ret ; @@ -618,8 +616,7 @@ ; FAST-NEXT: mul x9, x0, x1 ; FAST-NEXT: cmp xzr, x8 ; FAST-NEXT: cset w8, ne -; FAST-NEXT: and w8, w8, #0x1 -; FAST-NEXT: mov w0, w8 +; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: str x9, [x2] ; FAST-NEXT: ret ; @@ -628,8 +625,7 @@ ; GISEL-NEXT: umulh x8, x0, x1 ; GISEL-NEXT: mul x9, x0, x1 ; GISEL-NEXT: cmp x8, #0 -; GISEL-NEXT: cset w8, ne -; GISEL-NEXT: mov w0, w8 +; GISEL-NEXT: cset w0, ne ; GISEL-NEXT: str x9, [x2] ; GISEL-NEXT: ret entry: @@ -652,8 +648,8 @@ ; FAST: // %bb.0: // %entry ; FAST-NEXT: adds x8, x0, x0 ; FAST-NEXT: cset w9, hs -; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: str x8, [x1] +; FAST-NEXT: and w0, w9, #0x1 ; FAST-NEXT: ret ; ; GISEL-LABEL: umulo2.i64: @@ -1160,13 +1156,29 @@ } define i64 @smulo.select.i64(i64 %v1, i64 %v2) { -; CHECK-LABEL: smulo.select.i64: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mul x8, x0, x1 -; CHECK-NEXT: smulh x9, x0, x1 -; CHECK-NEXT: cmp x9, x8, asr #63 -; CHECK-NEXT: csel x0, x0, x1, ne -; CHECK-NEXT: ret +; SDAG-LABEL: smulo.select.i64: +; SDAG: // %bb.0: // %entry +; SDAG-NEXT: mul x8, x0, x1 +; SDAG-NEXT: smulh x9, x0, x1 +; SDAG-NEXT: cmp x9, x8, asr #63 +; SDAG-NEXT: csel x0, x0, x1, ne +; SDAG-NEXT: ret +; +; FAST-LABEL: smulo.select.i64: +; FAST: // %bb.0: // %entry +; FAST-NEXT: mul x8, x0, x1 +; FAST-NEXT: smulh x9, x0, x1 +; FAST-NEXT: cmp x9, x8, asr #63 +; FAST-NEXT: csel x0, x0, x1, ne +; FAST-NEXT: ret +; +; GISEL-LABEL: smulo.select.i64: +; GISEL: // %bb.0: // %entry +; GISEL-NEXT: smulh x8, x0, x1 +; GISEL-NEXT: mul x9, x0, x1 +; GISEL-NEXT: cmp x8, x9, asr #63 +; GISEL-NEXT: csel x0, x0, x1, ne +; GISEL-NEXT: ret entry: %t = call {i64, i1} @llvm.smul.with.overflow.i64(i64 %v1, i64 %v2) %obit = extractvalue {i64, i1} %t, 1 @@ -1193,9 +1205,9 @@ ; ; GISEL-LABEL: smulo.not.i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: mul x8, x0, x1 -; GISEL-NEXT: smulh x9, x0, x1 -; GISEL-NEXT: cmp x9, x8, asr #63 +; GISEL-NEXT: smulh x8, x0, x1 +; GISEL-NEXT: mul x9, x0, x1 +; GISEL-NEXT: cmp x8, x9, asr #63 ; GISEL-NEXT: cset w8, ne ; GISEL-NEXT: eor w0, w8, #0x1 ; GISEL-NEXT: ret @@ -1326,29 +1338,29 @@ define i8 @uaddo.selectboth.i8(i8 %a, i8 %b) { ; SDAG-LABEL: uaddo.selectboth.i8: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: and w8, w0, #0xff -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: add w8, w8, w1, uxtb -; SDAG-NEXT: tst w8, #0x100 -; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: and w9, w0, #0xff +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: add w9, w9, w1, uxtb +; SDAG-NEXT: tst w9, #0x100 +; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: uaddo.selectboth.i8: ; FAST: // %bb.0: // %entry -; FAST-NEXT: and w8, w0, #0xff -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: add w8, w8, w1, uxtb -; FAST-NEXT: tst w8, #0x100 -; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: and w9, w0, #0xff +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: add w9, w9, w1, uxtb +; FAST-NEXT: tst w9, #0x100 +; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.selectboth.i8: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: and w8, w1, #0xff -; GISEL-NEXT: mov w9, #10 -; GISEL-NEXT: add w8, w8, w0, uxtb -; GISEL-NEXT: cmp w8, w8, uxtb -; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: and w9, w1, #0xff +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: add w9, w9, w0, uxtb +; GISEL-NEXT: cmp w9, w9, uxtb +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 %a, i8 %b) @@ -1361,29 +1373,29 @@ define i8 @saddo.selectboth.i8(i8 %a, i8 %b) { ; SDAG-LABEL: saddo.selectboth.i8: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: sxtb w8, w0 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: add w8, w8, w1, sxtb -; SDAG-NEXT: cmp w8, w8, sxtb -; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: sxtb w9, w0 +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: add w9, w9, w1, sxtb +; SDAG-NEXT: cmp w9, w9, sxtb +; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: saddo.selectboth.i8: ; FAST: // %bb.0: // %entry -; FAST-NEXT: sxtb w8, w0 -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: add w8, w8, w1, sxtb -; FAST-NEXT: cmp w8, w8, sxtb -; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: sxtb w9, w0 +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: add w9, w9, w1, sxtb +; FAST-NEXT: cmp w9, w9, sxtb +; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo.selectboth.i8: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: sxtb w8, w1 -; GISEL-NEXT: mov w9, #10 -; GISEL-NEXT: add w8, w8, w0, sxtb -; GISEL-NEXT: cmp w8, w8, sxtb -; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: sxtb w9, w1 +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: add w9, w9, w0, sxtb +; GISEL-NEXT: cmp w9, w9, sxtb +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i8, i1 } @llvm.sadd.with.overflow.i8(i8 %a, i8 %b) @@ -1396,29 +1408,29 @@ define i16 @uaddo.selectboth.i16(i16 %a, i16 %b) { ; SDAG-LABEL: uaddo.selectboth.i16: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: and w8, w0, #0xffff -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: add w8, w8, w1, uxth -; SDAG-NEXT: tst w8, #0x10000 -; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: and w9, w0, #0xffff +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: add w9, w9, w1, uxth +; SDAG-NEXT: tst w9, #0x10000 +; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: uaddo.selectboth.i16: ; FAST: // %bb.0: // %entry -; FAST-NEXT: and w8, w0, #0xffff -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: add w8, w8, w1, uxth -; FAST-NEXT: tst w8, #0x10000 -; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: and w9, w0, #0xffff +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: add w9, w9, w1, uxth +; FAST-NEXT: tst w9, #0x10000 +; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.selectboth.i16: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: and w8, w1, #0xffff -; GISEL-NEXT: mov w9, #10 -; GISEL-NEXT: add w8, w8, w0, uxth -; GISEL-NEXT: cmp w8, w8, uxth -; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: and w9, w1, #0xffff +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: add w9, w9, w0, uxth +; GISEL-NEXT: cmp w9, w9, uxth +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i16, i1 } @llvm.uadd.with.overflow.i16(i16 %a, i16 %b) @@ -1431,29 +1443,29 @@ define i16 @saddo.selectboth.i16(i16 %a, i16 %b) { ; SDAG-LABEL: saddo.selectboth.i16: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: sxth w8, w0 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: add w8, w8, w1, sxth -; SDAG-NEXT: cmp w8, w8, sxth -; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: sxth w9, w0 +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: add w9, w9, w1, sxth +; SDAG-NEXT: cmp w9, w9, sxth +; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: saddo.selectboth.i16: ; FAST: // %bb.0: // %entry -; FAST-NEXT: sxth w8, w0 -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: add w8, w8, w1, sxth -; FAST-NEXT: cmp w8, w8, sxth -; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: sxth w9, w0 +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: add w9, w9, w1, sxth +; FAST-NEXT: cmp w9, w9, sxth +; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo.selectboth.i16: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: sxth w8, w1 -; GISEL-NEXT: mov w9, #10 -; GISEL-NEXT: add w8, w8, w0, sxth -; GISEL-NEXT: cmp w8, w8, sxth -; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: sxth w9, w1 +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: add w9, w9, w0, sxth +; GISEL-NEXT: cmp w9, w9, sxth +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i16, i1 } @llvm.sadd.with.overflow.i16(i16 %a, i16 %b) @@ -1466,25 +1478,25 @@ define i32 @uaddo.selectboth.i32(i32 %a, i32 %b) { ; SDAG-LABEL: uaddo.selectboth.i32: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: adds w8, w0, w1 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: csel w0, w8, w9, hs +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: adds w9, w0, w1 +; SDAG-NEXT: csel w0, w9, w8, hs ; SDAG-NEXT: ret ; ; FAST-LABEL: uaddo.selectboth.i32: ; FAST: // %bb.0: // %entry -; FAST-NEXT: adds w8, w0, w1 -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: csel w0, w8, w9, hs +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: adds w9, w0, w1 +; FAST-NEXT: csel w0, w9, w8, hs ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.selectboth.i32: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: adds w8, w0, w1 -; GISEL-NEXT: mov w10, #10 -; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: tst w9, #0x1 -; GISEL-NEXT: csel w0, w8, w10, ne +; GISEL-NEXT: adds w9, w0, w1 +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: cset w10, hs +; GISEL-NEXT: tst w10, #0x1 +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) @@ -1497,25 +1509,25 @@ define i32 @saddo.selectboth.i32(i32 %a, i32 %b) { ; SDAG-LABEL: saddo.selectboth.i32: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: adds w8, w0, w1 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: csel w0, w8, w9, vs +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: adds w9, w0, w1 +; SDAG-NEXT: csel w0, w9, w8, vs ; SDAG-NEXT: ret ; ; FAST-LABEL: saddo.selectboth.i32: ; FAST: // %bb.0: // %entry -; FAST-NEXT: adds w8, w0, w1 -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: csel w0, w8, w9, vs +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: adds w9, w0, w1 +; FAST-NEXT: csel w0, w9, w8, vs ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo.selectboth.i32: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: adds w8, w0, w1 -; GISEL-NEXT: mov w10, #10 -; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: tst w9, #0x1 -; GISEL-NEXT: csel w0, w8, w10, ne +; GISEL-NEXT: adds w9, w0, w1 +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: cset w10, vs +; GISEL-NEXT: tst w10, #0x1 +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) @@ -1528,25 +1540,25 @@ define i64 @uaddo.selectboth.i64(i64 %a, i64 %b) { ; SDAG-LABEL: uaddo.selectboth.i64: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: adds x8, x0, x1 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: csel x0, x8, x9, hs +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: adds x9, x0, x1 +; SDAG-NEXT: csel x0, x9, x8, hs ; SDAG-NEXT: ret ; ; FAST-LABEL: uaddo.selectboth.i64: ; FAST: // %bb.0: // %entry -; FAST-NEXT: adds x8, x0, x1 -; FAST-NEXT: mov x9, #10 -; FAST-NEXT: csel x0, x8, x9, hs +; FAST-NEXT: mov x8, #10 // =0xa +; FAST-NEXT: adds x9, x0, x1 +; FAST-NEXT: csel x0, x9, x8, hs ; FAST-NEXT: ret ; ; GISEL-LABEL: uaddo.selectboth.i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: adds x8, x0, x1 -; GISEL-NEXT: mov w10, #10 -; GISEL-NEXT: cset w9, hs -; GISEL-NEXT: tst w9, #0x1 -; GISEL-NEXT: csel x0, x8, x10, ne +; GISEL-NEXT: adds x9, x0, x1 +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: cset w10, hs +; GISEL-NEXT: tst w10, #0x1 +; GISEL-NEXT: csel x0, x9, x8, ne ; GISEL-NEXT: ret entry: %m = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) @@ -1559,25 +1571,25 @@ define i64 @saddo.selectboth.i64(i64 %a, i64 %b) { ; SDAG-LABEL: saddo.selectboth.i64: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: adds x8, x0, x1 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: csel x0, x8, x9, vs +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: adds x9, x0, x1 +; SDAG-NEXT: csel x0, x9, x8, vs ; SDAG-NEXT: ret ; ; FAST-LABEL: saddo.selectboth.i64: ; FAST: // %bb.0: // %entry -; FAST-NEXT: adds x8, x0, x1 -; FAST-NEXT: mov x9, #10 -; FAST-NEXT: csel x0, x8, x9, vs +; FAST-NEXT: mov x8, #10 // =0xa +; FAST-NEXT: adds x9, x0, x1 +; FAST-NEXT: csel x0, x9, x8, vs ; FAST-NEXT: ret ; ; GISEL-LABEL: saddo.selectboth.i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: adds x8, x0, x1 -; GISEL-NEXT: mov w10, #10 -; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: tst w9, #0x1 -; GISEL-NEXT: csel x0, x8, x10, ne +; GISEL-NEXT: adds x9, x0, x1 +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: cset w10, vs +; GISEL-NEXT: tst w10, #0x1 +; GISEL-NEXT: csel x0, x9, x8, ne ; GISEL-NEXT: ret entry: %m = call { i64, i1 } @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) @@ -1590,29 +1602,29 @@ define i8 @usubo.selectboth.i8(i8 %a, i8 %b) { ; SDAG-LABEL: usubo.selectboth.i8: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: and w8, w0, #0xff -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: sub w8, w8, w1, uxtb -; SDAG-NEXT: tst w8, #0xffffff00 -; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: and w9, w0, #0xff +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: sub w9, w9, w1, uxtb +; SDAG-NEXT: tst w9, #0xffffff00 +; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: usubo.selectboth.i8: ; FAST: // %bb.0: // %entry -; FAST-NEXT: and w8, w0, #0xff -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: sub w8, w8, w1, uxtb -; FAST-NEXT: tst w8, #0xffffff00 -; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: and w9, w0, #0xff +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: sub w9, w9, w1, uxtb +; FAST-NEXT: tst w9, #0xffffff00 +; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.selectboth.i8: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: and w8, w0, #0xff -; GISEL-NEXT: mov w9, #10 -; GISEL-NEXT: sub w8, w8, w1, uxtb -; GISEL-NEXT: cmp w8, w8, uxtb -; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: and w9, w0, #0xff +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: sub w9, w9, w1, uxtb +; GISEL-NEXT: cmp w9, w9, uxtb +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i8, i1 } @llvm.usub.with.overflow.i8(i8 %a, i8 %b) @@ -1625,11 +1637,11 @@ define i8 @ssubo.selectboth.i8(i8 %a, i8 %b) { ; CHECK-LABEL: ssubo.selectboth.i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: mov w9, #10 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, w8, sxtb -; CHECK-NEXT: csel w0, w8, w9, ne +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: mov w8, #10 // =0xa +; CHECK-NEXT: sub w9, w9, w1, sxtb +; CHECK-NEXT: cmp w9, w9, sxtb +; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret entry: %m = call { i8, i1 } @llvm.ssub.with.overflow.i8(i8 %a, i8 %b) @@ -1642,29 +1654,29 @@ define i16 @usubo.selectboth.i16(i16 %a, i16 %b) { ; SDAG-LABEL: usubo.selectboth.i16: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: and w8, w0, #0xffff -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: sub w8, w8, w1, uxth -; SDAG-NEXT: tst w8, #0xffff0000 -; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: and w9, w0, #0xffff +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: sub w9, w9, w1, uxth +; SDAG-NEXT: tst w9, #0xffff0000 +; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: usubo.selectboth.i16: ; FAST: // %bb.0: // %entry -; FAST-NEXT: and w8, w0, #0xffff -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: sub w8, w8, w1, uxth -; FAST-NEXT: tst w8, #0xffff0000 -; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: and w9, w0, #0xffff +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: sub w9, w9, w1, uxth +; FAST-NEXT: tst w9, #0xffff0000 +; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.selectboth.i16: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: and w8, w0, #0xffff -; GISEL-NEXT: mov w9, #10 -; GISEL-NEXT: sub w8, w8, w1, uxth -; GISEL-NEXT: cmp w8, w8, uxth -; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: and w9, w0, #0xffff +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: sub w9, w9, w1, uxth +; GISEL-NEXT: cmp w9, w9, uxth +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i16, i1 } @llvm.usub.with.overflow.i16(i16 %a, i16 %b) @@ -1677,11 +1689,11 @@ define i16 @ssubo.selectboth.i16(i16 %a, i16 %b) { ; CHECK-LABEL: ssubo.selectboth.i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: mov w9, #10 -; CHECK-NEXT: sub w8, w8, w1, sxth -; CHECK-NEXT: cmp w8, w8, sxth -; CHECK-NEXT: csel w0, w8, w9, ne +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: mov w8, #10 // =0xa +; CHECK-NEXT: sub w9, w9, w1, sxth +; CHECK-NEXT: cmp w9, w9, sxth +; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret entry: %m = call { i16, i1 } @llvm.ssub.with.overflow.i16(i16 %a, i16 %b) @@ -1694,25 +1706,25 @@ define i32 @usubo.selectboth.i32(i32 %a, i32 %b) { ; SDAG-LABEL: usubo.selectboth.i32: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: subs w8, w0, w1 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: csel w0, w8, w9, lo +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: subs w9, w0, w1 +; SDAG-NEXT: csel w0, w9, w8, lo ; SDAG-NEXT: ret ; ; FAST-LABEL: usubo.selectboth.i32: ; FAST: // %bb.0: // %entry -; FAST-NEXT: subs w8, w0, w1 -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: csel w0, w8, w9, lo +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: subs w9, w0, w1 +; FAST-NEXT: csel w0, w9, w8, lo ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.selectboth.i32: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: subs w8, w0, w1 -; GISEL-NEXT: mov w10, #10 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: tst w9, #0x1 -; GISEL-NEXT: csel w0, w8, w10, ne +; GISEL-NEXT: subs w9, w0, w1 +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: cset w10, lo +; GISEL-NEXT: tst w10, #0x1 +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b) @@ -1725,25 +1737,25 @@ define i32 @ssubo.selectboth.i32(i32 %a, i32 %b) { ; SDAG-LABEL: ssubo.selectboth.i32: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: subs w8, w0, w1 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: csel w0, w8, w9, vs +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: subs w9, w0, w1 +; SDAG-NEXT: csel w0, w9, w8, vs ; SDAG-NEXT: ret ; ; FAST-LABEL: ssubo.selectboth.i32: ; FAST: // %bb.0: // %entry -; FAST-NEXT: subs w8, w0, w1 -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: csel w0, w8, w9, vs +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: subs w9, w0, w1 +; FAST-NEXT: csel w0, w9, w8, vs ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo.selectboth.i32: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: subs w8, w0, w1 -; GISEL-NEXT: mov w10, #10 -; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: tst w9, #0x1 -; GISEL-NEXT: csel w0, w8, w10, ne +; GISEL-NEXT: subs w9, w0, w1 +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: cset w10, vs +; GISEL-NEXT: tst w10, #0x1 +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) @@ -1756,25 +1768,25 @@ define i64 @usubo.selectboth.i64(i64 %a, i64 %b) { ; SDAG-LABEL: usubo.selectboth.i64: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: subs x8, x0, x1 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: csel x0, x8, x9, lo +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: subs x9, x0, x1 +; SDAG-NEXT: csel x0, x9, x8, lo ; SDAG-NEXT: ret ; ; FAST-LABEL: usubo.selectboth.i64: ; FAST: // %bb.0: // %entry -; FAST-NEXT: subs x8, x0, x1 -; FAST-NEXT: mov x9, #10 -; FAST-NEXT: csel x0, x8, x9, lo +; FAST-NEXT: mov x8, #10 // =0xa +; FAST-NEXT: subs x9, x0, x1 +; FAST-NEXT: csel x0, x9, x8, lo ; FAST-NEXT: ret ; ; GISEL-LABEL: usubo.selectboth.i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: subs x8, x0, x1 -; GISEL-NEXT: mov w10, #10 -; GISEL-NEXT: cset w9, lo -; GISEL-NEXT: tst w9, #0x1 -; GISEL-NEXT: csel x0, x8, x10, ne +; GISEL-NEXT: subs x9, x0, x1 +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: cset w10, lo +; GISEL-NEXT: tst w10, #0x1 +; GISEL-NEXT: csel x0, x9, x8, ne ; GISEL-NEXT: ret entry: %m = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) @@ -1787,25 +1799,25 @@ define i64 @ssubo.selectboth.i64(i64 %a, i64 %b) { ; SDAG-LABEL: ssubo.selectboth.i64: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: subs x8, x0, x1 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: csel x0, x8, x9, vs +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: subs x9, x0, x1 +; SDAG-NEXT: csel x0, x9, x8, vs ; SDAG-NEXT: ret ; ; FAST-LABEL: ssubo.selectboth.i64: ; FAST: // %bb.0: // %entry -; FAST-NEXT: subs x8, x0, x1 -; FAST-NEXT: mov x9, #10 -; FAST-NEXT: csel x0, x8, x9, vs +; FAST-NEXT: mov x8, #10 // =0xa +; FAST-NEXT: subs x9, x0, x1 +; FAST-NEXT: csel x0, x9, x8, vs ; FAST-NEXT: ret ; ; GISEL-LABEL: ssubo.selectboth.i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: subs x8, x0, x1 -; GISEL-NEXT: mov w10, #10 -; GISEL-NEXT: cset w9, vs -; GISEL-NEXT: tst w9, #0x1 -; GISEL-NEXT: csel x0, x8, x10, ne +; GISEL-NEXT: subs x9, x0, x1 +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: cset w10, vs +; GISEL-NEXT: tst w10, #0x1 +; GISEL-NEXT: csel x0, x9, x8, ne ; GISEL-NEXT: ret entry: %m = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) @@ -1819,32 +1831,32 @@ define i8 @umulo.selectboth.i8(i8 %a, i8 %b) { ; SDAG-LABEL: umulo.selectboth.i8: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: and w8, w1, #0xff -; SDAG-NEXT: and w9, w0, #0xff -; SDAG-NEXT: mul w8, w9, w8 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: tst w8, #0xff00 -; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: and w9, w1, #0xff +; SDAG-NEXT: and w10, w0, #0xff +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: mul w9, w10, w9 +; SDAG-NEXT: tst w9, #0xff00 +; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: umulo.selectboth.i8: ; FAST: // %bb.0: // %entry -; FAST-NEXT: and w8, w1, #0xff -; FAST-NEXT: and w9, w0, #0xff -; FAST-NEXT: mul w8, w9, w8 -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: tst w8, #0xff00 -; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: and w9, w1, #0xff +; FAST-NEXT: and w10, w0, #0xff +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: mul w9, w10, w9 +; FAST-NEXT: tst w9, #0xff00 +; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: umulo.selectboth.i8: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: and w8, w0, #0xff -; GISEL-NEXT: and w9, w1, #0xff -; GISEL-NEXT: mul w8, w8, w9 -; GISEL-NEXT: mov w9, #10 -; GISEL-NEXT: cmp w8, w8, uxtb -; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: and w9, w0, #0xff +; GISEL-NEXT: and w10, w1, #0xff +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: mul w9, w9, w10 +; GISEL-NEXT: cmp w9, w9, uxtb +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 %a, i8 %b) @@ -1857,32 +1869,32 @@ define i8 @smulo.selectboth.i8(i8 %a, i8 %b) { ; SDAG-LABEL: smulo.selectboth.i8: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: sxtb w8, w1 -; SDAG-NEXT: sxtb w9, w0 -; SDAG-NEXT: mul w8, w9, w8 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: cmp w8, w8, sxtb -; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: sxtb w9, w1 +; SDAG-NEXT: sxtb w10, w0 +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: mul w9, w10, w9 +; SDAG-NEXT: cmp w9, w9, sxtb +; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: smulo.selectboth.i8: ; FAST: // %bb.0: // %entry -; FAST-NEXT: sxtb w8, w1 -; FAST-NEXT: sxtb w9, w0 -; FAST-NEXT: mul w8, w9, w8 -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: cmp w8, w8, sxtb -; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: sxtb w9, w1 +; FAST-NEXT: sxtb w10, w0 +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: mul w9, w10, w9 +; FAST-NEXT: cmp w9, w9, sxtb +; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: smulo.selectboth.i8: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: sxtb w8, w0 -; GISEL-NEXT: sxtb w9, w1 -; GISEL-NEXT: mul w8, w8, w9 -; GISEL-NEXT: mov w9, #10 -; GISEL-NEXT: cmp w8, w8, sxtb -; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: sxtb w9, w0 +; GISEL-NEXT: sxtb w10, w1 +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: mul w9, w9, w10 +; GISEL-NEXT: cmp w9, w9, sxtb +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i8, i1 } @llvm.smul.with.overflow.i8(i8 %a, i8 %b) @@ -1895,32 +1907,32 @@ define i16 @umulo.selectboth.i16(i16 %a, i16 %b) { ; SDAG-LABEL: umulo.selectboth.i16: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: and w8, w1, #0xffff -; SDAG-NEXT: and w9, w0, #0xffff -; SDAG-NEXT: mul w8, w9, w8 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: tst w8, #0xffff0000 -; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: and w9, w1, #0xffff +; SDAG-NEXT: and w10, w0, #0xffff +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: mul w9, w10, w9 +; SDAG-NEXT: tst w9, #0xffff0000 +; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: umulo.selectboth.i16: ; FAST: // %bb.0: // %entry -; FAST-NEXT: and w8, w1, #0xffff -; FAST-NEXT: and w9, w0, #0xffff -; FAST-NEXT: mul w8, w9, w8 -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: tst w8, #0xffff0000 -; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: and w9, w1, #0xffff +; FAST-NEXT: and w10, w0, #0xffff +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: mul w9, w10, w9 +; FAST-NEXT: tst w9, #0xffff0000 +; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: umulo.selectboth.i16: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: and w8, w0, #0xffff -; GISEL-NEXT: and w9, w1, #0xffff -; GISEL-NEXT: mul w8, w8, w9 -; GISEL-NEXT: mov w9, #10 -; GISEL-NEXT: cmp w8, w8, uxth -; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: and w9, w0, #0xffff +; GISEL-NEXT: and w10, w1, #0xffff +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: mul w9, w9, w10 +; GISEL-NEXT: cmp w9, w9, uxth +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i16, i1 } @llvm.umul.with.overflow.i16(i16 %a, i16 %b) @@ -1933,32 +1945,32 @@ define i16 @smulo.selectboth.i16(i16 %a, i16 %b) { ; SDAG-LABEL: smulo.selectboth.i16: ; SDAG: // %bb.0: // %entry -; SDAG-NEXT: sxth w8, w1 -; SDAG-NEXT: sxth w9, w0 -; SDAG-NEXT: mul w8, w9, w8 -; SDAG-NEXT: mov w9, #10 -; SDAG-NEXT: cmp w8, w8, sxth -; SDAG-NEXT: csel w0, w8, w9, ne +; SDAG-NEXT: sxth w9, w1 +; SDAG-NEXT: sxth w10, w0 +; SDAG-NEXT: mov w8, #10 // =0xa +; SDAG-NEXT: mul w9, w10, w9 +; SDAG-NEXT: cmp w9, w9, sxth +; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret ; ; FAST-LABEL: smulo.selectboth.i16: ; FAST: // %bb.0: // %entry -; FAST-NEXT: sxth w8, w1 -; FAST-NEXT: sxth w9, w0 -; FAST-NEXT: mul w8, w9, w8 -; FAST-NEXT: mov w9, #10 -; FAST-NEXT: cmp w8, w8, sxth -; FAST-NEXT: csel w0, w8, w9, ne +; FAST-NEXT: sxth w9, w1 +; FAST-NEXT: sxth w10, w0 +; FAST-NEXT: mov w8, #10 // =0xa +; FAST-NEXT: mul w9, w10, w9 +; FAST-NEXT: cmp w9, w9, sxth +; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret ; ; GISEL-LABEL: smulo.selectboth.i16: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: sxth w8, w0 -; GISEL-NEXT: sxth w9, w1 -; GISEL-NEXT: mul w8, w8, w9 -; GISEL-NEXT: mov w9, #10 -; GISEL-NEXT: cmp w8, w8, sxth -; GISEL-NEXT: csel w0, w8, w9, ne +; GISEL-NEXT: sxth w9, w0 +; GISEL-NEXT: sxth w10, w1 +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: mul w9, w9, w10 +; GISEL-NEXT: cmp w9, w9, sxth +; GISEL-NEXT: csel w0, w9, w8, ne ; GISEL-NEXT: ret entry: %m = call { i16, i1 } @llvm.smul.with.overflow.i16(i16 %a, i16 %b) @@ -1972,7 +1984,7 @@ ; SDAG-LABEL: umulo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: umull x9, w0, w1 -; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: mov w8, #10 // =0xa ; SDAG-NEXT: tst x9, #0xffffffff00000000 ; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret @@ -1980,7 +1992,7 @@ ; FAST-LABEL: umulo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umull x9, w0, w1 -; FAST-NEXT: mov w8, #10 +; FAST-NEXT: mov w8, #10 // =0xa ; FAST-NEXT: tst x9, #0xffffffff00000000 ; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret @@ -1988,7 +2000,7 @@ ; GISEL-LABEL: umulo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: umull x9, w0, w1 -; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mov w8, #10 // =0xa ; GISEL-NEXT: mul w10, w0, w1 ; GISEL-NEXT: lsr x9, x9, #32 ; GISEL-NEXT: cmp w9, #0 @@ -2006,7 +2018,7 @@ ; SDAG-LABEL: smulo.selectboth.i32: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: smull x9, w0, w1 -; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: mov w8, #10 // =0xa ; SDAG-NEXT: cmp x9, w9, sxtw ; SDAG-NEXT: csel w0, w9, w8, ne ; SDAG-NEXT: ret @@ -2014,7 +2026,7 @@ ; FAST-LABEL: smulo.selectboth.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: smull x9, w0, w1 -; FAST-NEXT: mov w8, #10 +; FAST-NEXT: mov w8, #10 // =0xa ; FAST-NEXT: cmp x9, w9, sxtw ; FAST-NEXT: csel w0, w9, w8, ne ; FAST-NEXT: ret @@ -2022,7 +2034,7 @@ ; GISEL-LABEL: smulo.selectboth.i32: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: smull x9, w0, w1 -; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mov w8, #10 // =0xa ; GISEL-NEXT: mul w10, w0, w1 ; GISEL-NEXT: asr x9, x9, #32 ; GISEL-NEXT: cmp w9, w10, asr #31 @@ -2040,7 +2052,7 @@ ; SDAG-LABEL: umulo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: umulh x9, x0, x1 -; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: mov w8, #10 // =0xa ; SDAG-NEXT: mul x10, x0, x1 ; SDAG-NEXT: cmp xzr, x9 ; SDAG-NEXT: csel x0, x10, x8, ne @@ -2049,7 +2061,7 @@ ; FAST-LABEL: umulo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umulh x9, x0, x1 -; FAST-NEXT: mov x8, #10 +; FAST-NEXT: mov x8, #10 // =0xa ; FAST-NEXT: mul x10, x0, x1 ; FAST-NEXT: cmp xzr, x9 ; FAST-NEXT: csel x0, x10, x8, ne @@ -2058,7 +2070,7 @@ ; GISEL-LABEL: umulo.selectboth.i64: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: umulh x9, x0, x1 -; GISEL-NEXT: mov w8, #10 +; GISEL-NEXT: mov w8, #10 // =0xa ; GISEL-NEXT: mul x10, x0, x1 ; GISEL-NEXT: cmp x9, #0 ; GISEL-NEXT: csel x0, x10, x8, ne @@ -2075,7 +2087,7 @@ ; SDAG-LABEL: smulo.selectboth.i64: ; SDAG: // %bb.0: // %entry ; SDAG-NEXT: mul x9, x0, x1 -; SDAG-NEXT: mov w8, #10 +; SDAG-NEXT: mov w8, #10 // =0xa ; SDAG-NEXT: smulh x10, x0, x1 ; SDAG-NEXT: cmp x10, x9, asr #63 ; SDAG-NEXT: csel x0, x9, x8, ne @@ -2084,7 +2096,7 @@ ; FAST-LABEL: smulo.selectboth.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: mul x9, x0, x1 -; FAST-NEXT: mov x8, #10 +; FAST-NEXT: mov x8, #10 // =0xa ; FAST-NEXT: smulh x10, x0, x1 ; FAST-NEXT: cmp x10, x9, asr #63 ; FAST-NEXT: csel x0, x9, x8, ne @@ -2092,11 +2104,11 @@ ; ; GISEL-LABEL: smulo.selectboth.i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: mul x9, x0, x1 -; GISEL-NEXT: mov w8, #10 -; GISEL-NEXT: smulh x10, x0, x1 -; GISEL-NEXT: cmp x10, x9, asr #63 -; GISEL-NEXT: csel x0, x9, x8, ne +; GISEL-NEXT: smulh x9, x0, x1 +; GISEL-NEXT: mov w8, #10 // =0xa +; GISEL-NEXT: mul x10, x0, x1 +; GISEL-NEXT: cmp x9, x10, asr #63 +; GISEL-NEXT: csel x0, x10, x8, ne ; GISEL-NEXT: ret entry: %m = call { i64, i1 } @llvm.smul.with.overflow.i64(i64 %a, i64 %b) @@ -2120,9 +2132,9 @@ ; FAST-LABEL: saddo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn w0, w1 -; FAST-NEXT: mov w9, #1 -; FAST-NEXT: cset w8, vs -; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: mov w8, #1 // =0x1 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2155,9 +2167,9 @@ ; FAST-LABEL: saddo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x1 -; FAST-NEXT: mov w9, #1 -; FAST-NEXT: cset w8, vs -; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: mov w8, #1 // =0x1 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2190,9 +2202,9 @@ ; FAST-LABEL: uaddo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn w0, w1 -; FAST-NEXT: mov w9, #1 -; FAST-NEXT: cset w8, hs -; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: mov w8, #1 // =0x1 +; FAST-NEXT: cset w9, hs +; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2225,9 +2237,9 @@ ; FAST-LABEL: uaddo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x1 -; FAST-NEXT: mov w9, #1 -; FAST-NEXT: cset w8, hs -; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: mov w8, #1 // =0x1 +; FAST-NEXT: cset w9, hs +; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2260,9 +2272,9 @@ ; FAST-LABEL: ssubo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp w0, w1 -; FAST-NEXT: mov w9, #1 -; FAST-NEXT: cset w8, vs -; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: mov w8, #1 // =0x1 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2295,9 +2307,9 @@ ; FAST-LABEL: ssubo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp x0, x1 -; FAST-NEXT: mov w9, #1 -; FAST-NEXT: cset w8, vs -; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: mov w8, #1 // =0x1 +; FAST-NEXT: cset w9, vs +; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2330,9 +2342,9 @@ ; FAST-LABEL: usubo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp w0, w1 -; FAST-NEXT: mov w9, #1 -; FAST-NEXT: cset w8, lo -; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: mov w8, #1 // =0x1 +; FAST-NEXT: cset w9, lo +; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2365,9 +2377,9 @@ ; FAST-LABEL: usubo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmp x0, x1 -; FAST-NEXT: mov w9, #1 -; FAST-NEXT: cset w8, lo -; FAST-NEXT: bic w8, w9, w8 +; FAST-NEXT: mov w8, #1 // =0x1 +; FAST-NEXT: cset w9, lo +; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 ; FAST-NEXT: ret ; @@ -2401,7 +2413,7 @@ ; FAST-LABEL: smulo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: smull x9, w0, w1 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: cmp x9, w9, sxtw ; FAST-NEXT: cset w9, ne ; FAST-NEXT: bic w8, w8, w9 @@ -2442,7 +2454,7 @@ ; FAST-LABEL: smulo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: mul x9, x0, x1 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: smulh x10, x0, x1 ; FAST-NEXT: cmp x10, x9, asr #63 ; FAST-NEXT: cset w9, ne @@ -2452,9 +2464,9 @@ ; ; GISEL-LABEL: smulo.br.i64: ; GISEL: // %bb.0: // %entry -; GISEL-NEXT: mul x8, x0, x1 -; GISEL-NEXT: smulh x9, x0, x1 -; GISEL-NEXT: cmp x9, x8, asr #63 +; GISEL-NEXT: smulh x8, x0, x1 +; GISEL-NEXT: mul x9, x0, x1 +; GISEL-NEXT: cmp x8, x9, asr #63 ; GISEL-NEXT: cset w8, ne ; GISEL-NEXT: eor w0, w8, #0x1 ; GISEL-NEXT: ret @@ -2481,7 +2493,7 @@ ; FAST-LABEL: smulo2.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x0 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: cset w9, vs ; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 @@ -2517,7 +2529,7 @@ ; FAST-LABEL: umulo.br.i32: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umull x9, w0, w1 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: tst x9, #0xffffffff00000000 ; FAST-NEXT: cset w9, ne ; FAST-NEXT: bic w8, w8, w9 @@ -2556,7 +2568,7 @@ ; FAST-LABEL: umulo.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: umulh x9, x0, x1 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: cmp xzr, x9 ; FAST-NEXT: cset w9, ne ; FAST-NEXT: bic w8, w8, w9 @@ -2593,7 +2605,7 @@ ; FAST-LABEL: umulo2.br.i64: ; FAST: // %bb.0: // %entry ; FAST-NEXT: cmn x0, x0 -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 // =0x1 ; FAST-NEXT: cset w9, hs ; FAST-NEXT: bic w8, w8, w9 ; FAST-NEXT: and w0, w8, #0x1 @@ -2621,17 +2633,17 @@ define i8 @pr60530() { ; SDAG-LABEL: pr60530: ; SDAG: // %bb.0: -; SDAG-NEXT: mov w0, #-1 +; SDAG-NEXT: mov w0, #-1 // =0xffffffff ; SDAG-NEXT: ret ; ; FAST-LABEL: pr60530: ; FAST: // %bb.0: -; FAST-NEXT: mov w0, #-1 +; FAST-NEXT: mov w0, #-1 // =0xffffffff ; FAST-NEXT: ret ; ; GISEL-LABEL: pr60530: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #1 +; GISEL-NEXT: mov w8, #1 // =0x1 ; GISEL-NEXT: sbfx w0, w8, #0, #1 ; GISEL-NEXT: ret %1 = call { i8, i1 } @llvm.uadd.with.overflow.i8(i8 0, i8 1) diff --git a/llvm/test/CodeGen/AArch64/arm64-zip.ll b/llvm/test/CodeGen/AArch64/arm64-zip.ll --- a/llvm/test/CodeGen/AArch64/arm64-zip.ll +++ b/llvm/test/CodeGen/AArch64/arm64-zip.ll @@ -250,8 +250,8 @@ define <16 x i8> @combine_v8i16_8first(<8 x i8> %0, <8 x i8> %1) { ; CHECK-LABEL: combine_v8i16_8first: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1_q2 +; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: tbl.16b v0, { v1, v2 }, v3 @@ -265,8 +265,8 @@ define <16 x i8> @combine_v8i16_8firstundef(<8 x i8> %0, <8 x i8> %1) { ; CHECK-LABEL: combine_v8i16_8firstundef: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1_q2 +; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: tbl.16b v0, { v1, v2 }, v3 diff --git a/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll --- a/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32-addrs.ll @@ -42,10 +42,10 @@ define i8 @test_valid_wrap_optimizable2(ptr %base, i32 %offset) { ; CHECK-LABEL: test_valid_wrap_optimizable2: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #-100 ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: ldrb w0, [x9, x8] +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov w9, #-100 ; =0xffffff9c +; CHECK-NEXT: ldrb w0, [x8, x9] ; CHECK-NEXT: ret %newaddr = getelementptr inbounds i8, ptr inttoptr(i32 -100 to ptr), i32 %offset diff --git a/llvm/test/CodeGen/AArch64/arm64_32.ll b/llvm/test/CodeGen/AArch64/arm64_32.ll --- a/llvm/test/CodeGen/AArch64/arm64_32.ll +++ b/llvm/test/CodeGen/AArch64/arm64_32.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=arm64_32-apple-ios7.0 %s -filetype=obj -o - -disable-post-ra -frame-pointer=non-leaf | \ ; RUN: llvm-objdump --private-headers - | \ ; RUN: FileCheck %s --check-prefix=CHECK-MACHO @@ -13,11 +14,24 @@ @var_got = external global i8 define ptr @test_global_addr() { -; CHECK-LABEL: test_global_addr: -; CHECK: adrp [[PAGE:x[0-9]+]], _var32@PAGE -; CHECK-OPT: add x0, [[PAGE]], _var32@PAGEOFF -; CHECK-FAST: add [[TMP:x[0-9]+]], [[PAGE]], _var32@PAGEOFF -; CHECK-FAST: and x0, [[TMP]], #0xffffffff +; CHECK-OPT-LABEL: test_global_addr: +; CHECK-OPT: ; %bb.0: +; CHECK-OPT-NEXT: Lloh0: +; CHECK-OPT-NEXT: adrp x0, _var32@PAGE +; CHECK-OPT-NEXT: Lloh1: +; CHECK-OPT-NEXT: add x0, x0, _var32@PAGEOFF +; CHECK-OPT-NEXT: ret +; CHECK-OPT-NEXT: .loh AdrpAdd Lloh0, Lloh1 +; +; CHECK-FAST-LABEL: test_global_addr: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: Lloh0: +; CHECK-FAST-NEXT: adrp x8, _var32@PAGE +; CHECK-FAST-NEXT: Lloh1: +; CHECK-FAST-NEXT: add x8, x8, _var32@PAGEOFF +; CHECK-FAST-NEXT: and x0, x8, #0xffffffff +; CHECK-FAST-NEXT: ret +; CHECK-FAST-NEXT: .loh AdrpAdd Lloh0, Lloh1 ret ptr @var32 } @@ -25,19 +39,36 @@ ; gets truncated to 32-bits, it's free. No need to zero out higher bits of that ; register. define i64 @test_global_addr_extension() { -; CHECK-LABEL: test_global_addr_extension: -; CHECK: adrp [[PAGE:x[0-9]+]], _var32@PAGE -; CHECK: add x0, [[PAGE]], _var32@PAGEOFF -; CHECK-NOT: and -; CHECK: ret +; CHECK-OPT-LABEL: test_global_addr_extension: +; CHECK-OPT: ; %bb.0: +; CHECK-OPT-NEXT: Lloh2: +; CHECK-OPT-NEXT: adrp x0, _var32@PAGE +; CHECK-OPT-NEXT: Lloh3: +; CHECK-OPT-NEXT: add x0, x0, _var32@PAGEOFF +; CHECK-OPT-NEXT: ret +; CHECK-OPT-NEXT: .loh AdrpAdd Lloh2, Lloh3 +; +; CHECK-FAST-LABEL: test_global_addr_extension: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: Lloh2: +; CHECK-FAST-NEXT: adrp x8, _var32@PAGE +; CHECK-FAST-NEXT: Lloh3: +; CHECK-FAST-NEXT: add x0, x8, _var32@PAGEOFF +; CHECK-FAST-NEXT: ret +; CHECK-FAST-NEXT: .loh AdrpAdd Lloh2, Lloh3 ret i64 ptrtoint(ptr @var32 to i64) } define i32 @test_global_value() { ; CHECK-LABEL: test_global_value: -; CHECK: adrp x[[PAGE:[0-9]+]], _var32@PAGE -; CHECK: ldr w0, [x[[PAGE]], _var32@PAGEOFF] +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh4: +; CHECK-NEXT: adrp x8, _var32@PAGE +; CHECK-NEXT: Lloh5: +; CHECK-NEXT: ldr w0, [x8, _var32@PAGEOFF] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5 %val = load i32, ptr @var32, align 4 ret i32 %val } @@ -45,9 +76,15 @@ ; Because the addition may wrap, it is not safe to use "ldr w0, [xN, #32]" here. define i32 @test_unsafe_indexed_add() { ; CHECK-LABEL: test_unsafe_indexed_add: -; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF -; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 -; CHECK: ldr w0, [x[[ADDR]]] +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh6: +; CHECK-NEXT: adrp x8, _var32@PAGE +; CHECK-NEXT: Lloh7: +; CHECK-NEXT: add x8, x8, _var32@PAGEOFF +; CHECK-NEXT: add w8, w8, #32 +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpAdd Lloh6, Lloh7 %addr_int = ptrtoint ptr @var32 to i32 %addr_plus_32 = add i32 %addr_int, 32 %addr = inttoptr i32 %addr_plus_32 to ptr @@ -59,9 +96,15 @@ ; 32-bytes below 2^32, and we can use the load this time. define i32 @test_safe_indexed_add() { ; CHECK-LABEL: test_safe_indexed_add: -; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF -; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 -; CHECK: ldr w0, [x[[ADDR]]] +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh8: +; CHECK-NEXT: adrp x8, _var32@PAGE +; CHECK-NEXT: Lloh9: +; CHECK-NEXT: add x8, x8, _var32@PAGEOFF +; CHECK-NEXT: add w8, w8, #32 +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpAdd Lloh8, Lloh9 %addr_int = ptrtoint ptr @var32 to i64 %addr_plus_32 = add nuw i64 %addr_int, 32 %addr = inttoptr i64 %addr_plus_32 to ptr @@ -71,9 +114,11 @@ define i32 @test_safe_indexed_or(i32 %in) { ; CHECK-LABEL: test_safe_indexed_or: -; CHECK: and [[TMP:w[0-9]+]], {{w[0-9]+}}, #0xfffffff0 -; CHECK: orr w[[ADDR:[0-9]+]], [[TMP]], #0x4 -; CHECK: ldr w0, [x[[ADDR]]] +; CHECK: ; %bb.0: +; CHECK-NEXT: and w8, w0, #0xfffffff0 +; CHECK-NEXT: orr w8, w8, #0x4 +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret %addr_int = and i32 %in, -16 %addr_plus_4 = or i32 %addr_int, 4 %addr = inttoptr i32 %addr_plus_4 to ptr @@ -87,10 +132,15 @@ ; "sext(base) + sext(offset) == base + offset". define i32 @test_unsafe_nsw_indexed_add() { ; CHECK-LABEL: test_unsafe_nsw_indexed_add: -; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF -; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #32 -; CHECK-NOT: ubfx -; CHECK: ldr w0, [x[[ADDR]]] +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh10: +; CHECK-NEXT: adrp x8, _var32@PAGE +; CHECK-NEXT: Lloh11: +; CHECK-NEXT: add x8, x8, _var32@PAGEOFF +; CHECK-NEXT: add w8, w8, #32 +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpAdd Lloh10, Lloh11 %addr_int = ptrtoint ptr @var32 to i32 %addr_plus_32 = add nsw i32 %addr_int, 32 %addr = inttoptr i32 %addr_plus_32 to ptr @@ -101,9 +151,15 @@ ; Because the addition may wrap, it is not safe to use "ldr w0, [xN, #32]" here. define i32 @test_unsafe_unscaled_add() { ; CHECK-LABEL: test_unsafe_unscaled_add: -; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF -; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3 -; CHECK: ldr w0, [x[[ADDR]]] +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh12: +; CHECK-NEXT: adrp x8, _var32@PAGE +; CHECK-NEXT: Lloh13: +; CHECK-NEXT: add x8, x8, _var32@PAGEOFF +; CHECK-NEXT: add w8, w8, #3 +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpAdd Lloh12, Lloh13 %addr_int = ptrtoint ptr @var32 to i32 %addr_plus_3 = add i32 %addr_int, 3 %addr = inttoptr i32 %addr_plus_3 to ptr @@ -115,9 +171,15 @@ ; 32-bytes below 2^32, and we can use the load this time. define i32 @test_safe_unscaled_add() { ; CHECK-LABEL: test_safe_unscaled_add: -; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF -; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3 -; CHECK: ldr w0, [x[[ADDR]]] +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh14: +; CHECK-NEXT: adrp x8, _var32@PAGE +; CHECK-NEXT: Lloh15: +; CHECK-NEXT: add x8, x8, _var32@PAGEOFF +; CHECK-NEXT: add w8, w8, #3 +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpAdd Lloh14, Lloh15 %addr_int = ptrtoint ptr @var32 to i32 %addr_plus_3 = add nuw i32 %addr_int, 3 %addr = inttoptr i32 %addr_plus_3 to ptr @@ -130,10 +192,15 @@ ; "sext(base) + sext(offset) == base + offset". define i32 @test_unsafe_nsw_unscaled_add() { ; CHECK-LABEL: test_unsafe_nsw_unscaled_add: -; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF -; CHECK: add w[[ADDR:[0-9]+]], w[[VAR32]], #3 -; CHECK-NOT: ubfx -; CHECK: ldr w0, [x[[ADDR]]] +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh16: +; CHECK-NEXT: adrp x8, _var32@PAGE +; CHECK-NEXT: Lloh17: +; CHECK-NEXT: add x8, x8, _var32@PAGEOFF +; CHECK-NEXT: add w8, w8, #3 +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpAdd Lloh16, Lloh17 %addr_int = ptrtoint ptr @var32 to i32 %addr_plus_3 = add nsw i32 %addr_int, 3 %addr = inttoptr i32 %addr_plus_3 to ptr @@ -145,9 +212,15 @@ ; here. define i32 @test_unsafe_negative_unscaled_add() { ; CHECK-LABEL: test_unsafe_negative_unscaled_add: -; CHECK: add x[[VAR32:[0-9]+]], {{x[0-9]+}}, _var32@PAGEOFF -; CHECK: sub w[[ADDR:[0-9]+]], w[[VAR32]], #3 -; CHECK: ldr w0, [x[[ADDR]]] +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh18: +; CHECK-NEXT: adrp x8, _var32@PAGE +; CHECK-NEXT: Lloh19: +; CHECK-NEXT: add x8, x8, _var32@PAGEOFF +; CHECK-NEXT: sub w8, w8, #3 +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpAdd Lloh18, Lloh19 %addr_int = ptrtoint ptr @var32 to i32 %addr_minus_3 = add i32 %addr_int, -3 %addr = inttoptr i32 %addr_minus_3 to ptr @@ -156,24 +229,39 @@ } define ptr @test_got_addr() { -; CHECK-LABEL: test_got_addr: -; CHECK: adrp x[[PAGE:[0-9]+]], _var_got@GOTPAGE -; CHECK-OPT: ldr w0, [x[[PAGE]], _var_got@GOTPAGEOFF] -; CHECK-FAST: ldr w[[TMP:[0-9]+]], [x[[PAGE]], _var_got@GOTPAGEOFF] -; CHECK-FAST: and x0, x[[TMP]], #0xffffffff +; CHECK-OPT-LABEL: test_got_addr: +; CHECK-OPT: ; %bb.0: +; CHECK-OPT-NEXT: Lloh20: +; CHECK-OPT-NEXT: adrp x0, _var_got@GOTPAGE +; CHECK-OPT-NEXT: Lloh21: +; CHECK-OPT-NEXT: ldr w0, [x0, _var_got@GOTPAGEOFF] +; CHECK-OPT-NEXT: ret +; CHECK-OPT-NEXT: .loh AdrpLdrGot Lloh20, Lloh21 +; +; CHECK-FAST-LABEL: test_got_addr: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: Lloh20: +; CHECK-FAST-NEXT: adrp x8, _var_got@GOTPAGE +; CHECK-FAST-NEXT: Lloh21: +; CHECK-FAST-NEXT: ldr w8, [x8, _var_got@GOTPAGEOFF] +; CHECK-FAST-NEXT: and x0, x8, #0xffffffff +; CHECK-FAST-NEXT: ret +; CHECK-FAST-NEXT: .loh AdrpLdrGot Lloh20, Lloh21 ret ptr @var_got } define float @test_va_arg_f32(ptr %list) { ; CHECK-LABEL: test_va_arg_f32: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: add w9, w8, #8 +; CHECK-NEXT: str w9, [x0] +; CHECK-NEXT: ldr d0, [x8] +; CHECK-NEXT: fcvt s0, d0 +; CHECK-NEXT: ret -; CHECK: ldr w[[START:[0-9]+]], [x0] -; CHECK: add [[AFTER:w[0-9]+]], w[[START]], #8 -; CHECK: str [[AFTER]], [x0] ; Floating point arguments get promoted to double as per C99. -; CHECK: ldr [[DBL:d[0-9]+]], [x[[START]]] -; CHECK: fcvt s0, [[DBL]] %res = va_arg ptr %list, float ret float %res } @@ -181,13 +269,15 @@ ; Interesting point is that the slot is 4 bytes. define i8 @test_va_arg_i8(ptr %list) { ; CHECK-LABEL: test_va_arg_i8: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: add w9, w8, #4 +; CHECK-NEXT: str w9, [x0] +; CHECK-NEXT: ldr w0, [x8] +; CHECK-NEXT: ret -; CHECK: ldr w[[START:[0-9]+]], [x0] -; CHECK: add [[AFTER:w[0-9]+]], w[[START]], #4 -; CHECK: str [[AFTER]], [x0] ; i8 gets promoted to int (again, as per C99). -; CHECK: ldr w0, [x[[START]]] %res = va_arg ptr %list, i8 ret i8 %res @@ -197,16 +287,18 @@ ; bytes). define i64 @test_va_arg_i64(ptr %list) { ; CHECK-LABEL: test_va_arg_i64: +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: add x8, x8, #7 +; CHECK-NEXT: and x8, x8, #0x1fffffff8 +; CHECK-NEXT: add w9, w8, #8 +; CHECK-NEXT: str w9, [x0] +; CHECK-NEXT: ldr x0, [x8] +; CHECK-NEXT: ret ; Update the list for the next user (minimum slot size is 4, but the actual ; argument is 8 which had better be reflected!) -; CHECK: ldr w[[UNALIGNED_START:[0-9]+]], [x0] -; CHECK: add [[ALIGN_TMP:x[0-9]+]], x[[UNALIGNED_START]], #7 -; CHECK: and x[[START:[0-9]+]], [[ALIGN_TMP]], #0x1fffffff8 -; CHECK: add w[[AFTER:[0-9]+]], w[[START]], #8 -; CHECK: str w[[AFTER]], [x0] -; CHECK: ldr x0, [x[[START]]] %res = va_arg ptr %list, i64 ret i64 %res @@ -214,14 +306,45 @@ declare void @bar(...) define void @test_va_call(i8 %l, i8 %r, float %in, ptr %ptr) { -; CHECK-LABEL: test_va_call: -; CHECK: add [[SUM:w[0-9]+]], {{w[0-9]+}}, w1 +; CHECK-OPT-LABEL: test_va_call: +; CHECK-OPT: ; %bb.0: +; CHECK-OPT-NEXT: sub sp, sp, #64 +; CHECK-OPT-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill +; CHECK-OPT-NEXT: add x29, sp, #48 +; CHECK-OPT-NEXT: .cfi_def_cfa w29, 16 +; CHECK-OPT-NEXT: .cfi_offset w30, -8 +; CHECK-OPT-NEXT: .cfi_offset w29, -16 +; CHECK-OPT-NEXT: add w8, w0, w1 +; CHECK-OPT-NEXT: str w2, [sp, #32] +; CHECK-OPT-NEXT: str xzr, [sp, #24] +; CHECK-OPT-NEXT: str s0, [sp, #16] +; CHECK-OPT-NEXT: str xzr, [sp, #8] +; CHECK-OPT-NEXT: str w8, [sp] +; CHECK-OPT-NEXT: bl _bar +; CHECK-OPT-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload +; CHECK-OPT-NEXT: add sp, sp, #64 +; CHECK-OPT-NEXT: ret +; +; CHECK-FAST-LABEL: test_va_call: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: sub sp, sp, #64 +; CHECK-FAST-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill +; CHECK-FAST-NEXT: add x29, sp, #48 +; CHECK-FAST-NEXT: .cfi_def_cfa w29, 16 +; CHECK-FAST-NEXT: .cfi_offset w30, -8 +; CHECK-FAST-NEXT: .cfi_offset w29, -16 +; CHECK-FAST-NEXT: sxtb w8, w0 +; CHECK-FAST-NEXT: add w8, w8, w1, sxtb +; CHECK-FAST-NEXT: str w2, [sp, #32] +; CHECK-FAST-NEXT: str xzr, [sp, #24] +; CHECK-FAST-NEXT: str s0, [sp, #16] +; CHECK-FAST-NEXT: str xzr, [sp, #8] +; CHECK-FAST-NEXT: str w8, [sp] +; CHECK-FAST-NEXT: bl _bar +; CHECK-FAST-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload +; CHECK-FAST-NEXT: add sp, sp, #64 +; CHECK-FAST-NEXT: ret -; CHECK-DAG: str w2, [sp, #32] -; CHECK-DAG: str xzr, [sp, #24] -; CHECK-DAG: str s0, [sp, #16] -; CHECK-DAG: str xzr, [sp, #8] -; CHECK-DAG: str [[SUM]], [sp] ; Add them to ensure real promotion occurs. %sum = add i8 %l, %r @@ -232,10 +355,28 @@ declare ptr @llvm.frameaddress(i32) define ptr @test_frameaddr() { -; CHECK-LABEL: test_frameaddr: -; CHECK-OPT: ldr x0, [x29] -; CHECK-FAST: ldr [[TMP:x[0-9]+]], [x29] -; CHECK-FAST: and x0, [[TMP]], #0xffffffff +; CHECK-OPT-LABEL: test_frameaddr: +; CHECK-OPT: ; %bb.0: +; CHECK-OPT-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-OPT-NEXT: mov x29, sp +; CHECK-OPT-NEXT: .cfi_def_cfa w29, 16 +; CHECK-OPT-NEXT: .cfi_offset w30, -8 +; CHECK-OPT-NEXT: .cfi_offset w29, -16 +; CHECK-OPT-NEXT: ldr x0, [x29] +; CHECK-OPT-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-OPT-NEXT: ret +; +; CHECK-FAST-LABEL: test_frameaddr: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-FAST-NEXT: mov x29, sp +; CHECK-FAST-NEXT: .cfi_def_cfa w29, 16 +; CHECK-FAST-NEXT: .cfi_offset w30, -8 +; CHECK-FAST-NEXT: .cfi_offset w29, -16 +; CHECK-FAST-NEXT: ldr x8, [x29] +; CHECK-FAST-NEXT: and x0, x8, #0xffffffff +; CHECK-FAST-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-FAST-NEXT: ret %val = call ptr @llvm.frameaddress(i32 1) ret ptr %val } @@ -243,28 +384,74 @@ declare ptr @llvm.returnaddress(i32) define ptr @test_toplevel_returnaddr() { -; CHECK-LABEL: test_toplevel_returnaddr: -; CHECK-OPT: mov x0, x30 -; CHECK-FAST: and x0, x30, #0xffffffff +; CHECK-OPT-LABEL: test_toplevel_returnaddr: +; CHECK-OPT: ; %bb.0: +; CHECK-OPT-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-OPT-NEXT: .cfi_def_cfa_offset 16 +; CHECK-OPT-NEXT: .cfi_offset w30, -8 +; CHECK-OPT-NEXT: .cfi_offset w29, -16 +; CHECK-OPT-NEXT: hint #7 +; CHECK-OPT-NEXT: mov x0, x30 +; CHECK-OPT-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-OPT-NEXT: ret +; +; CHECK-FAST-LABEL: test_toplevel_returnaddr: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-FAST-NEXT: .cfi_def_cfa_offset 16 +; CHECK-FAST-NEXT: .cfi_offset w30, -8 +; CHECK-FAST-NEXT: .cfi_offset w29, -16 +; CHECK-FAST-NEXT: hint #7 +; CHECK-FAST-NEXT: and x0, x30, #0xffffffff +; CHECK-FAST-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-FAST-NEXT: ret %val = call ptr @llvm.returnaddress(i32 0) ret ptr %val } define ptr @test_deep_returnaddr() { -; CHECK-LABEL: test_deep_returnaddr: -; CHECK: ldr x[[FRAME_REC:[0-9]+]], [x29] -; CHECK-OPT: ldr x30, [x[[FRAME_REC]], #8] -; CHECK-OPT: hint #7 -; CHECK-OPT: mov x0, x30 -; CHECK-FAST: ldr [[TMP:x[0-9]+]], [x[[FRAME_REC]], #8] -; CHECK-FAST: and x0, [[TMP]], #0xffffffff +; CHECK-OPT-LABEL: test_deep_returnaddr: +; CHECK-OPT: ; %bb.0: +; CHECK-OPT-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-OPT-NEXT: mov x29, sp +; CHECK-OPT-NEXT: .cfi_def_cfa w29, 16 +; CHECK-OPT-NEXT: .cfi_offset w30, -8 +; CHECK-OPT-NEXT: .cfi_offset w29, -16 +; CHECK-OPT-NEXT: ldr x8, [x29] +; CHECK-OPT-NEXT: ldr x30, [x8, #8] +; CHECK-OPT-NEXT: hint #7 +; CHECK-OPT-NEXT: mov x0, x30 +; CHECK-OPT-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-OPT-NEXT: ret +; +; CHECK-FAST-LABEL: test_deep_returnaddr: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-FAST-NEXT: mov x29, sp +; CHECK-FAST-NEXT: .cfi_def_cfa w29, 16 +; CHECK-FAST-NEXT: .cfi_offset w30, -8 +; CHECK-FAST-NEXT: .cfi_offset w29, -16 +; CHECK-FAST-NEXT: ldr x8, [x29] +; CHECK-FAST-NEXT: ldr x30, [x8, #8] +; CHECK-FAST-NEXT: hint #7 +; CHECK-FAST-NEXT: and x0, x30, #0xffffffff +; CHECK-FAST-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-FAST-NEXT: ret %val = call ptr @llvm.returnaddress(i32 1) ret ptr %val } define void @test_indirect_call(ptr %func) { ; CHECK-LABEL: test_indirect_call: -; CHECK: blr x0 +; CHECK: ; %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: blr x0 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret call void() %func() ret void } @@ -272,9 +459,16 @@ ; Safe to use the unextended address here define void @test_indirect_safe_call(ptr %weird_funcs) { ; CHECK-LABEL: test_indirect_safe_call: -; CHECK: add w[[ADDR32:[0-9]+]], w0, #4 -; CHECK-OPT-NOT: ubfx -; CHECK: blr x[[ADDR32]] +; CHECK: ; %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: add w8, w0, #4 +; CHECK-NEXT: blr x8 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret %addr = getelementptr i32, ptr %weird_funcs, i32 1 call void() %addr() ret void @@ -283,14 +477,16 @@ declare void @simple() define void @test_simple_tail_call() { ; CHECK-LABEL: test_simple_tail_call: -; CHECK: b _simple +; CHECK: ; %bb.0: +; CHECK-NEXT: b _simple tail call void @simple() ret void } define void @test_indirect_tail_call(ptr %func) { ; CHECK-LABEL: test_indirect_tail_call: -; CHECK: br x0 +; CHECK: ; %bb.0: +; CHECK-NEXT: br x0 tail call void() %func() ret void } @@ -298,9 +494,9 @@ ; Safe to use the unextended address here define void @test_indirect_safe_tail_call(ptr %weird_funcs) { ; CHECK-LABEL: test_indirect_safe_tail_call: -; CHECK: add w[[ADDR32:[0-9]+]], w0, #4 -; CHECK-OPT-NOT: ubfx -; CHECK-OPT: br x[[ADDR32]] +; CHECK: ; %bb.0: +; CHECK-NEXT: add w0, w0, #4 +; CHECK-NEXT: br x0 %addr = getelementptr i32, ptr %weird_funcs, i32 1 tail call void() %addr() ret void @@ -312,14 +508,20 @@ define i32 @test_in_smallstruct_low([3 x i32] %in) { ; CHECK-LABEL: test_in_smallstruct_low: -; CHECK: mov x0, x1 +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ; kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret %val = extractvalue [3 x i32] %in, 2 ret i32 %val } define i32 @test_in_smallstruct_high([3 x i32] %in) { ; CHECK-LABEL: test_in_smallstruct_high: -; CHECK: lsr x0, x0, #32 +; CHECK: ; %bb.0: +; CHECK-NEXT: lsr x0, x0, #32 +; CHECK-NEXT: ; kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret %val = extractvalue [3 x i32] %in, 1 ret i32 %val } @@ -329,15 +531,19 @@ ; be incompatible with the armv7k ABI. define i32 @test_in_smallstruct_stack([8 x i64], i32, [3 x i32] %in) { ; CHECK-LABEL: test_in_smallstruct_stack: -; CHECK: ldr w0, [sp, #4] +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr w0, [sp, #4] +; CHECK-NEXT: ret %val = extractvalue [3 x i32] %in, 0 ret i32 %val } define [2 x i32] @test_ret_smallstruct([3 x i32] %in) { ; CHECK-LABEL: test_ret_smallstruct: -; CHECK: mov x0, #1 -; CHECK: movk x0, #2, lsl #32 +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x0, #1 ; =0x1 +; CHECK-NEXT: movk x0, #2, lsl #32 +; CHECK-NEXT: ret ret [2 x i32] [i32 1, i32 2] } @@ -345,11 +551,19 @@ declare void @smallstruct_callee([4 x i32]) define void @test_call_smallstruct() { ; CHECK-LABEL: test_call_smallstruct: -; CHECK: mov x0, #1 -; CHECK: movk x0, #2, lsl #32 -; CHECK: mov x1, #3 -; CHECK: movk x1, #4, lsl #32 -; CHECK: bl _smallstruct_callee +; CHECK: ; %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: mov x0, #1 ; =0x1 +; CHECK-NEXT: movk x0, #2, lsl #32 +; CHECK-NEXT: mov x1, #3 ; =0x3 +; CHECK-NEXT: movk x1, #4, lsl #32 +; CHECK-NEXT: bl _smallstruct_callee +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret call void @smallstruct_callee([4 x i32] [i32 1, i32 2, i32 3, i32 4]) ret void @@ -358,9 +572,20 @@ declare void @smallstruct_callee_stack([8 x i64], i32, [2 x i32]) define void @test_call_smallstruct_stack() { ; CHECK-LABEL: test_call_smallstruct_stack: -; CHECK: mov [[VAL:x[0-9]+]], #1 -; CHECK: movk [[VAL]], #2, lsl #32 -; CHECK: stur [[VAL]], [sp, #4] +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #16 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: mov x8, #1 ; =0x1 +; CHECK-NEXT: movk x8, #2, lsl #32 +; CHECK-NEXT: stur x8, [sp, #4] +; CHECK-NEXT: bl _smallstruct_callee_stack +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret call void @smallstruct_callee_stack([8 x i64] undef, i32 undef, [2 x i32] [i32 1, i32 2]) ret void @@ -369,8 +594,17 @@ declare [3 x i32] @returns_smallstruct() define i32 @test_use_smallstruct_low() { ; CHECK-LABEL: test_use_smallstruct_low: -; CHECK: bl _returns_smallstruct -; CHECK: mov x0, x1 +; CHECK: ; %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: bl _returns_smallstruct +; CHECK-NEXT: mov x0, x1 +; CHECK-NEXT: ; kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret %struct = call [3 x i32] @returns_smallstruct() %val = extractvalue [3 x i32] %struct, 2 @@ -379,8 +613,17 @@ define i32 @test_use_smallstruct_high() { ; CHECK-LABEL: test_use_smallstruct_high: -; CHECK: bl _returns_smallstruct -; CHECK: lsr x0, x0, #32 +; CHECK: ; %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: bl _returns_smallstruct +; CHECK-NEXT: lsr x0, x0, #32 +; CHECK-NEXT: ; kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret %struct = call [3 x i32] @returns_smallstruct() %val = extractvalue [3 x i32] %struct, 1 @@ -391,10 +634,19 @@ ; be marked as unavailable and subsequent GPR arguments should also be on the ; stack. Obviously the struct itself should be passed entirely on the stack. define i32 @test_smallstruct_padding([7 x i64], [4 x i32] %struct, i32 %in) { -; CHECK-LABEL: test_smallstruct_padding: -; CHECK-DAG: ldr [[IN:w[0-9]+]], [sp, #16] -; CHECK-DAG: ldr [[LHS:w[0-9]+]], [sp] -; CHECK: add w0, [[LHS]], [[IN]] +; CHECK-OPT-LABEL: test_smallstruct_padding: +; CHECK-OPT: ; %bb.0: +; CHECK-OPT-NEXT: ldr w8, [sp, #16] +; CHECK-OPT-NEXT: ldr w9, [sp] +; CHECK-OPT-NEXT: add w0, w9, w8 +; CHECK-OPT-NEXT: ret +; +; CHECK-FAST-LABEL: test_smallstruct_padding: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: ldr w8, [sp] +; CHECK-FAST-NEXT: ldr w9, [sp, #16] +; CHECK-FAST-NEXT: add w0, w8, w9 +; CHECK-FAST-NEXT: ret %lhs = extractvalue [4 x i32] %struct, 0 %sum = add i32 %lhs, %in ret i32 %sum @@ -403,17 +655,30 @@ declare void @take_small_smallstruct(i64, [1 x i32]) define void @test_small_smallstruct() { ; CHECK-LABEL: test_small_smallstruct: -; CHECK-DAG: mov w0, #1 -; CHECK-DAG: mov w1, #2 -; CHECK: bl _take_small_smallstruct +; CHECK: ; %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: mov w0, #1 ; =0x1 +; CHECK-NEXT: mov w1, #2 ; =0x2 +; CHECK-NEXT: bl _take_small_smallstruct +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret call void @take_small_smallstruct(i64 1, [1 x i32] [i32 2]) ret void } define void @test_bare_frameaddr(ptr %addr) { ; CHECK-LABEL: test_bare_frameaddr: -; CHECK: add x[[LOCAL:[0-9]+]], sp, #{{[0-9]+}} -; CHECK: str w[[LOCAL]], +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: add x8, sp, #15 +; CHECK-NEXT: str w8, [x0] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret %ptr = alloca i8 store ptr %ptr, ptr %addr, align 4 @@ -422,15 +687,28 @@ define void @test_sret_use(ptr sret([8 x i64]) %out) { ; CHECK-LABEL: test_sret_use: -; CHECK: str xzr, [x8] +; CHECK: ; %bb.0: +; CHECK-NEXT: str xzr, [x8] +; CHECK-NEXT: ret store i64 0, ptr %out ret void } define i64 @test_sret_call() { ; CHECK-LABEL: test_sret_call: -; CHECK: mov x8, sp -; CHECK: bl _test_sret_use +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill +; CHECK-NEXT: add x29, sp, #64 +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: bl _test_sret_use +; CHECK-NEXT: ldr x0, [sp] +; CHECK-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #80 +; CHECK-NEXT: ret %arr = alloca [8 x i64] call void @test_sret_use(ptr sret([8 x i64]) %arr) @@ -440,16 +718,27 @@ define double @test_constpool() { ; CHECK-LABEL: test_constpool: -; CHECK: adrp x[[PAGE:[0-9]+]], [[POOL:lCPI[0-9]+_[0-9]+]]@PAGE -; CHECK: ldr d0, [x[[PAGE]], [[POOL]]@PAGEOFF] +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh22: +; CHECK-NEXT: adrp x8, lCPI37_0@PAGE +; CHECK-NEXT: Lloh23: +; CHECK-NEXT: ldr d0, [x8, lCPI37_0@PAGEOFF] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh23 ret double 1.0e-6 } define ptr @test_blockaddress() { ; CHECK-LABEL: test_blockaddress: -; CHECK: [[BLOCK:Ltmp[0-9]+]]: -; CHECK: adrp x[[PAGE:[0-9]+]], lCPI{{[0-9]+_[0-9]+}}@PAGE -; CHECK: ldr x0, [x[[PAGE]], lCPI{{[0-9]+_[0-9]+}}@PAGEOFF] +; CHECK: ; %bb.0: +; CHECK-NEXT: Ltmp7: ; Block address taken +; CHECK-NEXT: ; %bb.1: ; %dest +; CHECK-NEXT: Lloh24: +; CHECK-NEXT: adrp x0, lCPI38_0@PAGE +; CHECK-NEXT: Lloh25: +; CHECK-NEXT: ldr x0, [x0, lCPI38_0@PAGEOFF] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh24, Lloh25 br label %dest dest: ret ptr blockaddress(@test_blockaddress, %dest) @@ -457,7 +746,24 @@ define ptr @test_indirectbr(ptr %dest) { ; CHECK-LABEL: test_indirectbr: -; CHECK: br x0 +; CHECK: ; %bb.0: +; CHECK-NEXT: br x0 +; CHECK-NEXT: Ltmp8: ; Block address taken +; CHECK-NEXT: LBB39_1: ; %true +; CHECK-NEXT: Lloh26: +; CHECK-NEXT: adrp x0, lCPI39_0@PAGE +; CHECK-NEXT: Lloh27: +; CHECK-NEXT: ldr x0, [x0, lCPI39_0@PAGEOFF] +; CHECK-NEXT: ret +; CHECK-NEXT: Ltmp9: ; Block address taken +; CHECK-NEXT: LBB39_2: ; %false +; CHECK-NEXT: Lloh28: +; CHECK-NEXT: adrp x0, lCPI39_1@PAGE +; CHECK-NEXT: Lloh29: +; CHECK-NEXT: ldr x0, [x0, lCPI39_1@PAGEOFF] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh26, Lloh27 +; CHECK-NEXT: .loh AdrpLdr Lloh28, Lloh29 indirectbr ptr %dest, [label %true, label %false] true: @@ -471,7 +777,12 @@ ; claim the FI in the process -- it doesn't need extending. define float @test_frameindex_offset_load() { ; CHECK-LABEL: test_frameindex_offset_load: -; CHECK: ldr s0, [sp, #4] +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldr s0, [sp, #4] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret %arr = alloca float, i32 4, align 8 %addr = getelementptr inbounds float, ptr %arr, i32 1 @@ -481,10 +792,15 @@ define void @test_unaligned_frameindex_offset_store() { ; CHECK-LABEL: test_unaligned_frameindex_offset_store: -; CHECK: mov x[[TMP:[0-9]+]], sp -; CHECK: orr w[[ADDR:[0-9]+]], w[[TMP]], #0x2 -; CHECK: mov [[VAL:w[0-9]+]], #42 -; CHECK: str [[VAL]], [x[[ADDR]]] +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: orr w8, w8, #0x2 +; CHECK-NEXT: mov w9, #42 ; =0x2a +; CHECK-NEXT: str w9, [x8] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret %arr = alloca [4 x i32] %addr.int = ptrtoint ptr %arr to i32 @@ -497,9 +813,11 @@ define {i64, ptr} @test_pre_idx(ptr %addr) { ; CHECK-LABEL: test_pre_idx: +; CHECK: ; %bb.0: +; CHECK-NEXT: add w1, w0, #8 +; CHECK-NEXT: ldr x0, [x1] +; CHECK-NEXT: ret -; CHECK: add w[[ADDR:[0-9]+]], w0, #8 -; CHECK: ldr x0, [x[[ADDR]]] %addr.int = ptrtoint ptr %addr to i32 %addr.next.int = add nuw i32 %addr.int, 8 %addr.next = inttoptr i32 %addr.next.int to ptr @@ -515,8 +833,10 @@ ; %addr wraps round to 0. define {i64, ptr} @test_invalid_pre_idx(ptr %addr) { ; CHECK-LABEL: test_invalid_pre_idx: -; CHECK: add w1, w0, #8 -; CHECK: ldr x0, [x1] +; CHECK: ; %bb.0: +; CHECK-NEXT: add w1, w0, #8 +; CHECK-NEXT: ldr x0, [x1] +; CHECK-NEXT: ret %addr.next = getelementptr i64, ptr %addr, i32 1 %val = load i64, ptr %addr.next @@ -528,24 +848,79 @@ declare void @callee(ptr) define void @test_stack_guard() ssp { -; CHECK-LABEL: test_stack_guard: -; CHECK: adrp x[[GUARD_GOTPAGE:[0-9]+]], ___stack_chk_guard@GOTPAGE -; CHECK: ldr w[[GUARD_ADDR:[0-9]+]], [x[[GUARD_GOTPAGE]], ___stack_chk_guard@GOTPAGEOFF] -; CHECK: ldr [[GUARD_VAL:w[0-9]+]], [x[[GUARD_ADDR]]] -; CHECK: stur [[GUARD_VAL]], [x29, #[[GUARD_OFFSET:-[0-9]+]]] - -; CHECK: add x0, sp, #{{[0-9]+}} -; CHECK: bl _callee - -; CHECK-OPT: adrp x[[GUARD_GOTPAGE:[0-9]+]], ___stack_chk_guard@GOTPAGE -; CHECK-OPT: ldr w[[GUARD_ADDR:[0-9]+]], [x[[GUARD_GOTPAGE]], ___stack_chk_guard@GOTPAGEOFF] -; CHECK-OPT: ldr [[GUARD_VAL:w[0-9]+]], [x[[GUARD_ADDR]]] -; CHECK-OPT: ldur [[NEW_VAL:w[0-9]+]], [x29, #[[GUARD_OFFSET]]] -; CHECK-OPT: cmp [[GUARD_VAL]], [[NEW_VAL]] -; CHECK-OPT: b.ne [[FAIL:LBB[0-9]+_[0-9]+]] - -; CHECK-OPT: [[FAIL]]: -; CHECK-OPT-NEXT: bl ___stack_chk_fail +; CHECK-OPT-LABEL: test_stack_guard: +; CHECK-OPT: ; %bb.0: +; CHECK-OPT-NEXT: sub sp, sp, #64 +; CHECK-OPT-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill +; CHECK-OPT-NEXT: add x29, sp, #48 +; CHECK-OPT-NEXT: .cfi_def_cfa w29, 16 +; CHECK-OPT-NEXT: .cfi_offset w30, -8 +; CHECK-OPT-NEXT: .cfi_offset w29, -16 +; CHECK-OPT-NEXT: Lloh30: +; CHECK-OPT-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE +; CHECK-OPT-NEXT: Lloh31: +; CHECK-OPT-NEXT: ldr w8, [x8, ___stack_chk_guard@GOTPAGEOFF] +; CHECK-OPT-NEXT: Lloh32: +; CHECK-OPT-NEXT: ldr w8, [x8] +; CHECK-OPT-NEXT: stur w8, [x29, #-4] +; CHECK-OPT-NEXT: add x0, sp, #12 +; CHECK-OPT-NEXT: bl _callee +; CHECK-OPT-NEXT: Lloh33: +; CHECK-OPT-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE +; CHECK-OPT-NEXT: Lloh34: +; CHECK-OPT-NEXT: ldr w8, [x8, ___stack_chk_guard@GOTPAGEOFF] +; CHECK-OPT-NEXT: Lloh35: +; CHECK-OPT-NEXT: ldr w8, [x8] +; CHECK-OPT-NEXT: ldur w9, [x29, #-4] +; CHECK-OPT-NEXT: cmp w8, w9 +; CHECK-OPT-NEXT: b.ne LBB44_2 +; CHECK-OPT-NEXT: ; %bb.1: +; CHECK-OPT-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload +; CHECK-OPT-NEXT: add sp, sp, #64 +; CHECK-OPT-NEXT: ret +; CHECK-OPT-NEXT: LBB44_2: +; CHECK-OPT-NEXT: bl ___stack_chk_fail +; CHECK-OPT-NEXT: .loh AdrpLdrGotLdr Lloh33, Lloh34, Lloh35 +; CHECK-OPT-NEXT: .loh AdrpLdrGotLdr Lloh30, Lloh31, Lloh32 +; +; CHECK-FAST-LABEL: test_stack_guard: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: sub sp, sp, #64 +; CHECK-FAST-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill +; CHECK-FAST-NEXT: add x29, sp, #48 +; CHECK-FAST-NEXT: .cfi_def_cfa w29, 16 +; CHECK-FAST-NEXT: .cfi_offset w30, -8 +; CHECK-FAST-NEXT: .cfi_offset w29, -16 +; CHECK-FAST-NEXT: Lloh30: +; CHECK-FAST-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE +; CHECK-FAST-NEXT: Lloh31: +; CHECK-FAST-NEXT: ldr w8, [x8, ___stack_chk_guard@GOTPAGEOFF] +; CHECK-FAST-NEXT: Lloh32: +; CHECK-FAST-NEXT: ldr w8, [x8] +; CHECK-FAST-NEXT: stur w8, [x29, #-4] +; CHECK-FAST-NEXT: add x0, sp, #12 +; CHECK-FAST-NEXT: bl _callee +; CHECK-FAST-NEXT: Lloh33: +; CHECK-FAST-NEXT: adrp x8, ___stack_chk_guard@GOTPAGE +; CHECK-FAST-NEXT: Lloh34: +; CHECK-FAST-NEXT: ldr w8, [x8, ___stack_chk_guard@GOTPAGEOFF] +; CHECK-FAST-NEXT: Lloh35: +; CHECK-FAST-NEXT: ldr w8, [x8] +; CHECK-FAST-NEXT: ldur w9, [x29, #-4] +; CHECK-FAST-NEXT: and x8, x8, #0xffffffff +; CHECK-FAST-NEXT: cmp x8, x9 +; CHECK-FAST-NEXT: b.ne LBB44_2 +; CHECK-FAST-NEXT: ; %bb.1: ; %SP_return +; CHECK-FAST-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload +; CHECK-FAST-NEXT: add sp, sp, #64 +; CHECK-FAST-NEXT: ret +; CHECK-FAST-NEXT: LBB44_2: ; %CallStackCheckFailBlk +; CHECK-FAST-NEXT: bl ___stack_chk_fail +; CHECK-FAST-NEXT: .loh AdrpLdrGotLdr Lloh33, Lloh34, Lloh35 +; CHECK-FAST-NEXT: .loh AdrpLdrGotLdr Lloh30, Lloh31, Lloh32 + + + %arr = alloca [8 x i32] call void @callee(ptr %arr) ret void @@ -556,9 +931,61 @@ @_ZTI8Whatever = external global i8 define void @test_landingpad_marshalling() personality ptr @__gxx_personality_v0 { ; CHECK-LABEL: test_landingpad_marshalling: -; CHECK-OPT: mov x2, x1 -; CHECK-OPT: mov x1, x0 -; CHECK: bl _eat_landingpad_args +; CHECK: Lfunc_begin0: +; CHECK-NEXT: .cfi_startproc +; CHECK-NEXT: .cfi_personality 155, ___gxx_personality_v0 +; CHECK-NEXT: .cfi_lsda 16, Lexception0 +; CHECK-NEXT: ; %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: Ltmp3: +; CHECK-NEXT: bl _callee +; CHECK-NEXT: Ltmp4: +; CHECK-NEXT: ; %bb.1: ; %done +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: LBB45_2: ; %lpad +; CHECK-NEXT: Ltmp5: +; CHECK-NEXT: mov x2, x1 +; CHECK-NEXT: mov x1, x0 +; CHECK-NEXT: ; kill: def $w2 killed $w2 killed $x2 +; CHECK-NEXT: bl _eat_landingpad_args +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret +; CHECK-NEXT: Lfunc_end0: +; CHECK-NEXT: .cfi_endproc +; CHECK-NEXT: .section __TEXT,__gcc_except_tab +; CHECK-NEXT: .p2align 2, 0x0 +; CHECK-NEXT: GCC_except_table45: +; CHECK-NEXT: Lexception0: +; CHECK-NEXT: .byte 255 ; @LPStart Encoding = omit +; CHECK-NEXT: .byte 155 ; @TType Encoding = indirect pcrel sdata4 +; CHECK-NEXT: .uleb128 Lttbase0-Lttbaseref0 +; CHECK-NEXT: Lttbaseref0: +; CHECK-NEXT: .byte 1 ; Call site Encoding = uleb128 +; CHECK-NEXT: .uleb128 Lcst_end0-Lcst_begin0 +; CHECK-NEXT: Lcst_begin0: +; CHECK-NEXT: .uleb128 Ltmp3-Lfunc_begin0 ; >> Call Site 1 << +; CHECK-NEXT: .uleb128 Ltmp4-Ltmp3 ; Call between Ltmp3 and Ltmp4 +; CHECK-NEXT: .uleb128 Ltmp5-Lfunc_begin0 ; jumps to Ltmp5 +; CHECK-NEXT: .byte 1 ; On action: 1 +; CHECK-NEXT: .uleb128 Ltmp4-Lfunc_begin0 ; >> Call Site 2 << +; CHECK-NEXT: .uleb128 Lfunc_end0-Ltmp4 ; Call between Ltmp4 and Lfunc_end0 +; CHECK-NEXT: .byte 0 ; has no landing pad +; CHECK-NEXT: .byte 0 ; On action: cleanup +; CHECK-NEXT: Lcst_end0: +; CHECK-NEXT: .byte 1 ; >> Action Record 1 << +; CHECK-NEXT: ; Catch TypeInfo 1 +; CHECK-NEXT: .byte 0 ; No further actions +; CHECK-NEXT: .p2align 2, 0x0 +; CHECK-NEXT: ; >> Catch TypeInfos << +; CHECK-NEXT: Ltmp10: ; TypeInfo 1 +; CHECK-NEXT: .long __ZTI8Whatever@GOT-Ltmp10 +; CHECK-NEXT: Lttbase0: +; CHECK-NEXT: .p2align 2, 0x0 invoke void @callee(ptr undef) to label %done unwind label %lpad lpad: ; preds = %entry @@ -575,10 +1002,18 @@ define void @test_dynamic_stackalloc() { ; CHECK-LABEL: test_dynamic_stackalloc: -; CHECK: sub [[REG:x[0-9]+]], sp, #32 -; CHECK: mov sp, [[REG]] -; CHECK-OPT-NOT: ubfx -; CHECK: bl _callee +; CHECK: ; %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: sub x0, sp, #32 +; CHECK-NEXT: mov sp, x0 +; CHECK-NEXT: bl _callee +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret br label %next next: @@ -589,8 +1024,12 @@ define void @test_asm_memory(ptr %base.addr) { ; CHECK-LABEL: test_asm_memory: -; CHECK: add w[[ADDR:[0-9]+]], w0, #4 -; CHECK: str wzr, [x[[ADDR]] +; CHECK: ; %bb.0: +; CHECK-NEXT: add w8, w0, #4 +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: str wzr, [x8] +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret %addr = getelementptr i32, ptr %base.addr, i32 1 call void asm sideeffect "str wzr, $0", "*m"(ptr elementtype(i32) %addr) ret void @@ -598,8 +1037,12 @@ define void @test_unsafe_asm_memory(i64 %val) { ; CHECK-LABEL: test_unsafe_asm_memory: -; CHECK: and x[[ADDR:[0-9]+]], x0, #0xffffffff -; CHECK: str wzr, [x[[ADDR]]] +; CHECK: ; %bb.0: +; CHECK-NEXT: and x8, x0, #0xffffffff +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: str wzr, [x8] +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: ret %addr_int = trunc i64 %val to i32 %addr = inttoptr i32 %addr_int to ptr call void asm sideeffect "str wzr, $0", "*m"(ptr elementtype(i32) %addr) @@ -608,14 +1051,18 @@ define [9 x ptr] @test_demoted_return(ptr %in) { ; CHECK-LABEL: test_demoted_return: -; CHECK: str w0, [x8, #32] +; CHECK: ; %bb.0: +; CHECK-NEXT: str w0, [x8, #32] +; CHECK-NEXT: ret %res = insertvalue [9 x ptr] undef, ptr %in, 8 ret [9 x ptr] %res } define ptr @test_inttoptr(i64 %in) { ; CHECK-LABEL: test_inttoptr: -; CHECK: and x0, x0, #0xffffffff +; CHECK: ; %bb.0: +; CHECK-NEXT: and x0, x0, #0xffffffff +; CHECK-NEXT: ret %res = inttoptr i64 %in to ptr ret ptr %res } @@ -623,16 +1070,18 @@ declare i32 @llvm.get.dynamic.area.offset.i32() define i32 @test_dynamic_area() { ; CHECK-LABEL: test_dynamic_area: -; CHECK: mov w0, wzr +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: ret %res = call i32 @llvm.get.dynamic.area.offset.i32() ret i32 %res } define void @test_pointer_vec_store(ptr %addr) { ; CHECK-LABEL: test_pointer_vec_store: -; CHECK: str xzr, [x0] -; CHECK-NOT: str -; CHECK-NOT: stp +; CHECK: ; %bb.0: +; CHECK-NEXT: str xzr, [x0] +; CHECK-NEXT: ret store <2 x ptr> zeroinitializer, ptr %addr, align 16 ret void @@ -640,28 +1089,58 @@ define <2 x ptr> @test_pointer_vec_load(ptr %addr) { ; CHECK-LABEL: test_pointer_vec_load: -; CHECK: ldr d[[TMP:[0-9]+]], [x0] -; CHECK: ushll.2d v0, v[[TMP]], #0 +; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: ret %val = load <2 x ptr>, ptr %addr, align 16 ret <2 x ptr> %val } define void @test_inline_asm_mem_pointer(ptr %in) { ; CHECK-LABEL: test_inline_asm_mem_pointer: -; CHECK: str w0, +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: add x8, sp, #12 +; CHECK-NEXT: str w0, [sp, #12] +; CHECK-NEXT: ; InlineAsm Start +; CHECK-NEXT: ldr x0, [x8] +; CHECK-NEXT: ; InlineAsm End +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret tail call void asm sideeffect "ldr x0, $0", "rm"(ptr %in) ret void } define void @test_struct_hi(i32 %hi) nounwind { -; CHECK-LABEL: test_struct_hi: -; CHECK: mov w[[IN:[0-9]+]], w0 -; CHECK: bl _get_int -; CHECK-FAST-NEXT: mov w[[DST:[0-9]+]], w0 -; CHECK-FAST-NEXT: orr x0, x[[DST]], x[[IN]], lsl #32 -; CHECK-OPT-NEXT: bfi x0, x[[IN]], #32, #32 -; CHECK-NEXT: bl _take_pair +; CHECK-OPT-LABEL: test_struct_hi: +; CHECK-OPT: ; %bb.0: +; CHECK-OPT-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; CHECK-OPT-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-OPT-NEXT: add x29, sp, #16 +; CHECK-OPT-NEXT: mov w19, w0 +; CHECK-OPT-NEXT: bl _get_int +; CHECK-OPT-NEXT: bfi x0, x19, #32, #32 +; CHECK-OPT-NEXT: bl _take_pair +; CHECK-OPT-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-OPT-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; CHECK-OPT-NEXT: ret +; +; CHECK-FAST-LABEL: test_struct_hi: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill +; CHECK-FAST-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-FAST-NEXT: add x29, sp, #16 +; CHECK-FAST-NEXT: mov w19, w0 +; CHECK-FAST-NEXT: bl _get_int +; CHECK-FAST-NEXT: mov w8, w0 +; CHECK-FAST-NEXT: orr x0, x8, x19, lsl #32 +; CHECK-FAST-NEXT: bl _take_pair +; CHECK-FAST-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-FAST-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload +; CHECK-FAST-NEXT: ret %val.64 = call i64 @get_int() %val.32 = trunc i64 %val.64 to i32 @@ -675,16 +1154,47 @@ declare i64 @get_int() define i1 @test_icmp_ptr(ptr %in) { -; CHECK-LABEL: test_icmp_ptr -; CHECK: ubfx x0, x0, #31, #1 +; CHECK-LABEL: test_icmp_ptr: +; CHECK: ; %bb.0: +; CHECK-NEXT: ubfx x0, x0, #31, #1 +; CHECK-NEXT: ; kill: def $w0 killed $w0 killed $x0 +; CHECK-NEXT: ret %res = icmp slt ptr %in, null ret i1 %res } define void @test_multiple_icmp_ptr(ptr %l, ptr %r) { -; CHECK-LABEL: test_multiple_icmp_ptr: -; CHECK: tbnz w0, #31, [[FALSEBB:LBB[0-9]+_[0-9]+]] -; CHECK: tbnz w1, #31, [[FALSEBB]] +; CHECK-OPT-LABEL: test_multiple_icmp_ptr: +; CHECK-OPT: ; %bb.0: +; CHECK-OPT-NEXT: tbnz w0, #31, LBB57_3 +; CHECK-OPT-NEXT: ; %bb.1: +; CHECK-OPT-NEXT: tbnz w1, #31, LBB57_3 +; CHECK-OPT-NEXT: ; %bb.2: ; %true +; CHECK-OPT-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-OPT-NEXT: mov x29, sp +; CHECK-OPT-NEXT: .cfi_def_cfa w29, 16 +; CHECK-OPT-NEXT: .cfi_offset w30, -8 +; CHECK-OPT-NEXT: .cfi_offset w29, -16 +; CHECK-OPT-NEXT: bl _bar +; CHECK-OPT-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-OPT-NEXT: LBB57_3: ; %false +; CHECK-OPT-NEXT: ret +; +; CHECK-FAST-LABEL: test_multiple_icmp_ptr: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: tbnz w0, #31, LBB57_3 +; CHECK-FAST-NEXT: ; %bb.1: ; %.cond.split +; CHECK-FAST-NEXT: tbnz w1, #31, LBB57_3 +; CHECK-FAST-NEXT: ; %bb.2: ; %true +; CHECK-FAST-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-FAST-NEXT: mov x29, sp +; CHECK-FAST-NEXT: .cfi_def_cfa w29, 16 +; CHECK-FAST-NEXT: .cfi_offset w30, -8 +; CHECK-FAST-NEXT: .cfi_offset w29, -16 +; CHECK-FAST-NEXT: bl _bar +; CHECK-FAST-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-FAST-NEXT: LBB57_3: ; %false +; CHECK-FAST-NEXT: ret %tst1 = icmp sgt ptr %l, inttoptr (i32 -1 to ptr) %tst2 = icmp sgt ptr %r, inttoptr (i32 -1 to ptr) %tst = and i1 %tst1, %tst2 @@ -699,9 +1209,37 @@ } define void @test_multiple_icmp_ptr_select(ptr %l, ptr %r) { -; CHECK-LABEL: test_multiple_icmp_ptr_select: -; CHECK: tbnz w0, #31, [[FALSEBB:LBB[0-9]+_[0-9]+]] -; CHECK: tbnz w1, #31, [[FALSEBB]] +; CHECK-OPT-LABEL: test_multiple_icmp_ptr_select: +; CHECK-OPT: ; %bb.0: +; CHECK-OPT-NEXT: tbnz w0, #31, LBB58_3 +; CHECK-OPT-NEXT: ; %bb.1: +; CHECK-OPT-NEXT: tbnz w1, #31, LBB58_3 +; CHECK-OPT-NEXT: ; %bb.2: ; %true +; CHECK-OPT-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-OPT-NEXT: mov x29, sp +; CHECK-OPT-NEXT: .cfi_def_cfa w29, 16 +; CHECK-OPT-NEXT: .cfi_offset w30, -8 +; CHECK-OPT-NEXT: .cfi_offset w29, -16 +; CHECK-OPT-NEXT: bl _bar +; CHECK-OPT-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-OPT-NEXT: LBB58_3: ; %false +; CHECK-OPT-NEXT: ret +; +; CHECK-FAST-LABEL: test_multiple_icmp_ptr_select: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: tbnz w0, #31, LBB58_3 +; CHECK-FAST-NEXT: ; %bb.1: ; %.cond.split +; CHECK-FAST-NEXT: tbnz w1, #31, LBB58_3 +; CHECK-FAST-NEXT: ; %bb.2: ; %true +; CHECK-FAST-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-FAST-NEXT: mov x29, sp +; CHECK-FAST-NEXT: .cfi_def_cfa w29, 16 +; CHECK-FAST-NEXT: .cfi_offset w30, -8 +; CHECK-FAST-NEXT: .cfi_offset w29, -16 +; CHECK-FAST-NEXT: bl _bar +; CHECK-FAST-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-FAST-NEXT: LBB58_3: ; %false +; CHECK-FAST-NEXT: ret %tst1 = icmp sgt ptr %l, inttoptr (i32 -1 to ptr) %tst2 = icmp sgt ptr %r, inttoptr (i32 -1 to ptr) %tst = select i1 %tst1, i1 %tst2, i1 false @@ -716,25 +1254,30 @@ } define ptr @test_gep_nonpow2(ptr %a0, i32 %a1) { -; CHECK-LABEL: test_gep_nonpow2: -; CHECK-OPT: mov w[[SIZE:[0-9]+]], #18 -; CHECK-OPT-NEXT: smaddl x0, w1, w[[SIZE]], x0 -; CHECK-OPT-NEXT: ret - -; CHECK-FAST: mov w[[SIZE:[0-9]+]], #18 -; CHECK-FAST-NEXT: smaddl [[TMP:x[0-9]+]], w1, w[[SIZE]], x0 -; CHECK-FAST-NEXT: and x0, [[TMP]], #0xffffffff -; CHECK-FAST-NEXT: ret +; CHECK-OPT-LABEL: test_gep_nonpow2: +; CHECK-OPT: ; %bb.0: +; CHECK-OPT-NEXT: mov w8, #18 ; =0x12 +; CHECK-OPT-NEXT: smaddl x0, w1, w8, x0 +; CHECK-OPT-NEXT: ret +; +; CHECK-FAST-LABEL: test_gep_nonpow2: +; CHECK-FAST: ; %bb.0: +; CHECK-FAST-NEXT: mov w8, #18 ; =0x12 +; CHECK-FAST-NEXT: smaddl x8, w1, w8, x0 +; CHECK-FAST-NEXT: and x0, x8, #0xffffffff +; CHECK-FAST-NEXT: ret + %tmp0 = getelementptr inbounds { [18 x i8] }, ptr %a0, i32 %a1 ret ptr %tmp0 } define void @test_memset(i64 %in, i8 %value) { ; CHECK-LABEL: test_memset: -; CHECK-DAG: and x8, x0, #0xffffffff -; CHECK-DAG: lsr x2, x0, #32 -; CHECK-DAG: mov x0, x8 -; CHECK: b _memset +; CHECK: ; %bb.0: +; CHECK-NEXT: lsr x2, x0, #32 +; CHECK-NEXT: and x0, x0, #0xffffffff +; CHECK-NEXT: ; kill: def $w2 killed $w2 killed $x2 +; CHECK-NEXT: b _memset %ptr.i32 = trunc i64 %in to i32 %size.64 = lshr i64 %in, 32 @@ -746,9 +1289,11 @@ define void @test_bzero(i64 %in) { ; CHECK-LABEL: test_bzero: -; CHECK-DAG: lsr x1, x0, #32 -; CHECK-DAG: and x0, x0, #0xffffffff -; CHECK: b _bzero +; CHECK: ; %bb.0: +; CHECK-NEXT: lsr x1, x0, #32 +; CHECK-NEXT: and x0, x0, #0xffffffff +; CHECK-NEXT: ; kill: def $w1 killed $w1 killed $x1 +; CHECK-NEXT: b _bzero %ptr.i32 = trunc i64 %in to i32 %size.64 = lshr i64 %in, 32 diff --git a/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll b/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll --- a/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll +++ b/llvm/test/CodeGen/AArch64/arm64ec-reservedregs.ll @@ -18,10 +18,10 @@ ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldr w0, [sp, #28] // 4-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldr x27, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp x30, x29, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -41,10 +41,10 @@ ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: mov w0, w30 +; CHECK-NEXT: ldr x27, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov w0, w30 ; CHECK-NEXT: ldp x26, x25, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x27, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: ldp x30, x29, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -64,10 +64,10 @@ ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret entry: @@ -79,15 +79,15 @@ ; CHECK-LABEL: one_float_reg: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: fmov s15, s0 +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fmov s0, s15 ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: fmov s0, s15 ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll --- a/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll +++ b/llvm/test/CodeGen/AArch64/atomic-ops-lse.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+lse -mattr=+outline-atomics < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -disable-post-ra -verify-machineinstrs -mattr=+outline-atomics < %s | FileCheck %s --check-prefix=OUTLINE-ATOMICS @@ -17,6 +18,12 @@ define dso_local i8 @test_atomic_load_add_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -25,19 +32,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_add_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -46,19 +61,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_add_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -67,19 +90,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_add_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -88,19 +119,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_add_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -109,18 +148,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_add_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -129,18 +176,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_or_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -149,19 +204,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsetalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_or_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -170,19 +233,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsetalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_or_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -191,19 +262,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_or_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -212,19 +291,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_or_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -233,18 +320,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_or_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -253,18 +348,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xor_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeoralb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -273,19 +376,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldeoralb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xor_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeoralh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -294,19 +405,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldeoralh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xor_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -315,19 +434,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeoral w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xor_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -336,19 +463,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeoral x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xor_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -357,18 +492,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeoral w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xor_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -377,658 +520,858 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeoral x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_min_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB18_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB18_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsminalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_min_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB19_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB19_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsminalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_min_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB20_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB20_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_min_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB21_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB21_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_min_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB22_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB22_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_min_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB23_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB23_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xff -; OUTLINE-ATOMICS-NEXT: adrp x9, var8 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var8 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff +; OUTLINE-ATOMICS-NEXT: .LBB24_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, ls -; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB24_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: lduminalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: adrp x9, var16 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var16 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff +; OUTLINE-ATOMICS-NEXT: .LBB25_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, ls -; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB25_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: lduminalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umin_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB26_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB26_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umin_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB27_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB27_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umin_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB28_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB28_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umin_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB29_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB29_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_max_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB30_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB30_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_max_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB31_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB31_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_max_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB32_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB32_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_max_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB33_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB33_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_max_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB34_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB34_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_max_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB35_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB35_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xff -; OUTLINE-ATOMICS-NEXT: adrp x9, var8 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var8 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff +; OUTLINE-ATOMICS-NEXT: .LBB36_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, hi -; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB36_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: adrp x9, var16 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var16 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff +; OUTLINE-ATOMICS-NEXT: .LBB37_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, hi -; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB37_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umax_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB38_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB38_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umax_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB39_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB39_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umax_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB40_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB40_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umax_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB41_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB41_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xchg_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1037,19 +1380,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: swpalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xchg_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swpalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1058,19 +1409,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: swpalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xchg_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1079,19 +1438,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xchg_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1100,19 +1467,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xchg_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1121,19 +1496,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xchg_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1142,19 +1525,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: casab w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1163,20 +1554,31 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: casab w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire %old = extractvalue { i8, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK-NEXT: casab w0, w1, [x[[ADDR]]] -; CHECK-NEXT: ret ret i8 %old } define dso_local i1 @test_atomic_cmpxchg_i8_1(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: casab w8, w1, [x9] +; CHECK-NEXT: cmp w8, w0, uxtb +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_1: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill @@ -1188,22 +1590,31 @@ ; OUTLINE-ATOMICS-NEXT: cset w0, eq ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i8_1: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: casab w8, w1, [x9] +; CHECK-REG-NEXT: cmp w8, w0, uxtb +; CHECK-REG-NEXT: cset w0, eq +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire %success = extractvalue { i8, i1 } %pair, 1 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: casab w[[NEW:[0-9]+]], w1, [x[[ADDR]]] -; CHECK-NEXT: cmp w[[NEW]], w0, uxtb -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret ret i1 %success } define dso_local i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: casah w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1212,20 +1623,31 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: casah w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new acquire acquire %old = extractvalue { i16, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK-NEXT: casah w0, w1, [x[[ADDR]]] -; CHECK-NEXT: ret ret i16 %old } define dso_local i1 @test_atomic_cmpxchg_i16_1(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16_1: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: casah w8, w1, [x9] +; CHECK-NEXT: cmp w8, w0, uxth +; CHECK-NEXT: cset w0, eq +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_1: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: stp x30, x19, [sp, #-16]! // 16-byte Folded Spill @@ -1237,23 +1659,32 @@ ; OUTLINE-ATOMICS-NEXT: cset w0, eq ; OUTLINE-ATOMICS-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i16_1: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: casah w8, w1, [x9] +; CHECK-REG-NEXT: cmp w8, w0, uxth +; CHECK-REG-NEXT: cset w0, eq +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new acquire acquire %success = extractvalue { i16, i1 } %pair, 1 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK-NEXT: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: casah w[[NEW:[0-9]+]], w1, [x[[ADDR]]] -; CHECK-NEXT: cmp w[[NEW]], w0, uxth -; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: ret ret i1 %success } define dso_local i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casa w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1262,21 +1693,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: casa w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new acquire acquire %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casa w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i32 @test_atomic_cmpxchg_i32_monotonic_acquire(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_monotonic_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casa w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1285,21 +1724,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32_monotonic_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: casa w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new monotonic acquire %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casa w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: casa x0, x1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1308,21 +1755,33 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: casa x0, x1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new acquire acquire %old = extractvalue { i64, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: casa x0, x1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i128 @test_atomic_cmpxchg_i128(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: caspa x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1331,21 +1790,37 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i128: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: caspa x0, x1, x2, x3, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new acquire acquire %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define dso_local i128 @test_atomic_cmpxchg_i128_monotonic_seqcst(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128_monotonic_seqcst: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: caspal x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_monotonic_seqcst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1354,21 +1829,37 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i128_monotonic_seqcst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: caspal x0, x1, x2, x3, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new monotonic seq_cst %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: caspal x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define dso_local i128 @test_atomic_cmpxchg_i128_release_acquire(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128_release_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: caspal x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_release_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1377,21 +1868,34 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i128_release_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: caspal x0, x1, x2, x3, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new release acquire %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: caspal x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define dso_local i8 @test_atomic_load_sub_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddalb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1401,20 +1905,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddalh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1424,20 +1937,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1447,20 +1969,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1470,20 +2001,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_sub_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1493,20 +2033,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_sub_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1516,112 +2065,156 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_sub_i8_neg_imm() nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_neg_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddalb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_neg_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_neg_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 -1 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[IMM:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_neg_imm() nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_neg_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddalh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_neg_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_neg_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 -1 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[IMM:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_neg_imm() nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_neg_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_neg_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_neg_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 -1 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[IMM:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[IMM]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_neg_imm() nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_neg_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_neg_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_neg_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 -1 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[IMM:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[IMM]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i8 @test_atomic_load_sub_i8_neg_arg(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_neg_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_neg_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1630,20 +2223,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_neg_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %neg = sub i8 0, %offset %old = atomicrmw sub ptr @var8, i8 %neg seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_neg_arg(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_neg_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_neg_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1652,20 +2253,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_neg_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %neg = sub i16 0, %offset %old = atomicrmw sub ptr @var16, i16 %neg seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_neg_arg(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_neg_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_neg_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1674,20 +2283,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_neg_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w0, [x8] +; CHECK-REG-NEXT: ret %neg = sub i32 0, %offset %old = atomicrmw sub ptr @var32, i32 %neg seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_neg_arg(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_neg_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_neg_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1696,20 +2313,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_neg_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x0, [x8] +; CHECK-REG-NEXT: ret %neg = sub i64 0, %offset %old = atomicrmw sub ptr @var64, i64 %neg seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i8 @test_atomic_load_and_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclralb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1719,19 +2345,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclralb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclralh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1741,19 +2376,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclralh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1763,19 +2407,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1785,103 +2438,147 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i8 @test_atomic_load_and_i8_inv_imm() nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_inv_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclralb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_inv_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var8 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_inv_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclralb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 -2 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[CONST:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_inv_imm() nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_inv_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclralh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_inv_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var16 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_inv_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclralh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 -2 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[CONST:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_inv_imm() nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_inv_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_inv_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var32 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_inv_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 -2 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[CONST:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[CONST]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_inv_imm() nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_inv_imm: +; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_inv_imm: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; OUTLINE-ATOMICS-NEXT: adrp x1, var64 ; OUTLINE-ATOMICS-NEXT: add x1, x1, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: mov w0, #1 +; OUTLINE-ATOMICS-NEXT: mov w0, #1 // =0x1 ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_inv_imm: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov w8, #1 // =0x1 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 -2 seq_cst -; CHECK-NOT: dmb -; CHECK: mov w[[CONST:[0-9]+]], #1 -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[CONST]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i8 @test_atomic_load_and_i8_inv_arg(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_inv_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldclralb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_inv_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1890,18 +2587,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_inv_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldclralb w0, w0, [x8] +; CHECK-REG-NEXT: ret %inv = xor i8 %offset, -1 %old = atomicrmw and ptr @var8, i8 %inv seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_inv_arg(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_inv_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldclralh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_inv_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1910,18 +2615,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_inv_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldclralh w0, w0, [x8] +; CHECK-REG-NEXT: ret %inv = xor i16 %offset, -1 %old = atomicrmw and ptr @var16, i16 %inv seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_inv_arg(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_inv_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldclral w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_inv_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1930,18 +2643,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_inv_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldclral w0, w0, [x8] +; CHECK-REG-NEXT: ret %inv = xor i32 %offset, -1 %old = atomicrmw and ptr @var32, i32 %inv seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_inv_arg(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_inv_arg: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldclral x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_inv_arg: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1950,18 +2671,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_inv_arg: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldclral x0, x0, [x8] +; CHECK-REG-NEXT: ret %inv = xor i64 %offset, -1 %old = atomicrmw and ptr @var64, i64 %inv seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_and_i32_noret(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1971,19 +2701,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_and_i64_noret(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -1993,19 +2732,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_noret: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_add_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2014,19 +2761,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_add_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2035,19 +2790,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_add_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2056,19 +2819,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_add_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2077,19 +2848,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_add_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2098,18 +2877,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_add_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2118,18 +2905,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_add_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2138,19 +2933,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_add_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2159,19 +2962,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_add_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldadda w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2180,19 +2991,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldadda w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadda w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_add_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldadda x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2201,19 +3020,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldadda x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadda x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_add_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldadda w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2222,18 +3049,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldadda w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadda w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_add_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldadda x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2242,18 +3077,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldadda x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadda x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_add_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2262,19 +3105,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_add_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2283,19 +3134,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_add_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldadd w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2304,19 +3163,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldadd w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadd w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_add_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldadd x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2325,19 +3192,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldadd x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadd x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_add_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldadd w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2346,18 +3221,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldadd w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadd w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_add_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldadd x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2366,18 +3249,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldadd x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadd x{{[0-9]}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_add_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2386,19 +3277,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_add_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2407,19 +3306,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_add_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2428,19 +3335,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_add_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2449,19 +3364,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_add_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2470,18 +3393,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_add_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2490,18 +3421,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_add_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldaddalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2510,19 +3449,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_add_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldaddalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2531,19 +3478,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_add_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2552,19 +3507,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_add_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2573,19 +3536,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw add ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_add_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldaddal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2594,18 +3565,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_add_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_add_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldaddal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_add_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2614,18 +3593,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_add_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw add ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_and_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclralb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2635,19 +3623,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclralb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclralh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2657,19 +3654,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclralh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2679,19 +3685,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2701,19 +3716,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_and_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2723,19 +3747,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_and_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2745,19 +3778,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_and_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclrab w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2767,19 +3809,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclrab w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclrab w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclrah w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2789,19 +3840,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclrah w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclrah w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclra w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2811,19 +3871,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclra w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclra w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclra x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2833,19 +3902,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclra x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclra x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_and_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclra w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2855,19 +3933,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclra w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclra w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_and_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclra x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2877,19 +3964,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclra x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclra x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_and_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclrb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2899,19 +3995,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclrb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclrb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclrh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2921,19 +4026,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclrh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclrh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclr w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2943,19 +4057,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclr w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclr w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclr x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2965,19 +4088,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclr x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclr x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_and_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclr w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -2987,19 +4119,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclr w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclr w{{[0-9]+}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_and_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclr x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3009,19 +4150,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclr x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclr x{{[0-9]+}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_and_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclrlb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3031,19 +4181,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclrlb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclrlb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclrlh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3053,19 +4212,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclrlh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclrlh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclrl w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3075,19 +4243,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclrl w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclrl w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclrl x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3097,19 +4274,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclrl x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclrl x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_and_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclrl w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3119,19 +4305,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclrl w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclrl w{{[0-9]*}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_and_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclrl x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3141,19 +4336,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclrl x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclrl x{{[0-9]*}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_and_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldclralb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3163,19 +4367,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldclralb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldclralb w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_and_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldclralh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3185,19 +4398,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldclralh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldclralh w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_and_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3207,19 +4429,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_and_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3229,19 +4460,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw and ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_and_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldclral w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3251,19 +4491,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldclral w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn w[[NOT:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldclral w[[NOT]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_and_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_and_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: mvn x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldclral x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_and_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3273,19 +4522,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldclr8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_and_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mvn x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldclral x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw and ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: mvn x[[NOT:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldclral x[[NOT]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_cmpxchg_i8_acquire(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: casab w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3294,21 +4551,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: casab w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new acquire acquire %old = extractvalue { i8, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: casab w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_cmpxchg_i16_acquire(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: casah w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3317,21 +4582,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: casah w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new acquire acquire %old = extractvalue { i16, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: casah w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_cmpxchg_i32_acquire(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casa w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3340,21 +4613,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: casa w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new acquire acquire %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casa w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_cmpxchg_i64_acquire(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: casa x0, x1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3363,21 +4644,33 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: casa x0, x1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new acquire acquire %old = extractvalue { i64, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: casa x0, x1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i128 @test_atomic_cmpxchg_i128_acquire(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: caspa x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3386,21 +4679,33 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i128_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: caspa x0, x1, x2, x3, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new acquire acquire %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: caspa x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define dso_local i8 @test_atomic_cmpxchg_i8_monotonic(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: casb w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3409,21 +4714,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: casb w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new monotonic monotonic %old = extractvalue { i8, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: casb w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_cmpxchg_i16_monotonic(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: cash w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3432,21 +4745,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: cash w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new monotonic monotonic %old = extractvalue { i16, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: cash w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_cmpxchg_i32_monotonic(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: cas w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3455,21 +4776,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: cas w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new monotonic monotonic %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: cas w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_cmpxchg_i64_monotonic(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: cas x0, x1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3478,21 +4807,33 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: cas x0, x1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new monotonic monotonic %old = extractvalue { i64, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: cas x0, x1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i128 @test_atomic_cmpxchg_i128_monotonic(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: casp x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3501,21 +4842,33 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i128_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: casp x0, x1, x2, x3, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new monotonic monotonic %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: casp x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define dso_local i8 @test_atomic_cmpxchg_i8_seq_cst(i8 %wanted, i8 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: casalb w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3524,21 +4877,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: casalb w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var8, i8 %wanted, i8 %new seq_cst seq_cst %old = extractvalue { i8, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: casalb w[[NEW:[0-9]+]], w[[OLD:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_cmpxchg_i16_seq_cst(i16 %wanted, i16 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: casalh w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3547,21 +4908,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: casalh w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var16, i16 %wanted, i16 %new seq_cst seq_cst %old = extractvalue { i16, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: casalh w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_cmpxchg_i32_seq_cst(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casal w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3570,21 +4939,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: casal w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new seq_cst seq_cst %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casal w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i32 @test_atomic_cmpxchg_i32_monotonic_seq_cst(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_monotonic_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casal w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_monotonic_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3593,21 +4970,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32_monotonic_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: casal w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new monotonic seq_cst %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casal w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i32 @test_atomic_cmpxchg_i32_release_acquire(i32 %wanted, i32 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i32_release_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: casal w0, w1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i32_release_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3616,21 +5001,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i32_release_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: casal w0, w1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var32, i32 %wanted, i32 %new release acquire %old = extractvalue { i32, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: casal w0, w1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_cmpxchg_i64_seq_cst(i64 %wanted, i64 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: casal x0, x1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3639,21 +5032,33 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: casal x0, x1, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var64, i64 %wanted, i64 %new seq_cst seq_cst %old = extractvalue { i64, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: casal x0, x1, [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local i128 @test_atomic_cmpxchg_i128_seq_cst(i128 %wanted, i128 %new) nounwind { ; CHECK-LABEL: test_atomic_cmpxchg_i128_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: caspal x0, x1, x2, x3, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_cmpxchg_i128_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -3662,1621 +5067,2113 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_cas16_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_cmpxchg_i128_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: // kill: def $x3 killed $x3 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x1 killed $x1 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: // kill: def $x2 killed $x2 killed $x2_x3 def $x2_x3 +; CHECK-REG-NEXT: // kill: def $x0 killed $x0 killed $x0_x1 def $x0_x1 +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: caspal x0, x1, x2, x3, [x8] +; CHECK-REG-NEXT: ret %pair = cmpxchg ptr @var128, i128 %wanted, i128 %new seq_cst seq_cst %old = extractvalue { i128, i1 } %pair, 0 -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var128 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var128 -; CHECK: caspal x0, x1, x2, x3, [x[[ADDR]]] -; CHECK-NOT: dmb ret i128 %old } define dso_local i8 @test_atomic_load_max_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB163_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB163_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_max_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB164_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB164_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_max_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB165_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB165_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_max_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB166_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB166_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_max_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB167_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB167_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_max_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB168_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB168_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_max_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB169_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB169_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsmaxab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_max_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB170_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB170_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsmaxah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_max_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxa w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB171_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB171_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxa w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_max_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxa x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB172_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB172_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxa x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_max_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxa w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB173_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB173_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxa w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxa w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_max_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxa x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB174_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB174_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxa x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxa x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_max_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB175_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB175_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsmaxb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_max_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB176_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB176_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsmaxh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_max_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmax w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB177_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB177_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmax w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmax w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_max_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmax x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB178_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB178_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmax x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmax x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_max_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmax w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB179_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB179_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmax w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmax w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_max_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmax x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB180_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB180_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmax x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmax x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_max_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB181_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB181_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsmaxlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_max_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB182_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB182_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsmaxlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_max_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB183_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB183_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_max_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB184_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB184_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_max_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB185_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB185_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_max_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB186_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB186_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_max_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB187_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB187_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsmaxalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsmaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_max_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB188_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB188_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsmaxalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsmaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_max_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB189_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB189_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_max_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB190_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB190_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw max ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_max_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB191_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB191_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmaxal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_max_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_max_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_max_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB192_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, gt ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB192_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_max_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmaxal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw max ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_min_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB193_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB193_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsminalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_min_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB194_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB194_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsminalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_min_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB195_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB195_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_min_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB196_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB196_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_min_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB197_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB197_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_min_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB198_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB198_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_min_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB199_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB199_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsminab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_min_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB200_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB200_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsminah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_min_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmina w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB201_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB201_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmina w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmina w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_min_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmina x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB202_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB202_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmina x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmina x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_min_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmina w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB203_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB203_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmina w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmina w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_min_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmina x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB204_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB204_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmina x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmina x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_min_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB205_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB205_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsminb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_min_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB206_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB206_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsminh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_min_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmin w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB207_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB207_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmin w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmin w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_min_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmin x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB208_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB208_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmin x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmin x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_min_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsmin w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB209_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB209_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsmin w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsmin w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_min_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsmin x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB210_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB210_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsmin x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsmin x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_min_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB211_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB211_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsminlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_min_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB212_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB212_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsminlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_min_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB213_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB213_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_min_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB214_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB214_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_min_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB215_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB215_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_min_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB216_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB216_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_min_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsminalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var8 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB217_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrb w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxtb w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxtb ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB217_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsminalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_min_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsminalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var16 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB218_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxrh w10, [x9] ; OUTLINE-ATOMICS-NEXT: sxth w8, w10 ; OUTLINE-ATOMICS-NEXT: cmp w8, w0, sxth ; OUTLINE-ATOMICS-NEXT: csel w10, w10, w0, le ; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB218_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsminalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_min_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB219_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB219_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_min_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB220_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB220_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw min ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_min_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsminal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB221_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB221_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsminal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_min_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_min_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsminal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_min_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB222_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, le ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB222_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_min_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsminal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw min ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_or_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5285,19 +7182,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsetalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_or_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5306,19 +7211,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsetalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_or_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5327,19 +7240,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_or_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5348,19 +7269,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_or_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5369,18 +7298,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_or_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5389,18 +7326,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_or_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5409,19 +7354,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsetab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_or_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5430,19 +7383,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsetah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_or_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldseta w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5451,19 +7412,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldseta w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldseta w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_or_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldseta x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5472,19 +7441,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldseta x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldseta x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_or_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldseta w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5493,18 +7470,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldseta w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldseta w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_or_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldseta x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5513,18 +7498,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldseta x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldseta x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_or_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5533,19 +7526,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsetb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_or_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldseth w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5554,19 +7555,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldseth w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldseth w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_or_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldset w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5575,19 +7584,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldset w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldset w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_or_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldset x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5596,19 +7613,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldset x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldset x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_or_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldset w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5617,18 +7642,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldset w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldset w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_or_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldset x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5637,18 +7670,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldset x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldset x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_or_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5657,19 +7698,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsetlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_or_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5678,19 +7727,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsetlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_or_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5699,19 +7756,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_or_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5720,19 +7785,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_or_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5741,18 +7814,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_or_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5761,18 +7842,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_or_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldsetalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5781,19 +7870,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldsetalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldsetalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_or_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldsetalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5802,19 +7899,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldsetalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldsetalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_or_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5823,19 +7928,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_or_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5844,19 +7957,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw or ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_or_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldsetal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5865,18 +7986,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldsetal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldsetal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_or_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_or_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldsetal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_or_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5885,18 +8014,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldset8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_or_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldsetal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw or ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldsetal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_sub_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddalb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5906,20 +8044,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddalh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5929,20 +8076,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5952,20 +8108,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5975,20 +8140,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_sub_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -5998,20 +8172,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_sub_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6021,20 +8204,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_sub_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddab w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6044,20 +8236,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddab w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddab w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddah w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6067,20 +8268,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddah w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddah w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldadda w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6090,20 +8300,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldadda w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadda w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldadda x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6113,20 +8332,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldadda x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadda x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_sub_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldadda w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6136,20 +8364,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldadda w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadda w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_sub_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldadda x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6159,20 +8396,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldadda x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadda x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_sub_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6182,20 +8428,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6205,20 +8460,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldadd w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6228,20 +8492,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldadd w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadd w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldadd x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6251,20 +8524,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldadd x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadd x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_sub_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldadd w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6274,20 +8556,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldadd w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldadd w{{[0-9]+}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_sub_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldadd x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6297,20 +8588,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldadd x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldadd x{{[0-9]+}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_sub_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddlb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6320,20 +8620,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddlb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddlb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddlh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6343,20 +8652,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddlh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddlh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddl w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6366,20 +8684,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddl w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddl w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddl x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6389,20 +8716,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddl x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddl x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_sub_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddl w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6412,20 +8748,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddl w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddl w{{[0-9]*}}, w[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_sub_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddl x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6435,20 +8780,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddl x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddl x{{[0-9]*}}, x[[NEW:[1-9][0-9]*]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_sub_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var8 +; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: ldaddalb w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6458,20 +8812,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var8 +; CHECK-REG-NEXT: add x9, x9, :lo12:var8 +; CHECK-REG-NEXT: ldaddalb w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldaddalb w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_sub_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var16 +; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: ldaddalh w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6481,20 +8844,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var16 +; CHECK-REG-NEXT: add x9, x9, :lo12:var16 +; CHECK-REG-NEXT: ldaddalh w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldaddalh w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_sub_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6504,20 +8876,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_sub_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x0, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6527,20 +8908,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x0, [x9] +; CHECK-REG-NEXT: ret %old = atomicrmw sub ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_sub_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg w8, w0 +; CHECK-NEXT: adrp x9, var32 +; CHECK-NEXT: add x9, x9, :lo12:var32 +; CHECK-NEXT: ldaddal w8, w8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6550,20 +8940,29 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg w8, w0 +; CHECK-REG-NEXT: adrp x9, var32 +; CHECK-REG-NEXT: add x9, x9, :lo12:var32 +; CHECK-REG-NEXT: ldaddal w8, w8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg w[[NEG:[0-9]+]], w[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldaddal w[[NEG]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_sub_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_sub_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: neg x8, x0 +; CHECK-NEXT: adrp x9, var64 +; CHECK-NEXT: add x9, x9, :lo12:var64 +; CHECK-NEXT: ldaddal x8, x8, [x9] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_sub_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6573,20 +8972,28 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldadd8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_sub_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: neg x8, x0 +; CHECK-REG-NEXT: adrp x9, var64 +; CHECK-REG-NEXT: add x9, x9, :lo12:var64 +; CHECK-REG-NEXT: ldaddal x8, x8, [x9] +; CHECK-REG-NEXT: ret atomicrmw sub ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: neg x[[NEG:[0-9]+]], x[[OLD:[0-9]+]] -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldaddal x[[NEG]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xchg_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6595,19 +9002,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: swpalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xchg_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swpalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6616,19 +9031,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: swpalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xchg_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6637,19 +9060,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xchg_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6658,19 +9089,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xchg_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6679,19 +9118,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xchg_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6700,19 +9147,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xchg_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6721,19 +9176,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: swpab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xchg_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swpah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6742,19 +9205,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: swpah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swpah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xchg_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpa w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6763,19 +9234,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpa w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xchg_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpa x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6784,19 +9263,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpa x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xchg_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpa w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6805,19 +9292,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpa w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xchg_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpa x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6826,19 +9321,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpa x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xchg_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6847,19 +9350,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: swpb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xchg_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swph w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6868,19 +9379,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: swph w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swph w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xchg_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swp w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6889,19 +9408,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swp w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swp w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xchg_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swp x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6910,19 +9437,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swp x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swp x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xchg_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swp w0, wzr, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6931,19 +9466,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swp w0, wzr, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swp w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xchg_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swp x0, xzr, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6952,19 +9495,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swp x0, xzr, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swp x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xchg_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swplb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6973,19 +9524,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: swplb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swplb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xchg_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swplh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -6994,19 +9553,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: swplh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swplh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xchg_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7015,19 +9582,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xchg_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7036,19 +9611,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xchg_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpl w0, wzr, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7057,19 +9640,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpl w0, wzr, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpl w[[OLD:[0-9]+]], w[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xchg_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpl x0, xzr, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7078,19 +9669,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpl x0, xzr, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpl x[[OLD:[0-9]+]], x[[NEW:[0-9,a-z]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xchg_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: swpalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7099,19 +9698,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: swpalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: swpalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xchg_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: swpalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7120,19 +9727,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: swpalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: swpalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xchg_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7141,19 +9756,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xchg_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7162,19 +9785,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xchg ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xchg_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: swpal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7183,19 +9814,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: swpal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: swpal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xchg_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xchg_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: swpal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xchg_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -7204,1619 +9843,2107 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_swp8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xchg_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: swpal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xchg ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: swpal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umax_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xff -; OUTLINE-ATOMICS-NEXT: adrp x9, var8 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var8 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff +; OUTLINE-ATOMICS-NEXT: .LBB313_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, hi -; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB313_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umax_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: adrp x9, var16 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var16 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff +; OUTLINE-ATOMICS-NEXT: .LBB314_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, hi -; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB314_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umax_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB315_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB315_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umax_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB316_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB316_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umax_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB317_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB317_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umax_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB318_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB318_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umax_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xff -; OUTLINE-ATOMICS-NEXT: adrp x9, var8 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var8 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff +; OUTLINE-ATOMICS-NEXT: .LBB319_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, hi -; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi +; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB319_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldumaxab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umax_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: adrp x9, var16 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var16 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff +; OUTLINE-ATOMICS-NEXT: .LBB320_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, hi -; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi +; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB320_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldumaxah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umax_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxa w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB321_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB321_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxa w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxa w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umax_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxa x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB322_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB322_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxa x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxa x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umax_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxa w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB323_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB323_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxa w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxa w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umax_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxa x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB324_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB324_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxa x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxa x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umax_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xff -; OUTLINE-ATOMICS-NEXT: adrp x9, var8 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var8 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff +; OUTLINE-ATOMICS-NEXT: .LBB325_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldxrb w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, hi -; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldxrb w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi +; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB325_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldumaxb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umax_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: adrp x9, var16 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var16 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff +; OUTLINE-ATOMICS-NEXT: .LBB326_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldxrh w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, hi -; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldxrh w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi +; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB326_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldumaxh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umax_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumax w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB327_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB327_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumax w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumax w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umax_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumax x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB328_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB328_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumax x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumax x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umax_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumax w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB329_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB329_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumax w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumax w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umax_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumax x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB330_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB330_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumax x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumax x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umax_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_release: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xff -; OUTLINE-ATOMICS-NEXT: adrp x9, var8 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var8 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff +; OUTLINE-ATOMICS-NEXT: .LBB331_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldxrb w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, hi -; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldxrb w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB331_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldumaxlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umax_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_release: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: adrp x9, var16 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var16 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff +; OUTLINE-ATOMICS-NEXT: .LBB332_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldxrh w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, hi -; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldxrh w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB332_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldumaxlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umax_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB333_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB333_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umax_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB334_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB334_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umax_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB335_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB335_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umax_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB336_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB336_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umax_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xff -; OUTLINE-ATOMICS-NEXT: adrp x9, var8 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var8 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff +; OUTLINE-ATOMICS-NEXT: .LBB337_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, hi -; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB337_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldumaxalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldumaxalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umax_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: adrp x9, var16 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var16 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff +; OUTLINE-ATOMICS-NEXT: .LBB338_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, hi -; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, hi +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB338_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldumaxalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldumaxalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umax_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB339_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB339_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umax_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB340_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB340_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umax ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umax_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumaxal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB341_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB341_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumaxal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumaxal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umax_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumaxal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umax_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB342_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, hi ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB342_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umax_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumaxal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umax ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumaxal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umin_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xff -; OUTLINE-ATOMICS-NEXT: adrp x9, var8 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var8 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff +; OUTLINE-ATOMICS-NEXT: .LBB343_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, ls -; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB343_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: lduminalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umin_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: adrp x9, var16 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var16 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff +; OUTLINE-ATOMICS-NEXT: .LBB344_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, ls -; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB344_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: lduminalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umin_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB345_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB345_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umin_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB346_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB346_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umin_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB347_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB347_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umin_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB348_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB348_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umin_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xff -; OUTLINE-ATOMICS-NEXT: adrp x9, var8 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var8 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff +; OUTLINE-ATOMICS-NEXT: .LBB349_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, ls -; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls +; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB349_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: lduminab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umin_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: adrp x9, var16 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var16 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff +; OUTLINE-ATOMICS-NEXT: .LBB350_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, ls -; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls +; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB350_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: lduminah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umin_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumina w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB351_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB351_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumina w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumina w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umin_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumina x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB352_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB352_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumina x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumina x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umin_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumina w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB353_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB353_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumina w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumina w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umin_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumina x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB354_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB354_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumina x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumina x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umin_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xff -; OUTLINE-ATOMICS-NEXT: adrp x9, var8 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var8 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff +; OUTLINE-ATOMICS-NEXT: .LBB355_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldxrb w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, ls -; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldxrb w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls +; OUTLINE-ATOMICS-NEXT: stxrb w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB355_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: lduminb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umin_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: adrp x9, var16 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var16 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff +; OUTLINE-ATOMICS-NEXT: .LBB356_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldxrh w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, ls -; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldxrh w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls +; OUTLINE-ATOMICS-NEXT: stxrh w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB356_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: lduminh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umin_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumin w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB357_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls ; OUTLINE-ATOMICS-NEXT: stxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB357_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumin w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumin w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umin_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumin x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB358_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls ; OUTLINE-ATOMICS-NEXT: stxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB358_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumin x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumin x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umin_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldumin w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB359_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls ; OUTLINE-ATOMICS-NEXT: stxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB359_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldumin w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldumin w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umin_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldumin x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB360_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls ; OUTLINE-ATOMICS-NEXT: stxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB360_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldumin x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldumin x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umin_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_release: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xff -; OUTLINE-ATOMICS-NEXT: adrp x9, var8 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var8 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff +; OUTLINE-ATOMICS-NEXT: .LBB361_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldxrb w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, ls -; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldxrb w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB361_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: lduminlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umin_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_release: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: adrp x9, var16 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var16 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff +; OUTLINE-ATOMICS-NEXT: .LBB362_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldxrh w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, ls -; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldxrh w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB362_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: lduminlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umin_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB363_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB363_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umin_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB364_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB364_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umin_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB365_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB365_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umin_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB366_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB366_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_umin_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: lduminalb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xff -; OUTLINE-ATOMICS-NEXT: adrp x9, var8 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var8 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var8 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var8 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xff +; OUTLINE-ATOMICS-NEXT: .LBB367_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, ls -; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrb w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls +; OUTLINE-ATOMICS-NEXT: stlxrb w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB367_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: lduminalb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: lduminalb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_umin_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: lduminalh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: -; OUTLINE-ATOMICS-NEXT: and w8, w0, #0xffff -; OUTLINE-ATOMICS-NEXT: adrp x9, var16 -; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var16 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: adrp x8, var16 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var16 +; OUTLINE-ATOMICS-NEXT: and w9, w0, #0xffff +; OUTLINE-ATOMICS-NEXT: .LBB368_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 -; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x9] -; OUTLINE-ATOMICS-NEXT: cmp w0, w8 -; OUTLINE-ATOMICS-NEXT: csel w10, w0, w8, ls -; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: ldaxrh w0, [x8] +; OUTLINE-ATOMICS-NEXT: cmp w0, w9 +; OUTLINE-ATOMICS-NEXT: csel w10, w0, w9, ls +; OUTLINE-ATOMICS-NEXT: stlxrh w11, w10, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB368_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: // kill: def $w0 killed $w0 killed $x0 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: lduminalh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: lduminalh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_umin_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var32 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB369_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp w8, w0 ; OUTLINE-ATOMICS-NEXT: csel w10, w8, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, w10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB369_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov w0, w8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminal w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_umin_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x9, var64 ; OUTLINE-ATOMICS-NEXT: add x9, x9, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB370_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x8, [x9] ; OUTLINE-ATOMICS-NEXT: cmp x8, x0 ; OUTLINE-ATOMICS-NEXT: csel x10, x8, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w11, x10, [x9] -; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w11, .LBB370_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: mov x0, x8 ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminal x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw umin ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_umin_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: lduminal w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var32 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var32 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB371_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr w9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp w9, w0 ; OUTLINE-ATOMICS-NEXT: csel w9, w9, w0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, w9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB371_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: lduminal w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: lduminal w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_umin_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: lduminal x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_umin_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: adrp x8, var64 ; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var64 -; OUTLINE-ATOMICS-NEXT: .LBB[[LOOPSTART:.*]]: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: .LBB372_1: // %atomicrmw.start ; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 ; OUTLINE-ATOMICS-NEXT: ldaxr x9, [x8] ; OUTLINE-ATOMICS-NEXT: cmp x9, x0 ; OUTLINE-ATOMICS-NEXT: csel x9, x9, x0, ls ; OUTLINE-ATOMICS-NEXT: stlxr w10, x9, [x8] -; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB[[LOOPSTART]] +; OUTLINE-ATOMICS-NEXT: cbnz w10, .LBB372_1 ; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_umin_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: lduminal x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw umin ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: lduminal x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xor_i8_acq_rel(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeoralb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8825,19 +11952,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i8_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldeoralb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var8, i8 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xor_i16_acq_rel(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeoralh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8846,19 +11981,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i16_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldeoralh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var16, i16 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xor_i32_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8867,19 +12010,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeoral w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xor_i64_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8888,19 +12039,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeoral x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xor_i32_noret_acq_rel(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8909,18 +12068,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeoral w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var32, i32 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xor_i64_noret_acq_rel(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_acq_rel: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_acq_rel: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8929,18 +12096,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_noret_acq_rel: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeoral x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var64, i64 %offset acq_rel -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xor_i8_acquire(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeorab w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8949,19 +12124,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i8_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldeorab w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var8, i8 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeorab w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xor_i16_acquire(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeorah w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8970,19 +12153,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i16_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldeorah w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var16, i16 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeorah w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xor_i32_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeora w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -8991,19 +12182,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeora w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeora w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xor_i64_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeora x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9012,19 +12211,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeora x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeora x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xor_i32_noret_acquire(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeora w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9033,18 +12240,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeora w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var32, i32 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeora w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xor_i64_noret_acquire(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_acquire: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeora x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_acquire: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9053,18 +12268,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_noret_acquire: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeora x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var64, i64 %offset acquire -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeora x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xor_i8_monotonic(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeorb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9073,19 +12296,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i8_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldeorb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var8, i8 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeorb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xor_i16_monotonic(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeorh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9094,19 +12325,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i16_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldeorh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var16, i16 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeorh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xor_i32_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeor w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9115,19 +12354,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeor w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeor w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xor_i64_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeor x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9136,19 +12383,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeor x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeor x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xor_i32_noret_monotonic(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeor w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9157,18 +12412,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeor w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var32, i32 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeor w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xor_i64_noret_monotonic(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_monotonic: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeor x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_monotonic: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9177,18 +12440,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_relax ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_noret_monotonic: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeor x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var64, i64 %offset monotonic -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeor x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xor_i8_release(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeorlb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9197,19 +12468,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i8_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldeorlb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var8, i8 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeorlb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xor_i16_release(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeorlh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9218,19 +12497,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i16_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldeorlh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var16, i16 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeorlh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xor_i32_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeorl w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9239,19 +12526,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeorl w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeorl w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xor_i64_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeorl x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9260,19 +12555,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeorl x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeorl x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xor_i32_noret_release(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeorl w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9281,18 +12584,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeorl w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var32, i32 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeorl w{{[0-9]+}}, w{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xor_i64_noret_release(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_release: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeorl x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_release: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9301,18 +12612,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_noret_release: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeorl x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var64, i64 %offset release -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeorl x{{[0-9]+}}, x{{[1-9][0-9]*}}, [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i8 @test_atomic_load_xor_i8_seq_cst(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i8_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: ldeoralb w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i8_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9321,19 +12640,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor1_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i8_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var8 +; CHECK-REG-NEXT: add x8, x8, :lo12:var8 +; CHECK-REG-NEXT: ldeoralb w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var8, i8 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var8 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var8 -; CHECK: ldeoralb w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i8 %old } define dso_local i16 @test_atomic_load_xor_i16_seq_cst(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i16_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: ldeoralh w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i16_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9342,19 +12669,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor2_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i16_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var16 +; CHECK-REG-NEXT: add x8, x8, :lo12:var16 +; CHECK-REG-NEXT: ldeoralh w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var16, i16 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var16 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var16 -; CHECK: ldeoralh w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i16 %old } define dso_local i32 @test_atomic_load_xor_i32_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9363,19 +12698,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeoral w0, w0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w[[OLD:[0-9]+]], w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i32 %old } define dso_local i64 @test_atomic_load_xor_i64_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x0, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9384,19 +12727,27 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeoral x0, x0, [x8] +; CHECK-REG-NEXT: ret %old = atomicrmw xor ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x[[OLD:[0-9]+]], x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret i64 %old } define dso_local void @test_atomic_load_xor_i32_noret_seq_cst(i32 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i32_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var32 +; CHECK-NEXT: add x8, x8, :lo12:var32 +; CHECK-NEXT: ldeoral w0, w8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i32_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9405,18 +12756,26 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor4_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i32_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var32 +; CHECK-REG-NEXT: add x8, x8, :lo12:var32 +; CHECK-REG-NEXT: ldeoral w0, w8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var32, i32 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var32 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32 -; CHECK: ldeoral w0, w[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local void @test_atomic_load_xor_i64_noret_seq_cst(i64 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_xor_i64_noret_seq_cst: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, var64 +; CHECK-NEXT: add x8, x8, :lo12:var64 +; CHECK-NEXT: ldeoral x0, x8, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_xor_i64_noret_seq_cst: ; OUTLINE-ATOMICS: // %bb.0: ; OUTLINE-ATOMICS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill @@ -9425,23 +12784,49 @@ ; OUTLINE-ATOMICS-NEXT: bl __aarch64_ldeor8_acq_rel ; OUTLINE-ATOMICS-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_xor_i64_noret_seq_cst: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: adrp x8, var64 +; CHECK-REG-NEXT: add x8, x8, :lo12:var64 +; CHECK-REG-NEXT: ldeoral x0, x8, [x8] +; CHECK-REG-NEXT: ret atomicrmw xor ptr @var64, i64 %offset seq_cst -; CHECK-NOT: dmb -; CHECK: adrp [[TMPADDR:x[0-9]+]], var64 -; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var64 -; CHECK: ldeoral x0, x[[NEW:[0-9]+]], [x[[ADDR]]] -; CHECK-NOT: dmb ret void } define dso_local i128 @test_atomic_load_i128() nounwind { ; CHECK-LABEL: test_atomic_load_i128: -; CHECK: casp - +; CHECK: // %bb.0: +; CHECK-NEXT: mov x0, xzr +; CHECK-NEXT: mov x1, xzr +; CHECK-NEXT: adrp x8, var128 +; CHECK-NEXT: add x8, x8, :lo12:var128 +; CHECK-NEXT: casp x0, x1, x0, x1, [x8] +; CHECK-NEXT: ret +; ; OUTLINE-ATOMICS-LABEL: test_atomic_load_i128: -; OUTLINE-ATOMICS: ldxp -; OUTLINE-ATOMICS: stxp +; OUTLINE-ATOMICS: // %bb.0: +; OUTLINE-ATOMICS-NEXT: adrp x8, var128 +; OUTLINE-ATOMICS-NEXT: add x8, x8, :lo12:var128 +; OUTLINE-ATOMICS-NEXT: .LBB403_1: // %atomicrmw.start +; OUTLINE-ATOMICS-NEXT: // =>This Inner Loop Header: Depth=1 +; OUTLINE-ATOMICS-NEXT: ldxp x0, x1, [x8] +; OUTLINE-ATOMICS-NEXT: stxp w9, x0, x1, [x8] +; OUTLINE-ATOMICS-NEXT: cbnz w9, .LBB403_1 +; OUTLINE-ATOMICS-NEXT: // %bb.2: // %atomicrmw.end +; OUTLINE-ATOMICS-NEXT: ret +; +; CHECK-REG-LABEL: test_atomic_load_i128: +; CHECK-REG: // %bb.0: +; CHECK-REG-NEXT: mov x0, xzr +; CHECK-REG-NEXT: mov x1, xzr +; CHECK-REG-NEXT: adrp x8, var128 +; CHECK-REG-NEXT: add x8, x8, :lo12:var128 +; CHECK-REG-NEXT: casp x0, x1, x0, x1, [x8] +; CHECK-REG-NEXT: ret + %pair = load atomic i128, ptr @var128 monotonic, align 16 ret i128 %pair } diff --git a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll --- a/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll +++ b/llvm/test/CodeGen/AArch64/atomic-ops-msvc.ll @@ -605,15 +605,15 @@ define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: adrp x9, var8 -; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: and w9, w0, #0xff ; CHECK-NEXT: .LBB32_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldxrb w0, [x9] -; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: csel w10, w0, w8, ls -; CHECK-NEXT: stxrb w11, w10, [x9] +; CHECK-NEXT: ldxrb w0, [x8] +; CHECK-NEXT: cmp w0, w9 +; CHECK-NEXT: csel w10, w0, w9, ls +; CHECK-NEXT: stxrb w11, w10, [x8] ; CHECK-NEXT: cbnz w11, .LBB32_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -625,15 +625,15 @@ define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: adrp x9, var16 -; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: and w9, w0, #0xffff ; CHECK-NEXT: .LBB33_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxrh w0, [x9] -; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: csel w10, w0, w8, ls -; CHECK-NEXT: stxrh w11, w10, [x9] +; CHECK-NEXT: ldaxrh w0, [x8] +; CHECK-NEXT: cmp w0, w9 +; CHECK-NEXT: csel w10, w0, w9, ls +; CHECK-NEXT: stxrh w11, w10, [x8] ; CHECK-NEXT: cbnz w11, .LBB33_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -684,15 +684,15 @@ define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: adrp x9, var8 -; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: and w9, w0, #0xff ; CHECK-NEXT: .LBB36_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxrb w0, [x9] -; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: csel w10, w0, w8, hi -; CHECK-NEXT: stlxrb w11, w10, [x9] +; CHECK-NEXT: ldaxrb w0, [x8] +; CHECK-NEXT: cmp w0, w9 +; CHECK-NEXT: csel w10, w0, w9, hi +; CHECK-NEXT: stlxrb w11, w10, [x8] ; CHECK-NEXT: cbnz w11, .LBB36_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -704,15 +704,15 @@ define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: adrp x9, var16 -; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: and w9, w0, #0xffff ; CHECK-NEXT: .LBB37_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldxrh w0, [x9] -; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: csel w10, w0, w8, hi -; CHECK-NEXT: stxrh w11, w10, [x9] +; CHECK-NEXT: ldxrh w0, [x8] +; CHECK-NEXT: cmp w0, w9 +; CHECK-NEXT: csel w10, w0, w9, hi +; CHECK-NEXT: stxrh w11, w10, [x8] ; CHECK-NEXT: cbnz w11, .LBB37_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 diff --git a/llvm/test/CodeGen/AArch64/atomic-ops.ll b/llvm/test/CodeGen/AArch64/atomic-ops.ll --- a/llvm/test/CodeGen/AArch64/atomic-ops.ll +++ b/llvm/test/CodeGen/AArch64/atomic-ops.ll @@ -822,15 +822,15 @@ define dso_local i8 @test_atomic_load_umin_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: adrp x9, var8 -; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: and w9, w0, #0xff ; CHECK-NEXT: .LBB32_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldxrb w0, [x9] -; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: csel w10, w0, w8, ls -; CHECK-NEXT: stxrb w11, w10, [x9] +; CHECK-NEXT: ldxrb w0, [x8] +; CHECK-NEXT: cmp w0, w9 +; CHECK-NEXT: csel w10, w0, w9, ls +; CHECK-NEXT: stxrb w11, w10, [x8] ; CHECK-NEXT: cbnz w11, .LBB32_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -842,15 +842,15 @@ define dso_local i16 @test_atomic_load_umin_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umin_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: adrp x9, var16 -; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: and w9, w0, #0xffff ; CHECK-NEXT: .LBB33_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxrh w0, [x9] -; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: csel w10, w0, w8, ls -; CHECK-NEXT: stxrh w11, w10, [x9] +; CHECK-NEXT: ldaxrh w0, [x8] +; CHECK-NEXT: cmp w0, w9 +; CHECK-NEXT: csel w10, w0, w9, ls +; CHECK-NEXT: stxrh w11, w10, [x8] ; CHECK-NEXT: cbnz w11, .LBB33_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -900,15 +900,15 @@ define dso_local i8 @test_atomic_load_umax_i8(i8 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: adrp x9, var8 -; CHECK-NEXT: add x9, x9, :lo12:var8 +; CHECK-NEXT: adrp x8, var8 +; CHECK-NEXT: add x8, x8, :lo12:var8 +; CHECK-NEXT: and w9, w0, #0xff ; CHECK-NEXT: .LBB36_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldaxrb w0, [x9] -; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: csel w10, w0, w8, hi -; CHECK-NEXT: stlxrb w11, w10, [x9] +; CHECK-NEXT: ldaxrb w0, [x8] +; CHECK-NEXT: cmp w0, w9 +; CHECK-NEXT: csel w10, w0, w9, hi +; CHECK-NEXT: stlxrb w11, w10, [x8] ; CHECK-NEXT: cbnz w11, .LBB36_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -920,15 +920,15 @@ define dso_local i16 @test_atomic_load_umax_i16(i16 %offset) nounwind { ; CHECK-LABEL: test_atomic_load_umax_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: adrp x9, var16 -; CHECK-NEXT: add x9, x9, :lo12:var16 +; CHECK-NEXT: adrp x8, var16 +; CHECK-NEXT: add x8, x8, :lo12:var16 +; CHECK-NEXT: and w9, w0, #0xffff ; CHECK-NEXT: .LBB37_1: // %atomicrmw.start ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldxrh w0, [x9] -; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: csel w10, w0, w8, hi -; CHECK-NEXT: stxrh w11, w10, [x9] +; CHECK-NEXT: ldxrh w0, [x8] +; CHECK-NEXT: cmp w0, w9 +; CHECK-NEXT: csel w10, w0, w9, hi +; CHECK-NEXT: stxrh w11, w10, [x8] ; CHECK-NEXT: cbnz w11, .LBB37_1 ; CHECK-NEXT: // %bb.2: // %atomicrmw.end ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 diff --git a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll --- a/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll +++ b/llvm/test/CodeGen/AArch64/bcmp-inline-small.ll @@ -22,7 +22,7 @@ ; CHECKS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECKS-NEXT: .cfi_def_cfa_offset 16 ; CHECKS-NEXT: .cfi_offset w30, -16 -; CHECKS-NEXT: mov w2, #15 +; CHECKS-NEXT: mov w2, #15 // =0xf ; CHECKS-NEXT: bl bcmp ; CHECKS-NEXT: cmp w0, #0 ; CHECKS-NEXT: cset w0, eq @@ -52,7 +52,7 @@ ; CHECKS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECKS-NEXT: .cfi_def_cfa_offset 16 ; CHECKS-NEXT: .cfi_offset w30, -16 -; CHECKS-NEXT: mov w2, #15 +; CHECKS-NEXT: mov w2, #15 // =0xf ; CHECKS-NEXT: bl bcmp ; CHECKS-NEXT: cmp w0, #0 ; CHECKS-NEXT: cset w0, eq @@ -67,16 +67,16 @@ define i1 @test_bs(ptr %s1, ptr %s2) optsize { ; CHECKN-LABEL: test_bs: ; CHECKN: // %bb.0: // %entry -; CHECKN-NEXT: ldp x8, x9, [x0] -; CHECKN-NEXT: ldp x10, x11, [x1] +; CHECKN-NEXT: ldp x8, x11, [x1] ; CHECKN-NEXT: ldr x12, [x0, #16] -; CHECKN-NEXT: cmp x8, x10 -; CHECKN-NEXT: ldr x8, [x1, #16] -; CHECKN-NEXT: ccmp x9, x11, #0, eq -; CHECKN-NEXT: ldur x9, [x0, #23] -; CHECKN-NEXT: ldur x10, [x1, #23] -; CHECKN-NEXT: ccmp x12, x8, #0, eq -; CHECKN-NEXT: ccmp x9, x10, #0, eq +; CHECKN-NEXT: ldp x9, x10, [x0] +; CHECKN-NEXT: ldr x13, [x1, #16] +; CHECKN-NEXT: cmp x9, x8 +; CHECKN-NEXT: ldur x8, [x0, #23] +; CHECKN-NEXT: ldur x9, [x1, #23] +; CHECKN-NEXT: ccmp x10, x11, #0, eq +; CHECKN-NEXT: ccmp x12, x13, #0, eq +; CHECKN-NEXT: ccmp x8, x9, #0, eq ; CHECKN-NEXT: cset w0, eq ; CHECKN-NEXT: ret ; @@ -85,7 +85,7 @@ ; CHECKS-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECKS-NEXT: .cfi_def_cfa_offset 16 ; CHECKS-NEXT: .cfi_offset w30, -16 -; CHECKS-NEXT: mov w2, #31 +; CHECKS-NEXT: mov w2, #31 // =0x1f ; CHECKS-NEXT: bl memcmp ; CHECKS-NEXT: cmp w0, #0 ; CHECKS-NEXT: cset w0, eq diff --git a/llvm/test/CodeGen/AArch64/bcmp.ll b/llvm/test/CodeGen/AArch64/bcmp.ll --- a/llvm/test/CodeGen/AArch64/bcmp.ll +++ b/llvm/test/CodeGen/AArch64/bcmp.ll @@ -6,7 +6,7 @@ define i1 @bcmp0(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 0) %r = icmp eq i32 %cr, 0 @@ -249,10 +249,10 @@ define i1 @bcmp16(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 16) @@ -263,13 +263,13 @@ define i1 @bcmp20(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp20: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] +; CHECK-NEXT: ldp x8, x11, [x1] ; CHECK-NEXT: ldr w12, [x0, #16] -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: ldr w8, [x1, #16] -; CHECK-NEXT: ccmp x9, x11, #0, eq -; CHECK-NEXT: ccmp x12, x8, #0, eq +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldr w13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x12, x13, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 20) @@ -280,13 +280,13 @@ define i1 @bcmp24(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp24: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] +; CHECK-NEXT: ldp x8, x11, [x1] ; CHECK-NEXT: ldr x12, [x0, #16] -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: ldr x8, [x1, #16] -; CHECK-NEXT: ccmp x9, x11, #0, eq -; CHECK-NEXT: ccmp x12, x8, #0, eq +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldr x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x12, x13, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 24) @@ -297,16 +297,16 @@ define i1 @bcmp28(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp28: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] +; CHECK-NEXT: ldp x8, x11, [x1] ; CHECK-NEXT: ldr x12, [x0, #16] -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: ldr x8, [x1, #16] -; CHECK-NEXT: ccmp x9, x11, #0, eq -; CHECK-NEXT: ldr w9, [x0, #24] -; CHECK-NEXT: ldr w10, [x1, #24] -; CHECK-NEXT: ccmp x12, x8, #0, eq -; CHECK-NEXT: ccmp x9, x10, #0, eq +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldr x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ldr w8, [x0, #24] +; CHECK-NEXT: ldr w9, [x1, #24] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x12, x13, #0, eq +; CHECK-NEXT: ccmp x8, x9, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 28) @@ -317,17 +317,17 @@ define i1 @bcmp33(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp33: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: ccmp x9, x11, #0, eq -; CHECK-NEXT: ldrb w11, [x1, #32] +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ldp x12, x10, [x1, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ldrb w10, [x0, #32] +; CHECK-NEXT: ldrb w11, [x1, #32] ; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ldrb w8, [x0, #32] -; CHECK-NEXT: ccmp x9, x10, #0, eq -; CHECK-NEXT: ccmp x8, x11, #0, eq +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 33) @@ -338,17 +338,17 @@ define i1 @bcmp38(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp38: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: ccmp x9, x11, #0, eq -; CHECK-NEXT: ldur x11, [x1, #30] +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ldp x12, x10, [x1, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ldur x10, [x0, #30] +; CHECK-NEXT: ldur x11, [x1, #30] ; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ldur x8, [x0, #30] -; CHECK-NEXT: ccmp x9, x10, #0, eq -; CHECK-NEXT: ccmp x8, x11, #0, eq +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 38) @@ -359,20 +359,20 @@ define i1 @bcmp45(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp45: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: ccmp x9, x11, #0, eq -; CHECK-NEXT: ldr x11, [x1, #32] +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ldp x12, x10, [x1, #16] +; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ldr x10, [x0, #32] +; CHECK-NEXT: ldr x11, [x1, #32] +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldur x8, [x0, #37] +; CHECK-NEXT: ldur x12, [x1, #37] +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ldr x8, [x0, #32] -; CHECK-NEXT: ccmp x9, x10, #0, eq -; CHECK-NEXT: ldur x9, [x0, #37] -; CHECK-NEXT: ldur x10, [x1, #37] -; CHECK-NEXT: ccmp x8, x11, #0, eq -; CHECK-NEXT: ccmp x9, x10, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 45) @@ -389,22 +389,22 @@ define i1 @bcmp64(ptr %a, ptr %b) { ; CHECK-LABEL: bcmp64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: ldp x12, x13, [x1, #16] +; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: ldp x8, x9, [x0, #16] -; CHECK-NEXT: ldp x12, x10, [x1, #16] -; CHECK-NEXT: ccmp x8, x12, #0, eq -; CHECK-NEXT: ldp x8, x11, [x1, #32] -; CHECK-NEXT: ccmp x9, x10, #0, eq -; CHECK-NEXT: ldp x9, x10, [x0, #32] -; CHECK-NEXT: ccmp x9, x8, #0, eq -; CHECK-NEXT: ccmp x10, x11, #0, eq -; CHECK-NEXT: ldp x9, x10, [x0, #48] -; CHECK-NEXT: ldp x8, x11, [x1, #48] -; CHECK-NEXT: ccmp x9, x8, #0, eq ; CHECK-NEXT: ccmp x10, x11, #0, eq +; CHECK-NEXT: ccmp x8, x12, #0, eq +; CHECK-NEXT: ldp x8, x11, [x0, #32] +; CHECK-NEXT: ldp x10, x12, [x1, #32] +; CHECK-NEXT: ccmp x9, x13, #0, eq +; CHECK-NEXT: ldp x9, x13, [x1, #48] +; CHECK-NEXT: ccmp x8, x10, #0, eq +; CHECK-NEXT: ldp x8, x10, [x0, #48] +; CHECK-NEXT: ccmp x11, x12, #0, eq +; CHECK-NEXT: ccmp x8, x9, #0, eq +; CHECK-NEXT: ccmp x10, x13, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %cr = call i32 @bcmp(ptr %a, ptr %b, i64 64) @@ -418,7 +418,7 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov w2, #89 +; CHECK-NEXT: mov w2, #89 // =0x59 ; CHECK-NEXT: bl bcmp ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: cset w0, eq @@ -449,14 +449,14 @@ define i1 @bcmp_i8(i8 %a0, i8 %b0, i8 %a1, i8 %b1, i8 %a2, i8 %b2) { ; CHECK-LABEL: bcmp_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w1, #0xff -; CHECK-NEXT: and w8, w2, #0xff +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: and w9, w2, #0xff ; CHECK-NEXT: and w10, w3, #0xff -; CHECK-NEXT: cmp w9, w0, uxtb -; CHECK-NEXT: ccmp w10, w8, #0, eq +; CHECK-NEXT: cmp w8, w0, uxtb ; CHECK-NEXT: and w8, w4, #0xff -; CHECK-NEXT: and w9, w5, #0xff -; CHECK-NEXT: ccmp w9, w8, #0, eq +; CHECK-NEXT: and w11, w5, #0xff +; CHECK-NEXT: ccmp w10, w9, #0, eq +; CHECK-NEXT: ccmp w11, w8, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %xor0 = xor i8 %b0, %a0 @@ -471,14 +471,14 @@ define i1 @bcmp_i16(i16 %a0, i16 %b0, i16 %a1, i16 %b1, i16 %a2, i16 %b2) { ; CHECK-LABEL: bcmp_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w1, #0xffff -; CHECK-NEXT: and w8, w2, #0xffff +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: and w9, w2, #0xffff ; CHECK-NEXT: and w10, w3, #0xffff -; CHECK-NEXT: cmp w9, w0, uxth -; CHECK-NEXT: ccmp w10, w8, #0, eq +; CHECK-NEXT: cmp w8, w0, uxth ; CHECK-NEXT: and w8, w4, #0xffff -; CHECK-NEXT: and w9, w5, #0xffff -; CHECK-NEXT: ccmp w9, w8, #0, eq +; CHECK-NEXT: and w11, w5, #0xffff +; CHECK-NEXT: ccmp w10, w9, #0, eq +; CHECK-NEXT: ccmp w11, w8, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %xor0 = xor i16 %b0, %a0 @@ -494,14 +494,14 @@ ; CHECK-LABEL: bcmp_i128: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp x2, x0 +; CHECK-NEXT: ldp x8, x10, [sp] ; CHECK-NEXT: ccmp x3, x1, #0, eq -; CHECK-NEXT: ldp x9, x8, [sp] +; CHECK-NEXT: ldp x9, x11, [sp, #16] ; CHECK-NEXT: ccmp x6, x4, #0, eq -; CHECK-NEXT: ldp x10, x11, [sp, #16] ; CHECK-NEXT: ccmp x7, x5, #0, eq ; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: cmp x10, x9 -; CHECK-NEXT: ccmp x11, x8, #0, eq +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ccmp x11, x10, #0, eq ; CHECK-NEXT: csinc w0, w12, wzr, eq ; CHECK-NEXT: ret %xor0 = xor i128 %b0, %a0 @@ -516,14 +516,14 @@ define i1 @bcmp_i42(i42 %a0, i42 %b0, i42 %a1, i42 %b1, i42 %a2, i42 %b2) { ; CHECK-LABEL: bcmp_i42: ; CHECK: // %bb.0: -; CHECK-NEXT: and x9, x0, #0x3ffffffffff -; CHECK-NEXT: and x10, x1, #0x3ffffffffff -; CHECK-NEXT: and x8, x2, #0x3ffffffffff +; CHECK-NEXT: and x8, x0, #0x3ffffffffff +; CHECK-NEXT: and x9, x1, #0x3ffffffffff +; CHECK-NEXT: and x10, x2, #0x3ffffffffff ; CHECK-NEXT: and x11, x3, #0x3ffffffffff -; CHECK-NEXT: cmp x10, x9 -; CHECK-NEXT: and x9, x5, #0x3ffffffffff -; CHECK-NEXT: ccmp x11, x8, #0, eq +; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: and x8, x4, #0x3ffffffffff +; CHECK-NEXT: and x9, x5, #0x3ffffffffff +; CHECK-NEXT: ccmp x11, x10, #0, eq ; CHECK-NEXT: ccmp x9, x8, #0, eq ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll --- a/llvm/test/CodeGen/AArch64/bf16-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/bf16-shuffle.ll @@ -226,8 +226,8 @@ ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: mov v3.16b, v2.16b -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: adrp x8, .LCPI16_1 ; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_1] @@ -244,8 +244,8 @@ ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: mov v3.16b, v2.16b -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: adrp x8, .LCPI17_1 ; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_1] @@ -262,8 +262,8 @@ ; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: mov v3.16b, v2.16b -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: adrp x8, .LCPI18_1 ; CHECK-NEXT: tbl v2.16b, { v0.16b, v1.16b }, v4.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_1] diff --git a/llvm/test/CodeGen/AArch64/bfis-in-loop.ll b/llvm/test/CodeGen/AArch64/bfis-in-loop.ll --- a/llvm/test/CodeGen/AArch64/bfis-in-loop.ll +++ b/llvm/test/CodeGen/AArch64/bfis-in-loop.ll @@ -11,30 +11,6 @@ @global = external global %struct.bar, align 8 define i64 @bfis_in_loop_zero() { -; CHECK-LABEL: bfis_in_loop_zero: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:global -; CHECK-NEXT: mov x0, xzr -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ldr x9, [x9, :got_lo12:global] -; CHECK-NEXT: ldr x9, [x9] -; CHECK-NEXT: .LBB0_1: // %midblock -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrh w10, [x9, #72] -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: ubfx x11, x10, #8, #24 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: csel w8, w8, w11, eq -; CHECK-NEXT: ldr x11, [x9, #8] -; CHECK-NEXT: and x9, x10, #0xff -; CHECK-NEXT: and x10, x0, #0xffffffff00000000 -; CHECK-NEXT: orr x9, x9, x8, lsl #8 -; CHECK-NEXT: orr x10, x10, x12, lsl #16 -; CHECK-NEXT: orr x0, x10, x9 -; CHECK-NEXT: ldr x9, [x11, #16] -; CHECK-NEXT: cbnz x11, .LBB0_1 -; CHECK-NEXT: // %bb.2: // %exit -; CHECK-NEXT: ret entry: %var = load ptr, ptr @global, align 8 br label %preheader @@ -79,30 +55,6 @@ } define i64 @bfis_in_loop_undef() { -; CHECK-LABEL: bfis_in_loop_undef: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:global -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: // implicit-def: $x0 -; CHECK-NEXT: ldr x9, [x9, :got_lo12:global] -; CHECK-NEXT: ldr x9, [x9] -; CHECK-NEXT: .LBB1_1: // %midblock -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrh w10, [x9, #72] -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: ubfx x11, x10, #8, #24 -; CHECK-NEXT: cset w12, ne -; CHECK-NEXT: csel w8, w8, w11, eq -; CHECK-NEXT: ldr x11, [x9, #8] -; CHECK-NEXT: and x9, x10, #0xff -; CHECK-NEXT: and x10, x0, #0xffffffff00000000 -; CHECK-NEXT: orr x9, x9, x8, lsl #8 -; CHECK-NEXT: orr x10, x10, x12, lsl #16 -; CHECK-NEXT: orr x0, x10, x9 -; CHECK-NEXT: ldr x9, [x11, #16] -; CHECK-NEXT: cbnz x11, .LBB1_1 -; CHECK-NEXT: // %bb.2: // %exit -; CHECK-NEXT: ret entry: %var = load ptr, ptr @global, align 8 br label %preheader @@ -145,3 +97,5 @@ exit: ret i64 %var30 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/bitfield-insert.ll b/llvm/test/CodeGen/AArch64/bitfield-insert.ll --- a/llvm/test/CodeGen/AArch64/bitfield-insert.ll +++ b/llvm/test/CodeGen/AArch64/bitfield-insert.ll @@ -95,11 +95,11 @@ define void @test_32bit_masked(ptr %existing, ptr %new) { ; CHECK-LABEL: test_32bit_masked: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: mov w10, #135 // =0x87 -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: bfi w8, w9, #3, #4 +; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: mov w8, #135 // =0x87 +; CHECK-NEXT: ldr w10, [x1] +; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: bfi w8, w10, #3, #4 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %oldval = load volatile i32, ptr %existing @@ -141,11 +141,11 @@ define void @test_32bit_complexmask(ptr %existing, ptr %new) { ; CHECK-LABEL: test_32bit_complexmask: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: mov w10, #647 // =0x287 -; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: bfi w8, w9, #3, #4 +; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: mov w8, #647 // =0x287 +; CHECK-NEXT: ldr w10, [x1] +; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: bfi w8, w10, #3, #4 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %oldval = load volatile i32, ptr %existing @@ -166,11 +166,11 @@ ; CHECK-LABEL: test_32bit_badmask: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: mov w10, #135 // =0x87 ; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: mov w11, #632 // =0x278 -; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w9, w11, w9, lsl #3 +; CHECK-NEXT: mov w10, #632 // =0x278 +; CHECK-NEXT: mov w11, #135 // =0x87 +; CHECK-NEXT: and w9, w10, w9, lsl #3 +; CHECK-NEXT: and w8, w8, w11 ; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret @@ -191,13 +191,13 @@ define void @test_64bit_badmask(ptr %existing, ptr %new) { ; CHECK-LABEL: test_64bit_badmask: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w8, #135 // =0x87 -; CHECK-NEXT: ldr x10, [x1] +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: ldr x9, [x1] +; CHECK-NEXT: mov w10, #135 // =0x87 ; CHECK-NEXT: mov w11, #664 // =0x298 -; CHECK-NEXT: and x8, x9, x8 -; CHECK-NEXT: lsl w10, w10, #3 -; CHECK-NEXT: and x9, x10, x11 +; CHECK-NEXT: lsl w9, w9, #3 +; CHECK-NEXT: and x8, x8, x10 +; CHECK-NEXT: and x9, x9, x11 ; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret @@ -544,8 +544,8 @@ define i32 @test9(i64 %b, i32 %e) { ; CHECK-LABEL: test9: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr w8, w1, #23 ; CHECK-NEXT: lsr x0, x0, #12 +; CHECK-NEXT: lsr w8, w1, #23 ; CHECK-NEXT: bfi w0, w8, #23, #9 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/bool-ext-inc.ll b/llvm/test/CodeGen/AArch64/bool-ext-inc.ll --- a/llvm/test/CodeGen/AArch64/bool-ext-inc.ll +++ b/llvm/test/CodeGen/AArch64/bool-ext-inc.ll @@ -31,8 +31,8 @@ ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI1_0 ; GISEL-NEXT: cmeq v0.4s, v0.4s, v1.4s -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_0] -; GISEL-NEXT: and v0.16b, v0.16b, v3.16b +; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] +; GISEL-NEXT: and v0.16b, v0.16b, v1.16b ; GISEL-NEXT: sub v0.4s, v2.4s, v0.4s ; GISEL-NEXT: ret %c = icmp eq <4 x i32> %c1, %c2 @@ -107,7 +107,7 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov w0, #-1 +; CHECK-NEXT: mov w0, #-1 // =0xffffffff ; CHECK-NEXT: bl callee_signext_i1 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -117,7 +117,7 @@ ; GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISEL-NEXT: .cfi_def_cfa_offset 16 ; GISEL-NEXT: .cfi_offset w30, -16 -; GISEL-NEXT: mov w8, #1 +; GISEL-NEXT: mov w8, #1 // =0x1 ; GISEL-NEXT: sbfx w0, w8, #0, #1 ; GISEL-NEXT: bl callee_signext_i1 ; GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll b/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll --- a/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-alignment.ll @@ -7,9 +7,9 @@ define i32 @invert_bcc_block_align_higher_func(i32 %x, i32 %y) align 4 #0 { ; CHECK-LABEL: invert_bcc_block_align_higher_func: ; CHECK: ; %bb.0: ; %common.ret +; CHECK-NEXT: mov w8, #9 ; =0x9 ; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: mov w9, #42 +; CHECK-NEXT: mov w9, #42 ; =0x2a ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: csel w8, w9, w8, eq ; CHECK-NEXT: str w8, [x8] diff --git a/llvm/test/CodeGen/AArch64/branch-relax-bcc.ll b/llvm/test/CodeGen/AArch64/branch-relax-bcc.ll --- a/llvm/test/CodeGen/AArch64/branch-relax-bcc.ll +++ b/llvm/test/CodeGen/AArch64/branch-relax-bcc.ll @@ -4,9 +4,9 @@ define i32 @invert_bcc(float %x, float %y) #0 { ; CHECK-LABEL: invert_bcc: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: mov w8, #42 ; =0x2a ; CHECK-NEXT: b.pl LBB0_3 ; CHECK-NEXT: b LBB0_2 ; CHECK-NEXT: LBB0_3: @@ -15,8 +15,8 @@ ; CHECK-NEXT: str w8, [x8] ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_2: ; %bb2 -; CHECK-NEXT: mov w0, #1 -; CHECK-NEXT: mov w8, #9 +; CHECK-NEXT: mov w0, #1 ; =0x1 +; CHECK-NEXT: mov w8, #9 ; =0x9 ; CHECK-NEXT: ; InlineAsm Start ; CHECK-NEXT: nop ; CHECK-NEXT: nop @@ -59,7 +59,7 @@ ; CHECK-NEXT: bl _foo ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: LBB1_3: ; %if.end -; CHECK-NEXT: mov w0, #7 +; CHECK-NEXT: mov w0, #7 ; =0x7 ; CHECK-NEXT: ret entry: %cmp = icmp eq i32 %a, 5 diff --git a/llvm/test/CodeGen/AArch64/build-one-lane.ll b/llvm/test/CodeGen/AArch64/build-one-lane.ll --- a/llvm/test/CodeGen/AArch64/build-one-lane.ll +++ b/llvm/test/CodeGen/AArch64/build-one-lane.ll @@ -318,14 +318,13 @@ define <32 x i8> @test_lanex_32xi8(<32 x i8> %a, i32 %x) { ; CHECK-LABEL: test_lanex_32xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: and x9, x0, #0x1f -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: mov w10, #30 -; CHECK-NEXT: stp q0, q1, [sp] -; CHECK-NEXT: strb w10, [x8, x9] +; CHECK-NEXT: stp q0, q1, [sp, #-32]! +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: and x8, x0, #0x1f +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: mov w10, #30 // =0x1e +; CHECK-NEXT: strb w10, [x9, x8] ; CHECK-NEXT: ldp q0, q1, [sp], #32 ; CHECK-NEXT: ret %b = insertelement <32 x i8> %a, i8 30, i32 %x diff --git a/llvm/test/CodeGen/AArch64/build-vector-to-extract-subvec-crash.ll b/llvm/test/CodeGen/AArch64/build-vector-to-extract-subvec-crash.ll --- a/llvm/test/CodeGen/AArch64/build-vector-to-extract-subvec-crash.ll +++ b/llvm/test/CodeGen/AArch64/build-vector-to-extract-subvec-crash.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: bfi x10, x0, #1, #3 ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: mov w0, wzr diff --git a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll --- a/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll +++ b/llvm/test/CodeGen/AArch64/build-vector-two-dup.ll @@ -78,8 +78,8 @@ define <16 x i8> @test5(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) { ; CHECK-LABEL: test5: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: ld1r { v1.16b }, [x1] ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b @@ -212,8 +212,8 @@ ; CHECK-NEXT: ld1r { v0.2s }, [x0] ; CHECK-NEXT: ldr w8, [x1] ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov v1.s[0], w8 ; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v1.s[0], w8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/cgp-usubo.ll b/llvm/test/CodeGen/AArch64/cgp-usubo.ll --- a/llvm/test/CodeGen/AArch64/cgp-usubo.ll +++ b/llvm/test/CodeGen/AArch64/cgp-usubo.ll @@ -37,12 +37,12 @@ ; CHECK-LABEL: usubo_ugt_constant_op0_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: mov w9, #42 +; CHECK-NEXT: mov w9, #42 // =0x2a ; CHECK-NEXT: cmp w8, #42 ; CHECK-NEXT: sub w9, w9, w0 ; CHECK-NEXT: cset w8, hi -; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: strb w9, [x1] +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %s = sub i8 42, %x %ov = icmp ugt i8 %x, 42 @@ -56,12 +56,12 @@ ; CHECK-LABEL: usubo_ult_constant_op0_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: mov w9, #43 +; CHECK-NEXT: mov w9, #43 // =0x2b ; CHECK-NEXT: cmp w8, #43 ; CHECK-NEXT: sub w9, w9, w0 ; CHECK-NEXT: cset w8, hi -; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: strh w9, [x1] +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %s = sub i16 43, %x %ov = icmp ult i16 43, %x @@ -77,9 +77,9 @@ ; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: sub w9, w0, #44 ; CHECK-NEXT: cmp w8, #44 +; CHECK-NEXT: strh w9, [x1] ; CHECK-NEXT: cset w8, lo ; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: strh w9, [x1] ; CHECK-NEXT: ret %s = add i16 %x, -44 %ov = icmp ult i16 %x, 44 @@ -93,9 +93,9 @@ ; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: sub w9, w0, #45 ; CHECK-NEXT: cmp w8, #45 +; CHECK-NEXT: strb w9, [x1] ; CHECK-NEXT: cset w8, lo ; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: strb w9, [x1] ; CHECK-NEXT: ret %ov = icmp ugt i8 45, %x %s = add i8 %x, -45 @@ -111,8 +111,8 @@ ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: sub w9, w0, #1 ; CHECK-NEXT: cset w8, eq -; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: str w9, [x1] +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %s = add i32 %x, -1 %ov = icmp eq i32 %x, 0 @@ -162,10 +162,10 @@ ; CHECK-NEXT: // %bb.1: // %t ; CHECK-NEXT: cmp x0, x1 ; CHECK-NEXT: mov x23, x0 -; CHECK-NEXT: cset w21, lo ; CHECK-NEXT: mov x20, x2 -; CHECK-NEXT: mov w0, w21 +; CHECK-NEXT: cset w21, lo ; CHECK-NEXT: mov x22, x1 +; CHECK-NEXT: mov w0, w21 ; CHECK-NEXT: bl call ; CHECK-NEXT: subs x8, x23, x22 ; CHECK-NEXT: b.hs .LBB8_3 diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll --- a/llvm/test/CodeGen/AArch64/cmp-chains.ll +++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll @@ -78,8 +78,8 @@ ; GISEL-NEXT: cmp w4, w5 ; GISEL-NEXT: cset w10, ne ; GISEL-NEXT: cmp w6, w7 -; GISEL-NEXT: cset w11, eq ; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: cset w11, eq ; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w0, w8, w9 ; GISEL-NEXT: ret @@ -168,8 +168,8 @@ ; GISEL-NEXT: cmp w4, w5 ; GISEL-NEXT: cset w10, ne ; GISEL-NEXT: cmp w6, w7 -; GISEL-NEXT: cset w11, eq ; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: cset w11, eq ; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w0, w8, w9 ; GISEL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -229,9 +229,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bic v1.16b, v2.16b, v0.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: bic v1.16b, v1.16b, v0.16b ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, @@ -244,18 +244,18 @@ ; CHECK-LABEL: sign_4xi65: ; CHECK: // %bb.0: ; CHECK-NEXT: sbfx x8, x1, #0, #1 -; CHECK-NEXT: sbfx x10, x5, #0, #1 -; CHECK-NEXT: orr x9, x8, #0x1 -; CHECK-NEXT: lsr x1, x8, #63 -; CHECK-NEXT: sbfx x8, x7, #0, #1 -; CHECK-NEXT: orr x4, x10, #0x1 -; CHECK-NEXT: lsr x5, x10, #63 -; CHECK-NEXT: orr x6, x8, #0x1 -; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: sbfx x9, x3, #0, #1 -; CHECK-NEXT: orr x2, x9, #0x1 +; CHECK-NEXT: sbfx x10, x7, #0, #1 +; CHECK-NEXT: lsr x1, x8, #63 +; CHECK-NEXT: orr x8, x8, #0x1 ; CHECK-NEXT: lsr x3, x9, #63 -; CHECK-NEXT: lsr x7, x8, #63 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: sbfx x8, x5, #0, #1 +; CHECK-NEXT: lsr x7, x10, #63 +; CHECK-NEXT: orr x2, x9, #0x1 +; CHECK-NEXT: orr x6, x10, #0x1 +; CHECK-NEXT: lsr x5, x8, #63 +; CHECK-NEXT: orr x4, x8, #0x1 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll --- a/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll +++ b/llvm/test/CodeGen/AArch64/cmpxchg-idioms.ll @@ -15,7 +15,7 @@ ; CHECK-NEXT: stlxr w8, w2, [x0] ; CHECK-NEXT: cbnz w8, LBB0_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 ; =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB0_4: ; %cmpxchg.nostore ; CHECK-NEXT: mov w0, wzr @@ -64,7 +64,7 @@ ; CHECK-NEXT: stlxrb w9, w2, [x0] ; CHECK-NEXT: cbnz w9, LBB1_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 ; =0x1 ; CHECK-NEXT: eor w0, w8, #0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: LBB1_4: ; %cmpxchg.nostore @@ -87,8 +87,8 @@ ; OUTLINE-ATOMICS-NEXT: mov w1, w2 ; OUTLINE-ATOMICS-NEXT: mov x2, x8 ; OUTLINE-ATOMICS-NEXT: bl ___aarch64_cas1_acq_rel -; OUTLINE-ATOMICS-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: cmp w0, w19, uxtb +; OUTLINE-ATOMICS-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; OUTLINE-ATOMICS-NEXT: cset w8, eq ; OUTLINE-ATOMICS-NEXT: eor w0, w8, #0x1 ; OUTLINE-ATOMICS-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload @@ -188,13 +188,13 @@ ; CHECK-NEXT: stlxr w8, w20, [x19] ; CHECK-NEXT: cbnz w8, LBB3_1 ; CHECK-NEXT: ; %bb.3: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 ; =0x1 ; CHECK-NEXT: b LBB3_5 ; CHECK-NEXT: LBB3_4: ; %cmpxchg.nostore ; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: clrex ; CHECK-NEXT: LBB3_5: ; %for.cond.preheader -; CHECK-NEXT: mov w22, #2 +; CHECK-NEXT: mov w22, #2 ; =0x2 ; CHECK-NEXT: LBB3_6: ; %for.cond ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cbz w22, LBB3_9 @@ -236,7 +236,7 @@ ; OUTLINE-ATOMICS-NEXT: mov w21, w0 ; OUTLINE-ATOMICS-NEXT: bl ___aarch64_cas4_acq_rel ; OUTLINE-ATOMICS-NEXT: cmp w0, w21 -; OUTLINE-ATOMICS-NEXT: mov w22, #2 +; OUTLINE-ATOMICS-NEXT: mov w22, #2 ; =0x2 ; OUTLINE-ATOMICS-NEXT: cset w8, eq ; OUTLINE-ATOMICS-NEXT: LBB3_1: ; %for.cond ; OUTLINE-ATOMICS-NEXT: ; =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/AArch64/combine-andintoload.ll b/llvm/test/CodeGen/AArch64/combine-andintoload.ll --- a/llvm/test/CodeGen/AArch64/combine-andintoload.ll +++ b/llvm/test/CodeGen/AArch64/combine-andintoload.ll @@ -412,13 +412,13 @@ define zeroext i1 @bigger(ptr nocapture readonly %c, ptr nocapture readonly %e, i64 %d, i64 %p1) { ; CHECK-LABEL: bigger: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrb w8, [x0, x2] +; CHECK-NEXT: ldrb w8, [x1, x2] +; CHECK-NEXT: ldrb w9, [x0, x2] ; CHECK-NEXT: and w10, w3, #0x7 -; CHECK-NEXT: ldrb w9, [x1, x2] -; CHECK-NEXT: mov w11, #8 +; CHECK-NEXT: mov w11, #8 // =0x8 ; CHECK-NEXT: sub w10, w11, w10 -; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: mov w9, #5 +; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: mov w9, #5 // =0x5 ; CHECK-NEXT: lsr w8, w8, w10 ; CHECK-NEXT: tst w8, w9 ; CHECK-NEXT: cset w0, eq @@ -426,13 +426,13 @@ ; ; CHECKBE-LABEL: bigger: ; CHECKBE: // %bb.0: // %entry -; CHECKBE-NEXT: ldrb w8, [x0, x2] +; CHECKBE-NEXT: ldrb w8, [x1, x2] +; CHECKBE-NEXT: ldrb w9, [x0, x2] ; CHECKBE-NEXT: and w10, w3, #0x7 -; CHECKBE-NEXT: ldrb w9, [x1, x2] -; CHECKBE-NEXT: mov w11, #8 +; CHECKBE-NEXT: mov w11, #8 // =0x8 ; CHECKBE-NEXT: sub w10, w11, w10 -; CHECKBE-NEXT: eor w8, w9, w8 -; CHECKBE-NEXT: mov w9, #5 +; CHECKBE-NEXT: eor w8, w8, w9 +; CHECKBE-NEXT: mov w9, #5 // =0x5 ; CHECKBE-NEXT: lsr w8, w8, w10 ; CHECKBE-NEXT: tst w8, w9 ; CHECKBE-NEXT: cset w0, eq diff --git a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll --- a/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll +++ b/llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll @@ -13,10 +13,10 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:a ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] -; CHECK-NEXT: ldr w9, [x8] +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: cmp w8, #10 ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] -; CHECK-NEXT: cmp w9, #10 ; CHECK-NEXT: b.le .LBB0_3 ; CHECK-NEXT: // %bb.1: // %land.lhs.true ; CHECK-NEXT: adrp x9, :got:c @@ -26,7 +26,7 @@ ; CHECK-NEXT: cmp w10, w9 ; CHECK-NEXT: b.ne .LBB0_4 ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_3: // %lor.lhs.false ; CHECK-NEXT: b.lt .LBB0_6 @@ -38,7 +38,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB0_6 ; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_6: // %if.end ; CHECK-NEXT: mov w0, wzr @@ -91,7 +91,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB1_6 ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_3: // %lor.lhs.false ; CHECK-NEXT: b.ge .LBB1_6 @@ -105,7 +105,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB1_6 ; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_6: // %if.end ; CHECK-NEXT: mov w0, wzr @@ -145,10 +145,10 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:a ; CHECK-NEXT: ldr x8, [x8, :got_lo12:a] -; CHECK-NEXT: ldr w9, [x8] +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: cmp w8, #5 ; CHECK-NEXT: adrp x8, :got:b ; CHECK-NEXT: ldr x8, [x8, :got_lo12:b] -; CHECK-NEXT: cmp w9, #5 ; CHECK-NEXT: b.ge .LBB2_3 ; CHECK-NEXT: // %bb.1: // %land.lhs.true ; CHECK-NEXT: adrp x9, :got:c @@ -158,7 +158,7 @@ ; CHECK-NEXT: cmp w10, w9 ; CHECK-NEXT: b.ne .LBB2_4 ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_3: // %lor.lhs.false ; CHECK-NEXT: b.gt .LBB2_6 @@ -170,7 +170,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB2_6 ; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_6: // %if.end ; CHECK-NEXT: mov w0, wzr @@ -223,7 +223,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB3_6 ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_3: // %lor.lhs.false ; CHECK-NEXT: b.le .LBB3_6 @@ -237,7 +237,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB3_6 ; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_6: // %if.end ; CHECK-NEXT: mov w0, wzr @@ -290,7 +290,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB4_6 ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB4_3: // %lor.lhs.false ; CHECK-NEXT: b.ge .LBB4_6 @@ -304,7 +304,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB4_6 ; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB4_6: // %if.end ; CHECK-NEXT: mov w0, wzr @@ -357,7 +357,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB5_6 ; CHECK-NEXT: // %bb.2: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB5_3: // %lor.lhs.false ; CHECK-NEXT: b.le .LBB5_6 @@ -371,7 +371,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB5_6 ; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB5_6: // %if.end ; CHECK-NEXT: mov w0, wzr @@ -425,7 +425,7 @@ ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w30, -48 ; CHECK-NEXT: ldr x20, [x0] -; CHECK-NEXT: mov w19, #24 +; CHECK-NEXT: mov w19, #24 // =0x18 ; CHECK-NEXT: adrp x22, glob ; CHECK-NEXT: add x21, x20, #2 ; CHECK-NEXT: .LBB6_1: // %land.rhs @@ -511,7 +511,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB7_7 ; CHECK-NEXT: // %bb.6: -; CHECK-NEXT: mov w0, #123 +; CHECK-NEXT: mov w0, #123 // =0x7b ; CHECK-NEXT: b .LBB7_8 ; CHECK-NEXT: .LBB7_7: // %if.end ; CHECK-NEXT: mov w0, wzr @@ -597,7 +597,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB8_6 ; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #123 +; CHECK-NEXT: mov w0, #123 // =0x7b ; CHECK-NEXT: ldp x30, x19, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: .cfi_def_cfa_offset 0 ; CHECK-NEXT: .cfi_restore w19 @@ -680,8 +680,8 @@ ; CHECK-NEXT: bl yoo ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: mov w1, #2 // =0x2 -; CHECK-NEXT: cinc w0, w19, gt ; CHECK-NEXT: fmov d8, d0 +; CHECK-NEXT: cinc w0, w19, gt ; CHECK-NEXT: bl xoo ; CHECK-NEXT: fmov d0, #-1.00000000 ; CHECK-NEXT: fcmp d8, #0.0 @@ -740,11 +740,11 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: mov w9, #128 // =0x80 ; CHECK-NEXT: csinc w8, w8, wzr, gt ; CHECK-NEXT: cmp w0, #2, lsl #12 // =8192 -; CHECK-NEXT: mov w9, #128 ; CHECK-NEXT: csel w0, w9, w8, ge ; CHECK-NEXT: bl zoo ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -797,7 +797,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB11_4 ; CHECK-NEXT: // %bb.3: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB11_4: // %land.lhs.true3 ; CHECK-NEXT: adrp x8, :got:b @@ -809,7 +809,7 @@ ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: b.ne .LBB11_6 ; CHECK-NEXT: // %bb.5: -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB11_6: // %if.end ; CHECK-NEXT: mov w0, wzr diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-contract.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-contract.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-contract.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-contract.ll @@ -16,10 +16,10 @@ ; CHECK-NEXT: fneg v1.2d, v1.2d ; CHECK-NEXT: fmla v3.2d, v2.2d, v5.2d ; CHECK-NEXT: fmla v1.2d, v2.2d, v0.2d -; CHECK-NEXT: fadd v3.2d, v3.2d, v4.2d ; CHECK-NEXT: fadd v1.2d, v2.2d, v1.2d -; CHECK-NEXT: zip1 v0.2d, v1.2d, v3.2d -; CHECK-NEXT: zip2 v1.2d, v1.2d, v3.2d +; CHECK-NEXT: fadd v2.2d, v3.2d, v4.2d +; CHECK-NEXT: zip1 v0.2d, v1.2d, v2.2d +; CHECK-NEXT: zip2 v1.2d, v1.2d, v2.2d ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> @@ -49,15 +49,15 @@ ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: movi v19.2d, #0000000000000000 ; CHECK-NEXT: fcmla v16.2d, v0.2d, v2.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v1.2d, v3.2d, #0 -; CHECK-NEXT: fcmla v18.2d, v4.2d, v6.2d, #0 -; CHECK-NEXT: fcmla v19.2d, v5.2d, v7.2d, #0 +; CHECK-NEXT: fcmla v18.2d, v1.2d, v3.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #0 +; CHECK-NEXT: fcmla v19.2d, v4.2d, v6.2d, #0 ; CHECK-NEXT: fcmla v16.2d, v0.2d, v2.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v1.2d, v3.2d, #90 -; CHECK-NEXT: fcmla v18.2d, v4.2d, v6.2d, #90 -; CHECK-NEXT: fcmla v19.2d, v5.2d, v7.2d, #90 -; CHECK-NEXT: fadd v0.2d, v16.2d, v18.2d -; CHECK-NEXT: fadd v1.2d, v17.2d, v19.2d +; CHECK-NEXT: fcmla v18.2d, v1.2d, v3.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #90 +; CHECK-NEXT: fcmla v19.2d, v4.2d, v6.2d, #90 +; CHECK-NEXT: fadd v1.2d, v18.2d, v17.2d +; CHECK-NEXT: fadd v0.2d, v16.2d, v19.2d ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> @@ -95,15 +95,15 @@ ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: movi v19.2d, #0000000000000000 ; CHECK-NEXT: fcmla v16.2d, v0.2d, v2.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v1.2d, v3.2d, #0 -; CHECK-NEXT: fcmla v18.2d, v4.2d, v6.2d, #0 -; CHECK-NEXT: fcmla v19.2d, v5.2d, v7.2d, #0 +; CHECK-NEXT: fcmla v18.2d, v1.2d, v3.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #0 +; CHECK-NEXT: fcmla v19.2d, v4.2d, v6.2d, #0 ; CHECK-NEXT: fcmla v16.2d, v0.2d, v2.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v1.2d, v3.2d, #90 -; CHECK-NEXT: fcmla v18.2d, v4.2d, v6.2d, #90 -; CHECK-NEXT: fcmla v19.2d, v5.2d, v7.2d, #90 -; CHECK-NEXT: fsub v0.2d, v16.2d, v18.2d -; CHECK-NEXT: fsub v1.2d, v17.2d, v19.2d +; CHECK-NEXT: fcmla v18.2d, v1.2d, v3.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #90 +; CHECK-NEXT: fcmla v19.2d, v4.2d, v6.2d, #90 +; CHECK-NEXT: fsub v1.2d, v18.2d, v17.2d +; CHECK-NEXT: fsub v0.2d, v16.2d, v19.2d ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> @@ -141,15 +141,15 @@ ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: movi v19.2d, #0000000000000000 ; CHECK-NEXT: fcmla v16.2d, v0.2d, v2.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v1.2d, v3.2d, #0 -; CHECK-NEXT: fcmla v18.2d, v6.2d, v4.2d, #0 -; CHECK-NEXT: fcmla v19.2d, v7.2d, v5.2d, #0 +; CHECK-NEXT: fcmla v18.2d, v1.2d, v3.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v7.2d, v5.2d, #0 +; CHECK-NEXT: fcmla v19.2d, v6.2d, v4.2d, #0 ; CHECK-NEXT: fcmla v16.2d, v0.2d, v2.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v1.2d, v3.2d, #90 -; CHECK-NEXT: fcmla v18.2d, v6.2d, v4.2d, #270 -; CHECK-NEXT: fcmla v19.2d, v7.2d, v5.2d, #270 -; CHECK-NEXT: fadd v0.2d, v16.2d, v18.2d -; CHECK-NEXT: fadd v1.2d, v17.2d, v19.2d +; CHECK-NEXT: fcmla v18.2d, v1.2d, v3.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v7.2d, v5.2d, #270 +; CHECK-NEXT: fcmla v19.2d, v6.2d, v4.2d, #270 +; CHECK-NEXT: fadd v1.2d, v18.2d, v17.2d +; CHECK-NEXT: fadd v0.2d, v16.2d, v19.2d ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> @@ -185,34 +185,33 @@ ; CHECK-NEXT: movi v16.2d, #0xffffffffffffffff ; CHECK-NEXT: zip2 v17.2d, v4.2d, v5.2d ; CHECK-NEXT: movi v18.2d, #0000000000000000 -; CHECK-NEXT: zip1 v19.2d, v0.2d, v1.2d +; CHECK-NEXT: zip1 v4.2d, v4.2d, v5.2d +; CHECK-NEXT: zip2 v19.2d, v0.2d, v1.2d +; CHECK-NEXT: zip2 v20.2d, v2.2d, v3.2d +; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d +; CHECK-NEXT: zip1 v2.2d, v2.2d, v3.2d ; CHECK-NEXT: fneg v16.2d, v16.2d -; CHECK-NEXT: zip2 v0.2d, v0.2d, v1.2d -; CHECK-NEXT: zip1 v1.2d, v4.2d, v5.2d -; CHECK-NEXT: zip1 v5.2d, v2.2d, v3.2d -; CHECK-NEXT: mov v4.16b, v16.16b -; CHECK-NEXT: bsl v4.16b, v18.16b, v17.16b -; CHECK-NEXT: zip2 v2.2d, v2.2d, v3.2d -; CHECK-NEXT: mov v3.16b, v16.16b -; CHECK-NEXT: bsl v3.16b, v18.16b, v1.16b -; CHECK-NEXT: fadd v1.2d, v1.2d, v4.2d -; CHECK-NEXT: zip2 v4.2d, v6.2d, v7.2d +; CHECK-NEXT: fmul v1.2d, v19.2d, v20.2d +; CHECK-NEXT: fmul v3.2d, v0.2d, v20.2d +; CHECK-NEXT: mov v5.16b, v16.16b +; CHECK-NEXT: bsl v16.16b, v18.16b, v4.16b +; CHECK-NEXT: fneg v1.2d, v1.2d +; CHECK-NEXT: fmla v3.2d, v2.2d, v19.2d +; CHECK-NEXT: bsl v5.16b, v18.16b, v17.16b +; CHECK-NEXT: fsub v16.2d, v16.2d, v17.2d +; CHECK-NEXT: fmla v1.2d, v2.2d, v0.2d +; CHECK-NEXT: fadd v4.2d, v4.2d, v5.2d +; CHECK-NEXT: zip2 v5.2d, v6.2d, v7.2d ; CHECK-NEXT: zip1 v6.2d, v6.2d, v7.2d -; CHECK-NEXT: fmul v7.2d, v0.2d, v2.2d -; CHECK-NEXT: fsub v3.2d, v3.2d, v17.2d -; CHECK-NEXT: fmul v16.2d, v1.2d, v4.2d -; CHECK-NEXT: fmul v2.2d, v19.2d, v2.2d -; CHECK-NEXT: fneg v7.2d, v7.2d -; CHECK-NEXT: fmul v4.2d, v3.2d, v4.2d -; CHECK-NEXT: fneg v16.2d, v16.2d -; CHECK-NEXT: fmla v2.2d, v5.2d, v0.2d -; CHECK-NEXT: fmla v7.2d, v5.2d, v19.2d -; CHECK-NEXT: fmla v4.2d, v1.2d, v6.2d -; CHECK-NEXT: fmla v16.2d, v6.2d, v3.2d -; CHECK-NEXT: fadd v1.2d, v2.2d, v4.2d -; CHECK-NEXT: fadd v2.2d, v7.2d, v16.2d -; CHECK-NEXT: zip1 v0.2d, v2.2d, v1.2d -; CHECK-NEXT: zip2 v1.2d, v2.2d, v1.2d +; CHECK-NEXT: fmul v17.2d, v4.2d, v5.2d +; CHECK-NEXT: fmul v5.2d, v16.2d, v5.2d +; CHECK-NEXT: fneg v7.2d, v17.2d +; CHECK-NEXT: fmla v5.2d, v4.2d, v6.2d +; CHECK-NEXT: fmla v7.2d, v6.2d, v16.2d +; CHECK-NEXT: fadd v2.2d, v3.2d, v5.2d +; CHECK-NEXT: fadd v1.2d, v1.2d, v7.2d +; CHECK-NEXT: zip1 v0.2d, v1.2d, v2.2d +; CHECK-NEXT: zip2 v1.2d, v1.2d, v2.2d ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-fast.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-fast.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-fast.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-fixed-fast.ll @@ -39,16 +39,16 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v16.2d, #0000000000000000 ; CHECK-NEXT: movi v17.2d, #0000000000000000 -; CHECK-NEXT: fcmla v16.2d, v4.2d, v6.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v2.2d, v0.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v3.2d, v1.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v4.2d, v6.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #90 -; CHECK-NEXT: fcmla v16.2d, v2.2d, v0.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v3.2d, v1.2d, #90 -; CHECK-NEXT: mov v0.16b, v16.16b -; CHECK-NEXT: mov v1.16b, v17.16b +; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v2.2d, v0.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v3.2d, v1.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #90 +; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v2.2d, v0.2d, #90 +; CHECK-NEXT: fcmla v16.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: mov v0.16b, v17.16b +; CHECK-NEXT: mov v1.16b, v16.16b ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> @@ -83,16 +83,16 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v16.2d, #0000000000000000 ; CHECK-NEXT: movi v17.2d, #0000000000000000 -; CHECK-NEXT: fcmla v16.2d, v4.2d, v6.2d, #270 -; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #270 -; CHECK-NEXT: fcmla v16.2d, v2.2d, v0.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v3.2d, v1.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v4.2d, v6.2d, #180 -; CHECK-NEXT: fcmla v17.2d, v5.2d, v7.2d, #180 -; CHECK-NEXT: fcmla v16.2d, v2.2d, v0.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v3.2d, v1.2d, #90 -; CHECK-NEXT: mov v0.16b, v16.16b -; CHECK-NEXT: mov v1.16b, v17.16b +; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #270 +; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #270 +; CHECK-NEXT: fcmla v17.2d, v2.2d, v0.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v3.2d, v1.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v4.2d, v6.2d, #180 +; CHECK-NEXT: fcmla v16.2d, v5.2d, v7.2d, #180 +; CHECK-NEXT: fcmla v17.2d, v2.2d, v0.2d, #90 +; CHECK-NEXT: fcmla v16.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: mov v0.16b, v17.16b +; CHECK-NEXT: mov v1.16b, v16.16b ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> @@ -127,16 +127,16 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v16.2d, #0000000000000000 ; CHECK-NEXT: movi v17.2d, #0000000000000000 -; CHECK-NEXT: fcmla v16.2d, v2.2d, v0.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v3.2d, v1.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v2.2d, v0.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v3.2d, v1.2d, #90 -; CHECK-NEXT: fcmla v16.2d, v6.2d, v4.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v7.2d, v5.2d, #0 -; CHECK-NEXT: fcmla v16.2d, v6.2d, v4.2d, #270 -; CHECK-NEXT: fcmla v17.2d, v7.2d, v5.2d, #270 -; CHECK-NEXT: mov v0.16b, v16.16b -; CHECK-NEXT: mov v1.16b, v17.16b +; CHECK-NEXT: fcmla v17.2d, v2.2d, v0.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v3.2d, v1.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v2.2d, v0.2d, #90 +; CHECK-NEXT: fcmla v16.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #270 +; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #270 +; CHECK-NEXT: mov v0.16b, v17.16b +; CHECK-NEXT: mov v1.16b, v16.16b ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> @@ -172,22 +172,22 @@ ; CHECK-NEXT: zip2 v16.2d, v2.2d, v3.2d ; CHECK-NEXT: zip2 v17.2d, v0.2d, v1.2d ; CHECK-NEXT: zip1 v2.2d, v2.2d, v3.2d -; CHECK-NEXT: zip2 v3.2d, v4.2d, v5.2d -; CHECK-NEXT: zip1 v18.2d, v6.2d, v7.2d -; CHECK-NEXT: fmul v19.2d, v16.2d, v17.2d +; CHECK-NEXT: zip2 v18.2d, v4.2d, v5.2d +; CHECK-NEXT: zip1 v19.2d, v6.2d, v7.2d ; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d ; CHECK-NEXT: zip1 v1.2d, v4.2d, v5.2d -; CHECK-NEXT: fmul v4.2d, v2.2d, v17.2d ; CHECK-NEXT: zip2 v5.2d, v6.2d, v7.2d -; CHECK-NEXT: fmla v19.2d, v3.2d, v18.2d +; CHECK-NEXT: fmul v3.2d, v16.2d, v17.2d +; CHECK-NEXT: fmul v4.2d, v2.2d, v17.2d +; CHECK-NEXT: fmla v3.2d, v18.2d, v19.2d ; CHECK-NEXT: fmla v4.2d, v0.2d, v16.2d -; CHECK-NEXT: fmla v19.2d, v1.2d, v5.2d -; CHECK-NEXT: fmla v4.2d, v1.2d, v18.2d -; CHECK-NEXT: fneg v1.2d, v19.2d -; CHECK-NEXT: fmls v4.2d, v3.2d, v5.2d -; CHECK-NEXT: fmla v1.2d, v0.2d, v2.2d -; CHECK-NEXT: zip1 v0.2d, v1.2d, v4.2d -; CHECK-NEXT: zip2 v1.2d, v1.2d, v4.2d +; CHECK-NEXT: fmla v3.2d, v1.2d, v5.2d +; CHECK-NEXT: fmla v4.2d, v1.2d, v19.2d +; CHECK-NEXT: fneg v3.2d, v3.2d +; CHECK-NEXT: fmls v4.2d, v18.2d, v5.2d +; CHECK-NEXT: fmla v3.2d, v0.2d, v2.2d +; CHECK-NEXT: zip1 v0.2d, v3.2d, v4.2d +; CHECK-NEXT: zip2 v1.2d, v3.2d, v4.2d ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-contract.ll @@ -7,21 +7,21 @@ define @mull_add( %a, %b, %c) { ; CHECK-LABEL: mull_add: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uzp2 z6.d, z2.d, z3.d -; CHECK-NEXT: uzp2 z7.d, z0.d, z1.d -; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z6.d, z0.d, z1.d +; CHECK-NEXT: uzp2 z7.d, z2.d, z3.d +; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uzp1 z1.d, z2.d, z3.d -; CHECK-NEXT: fmul z2.d, z0.d, z6.d -; CHECK-NEXT: fmla z2.d, p0/m, z7.d, z1.d -; CHECK-NEXT: fmul z3.d, z7.d, z6.d -; CHECK-NEXT: fnmsb z0.d, p0/m, z1.d, z3.d -; CHECK-NEXT: uzp2 z1.d, z4.d, z5.d +; CHECK-NEXT: fmul z2.d, z6.d, z7.d +; CHECK-NEXT: fmul z3.d, z0.d, z7.d +; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: fnmsb z1.d, p0/m, z6.d, z3.d +; CHECK-NEXT: uzp2 z2.d, z4.d, z5.d ; CHECK-NEXT: uzp1 z3.d, z4.d, z5.d -; CHECK-NEXT: fadd z3.d, z3.d, z0.d -; CHECK-NEXT: fadd z1.d, z2.d, z1.d -; CHECK-NEXT: zip1 z0.d, z3.d, z1.d -; CHECK-NEXT: zip2 z1.d, z3.d, z1.d +; CHECK-NEXT: fadd z2.d, z0.d, z2.d +; CHECK-NEXT: fadd z1.d, z3.d, z1.d +; CHECK-NEXT: zip1 z0.d, z1.d, z2.d +; CHECK-NEXT: zip2 z1.d, z1.d, z2.d ; CHECK-NEXT: ret entry: %strided.vec = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) @@ -49,8 +49,8 @@ define @mul_add_mull( %a, %b, %c, %d) { ; CHECK-LABEL: mul_add_mull: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: mov z25.d, z24.d ; CHECK-NEXT: mov z26.d, z24.d ; CHECK-NEXT: mov z27.d, z24.d @@ -100,8 +100,8 @@ define @mul_sub_mull( %a, %b, %c, %d) { ; CHECK-LABEL: mul_sub_mull: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: mov z25.d, z24.d ; CHECK-NEXT: mov z26.d, z24.d ; CHECK-NEXT: mov z27.d, z24.d @@ -151,8 +151,8 @@ define @mul_conj_mull( %a, %b, %c, %d) { ; CHECK-LABEL: mul_conj_mull: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: mov z25.d, z24.d ; CHECK-NEXT: mov z26.d, z24.d ; CHECK-NEXT: mov z27.d, z24.d @@ -203,35 +203,37 @@ ; CHECK-LABEL: mul_add_rot_mull: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: uzp2 z24.d, z4.d, z5.d -; CHECK-NEXT: mov z26.d, #0 // =0x0 -; CHECK-NEXT: mov z25.d, z24.d -; CHECK-NEXT: and z26.d, z26.d, #0x7fffffffffffffff -; CHECK-NEXT: and z25.d, z25.d, #0x8000000000000000 -; CHECK-NEXT: uzp2 z27.d, z0.d, z1.d +; CHECK-NEXT: mov z25.d, #0 // =0x0 +; CHECK-NEXT: uzp1 z4.d, z4.d, z5.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z25.d, z25.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z26.d, z24.d +; CHECK-NEXT: and z26.d, z26.d, #0x8000000000000000 +; CHECK-NEXT: orr z5.d, z25.d, z26.d +; CHECK-NEXT: fadd z5.d, z4.d, z5.d +; CHECK-NEXT: and z4.d, z4.d, #0x8000000000000000 +; CHECK-NEXT: orr z4.d, z25.d, z4.d +; CHECK-NEXT: uzp2 z25.d, z0.d, z1.d ; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d -; CHECK-NEXT: uzp1 z1.d, z4.d, z5.d -; CHECK-NEXT: orr z5.d, z26.d, z25.d -; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d -; CHECK-NEXT: fadd z5.d, z1.d, z5.d -; CHECK-NEXT: and z1.d, z1.d, #0x8000000000000000 -; CHECK-NEXT: orr z1.d, z26.d, z1.d -; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d -; CHECK-NEXT: fsub z1.d, z1.d, z24.d +; CHECK-NEXT: uzp2 z1.d, z2.d, z3.d +; CHECK-NEXT: uzp1 z2.d, z2.d, z3.d +; CHECK-NEXT: fsub z4.d, z4.d, z24.d ; CHECK-NEXT: uzp2 z24.d, z6.d, z7.d -; CHECK-NEXT: fmul z3.d, z0.d, z2.d -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uzp1 z6.d, z6.d, z7.d -; CHECK-NEXT: fmul z7.d, z1.d, z24.d -; CHECK-NEXT: fmla z3.d, p0/m, z27.d, z4.d -; CHECK-NEXT: fmla z7.d, p0/m, z6.d, z5.d -; CHECK-NEXT: fmul z2.d, z27.d, z2.d -; CHECK-NEXT: fmul z5.d, z5.d, z24.d -; CHECK-NEXT: fnmsb z0.d, p0/m, z4.d, z2.d -; CHECK-NEXT: fnmsb z1.d, p0/m, z6.d, z5.d -; CHECK-NEXT: fadd z1.d, z0.d, z1.d -; CHECK-NEXT: fadd z2.d, z3.d, z7.d -; CHECK-NEXT: zip1 z0.d, z1.d, z2.d -; CHECK-NEXT: zip2 z1.d, z1.d, z2.d +; CHECK-NEXT: fmul z3.d, z0.d, z1.d +; CHECK-NEXT: fmul z1.d, z25.d, z1.d +; CHECK-NEXT: fmul z7.d, z4.d, z24.d +; CHECK-NEXT: fmul z24.d, z5.d, z24.d +; CHECK-NEXT: fmla z3.d, p0/m, z25.d, z2.d +; CHECK-NEXT: fnmsb z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: movprfx z1, z7 +; CHECK-NEXT: fmla z1.d, p0/m, z6.d, z5.d +; CHECK-NEXT: movprfx z2, z24 +; CHECK-NEXT: fnmls z2.d, p0/m, z4.d, z6.d +; CHECK-NEXT: fadd z2.d, z0.d, z2.d +; CHECK-NEXT: fadd z1.d, z3.d, z1.d +; CHECK-NEXT: zip1 z0.d, z2.d, z1.d +; CHECK-NEXT: zip2 z1.d, z2.d, z1.d ; CHECK-NEXT: ret entry: %strided.vec = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-add-mull-scalable-fast.ll @@ -41,19 +41,19 @@ define @mul_add_mull( %a, %b, %c, %d) { ; CHECK-LABEL: mul_add_mull: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: mov z25.d, z24.d -; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0 ; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #0 -; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0 +; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #0 ; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0 -; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90 +; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0 ; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #90 -; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90 +; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #90 ; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90 -; CHECK-NEXT: mov z1.d, z24.d +; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90 ; CHECK-NEXT: mov z0.d, z25.d +; CHECK-NEXT: mov z1.d, z24.d ; CHECK-NEXT: ret entry: %strided.vec = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) @@ -90,19 +90,19 @@ define @mul_sub_mull( %a, %b, %c, %d) { ; CHECK-LABEL: mul_sub_mull: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: mov z25.d, z24.d -; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #270 ; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #270 -; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0 +; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #270 ; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0 -; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #180 +; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0 ; CHECK-NEXT: fcmla z25.d, p0/m, z6.d, z4.d, #180 -; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90 +; CHECK-NEXT: fcmla z24.d, p0/m, z7.d, z5.d, #180 ; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90 -; CHECK-NEXT: mov z1.d, z24.d +; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90 ; CHECK-NEXT: mov z0.d, z25.d +; CHECK-NEXT: mov z1.d, z24.d ; CHECK-NEXT: ret entry: %strided.vec = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) @@ -139,19 +139,19 @@ define @mul_conj_mull( %a, %b, %c, %d) { ; CHECK-LABEL: mul_conj_mull: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: mov z25.d, z24.d -; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0 ; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #0 -; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90 +; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0 ; CHECK-NEXT: fcmla z25.d, p0/m, z0.d, z2.d, #90 -; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0 +; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90 ; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #0 -; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270 +; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #0 ; CHECK-NEXT: fcmla z25.d, p0/m, z4.d, z6.d, #270 -; CHECK-NEXT: mov z1.d, z24.d +; CHECK-NEXT: fcmla z24.d, p0/m, z5.d, z7.d, #270 ; CHECK-NEXT: mov z0.d, z25.d +; CHECK-NEXT: mov z1.d, z24.d ; CHECK-NEXT: ret entry: %strided.vec = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) @@ -188,25 +188,26 @@ define @mul_add_rot_mull( %a, %b, %c, %d) { ; CHECK-LABEL: mul_add_rot_mull: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uzp1 z25.d, z0.d, z1.d -; CHECK-NEXT: uzp2 z0.d, z0.d, z1.d -; CHECK-NEXT: uzp1 z1.d, z2.d, z3.d -; CHECK-NEXT: uzp2 z24.d, z2.d, z3.d -; CHECK-NEXT: fmul z2.d, z1.d, z0.d +; CHECK-NEXT: uzp1 z24.d, z2.d, z3.d +; CHECK-NEXT: uzp2 z25.d, z0.d, z1.d ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fmla z2.d, p0/m, z24.d, z25.d -; CHECK-NEXT: fmul z0.d, z24.d, z0.d -; CHECK-NEXT: uzp2 z3.d, z4.d, z5.d -; CHECK-NEXT: uzp1 z24.d, z6.d, z7.d +; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d +; CHECK-NEXT: uzp1 z0.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z26.d, z6.d, z7.d +; CHECK-NEXT: fmul z1.d, z24.d, z25.d +; CHECK-NEXT: fmul z3.d, z2.d, z25.d +; CHECK-NEXT: uzp2 z25.d, z4.d, z5.d ; CHECK-NEXT: uzp1 z4.d, z4.d, z5.d -; CHECK-NEXT: fmla z0.d, p0/m, z24.d, z3.d ; CHECK-NEXT: uzp2 z5.d, z6.d, z7.d -; CHECK-NEXT: fmla z2.d, p0/m, z24.d, z4.d -; CHECK-NEXT: fmla z0.d, p0/m, z5.d, z4.d -; CHECK-NEXT: fmls z2.d, p0/m, z5.d, z3.d -; CHECK-NEXT: fnmsb z1.d, p0/m, z25.d, z0.d -; CHECK-NEXT: zip1 z0.d, z1.d, z2.d -; CHECK-NEXT: zip2 z1.d, z1.d, z2.d +; CHECK-NEXT: fmla z1.d, p0/m, z2.d, z0.d +; CHECK-NEXT: fmla z3.d, p0/m, z26.d, z25.d +; CHECK-NEXT: movprfx z2, z3 +; CHECK-NEXT: fmla z2.d, p0/m, z5.d, z4.d +; CHECK-NEXT: fnmls z2.d, p0/m, z24.d, z0.d +; CHECK-NEXT: fmla z1.d, p0/m, z26.d, z4.d +; CHECK-NEXT: fmls z1.d, p0/m, z5.d, z25.d +; CHECK-NEXT: zip1 z0.d, z2.d, z1.d +; CHECK-NEXT: zip2 z1.d, z2.d, z1.d ; CHECK-NEXT: ret entry: %strided.vec = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add-scalable.ll @@ -85,9 +85,9 @@ ; CHECK-LABEL: complex_add_v32f16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: fcadd z6.h, p0/m, z6.h, z2.h, #90 ; CHECK-NEXT: fcadd z4.h, p0/m, z4.h, z0.h, #90 ; CHECK-NEXT: fcadd z5.h, p0/m, z5.h, z1.h, #90 +; CHECK-NEXT: fcadd z6.h, p0/m, z6.h, z2.h, #90 ; CHECK-NEXT: fcadd z7.h, p0/m, z7.h, z3.h, #90 ; CHECK-NEXT: mov z0.d, z4.d ; CHECK-NEXT: mov z1.d, z5.d diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll @@ -66,8 +66,8 @@ define <16 x half> @complex_add_v16f16(<16 x half> %a, <16 x half> %b) { ; CHECK-LABEL: complex_add_v16f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcadd v0.8h, v2.8h, v0.8h, #90 ; CHECK-NEXT: fcadd v1.8h, v3.8h, v1.8h, #90 +; CHECK-NEXT: fcadd v0.8h, v2.8h, v0.8h, #90 ; CHECK-NEXT: ret entry: %a.real = shufflevector <16 x half> %a, <16 x half> zeroinitializer, <8 x i32> @@ -142,8 +142,8 @@ define <16 x half> @complex_add_v16f16_with_intrinsic(<16 x half> %a, <16 x half> %b) { ; CHECK-LABEL: complex_add_v16f16_with_intrinsic: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcadd v0.8h, v2.8h, v0.8h, #90 ; CHECK-NEXT: fcadd v1.8h, v3.8h, v1.8h, #90 +; CHECK-NEXT: fcadd v0.8h, v2.8h, v0.8h, #90 ; CHECK-NEXT: ret entry: %a.deinterleaved = tail call { <8 x half>, <8 x half> } @llvm.experimental.vector.deinterleave2.v16f16(<16 x half> %a) diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul-scalable.ll @@ -18,8 +18,8 @@ ; CHECK-NEXT: uzp1 z1.d, z1.d, z3.d ; CHECK-NEXT: movprfx z3, z2 ; CHECK-NEXT: fmul z3.h, p0/m, z3.h, z0.h -; CHECK-NEXT: fmla z3.h, p0/m, z1.h, z4.h ; CHECK-NEXT: fmul z2.h, p0/m, z2.h, z4.h +; CHECK-NEXT: fmla z3.h, p0/m, z1.h, z4.h ; CHECK-NEXT: fnmsb z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: zip2 z1.d, z0.d, z3.d ; CHECK-NEXT: zip1 z0.d, z0.d, z3.d @@ -46,8 +46,8 @@ define @complex_mul_v8f16( %a, %b) { ; CHECK-LABEL: complex_mul_v8f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z2.h, #0 // =0x0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z2.h, #0 // =0x0 ; CHECK-NEXT: fcmla z2.h, p0/m, z1.h, z0.h, #0 ; CHECK-NEXT: fcmla z2.h, p0/m, z1.h, z0.h, #90 ; CHECK-NEXT: mov z0.d, z2.d @@ -72,15 +72,15 @@ define @complex_mul_v16f16( %a, %b) { ; CHECK-LABEL: complex_mul_v16f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z4.h, #0 // =0x0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z4.h, #0 // =0x0 ; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #0 ; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #0 -; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #90 +; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #0 ; CHECK-NEXT: fcmla z5.h, p0/m, z2.h, z0.h, #90 -; CHECK-NEXT: mov z1.d, z4.d +; CHECK-NEXT: fcmla z4.h, p0/m, z3.h, z1.h, #90 ; CHECK-NEXT: mov z0.d, z5.d +; CHECK-NEXT: mov z1.d, z4.d ; CHECK-NEXT: ret entry: %a.deinterleaved = tail call { , } @llvm.experimental.vector.deinterleave2.nxv16f16( %a) @@ -103,8 +103,8 @@ define @complex_mul_v32f16( %a, %b) { ; CHECK-LABEL: complex_mul_v32f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z24.h, #0 // =0x0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z24.h, #0 // =0x0 ; CHECK-NEXT: mov z25.d, z24.d ; CHECK-NEXT: mov z26.d, z24.d ; CHECK-NEXT: mov z27.d, z24.d diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-mul.ll @@ -87,12 +87,12 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v4.2d, #0000000000000000 ; CHECK-NEXT: movi v5.2d, #0000000000000000 -; CHECK-NEXT: fcmla v4.8h, v0.8h, v2.8h, #0 -; CHECK-NEXT: fcmla v5.8h, v1.8h, v3.8h, #0 -; CHECK-NEXT: fcmla v4.8h, v0.8h, v2.8h, #90 -; CHECK-NEXT: fcmla v5.8h, v1.8h, v3.8h, #90 -; CHECK-NEXT: mov v0.16b, v4.16b -; CHECK-NEXT: mov v1.16b, v5.16b +; CHECK-NEXT: fcmla v5.8h, v0.8h, v2.8h, #0 +; CHECK-NEXT: fcmla v4.8h, v1.8h, v3.8h, #0 +; CHECK-NEXT: fcmla v5.8h, v0.8h, v2.8h, #90 +; CHECK-NEXT: fcmla v4.8h, v1.8h, v3.8h, #90 +; CHECK-NEXT: mov v0.16b, v5.16b +; CHECK-NEXT: mov v1.16b, v4.16b ; CHECK-NEXT: ret entry: %a.real = shufflevector <16 x half> %a, <16 x half> poison, <8 x i32> @@ -118,17 +118,17 @@ ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: movi v19.2d, #0000000000000000 ; CHECK-NEXT: fcmla v16.8h, v0.8h, v4.8h, #0 -; CHECK-NEXT: fcmla v17.8h, v1.8h, v5.8h, #0 -; CHECK-NEXT: fcmla v18.8h, v2.8h, v6.8h, #0 -; CHECK-NEXT: fcmla v19.8h, v3.8h, v7.8h, #0 +; CHECK-NEXT: fcmla v18.8h, v1.8h, v5.8h, #0 +; CHECK-NEXT: fcmla v17.8h, v3.8h, v7.8h, #0 +; CHECK-NEXT: fcmla v19.8h, v2.8h, v6.8h, #0 ; CHECK-NEXT: fcmla v16.8h, v0.8h, v4.8h, #90 -; CHECK-NEXT: fcmla v17.8h, v1.8h, v5.8h, #90 -; CHECK-NEXT: fcmla v18.8h, v2.8h, v6.8h, #90 -; CHECK-NEXT: fcmla v19.8h, v3.8h, v7.8h, #90 +; CHECK-NEXT: fcmla v18.8h, v1.8h, v5.8h, #90 +; CHECK-NEXT: fcmla v17.8h, v3.8h, v7.8h, #90 +; CHECK-NEXT: fcmla v19.8h, v2.8h, v6.8h, #90 ; CHECK-NEXT: mov v0.16b, v16.16b -; CHECK-NEXT: mov v1.16b, v17.16b -; CHECK-NEXT: mov v2.16b, v18.16b -; CHECK-NEXT: mov v3.16b, v19.16b +; CHECK-NEXT: mov v1.16b, v18.16b +; CHECK-NEXT: mov v3.16b, v17.16b +; CHECK-NEXT: mov v2.16b, v19.16b ; CHECK-NEXT: ret entry: %a.real = shufflevector <32 x half> %a, <32 x half> poison, <16 x i32> diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-add-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-add-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-add-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-add-scalable.ll @@ -51,9 +51,9 @@ ; CHECK-LABEL: complex_add_v16f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcadd z6.s, p0/m, z6.s, z2.s, #90 ; CHECK-NEXT: fcadd z4.s, p0/m, z4.s, z0.s, #90 ; CHECK-NEXT: fcadd z5.s, p0/m, z5.s, z1.s, #90 +; CHECK-NEXT: fcadd z6.s, p0/m, z6.s, z2.s, #90 ; CHECK-NEXT: fcadd z7.s, p0/m, z7.s, z3.s, #90 ; CHECK-NEXT: mov z0.d, z4.d ; CHECK-NEXT: mov z1.d, z5.d diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-add.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-add.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-add.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-add.ll @@ -42,8 +42,8 @@ define <8 x float> @complex_add_v8f32(<8 x float> %a, <8 x float> %b) { ; CHECK-LABEL: complex_add_v8f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcadd v0.4s, v2.4s, v0.4s, #90 ; CHECK-NEXT: fcadd v1.4s, v3.4s, v1.4s, #90 +; CHECK-NEXT: fcadd v0.4s, v2.4s, v0.4s, #90 ; CHECK-NEXT: ret entry: %a.real = shufflevector <8 x float> %a, <8 x float> zeroinitializer, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul-scalable.ll @@ -7,8 +7,8 @@ define @complex_mul_v4f32( %a, %b) { ; CHECK-LABEL: complex_mul_v4f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z2.s, #0 // =0x0 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z2.s, #0 // =0x0 ; CHECK-NEXT: fcmla z2.s, p0/m, z1.s, z0.s, #0 ; CHECK-NEXT: fcmla z2.s, p0/m, z1.s, z0.s, #90 ; CHECK-NEXT: mov z0.d, z2.d @@ -34,15 +34,15 @@ define @complex_mul_v8f32( %a, %b) { ; CHECK-LABEL: complex_mul_v8f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z4.s, #0 // =0x0 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z4.s, #0 // =0x0 ; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #0 ; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #0 -; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #90 +; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #0 ; CHECK-NEXT: fcmla z5.s, p0/m, z2.s, z0.s, #90 -; CHECK-NEXT: mov z1.d, z4.d +; CHECK-NEXT: fcmla z4.s, p0/m, z3.s, z1.s, #90 ; CHECK-NEXT: mov z0.d, z5.d +; CHECK-NEXT: mov z1.d, z4.d ; CHECK-NEXT: ret entry: %a.deinterleaved = tail call { , } @llvm.experimental.vector.deinterleave2.nxv8f32( %a) @@ -65,8 +65,8 @@ define @complex_mul_v16f32( %a, %b) { ; CHECK-LABEL: complex_mul_v16f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z24.s, #0 // =0x0 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z24.s, #0 // =0x0 ; CHECK-NEXT: mov z25.d, z24.d ; CHECK-NEXT: mov z26.d, z24.d ; CHECK-NEXT: mov z27.d, z24.d diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f32-mul.ll @@ -57,12 +57,12 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v4.2d, #0000000000000000 ; CHECK-NEXT: movi v5.2d, #0000000000000000 -; CHECK-NEXT: fcmla v4.4s, v0.4s, v2.4s, #0 -; CHECK-NEXT: fcmla v5.4s, v1.4s, v3.4s, #0 -; CHECK-NEXT: fcmla v4.4s, v0.4s, v2.4s, #90 -; CHECK-NEXT: fcmla v5.4s, v1.4s, v3.4s, #90 -; CHECK-NEXT: mov v0.16b, v4.16b -; CHECK-NEXT: mov v1.16b, v5.16b +; CHECK-NEXT: fcmla v5.4s, v0.4s, v2.4s, #0 +; CHECK-NEXT: fcmla v4.4s, v1.4s, v3.4s, #0 +; CHECK-NEXT: fcmla v5.4s, v0.4s, v2.4s, #90 +; CHECK-NEXT: fcmla v4.4s, v1.4s, v3.4s, #90 +; CHECK-NEXT: mov v0.16b, v5.16b +; CHECK-NEXT: mov v1.16b, v4.16b ; CHECK-NEXT: ret entry: %a.real = shufflevector <8 x float> %a, <8 x float> poison, <4 x i32> @@ -88,17 +88,17 @@ ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: movi v19.2d, #0000000000000000 ; CHECK-NEXT: fcmla v16.4s, v0.4s, v4.4s, #0 -; CHECK-NEXT: fcmla v17.4s, v1.4s, v5.4s, #0 -; CHECK-NEXT: fcmla v18.4s, v2.4s, v6.4s, #0 -; CHECK-NEXT: fcmla v19.4s, v3.4s, v7.4s, #0 +; CHECK-NEXT: fcmla v18.4s, v1.4s, v5.4s, #0 +; CHECK-NEXT: fcmla v17.4s, v3.4s, v7.4s, #0 +; CHECK-NEXT: fcmla v19.4s, v2.4s, v6.4s, #0 ; CHECK-NEXT: fcmla v16.4s, v0.4s, v4.4s, #90 -; CHECK-NEXT: fcmla v17.4s, v1.4s, v5.4s, #90 -; CHECK-NEXT: fcmla v18.4s, v2.4s, v6.4s, #90 -; CHECK-NEXT: fcmla v19.4s, v3.4s, v7.4s, #90 +; CHECK-NEXT: fcmla v18.4s, v1.4s, v5.4s, #90 +; CHECK-NEXT: fcmla v17.4s, v3.4s, v7.4s, #90 +; CHECK-NEXT: fcmla v19.4s, v2.4s, v6.4s, #90 ; CHECK-NEXT: mov v0.16b, v16.16b -; CHECK-NEXT: mov v1.16b, v17.16b -; CHECK-NEXT: mov v2.16b, v18.16b -; CHECK-NEXT: mov v3.16b, v19.16b +; CHECK-NEXT: mov v1.16b, v18.16b +; CHECK-NEXT: mov v3.16b, v17.16b +; CHECK-NEXT: mov v2.16b, v19.16b ; CHECK-NEXT: ret entry: %a.real = shufflevector <16 x float> %a, <16 x float> poison, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-add-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-add-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-add-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-add-scalable.ll @@ -52,9 +52,9 @@ ; CHECK-LABEL: complex_add_v8f64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcadd z6.d, p0/m, z6.d, z2.d, #90 ; CHECK-NEXT: fcadd z4.d, p0/m, z4.d, z0.d, #90 ; CHECK-NEXT: fcadd z5.d, p0/m, z5.d, z1.d, #90 +; CHECK-NEXT: fcadd z6.d, p0/m, z6.d, z2.d, #90 ; CHECK-NEXT: fcadd z7.d, p0/m, z7.d, z3.d, #90 ; CHECK-NEXT: mov z0.d, z4.d ; CHECK-NEXT: mov z1.d, z5.d diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-add.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-add.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-add.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-add.ll @@ -25,8 +25,8 @@ define <4 x double> @complex_add_v4f64(<4 x double> %a, <4 x double> %b) { ; CHECK-LABEL: complex_add_v4f64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fcadd v0.2d, v2.2d, v0.2d, #90 ; CHECK-NEXT: fcadd v1.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: fcadd v0.2d, v2.2d, v0.2d, #90 ; CHECK-NEXT: ret entry: %a.real = shufflevector <4 x double> %a, <4 x double> zeroinitializer, <2 x i32> diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul-scalable.ll @@ -7,8 +7,8 @@ define @complex_mul_v2f64( %a, %b) { ; CHECK-LABEL: complex_mul_v2f64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z2.d, #0 // =0x0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, #0 // =0x0 ; CHECK-NEXT: fcmla z2.d, p0/m, z1.d, z0.d, #0 ; CHECK-NEXT: fcmla z2.d, p0/m, z1.d, z0.d, #90 ; CHECK-NEXT: mov z0.d, z2.d @@ -34,15 +34,15 @@ define @complex_mul_v4f64( %a, %b) { ; CHECK-LABEL: complex_mul_v4f64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z4.d, #0 // =0x0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z4.d, #0 // =0x0 ; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #0 ; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #0 -; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #90 +; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #0 ; CHECK-NEXT: fcmla z5.d, p0/m, z2.d, z0.d, #90 -; CHECK-NEXT: mov z1.d, z4.d +; CHECK-NEXT: fcmla z4.d, p0/m, z3.d, z1.d, #90 ; CHECK-NEXT: mov z0.d, z5.d +; CHECK-NEXT: mov z1.d, z4.d ; CHECK-NEXT: ret entry: %a.deinterleaved = tail call { , } @llvm.experimental.vector.deinterleave2.nxv4f64( %a) @@ -65,8 +65,8 @@ define @complex_mul_v8f64( %a, %b) { ; CHECK-LABEL: complex_mul_v8f64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z24.d, #0 // =0x0 ; CHECK-NEXT: mov z25.d, z24.d ; CHECK-NEXT: mov z26.d, z24.d ; CHECK-NEXT: mov z27.d, z24.d diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-f64-mul.ll @@ -33,12 +33,12 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v4.2d, #0000000000000000 ; CHECK-NEXT: movi v5.2d, #0000000000000000 -; CHECK-NEXT: fcmla v4.2d, v0.2d, v2.2d, #0 -; CHECK-NEXT: fcmla v5.2d, v1.2d, v3.2d, #0 -; CHECK-NEXT: fcmla v4.2d, v0.2d, v2.2d, #90 -; CHECK-NEXT: fcmla v5.2d, v1.2d, v3.2d, #90 -; CHECK-NEXT: mov v0.16b, v4.16b -; CHECK-NEXT: mov v1.16b, v5.16b +; CHECK-NEXT: fcmla v5.2d, v0.2d, v2.2d, #0 +; CHECK-NEXT: fcmla v4.2d, v1.2d, v3.2d, #0 +; CHECK-NEXT: fcmla v5.2d, v0.2d, v2.2d, #90 +; CHECK-NEXT: fcmla v4.2d, v1.2d, v3.2d, #90 +; CHECK-NEXT: mov v0.16b, v5.16b +; CHECK-NEXT: mov v1.16b, v4.16b ; CHECK-NEXT: ret entry: %a.real = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> @@ -64,17 +64,17 @@ ; CHECK-NEXT: movi v18.2d, #0000000000000000 ; CHECK-NEXT: movi v19.2d, #0000000000000000 ; CHECK-NEXT: fcmla v16.2d, v0.2d, v4.2d, #0 -; CHECK-NEXT: fcmla v17.2d, v1.2d, v5.2d, #0 -; CHECK-NEXT: fcmla v18.2d, v2.2d, v6.2d, #0 -; CHECK-NEXT: fcmla v19.2d, v3.2d, v7.2d, #0 +; CHECK-NEXT: fcmla v18.2d, v1.2d, v5.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v3.2d, v7.2d, #0 +; CHECK-NEXT: fcmla v19.2d, v2.2d, v6.2d, #0 ; CHECK-NEXT: fcmla v16.2d, v0.2d, v4.2d, #90 -; CHECK-NEXT: fcmla v17.2d, v1.2d, v5.2d, #90 -; CHECK-NEXT: fcmla v18.2d, v2.2d, v6.2d, #90 -; CHECK-NEXT: fcmla v19.2d, v3.2d, v7.2d, #90 +; CHECK-NEXT: fcmla v18.2d, v1.2d, v5.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v3.2d, v7.2d, #90 +; CHECK-NEXT: fcmla v19.2d, v2.2d, v6.2d, #90 ; CHECK-NEXT: mov v0.16b, v16.16b -; CHECK-NEXT: mov v1.16b, v17.16b -; CHECK-NEXT: mov v2.16b, v18.16b -; CHECK-NEXT: mov v3.16b, v19.16b +; CHECK-NEXT: mov v1.16b, v18.16b +; CHECK-NEXT: mov v3.16b, v17.16b +; CHECK-NEXT: mov v2.16b, v19.16b ; CHECK-NEXT: ret entry: %a.real = shufflevector <8 x double> %a, <8 x double> poison, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-add-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-add-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-add-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-add-scalable.ll @@ -58,8 +58,8 @@ define @complex_add_v16i16( %a, %b) { ; CHECK-LABEL: complex_add_v16i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cadd z2.h, z2.h, z0.h, #90 ; CHECK-NEXT: cadd z3.h, z3.h, z1.h, #90 +; CHECK-NEXT: cadd z2.h, z2.h, z0.h, #90 ; CHECK-NEXT: mov z0.d, z2.d ; CHECK-NEXT: mov z1.d, z3.d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i16-mul-scalable.ll @@ -11,15 +11,15 @@ ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uunpkhi z3.d, z1.s ; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uzp1 z4.d, z0.d, z2.d ; CHECK-NEXT: uzp2 z0.d, z0.d, z2.d -; CHECK-NEXT: uzp2 z2.d, z1.d, z3.d -; CHECK-NEXT: uzp1 z1.d, z1.d, z3.d -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mul z3.d, z1.d, z0.d -; CHECK-NEXT: mul z1.d, z1.d, z4.d -; CHECK-NEXT: mla z3.d, p0/m, z2.d, z4.d -; CHECK-NEXT: msb z0.d, p0/m, z2.d, z1.d +; CHECK-NEXT: uzp1 z2.d, z1.d, z3.d +; CHECK-NEXT: uzp2 z1.d, z1.d, z3.d +; CHECK-NEXT: mul z3.d, z2.d, z0.d +; CHECK-NEXT: mul z2.d, z2.d, z4.d +; CHECK-NEXT: mla z3.d, p0/m, z1.d, z4.d +; CHECK-NEXT: msb z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: zip2 z1.d, z0.d, z3.d ; CHECK-NEXT: zip1 z0.d, z0.d, z3.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s @@ -104,18 +104,18 @@ ; CHECK-NEXT: mov z25.d, z24.d ; CHECK-NEXT: mov z26.d, z24.d ; CHECK-NEXT: mov z27.d, z24.d +; CHECK-NEXT: cmla z24.h, z7.h, z3.h, #0 ; CHECK-NEXT: cmla z25.h, z4.h, z0.h, #0 ; CHECK-NEXT: cmla z26.h, z5.h, z1.h, #0 ; CHECK-NEXT: cmla z27.h, z6.h, z2.h, #0 -; CHECK-NEXT: cmla z24.h, z7.h, z3.h, #0 +; CHECK-NEXT: cmla z24.h, z7.h, z3.h, #90 ; CHECK-NEXT: cmla z25.h, z4.h, z0.h, #90 ; CHECK-NEXT: cmla z26.h, z5.h, z1.h, #90 ; CHECK-NEXT: cmla z27.h, z6.h, z2.h, #90 -; CHECK-NEXT: cmla z24.h, z7.h, z3.h, #90 +; CHECK-NEXT: mov z3.d, z24.d ; CHECK-NEXT: mov z0.d, z25.d ; CHECK-NEXT: mov z1.d, z26.d ; CHECK-NEXT: mov z2.d, z27.d -; CHECK-NEXT: mov z3.d, z24.d ; CHECK-NEXT: ret entry: %a.deinterleaved = tail call { , } @llvm.experimental.vector.deinterleave2.nxv32i16( %a) diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-add-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-add-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-add-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-add-scalable.ll @@ -27,8 +27,8 @@ define @complex_add_v8i32( %a, %b) { ; CHECK-LABEL: complex_add_v8i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cadd z2.s, z2.s, z0.s, #90 ; CHECK-NEXT: cadd z3.s, z3.s, z1.s, #90 +; CHECK-NEXT: cadd z2.s, z2.s, z0.s, #90 ; CHECK-NEXT: mov z0.d, z2.d ; CHECK-NEXT: mov z1.d, z3.d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-mul-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-mul-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-mul-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i32-mul-scalable.ll @@ -67,18 +67,18 @@ ; CHECK-NEXT: mov z25.d, z24.d ; CHECK-NEXT: mov z26.d, z24.d ; CHECK-NEXT: mov z27.d, z24.d +; CHECK-NEXT: cmla z24.s, z7.s, z3.s, #0 ; CHECK-NEXT: cmla z25.s, z4.s, z0.s, #0 ; CHECK-NEXT: cmla z26.s, z5.s, z1.s, #0 ; CHECK-NEXT: cmla z27.s, z6.s, z2.s, #0 -; CHECK-NEXT: cmla z24.s, z7.s, z3.s, #0 +; CHECK-NEXT: cmla z24.s, z7.s, z3.s, #90 ; CHECK-NEXT: cmla z25.s, z4.s, z0.s, #90 ; CHECK-NEXT: cmla z26.s, z5.s, z1.s, #90 ; CHECK-NEXT: cmla z27.s, z6.s, z2.s, #90 -; CHECK-NEXT: cmla z24.s, z7.s, z3.s, #90 +; CHECK-NEXT: mov z3.d, z24.d ; CHECK-NEXT: mov z0.d, z25.d ; CHECK-NEXT: mov z1.d, z26.d ; CHECK-NEXT: mov z2.d, z27.d -; CHECK-NEXT: mov z3.d, z24.d ; CHECK-NEXT: ret entry: %a.deinterleaved = tail call { , } @llvm.experimental.vector.deinterleave2.nxv16i32( %a) diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-add-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-add-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-add-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-add-scalable.ll @@ -27,8 +27,8 @@ define @complex_add_v4i64( %a, %b) { ; CHECK-LABEL: complex_add_v4i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cadd z2.d, z2.d, z0.d, #90 ; CHECK-NEXT: cadd z3.d, z3.d, z1.d, #90 +; CHECK-NEXT: cadd z2.d, z2.d, z0.d, #90 ; CHECK-NEXT: mov z0.d, z2.d ; CHECK-NEXT: mov z1.d, z3.d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-mul-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-mul-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-mul-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i64-mul-scalable.ll @@ -67,18 +67,18 @@ ; CHECK-NEXT: mov z25.d, z24.d ; CHECK-NEXT: mov z26.d, z24.d ; CHECK-NEXT: mov z27.d, z24.d +; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #0 ; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #0 ; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #0 ; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #0 -; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #0 +; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #90 ; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #90 ; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #90 ; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #90 -; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #90 +; CHECK-NEXT: mov z3.d, z24.d ; CHECK-NEXT: mov z0.d, z25.d ; CHECK-NEXT: mov z1.d, z26.d ; CHECK-NEXT: mov z2.d, z27.d -; CHECK-NEXT: mov z3.d, z24.d ; CHECK-NEXT: ret entry: %a.deinterleaved = tail call { , } @llvm.experimental.vector.deinterleave2.nxv8i64( %a) @@ -105,18 +105,18 @@ ; CHECK-NEXT: mov z25.d, z24.d ; CHECK-NEXT: mov z26.d, z24.d ; CHECK-NEXT: mov z27.d, z24.d +; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #270 ; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #270 ; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #270 ; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #270 -; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #270 +; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #180 ; CHECK-NEXT: cmla z25.d, z4.d, z0.d, #180 ; CHECK-NEXT: cmla z26.d, z5.d, z1.d, #180 ; CHECK-NEXT: cmla z27.d, z6.d, z2.d, #180 -; CHECK-NEXT: cmla z24.d, z7.d, z3.d, #180 +; CHECK-NEXT: mov z3.d, z24.d ; CHECK-NEXT: mov z0.d, z25.d ; CHECK-NEXT: mov z1.d, z26.d ; CHECK-NEXT: mov z2.d, z27.d -; CHECK-NEXT: mov z3.d, z24.d ; CHECK-NEXT: ret entry: %a.deinterleaved = tail call { , } @llvm.experimental.vector.deinterleave2.nxv8i64( %a) diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i8-add-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i8-add-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-i8-add-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-i8-add-scalable.ll @@ -58,8 +58,8 @@ define @complex_add_v32i8( %a, %b) { ; CHECK-LABEL: complex_add_v32i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cadd z2.b, z2.b, z0.b, #90 ; CHECK-NEXT: cadd z3.b, z3.b, z1.b, #90 +; CHECK-NEXT: cadd z2.b, z2.b, z0.b, #90 ; CHECK-NEXT: mov z0.d, z2.d ; CHECK-NEXT: mov z1.d, z3.d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-mixed-cases.ll @@ -82,8 +82,8 @@ ; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8 ; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: zip1 v5.2s, v2.2s, v3.2s -; CHECK-NEXT: zip2 v2.2s, v2.2s, v3.2s ; CHECK-NEXT: zip1 v6.2s, v1.2s, v4.2s +; CHECK-NEXT: zip2 v2.2s, v2.2s, v3.2s ; CHECK-NEXT: zip2 v1.2s, v1.2s, v4.2s ; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: fmul v7.2s, v6.2s, v5.2s @@ -220,11 +220,11 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: movi v4.2d, #0000000000000000 -; CHECK-NEXT: fcmla v3.4s, v2.4s, v0.4s, #0 -; CHECK-NEXT: fcmla v4.4s, v1.4s, v0.4s, #0 -; CHECK-NEXT: fcmla v3.4s, v2.4s, v0.4s, #90 -; CHECK-NEXT: fcmla v4.4s, v1.4s, v0.4s, #90 -; CHECK-NEXT: fcadd v0.4s, v3.4s, v4.4s, #90 +; CHECK-NEXT: fcmla v4.4s, v2.4s, v0.4s, #0 +; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #0 +; CHECK-NEXT: fcmla v4.4s, v2.4s, v0.4s, #90 +; CHECK-NEXT: fcmla v3.4s, v1.4s, v0.4s, #90 +; CHECK-NEXT: fcadd v0.4s, v4.4s, v3.4s, #90 ; CHECK-NEXT: ret entry: %ar = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> @@ -258,25 +258,25 @@ define <4 x float> @mul_triangle_addmul(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK-LABEL: mul_triangle_addmul: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: zip1 v5.2s, v1.2s, v3.2s -; CHECK-NEXT: zip2 v1.2s, v1.2s, v3.2s -; CHECK-NEXT: zip1 v6.2s, v0.2s, v4.2s -; CHECK-NEXT: zip2 v0.2s, v0.2s, v4.2s -; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: fmul v7.2s, v5.2s, v6.2s -; CHECK-NEXT: fmul v6.2s, v1.2s, v6.2s -; CHECK-NEXT: zip1 v4.2s, v2.2s, v3.2s -; CHECK-NEXT: zip2 v2.2s, v2.2s, v3.2s -; CHECK-NEXT: fmov d3, d7 -; CHECK-NEXT: fmov d16, d6 +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: zip1 v5.2s, v0.2s, v3.2s +; CHECK-NEXT: zip1 v6.2s, v1.2s, v4.2s +; CHECK-NEXT: zip2 v1.2s, v1.2s, v4.2s +; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: zip2 v0.2s, v0.2s, v3.2s +; CHECK-NEXT: fmul v7.2s, v6.2s, v5.2s +; CHECK-NEXT: fmul v5.2s, v1.2s, v5.2s +; CHECK-NEXT: zip1 v3.2s, v2.2s, v4.2s +; CHECK-NEXT: zip2 v2.2s, v2.2s, v4.2s +; CHECK-NEXT: fmov d4, d7 +; CHECK-NEXT: fmov d16, d5 ; CHECK-NEXT: fmls v7.2s, v0.2s, v2.2s -; CHECK-NEXT: fmla v6.2s, v0.2s, v4.2s -; CHECK-NEXT: fmls v3.2s, v0.2s, v1.2s -; CHECK-NEXT: fmla v16.2s, v0.2s, v5.2s +; CHECK-NEXT: fmla v5.2s, v0.2s, v3.2s +; CHECK-NEXT: fmls v4.2s, v0.2s, v1.2s +; CHECK-NEXT: fmla v16.2s, v0.2s, v6.2s ; CHECK-NEXT: fsub v0.2s, v7.2s, v16.2s -; CHECK-NEXT: fadd v1.2s, v6.2s, v3.2s +; CHECK-NEXT: fadd v1.2s, v5.2s, v4.2s ; CHECK-NEXT: zip1 v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret entry: @@ -314,8 +314,8 @@ ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: zip2 v4.2s, v0.2s, v2.2s -; CHECK-NEXT: zip1 v0.2s, v0.2s, v2.2s ; CHECK-NEXT: zip1 v5.2s, v1.2s, v3.2s +; CHECK-NEXT: zip1 v0.2s, v0.2s, v2.2s ; CHECK-NEXT: zip2 v1.2s, v1.2s, v3.2s ; CHECK-NEXT: fmul v2.2s, v4.2s, v5.2s ; CHECK-NEXT: fmul v3.2s, v1.2s, v4.2s @@ -442,23 +442,23 @@ define <4 x float> @mul_divequal(<4 x float> %a, <4 x float> %b, <4 x float> %c) { ; CHECK-LABEL: mul_divequal: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v16.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: zip2 v5.2s, v1.2s, v3.2s -; CHECK-NEXT: zip1 v1.2s, v1.2s, v3.2s -; CHECK-NEXT: zip2 v6.2s, v0.2s, v4.2s -; CHECK-NEXT: zip1 v0.2s, v0.2s, v4.2s -; CHECK-NEXT: zip1 v4.2s, v2.2s, v16.2s -; CHECK-NEXT: zip2 v2.2s, v2.2s, v16.2s -; CHECK-NEXT: fmul v7.2s, v6.2s, v5.2s -; CHECK-NEXT: fneg v3.2s, v7.2s -; CHECK-NEXT: fmla v3.2s, v0.2s, v1.2s -; CHECK-NEXT: fmul v0.2s, v5.2s, v0.2s -; CHECK-NEXT: fmla v0.2s, v6.2s, v1.2s -; CHECK-NEXT: fdiv v3.2s, v3.2s, v4.2s +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: zip2 v5.2s, v0.2s, v3.2s +; CHECK-NEXT: zip2 v6.2s, v1.2s, v4.2s +; CHECK-NEXT: zip1 v0.2s, v0.2s, v3.2s +; CHECK-NEXT: zip1 v1.2s, v1.2s, v4.2s +; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: fmul v7.2s, v5.2s, v6.2s +; CHECK-NEXT: fneg v4.2s, v7.2s +; CHECK-NEXT: zip1 v7.2s, v2.2s, v3.2s +; CHECK-NEXT: zip2 v2.2s, v2.2s, v3.2s +; CHECK-NEXT: fmla v4.2s, v0.2s, v1.2s +; CHECK-NEXT: fmul v0.2s, v6.2s, v0.2s +; CHECK-NEXT: fmla v0.2s, v5.2s, v1.2s +; CHECK-NEXT: fdiv v4.2s, v4.2s, v7.2s ; CHECK-NEXT: fdiv v0.2s, v0.2s, v2.2s -; CHECK-NEXT: zip1 v0.4s, v3.4s, v0.4s +; CHECK-NEXT: zip1 v0.4s, v4.4s, v0.4s ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-multiuses.ll @@ -49,17 +49,17 @@ ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: zip2 v4.2s, v0.2s, v2.2s -; CHECK-NEXT: zip1 v0.2s, v0.2s, v2.2s ; CHECK-NEXT: zip1 v5.2s, v1.2s, v3.2s +; CHECK-NEXT: zip1 v0.2s, v0.2s, v2.2s ; CHECK-NEXT: zip2 v1.2s, v1.2s, v3.2s ; CHECK-NEXT: fmul v2.2s, v4.2s, v5.2s ; CHECK-NEXT: fmul v3.2s, v1.2s, v4.2s ; CHECK-NEXT: fmla v2.2s, v0.2s, v1.2s ; CHECK-NEXT: fneg v1.2s, v3.2s ; CHECK-NEXT: fmul v3.2s, v2.2s, v4.2s +; CHECK-NEXT: str d2, [x0] ; CHECK-NEXT: fmla v1.2s, v0.2s, v5.2s ; CHECK-NEXT: fmul v5.2s, v2.2s, v0.2s -; CHECK-NEXT: str d2, [x0] ; CHECK-NEXT: fneg v3.2s, v3.2s ; CHECK-NEXT: fmla v5.2s, v4.2s, v1.2s ; CHECK-NEXT: fmla v3.2s, v0.2s, v1.2s @@ -96,28 +96,27 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ext v5.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: movi v4.2d, #0000000000000000 +; CHECK-NEXT: ext v4.16b, v2.16b, v2.16b, #8 ; CHECK-NEXT: zip2 v7.2s, v0.2s, v5.2s -; CHECK-NEXT: zip1 v0.2s, v0.2s, v5.2s ; CHECK-NEXT: zip1 v16.2s, v1.2s, v6.2s ; CHECK-NEXT: zip2 v1.2s, v1.2s, v6.2s -; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: fcmla v4.4s, v3.4s, v2.4s, #0 +; CHECK-NEXT: zip1 v0.2s, v0.2s, v5.2s ; CHECK-NEXT: fmul v5.2s, v16.2s, v7.2s -; CHECK-NEXT: fmul v7.2s, v1.2s, v7.2s -; CHECK-NEXT: fcmla v4.4s, v3.4s, v2.4s, #90 +; CHECK-NEXT: fmul v6.2s, v1.2s, v7.2s ; CHECK-NEXT: fmla v5.2s, v0.2s, v1.2s -; CHECK-NEXT: fneg v1.2s, v7.2s -; CHECK-NEXT: zip1 v7.2s, v2.2s, v6.2s -; CHECK-NEXT: zip2 v6.2s, v2.2s, v6.2s +; CHECK-NEXT: fneg v1.2s, v6.2s +; CHECK-NEXT: zip1 v6.2s, v2.2s, v4.2s +; CHECK-NEXT: zip2 v4.2s, v2.2s, v4.2s ; CHECK-NEXT: fmla v1.2s, v0.2s, v16.2s -; CHECK-NEXT: fmul v17.2s, v7.2s, v5.2s -; CHECK-NEXT: fmul v0.2s, v6.2s, v5.2s +; CHECK-NEXT: fmul v17.2s, v6.2s, v5.2s +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: fmul v5.2s, v4.2s, v5.2s +; CHECK-NEXT: fmla v17.2s, v1.2s, v4.2s +; CHECK-NEXT: fcmla v0.4s, v3.4s, v2.4s, #0 ; CHECK-NEXT: str d1, [x0] -; CHECK-NEXT: fmla v17.2s, v1.2s, v6.2s -; CHECK-NEXT: fneg v16.2s, v0.2s -; CHECK-NEXT: mov v0.16b, v4.16b -; CHECK-NEXT: fmla v16.2s, v1.2s, v7.2s +; CHECK-NEXT: fneg v16.2s, v5.2s +; CHECK-NEXT: fcmla v0.4s, v3.4s, v2.4s, #90 +; CHECK-NEXT: fmla v16.2s, v1.2s, v6.2s ; CHECK-NEXT: st2 { v16.2s, v17.2s }, [x1] ; CHECK-NEXT: ret entry: @@ -160,28 +159,28 @@ define <4 x float> @multiple_muls_shuffle_external_with_loads(ptr %ptr_a, ptr %ptr_b, ptr %ptr_c, ptr %ptr_d, ptr %p1, ptr %p2) { ; CHECK-LABEL: multiple_muls_shuffle_external_with_loads: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ld2 { v1.2s, v2.2s }, [x0] +; CHECK-NEXT: ld2 { v0.2s, v1.2s }, [x0] +; CHECK-NEXT: ld2 { v2.2s, v3.2s }, [x1] +; CHECK-NEXT: fmul v4.2s, v3.2s, v1.2s +; CHECK-NEXT: fmul v6.2s, v2.2s, v1.2s +; CHECK-NEXT: fneg v4.2s, v4.2s +; CHECK-NEXT: fmla v6.2s, v0.2s, v3.2s +; CHECK-NEXT: fmla v4.2s, v0.2s, v2.2s +; CHECK-NEXT: str d4, [x4] +; CHECK-NEXT: ldr q5, [x2] +; CHECK-NEXT: ext v7.16b, v5.16b, v5.16b, #8 +; CHECK-NEXT: zip1 v0.2s, v5.2s, v7.2s +; CHECK-NEXT: zip2 v1.2s, v5.2s, v7.2s +; CHECK-NEXT: fmul v3.2s, v0.2s, v6.2s +; CHECK-NEXT: fmul v6.2s, v1.2s, v6.2s +; CHECK-NEXT: fmla v3.2s, v4.2s, v1.2s +; CHECK-NEXT: fneg v2.2s, v6.2s +; CHECK-NEXT: fmla v2.2s, v4.2s, v0.2s ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ld2 { v3.2s, v4.2s }, [x1] -; CHECK-NEXT: fmul v5.2s, v4.2s, v2.2s -; CHECK-NEXT: fmul v7.2s, v3.2s, v2.2s -; CHECK-NEXT: fneg v5.2s, v5.2s -; CHECK-NEXT: fmla v7.2s, v1.2s, v4.2s -; CHECK-NEXT: fmla v5.2s, v1.2s, v3.2s -; CHECK-NEXT: str d5, [x4] -; CHECK-NEXT: ldr q6, [x2] -; CHECK-NEXT: ext v16.16b, v6.16b, v6.16b, #8 -; CHECK-NEXT: zip1 v1.2s, v6.2s, v16.2s -; CHECK-NEXT: zip2 v2.2s, v6.2s, v16.2s -; CHECK-NEXT: fmul v4.2s, v1.2s, v7.2s -; CHECK-NEXT: fmul v7.2s, v2.2s, v7.2s -; CHECK-NEXT: fmla v4.2s, v5.2s, v2.2s -; CHECK-NEXT: fneg v3.2s, v7.2s -; CHECK-NEXT: fmla v3.2s, v5.2s, v1.2s -; CHECK-NEXT: st2 { v3.2s, v4.2s }, [x5] +; CHECK-NEXT: st2 { v2.2s, v3.2s }, [x5] ; CHECK-NEXT: ldr q1, [x3] -; CHECK-NEXT: fcmla v0.4s, v1.4s, v6.4s, #0 -; CHECK-NEXT: fcmla v0.4s, v1.4s, v6.4s, #90 +; CHECK-NEXT: fcmla v0.4s, v1.4s, v5.4s, #0 +; CHECK-NEXT: fcmla v0.4s, v1.4s, v5.4s, #90 ; CHECK-NEXT: ret entry: %a = load <4 x float>, ptr %ptr_a @@ -228,30 +227,30 @@ define <4 x float> @multiple_muls_mul_external(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, ptr %p1, ptr %p2) { ; CHECK-LABEL: multiple_muls_mul_external: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ext v5.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ext v6.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: ext v4.16b, v3.16b, v3.16b, #8 -; CHECK-NEXT: ext v7.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: zip2 v16.2s, v0.2s, v5.2s -; CHECK-NEXT: zip2 v17.2s, v1.2s, v6.2s -; CHECK-NEXT: zip1 v0.2s, v0.2s, v5.2s -; CHECK-NEXT: zip1 v1.2s, v1.2s, v6.2s -; CHECK-NEXT: zip1 v18.2s, v2.2s, v7.2s -; CHECK-NEXT: zip2 v2.2s, v2.2s, v7.2s -; CHECK-NEXT: zip2 v7.2s, v3.2s, v4.2s -; CHECK-NEXT: zip1 v3.2s, v3.2s, v4.2s -; CHECK-NEXT: fmul v19.2s, v16.2s, v17.2s -; CHECK-NEXT: fmul v5.2s, v18.2s, v7.2s -; CHECK-NEXT: fmul v6.2s, v2.2s, v7.2s -; CHECK-NEXT: fneg v4.2s, v19.2s -; CHECK-NEXT: fmul v7.2s, v0.2s, v17.2s +; CHECK-NEXT: ext v4.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #8 +; CHECK-NEXT: ext v16.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #8 +; CHECK-NEXT: zip2 v6.2s, v0.2s, v4.2s +; CHECK-NEXT: zip2 v7.2s, v1.2s, v5.2s +; CHECK-NEXT: zip1 v19.2s, v2.2s, v16.2s +; CHECK-NEXT: zip2 v2.2s, v2.2s, v16.2s +; CHECK-NEXT: zip2 v16.2s, v3.2s, v17.2s +; CHECK-NEXT: zip1 v0.2s, v0.2s, v4.2s +; CHECK-NEXT: zip1 v1.2s, v1.2s, v5.2s +; CHECK-NEXT: zip1 v3.2s, v3.2s, v17.2s +; CHECK-NEXT: fmul v18.2s, v6.2s, v7.2s +; CHECK-NEXT: fmul v5.2s, v19.2s, v16.2s +; CHECK-NEXT: fmul v16.2s, v2.2s, v16.2s +; CHECK-NEXT: fmul v7.2s, v0.2s, v7.2s +; CHECK-NEXT: fneg v4.2s, v18.2s ; CHECK-NEXT: fmla v5.2s, v3.2s, v2.2s -; CHECK-NEXT: fneg v2.2s, v6.2s +; CHECK-NEXT: fneg v2.2s, v16.2s +; CHECK-NEXT: fmla v7.2s, v1.2s, v6.2s ; CHECK-NEXT: fmla v4.2s, v1.2s, v0.2s -; CHECK-NEXT: fmla v7.2s, v1.2s, v16.2s -; CHECK-NEXT: fmla v2.2s, v3.2s, v18.2s -; CHECK-NEXT: fmul v17.2s, v4.2s, v5.2s +; CHECK-NEXT: fmla v2.2s, v3.2s, v19.2s ; CHECK-NEXT: fmul v0.2s, v7.2s, v5.2s +; CHECK-NEXT: fmul v17.2s, v4.2s, v5.2s ; CHECK-NEXT: str d4, [x0] ; CHECK-NEXT: fmla v17.2s, v2.2s, v7.2s ; CHECK-NEXT: fneg v16.2s, v0.2s @@ -299,34 +298,35 @@ define void @mul_add_common_mul_add_mul(<4 x double> %a, <4 x double> %b, <4 x double> %c, <4 x double> %d, <4 x double> %e, <4 x double> %f, <4 x double> %g, <4 x double> %h, ptr %p1, ptr %p2) { ; CHECK-LABEL: mul_add_common_mul_add_mul: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q17, q16, [sp, #64] -; CHECK-NEXT: movi v20.2d, #0000000000000000 -; CHECK-NEXT: movi v21.2d, #0000000000000000 -; CHECK-NEXT: movi v24.2d, #0000000000000000 -; CHECK-NEXT: movi v25.2d, #0000000000000000 -; CHECK-NEXT: ldp q19, q18, [sp, #96] -; CHECK-NEXT: fcmla v24.2d, v2.2d, v0.2d, #0 -; CHECK-NEXT: fcmla v25.2d, v3.2d, v1.2d, #0 -; CHECK-NEXT: fcmla v20.2d, v19.2d, v17.2d, #0 -; CHECK-NEXT: fcmla v24.2d, v2.2d, v0.2d, #90 -; CHECK-NEXT: fcmla v21.2d, v18.2d, v16.2d, #0 -; CHECK-NEXT: ldp q23, q22, [sp, #32] -; CHECK-NEXT: fcmla v20.2d, v19.2d, v17.2d, #90 -; CHECK-NEXT: fcmla v25.2d, v3.2d, v1.2d, #90 -; CHECK-NEXT: fcmla v21.2d, v18.2d, v16.2d, #90 -; CHECK-NEXT: fcmla v20.2d, v6.2d, v4.2d, #0 -; CHECK-NEXT: ldp q1, q0, [sp] -; CHECK-NEXT: fcmla v21.2d, v7.2d, v5.2d, #0 -; CHECK-NEXT: fcmla v20.2d, v6.2d, v4.2d, #90 -; CHECK-NEXT: fcmla v21.2d, v7.2d, v5.2d, #90 -; CHECK-NEXT: fsub v2.2d, v24.2d, v20.2d -; CHECK-NEXT: fcmla v20.2d, v1.2d, v23.2d, #0 -; CHECK-NEXT: fsub v3.2d, v25.2d, v21.2d -; CHECK-NEXT: fcmla v21.2d, v0.2d, v22.2d, #0 -; CHECK-NEXT: fcmla v20.2d, v1.2d, v23.2d, #90 -; CHECK-NEXT: stp q2, q3, [x0] -; CHECK-NEXT: fcmla v21.2d, v0.2d, v22.2d, #90 -; CHECK-NEXT: stp q20, q21, [x1] +; CHECK-NEXT: movi v16.2d, #0000000000000000 +; CHECK-NEXT: movi v17.2d, #0000000000000000 +; CHECK-NEXT: movi v22.2d, #0000000000000000 +; CHECK-NEXT: ldp q21, q18, [sp, #96] +; CHECK-NEXT: ldp q20, q19, [sp, #64] +; CHECK-NEXT: fcmla v22.2d, v3.2d, v1.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v18.2d, v19.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v21.2d, v20.2d, #0 +; CHECK-NEXT: fcmla v22.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: ldr q1, [sp, #48] +; CHECK-NEXT: ldr q3, [sp] +; CHECK-NEXT: fcmla v16.2d, v18.2d, v19.2d, #90 +; CHECK-NEXT: movi v18.2d, #0000000000000000 +; CHECK-NEXT: fcmla v17.2d, v21.2d, v20.2d, #90 +; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #0 +; CHECK-NEXT: fcmla v18.2d, v2.2d, v0.2d, #0 +; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v7.2d, v5.2d, #90 +; CHECK-NEXT: fcmla v18.2d, v2.2d, v0.2d, #90 +; CHECK-NEXT: fcmla v17.2d, v6.2d, v4.2d, #90 +; CHECK-NEXT: ldp q0, q2, [sp, #16] +; CHECK-NEXT: fsub v4.2d, v22.2d, v16.2d +; CHECK-NEXT: fcmla v16.2d, v0.2d, v1.2d, #0 +; CHECK-NEXT: fsub v5.2d, v18.2d, v17.2d +; CHECK-NEXT: fcmla v17.2d, v3.2d, v2.2d, #0 +; CHECK-NEXT: fcmla v16.2d, v0.2d, v1.2d, #90 +; CHECK-NEXT: stp q5, q4, [x0] +; CHECK-NEXT: fcmla v17.2d, v3.2d, v2.2d, #90 +; CHECK-NEXT: stp q17, q16, [x1] ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x double> %a, <4 x double> poison, <2 x i32> diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-predicated-scalable.ll @@ -15,42 +15,42 @@ ; CHECK-LABEL: complex_mul_v2f64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w9, #100 // =0x64 -; CHECK-NEXT: cntd x10 -; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: mov x11, x10 ; CHECK-NEXT: mov z1.d, #0 // =0x0 -; CHECK-NEXT: rdvl x12, #2 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: whilelo p1.d, xzr, x9 +; CHECK-NEXT: cntd x10 +; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: rdvl x11, #2 +; CHECK-NEXT: mov x12, x10 ; CHECK-NEXT: zip2 z0.d, z1.d, z1.d ; CHECK-NEXT: zip1 z1.d, z1.d, z1.d -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: .LBB0_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: zip2 p2.d, p1.d, p1.d ; CHECK-NEXT: add x13, x0, x8 ; CHECK-NEXT: add x14, x1, x8 -; CHECK-NEXT: zip1 p2.d, p1.d, p1.d -; CHECK-NEXT: zip2 p3.d, p1.d, p1.d +; CHECK-NEXT: zip1 p3.d, p1.d, p1.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: mov z7.d, z0.d -; CHECK-NEXT: ld1d { z2.d }, p3/z, [x13, #1, mul vl] -; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13] -; CHECK-NEXT: ld1d { z4.d }, p3/z, [x14, #1, mul vl] -; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14] -; CHECK-NEXT: whilelo p1.d, x11, x9 -; CHECK-NEXT: add x8, x8, x12 -; CHECK-NEXT: add x11, x11, x10 +; CHECK-NEXT: whilelo p1.d, x12, x9 +; CHECK-NEXT: add x8, x8, x11 +; CHECK-NEXT: add x12, x12, x10 +; CHECK-NEXT: ld1d { z2.d }, p2/z, [x13, #1, mul vl] +; CHECK-NEXT: ld1d { z3.d }, p3/z, [x13] +; CHECK-NEXT: ld1d { z4.d }, p2/z, [x14, #1, mul vl] +; CHECK-NEXT: ld1d { z5.d }, p3/z, [x14] ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0 ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90 -; CHECK-NEXT: mov z0.d, p3/m, z7.d -; CHECK-NEXT: mov z1.d, p2/m, z6.d +; CHECK-NEXT: mov z0.d, p2/m, z7.d +; CHECK-NEXT: mov z1.d, p3/m, z6.d ; CHECK-NEXT: b.mi .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit.block -; CHECK-NEXT: uzp2 z2.d, z1.d, z0.d -; CHECK-NEXT: uzp1 z0.d, z1.d, z0.d -; CHECK-NEXT: faddv d0, p0, z0.d -; CHECK-NEXT: faddv d1, p0, z2.d +; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d +; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d +; CHECK-NEXT: faddv d0, p0, z2.d +; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1 ; CHECK-NEXT: ret @@ -113,17 +113,17 @@ define %"class.std::complex" @complex_mul_predicated_v2f64(ptr %a, ptr %b, ptr %cond) { ; CHECK-LABEL: complex_mul_predicated_v2f64: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cntd x10 -; CHECK-NEXT: mov w12, #100 // =0x64 ; CHECK-NEXT: neg x11, x10 +; CHECK-NEXT: mov w12, #100 // =0x64 ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: and x11, x11, x12 -; CHECK-NEXT: mov z1.d, #0 // =0x0 ; CHECK-NEXT: rdvl x12, #2 ; CHECK-NEXT: zip2 z0.d, z1.d, z1.d ; CHECK-NEXT: zip1 z1.d, z1.d, z1.d -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: .LBB1_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld1w { z2.d }, p0/z, [x2, x9, lsl #2] @@ -133,14 +133,14 @@ ; CHECK-NEXT: mov z7.d, z0.d ; CHECK-NEXT: add x9, x9, x10 ; CHECK-NEXT: add x8, x8, x12 -; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, #0 -; CHECK-NEXT: zip1 p1.d, p2.d, p2.d -; CHECK-NEXT: zip2 p2.d, p2.d, p2.d +; CHECK-NEXT: cmpne p1.d, p0/z, z2.d, #0 +; CHECK-NEXT: cmp x11, x9 +; CHECK-NEXT: zip2 p2.d, p1.d, p1.d +; CHECK-NEXT: zip1 p1.d, p1.d, p1.d ; CHECK-NEXT: ld1d { z2.d }, p2/z, [x13, #1, mul vl] ; CHECK-NEXT: ld1d { z3.d }, p1/z, [x13] ; CHECK-NEXT: ld1d { z4.d }, p2/z, [x14, #1, mul vl] ; CHECK-NEXT: ld1d { z5.d }, p1/z, [x14] -; CHECK-NEXT: cmp x11, x9 ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0 ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90 @@ -149,10 +149,10 @@ ; CHECK-NEXT: mov z1.d, p1/m, z6.d ; CHECK-NEXT: b.ne .LBB1_1 ; CHECK-NEXT: // %bb.2: // %exit.block -; CHECK-NEXT: uzp2 z2.d, z1.d, z0.d -; CHECK-NEXT: uzp1 z0.d, z1.d, z0.d -; CHECK-NEXT: faddv d0, p0, z0.d -; CHECK-NEXT: faddv d1, p0, z2.d +; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d +; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d +; CHECK-NEXT: faddv d0, p0, z2.d +; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1 ; CHECK-NEXT: ret @@ -218,15 +218,15 @@ ; CHECK-LABEL: complex_mul_predicated_x2_v2f64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w10, #100 // =0x64 +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: whilelo p1.d, xzr, x10 ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: mov x9, xzr -; CHECK-NEXT: mov z1.d, #0 // =0x0 ; CHECK-NEXT: cntd x11 ; CHECK-NEXT: rdvl x12, #2 -; CHECK-NEXT: whilelo p1.d, xzr, x10 ; CHECK-NEXT: zip2 z0.d, z1.d, z1.d ; CHECK-NEXT: zip1 z1.d, z1.d, z1.d -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: .LBB2_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld1w { z2.d }, p1/z, [x2, x9, lsl #2] @@ -237,25 +237,25 @@ ; CHECK-NEXT: add x9, x9, x11 ; CHECK-NEXT: add x8, x8, x12 ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 -; CHECK-NEXT: zip1 p2.d, p1.d, p1.d -; CHECK-NEXT: zip2 p3.d, p1.d, p1.d -; CHECK-NEXT: ld1d { z2.d }, p3/z, [x13, #1, mul vl] -; CHECK-NEXT: ld1d { z3.d }, p2/z, [x13] -; CHECK-NEXT: ld1d { z4.d }, p3/z, [x14, #1, mul vl] -; CHECK-NEXT: ld1d { z5.d }, p2/z, [x14] +; CHECK-NEXT: zip2 p2.d, p1.d, p1.d +; CHECK-NEXT: zip1 p3.d, p1.d, p1.d ; CHECK-NEXT: whilelo p1.d, x9, x10 +; CHECK-NEXT: ld1d { z2.d }, p2/z, [x13, #1, mul vl] +; CHECK-NEXT: ld1d { z3.d }, p3/z, [x13] +; CHECK-NEXT: ld1d { z4.d }, p2/z, [x14, #1, mul vl] +; CHECK-NEXT: ld1d { z5.d }, p3/z, [x14] ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #0 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #0 ; CHECK-NEXT: fcmla z6.d, p0/m, z5.d, z3.d, #90 ; CHECK-NEXT: fcmla z7.d, p0/m, z4.d, z2.d, #90 -; CHECK-NEXT: mov z0.d, p3/m, z7.d -; CHECK-NEXT: mov z1.d, p2/m, z6.d +; CHECK-NEXT: mov z0.d, p2/m, z7.d +; CHECK-NEXT: mov z1.d, p3/m, z6.d ; CHECK-NEXT: b.mi .LBB2_1 ; CHECK-NEXT: // %bb.2: // %exit.block -; CHECK-NEXT: uzp2 z2.d, z1.d, z0.d -; CHECK-NEXT: uzp1 z0.d, z1.d, z0.d -; CHECK-NEXT: faddv d0, p0, z0.d -; CHECK-NEXT: faddv d1, p0, z2.d +; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d +; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d +; CHECK-NEXT: faddv d0, p0, z2.d +; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions-scalable.ll @@ -14,26 +14,26 @@ define %"class.std::complex" @complex_mul_v2f64(ptr %a, ptr %b) { ; CHECK-LABEL: complex_mul_v2f64: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cntd x9 -; CHECK-NEXT: mov w11, #100 // =0x64 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: neg x10, x9 +; CHECK-NEXT: mov w11, #100 // =0x64 ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: and x10, x10, x11 -; CHECK-NEXT: mov z1.d, #0 // =0x0 ; CHECK-NEXT: rdvl x11, #2 ; CHECK-NEXT: zip2 z0.d, z1.d, z1.d ; CHECK-NEXT: zip1 z1.d, z1.d, z1.d -; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: .LBB0_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x12, x0, x8 ; CHECK-NEXT: add x13, x1, x8 ; CHECK-NEXT: ld1b { z2.b }, p1/z, [x0, x8] -; CHECK-NEXT: subs x10, x10, x9 ; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl] ; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8] ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl] +; CHECK-NEXT: subs x10, x10, x9 ; CHECK-NEXT: add x8, x8, x11 ; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0 ; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0 @@ -41,10 +41,10 @@ ; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #90 ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit.block -; CHECK-NEXT: uzp2 z2.d, z1.d, z0.d -; CHECK-NEXT: uzp1 z0.d, z1.d, z0.d -; CHECK-NEXT: faddv d0, p0, z0.d -; CHECK-NEXT: faddv d1, p0, z2.d +; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d +; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d +; CHECK-NEXT: faddv d0, p0, z2.d +; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1 ; CHECK-NEXT: ret @@ -101,31 +101,31 @@ define %"class.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) { ; CHECK-LABEL: complex_mul_nonzero_init_v2f64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: cntd x9 +; CHECK-NEXT: ptrue p0.d, vl1 ; CHECK-NEXT: fmov d0, #1.00000000 -; CHECK-NEXT: fmov d1, #2.00000000 -; CHECK-NEXT: neg x10, x9 +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: fmov d2, #2.00000000 +; CHECK-NEXT: cntd x9 ; CHECK-NEXT: mov w11, #100 // =0x64 +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: neg x10, x9 ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: and x10, x10, x11 -; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: ptrue p0.d, vl1 ; CHECK-NEXT: rdvl x11, #2 -; CHECK-NEXT: sel z3.d, p0, z0.d, z2.d -; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d +; CHECK-NEXT: sel z3.d, p0, z0.d, z1.d +; CHECK-NEXT: mov z1.d, p0/m, z2.d +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: zip2 z0.d, z1.d, z3.d ; CHECK-NEXT: zip1 z1.d, z1.d, z3.d -; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: .LBB1_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x12, x0, x8 ; CHECK-NEXT: add x13, x1, x8 ; CHECK-NEXT: ld1b { z2.b }, p1/z, [x0, x8] -; CHECK-NEXT: subs x10, x10, x9 ; CHECK-NEXT: ld1d { z3.d }, p0/z, [x12, #1, mul vl] ; CHECK-NEXT: ld1b { z4.b }, p1/z, [x1, x8] ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x13, #1, mul vl] +; CHECK-NEXT: subs x10, x10, x9 ; CHECK-NEXT: add x8, x8, x11 ; CHECK-NEXT: fcmla z1.d, p0/m, z4.d, z2.d, #0 ; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #0 @@ -133,10 +133,10 @@ ; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z3.d, #90 ; CHECK-NEXT: b.ne .LBB1_1 ; CHECK-NEXT: // %bb.2: // %exit.block -; CHECK-NEXT: uzp2 z2.d, z1.d, z0.d -; CHECK-NEXT: uzp1 z0.d, z1.d, z0.d -; CHECK-NEXT: faddv d0, p0, z0.d -; CHECK-NEXT: faddv d1, p0, z2.d +; CHECK-NEXT: uzp1 z2.d, z1.d, z0.d +; CHECK-NEXT: uzp2 z1.d, z1.d, z0.d +; CHECK-NEXT: faddv d0, p0, z2.d +; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1 ; CHECK-NEXT: ret @@ -189,21 +189,21 @@ define %"class.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) { ; CHECK-LABEL: complex_mul_v2f64_unrolled: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: cntw x9 -; CHECK-NEXT: mov w11, #1000 // =0x3e8 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: neg x10, x9 +; CHECK-NEXT: mov w11, #1000 // =0x3e8 ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: and x10, x10, x11 -; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: rdvl x11, #4 ; CHECK-NEXT: zip2 z0.d, z1.d, z1.d ; CHECK-NEXT: zip1 z1.d, z1.d, z1.d -; CHECK-NEXT: rdvl x11, #4 -; CHECK-NEXT: mov z2.d, z1.d -; CHECK-NEXT: mov z3.d, z0.d ; CHECK-NEXT: addvl x12, x1, #2 ; CHECK-NEXT: addvl x13, x0, #2 -; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov z3.d, z0.d ; CHECK-NEXT: .LBB2_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x14, x0, x8 @@ -211,7 +211,6 @@ ; CHECK-NEXT: add x16, x1, x8 ; CHECK-NEXT: add x17, x12, x8 ; CHECK-NEXT: ld1b { z4.b }, p1/z, [x0, x8] -; CHECK-NEXT: subs x10, x10, x9 ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x14, #1, mul vl] ; CHECK-NEXT: ld1b { z6.b }, p1/z, [x13, x8] ; CHECK-NEXT: ld1d { z7.d }, p0/z, [x15, #1, mul vl] @@ -219,6 +218,7 @@ ; CHECK-NEXT: ld1d { z17.d }, p0/z, [x16, #1, mul vl] ; CHECK-NEXT: ld1b { z18.b }, p1/z, [x12, x8] ; CHECK-NEXT: ld1d { z19.d }, p0/z, [x17, #1, mul vl] +; CHECK-NEXT: subs x10, x10, x9 ; CHECK-NEXT: add x8, x8, x11 ; CHECK-NEXT: fcmla z1.d, p0/m, z16.d, z4.d, #0 ; CHECK-NEXT: fcmla z0.d, p0/m, z17.d, z5.d, #0 @@ -230,14 +230,14 @@ ; CHECK-NEXT: fcmla z3.d, p0/m, z19.d, z7.d, #90 ; CHECK-NEXT: b.ne .LBB2_1 ; CHECK-NEXT: // %bb.2: // %exit.block -; CHECK-NEXT: uzp2 z4.d, z2.d, z3.d -; CHECK-NEXT: uzp1 z2.d, z2.d, z3.d -; CHECK-NEXT: uzp2 z3.d, z1.d, z0.d -; CHECK-NEXT: uzp1 z0.d, z1.d, z0.d -; CHECK-NEXT: fadd z0.d, z2.d, z0.d -; CHECK-NEXT: fadd z1.d, z4.d, z3.d -; CHECK-NEXT: faddv d0, p0, z0.d -; CHECK-NEXT: faddv d1, p0, z1.d +; CHECK-NEXT: uzp1 z4.d, z2.d, z3.d +; CHECK-NEXT: uzp1 z5.d, z1.d, z0.d +; CHECK-NEXT: uzp2 z2.d, z2.d, z3.d +; CHECK-NEXT: uzp2 z0.d, z1.d, z0.d +; CHECK-NEXT: fadd z1.d, z4.d, z5.d +; CHECK-NEXT: fadd z2.d, z2.d, z0.d +; CHECK-NEXT: faddv d0, p0, z1.d +; CHECK-NEXT: faddv d1, p0, z2.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1 ; CHECK-NEXT: ret @@ -322,16 +322,16 @@ define dso_local %"class.std::complex" @reduction_mix(ptr %a, ptr %b, ptr noalias nocapture noundef readnone %c, [2 x double] %d.coerce, ptr nocapture noundef readonly %s, ptr nocapture noundef writeonly %outs) local_unnamed_addr #0 { ; CHECK-LABEL: reduction_mix: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: mov z2.d, #0 // =0x0 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cntd x9 -; CHECK-NEXT: mov w11, #100 // =0x64 ; CHECK-NEXT: neg x10, x9 +; CHECK-NEXT: mov w11, #100 // =0x64 ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: and x10, x10, x11 -; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: rdvl x11, #2 -; CHECK-NEXT: zip2 z1.d, z0.d, z0.d -; CHECK-NEXT: zip1 z2.d, z0.d, z0.d -; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: zip2 z0.d, z2.d, z2.d +; CHECK-NEXT: zip1 z1.d, z2.d, z2.d ; CHECK-NEXT: .LBB3_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld1w { z3.d }, p0/z, [x3, x8, lsl #2] @@ -340,17 +340,17 @@ ; CHECK-NEXT: add x8, x8, x9 ; CHECK-NEXT: add x0, x0, x11 ; CHECK-NEXT: cmp x10, x8 -; CHECK-NEXT: add z0.d, z3.d, z0.d -; CHECK-NEXT: fadd z2.d, z4.d, z2.d -; CHECK-NEXT: fadd z1.d, z5.d, z1.d +; CHECK-NEXT: fadd z0.d, z5.d, z0.d +; CHECK-NEXT: fadd z1.d, z4.d, z1.d +; CHECK-NEXT: add z2.d, z3.d, z2.d ; CHECK-NEXT: b.ne .LBB3_1 ; CHECK-NEXT: // %bb.2: // %middle.block -; CHECK-NEXT: uzp1 z3.d, z2.d, z1.d -; CHECK-NEXT: uzp2 z1.d, z2.d, z1.d -; CHECK-NEXT: uaddv d2, p0, z0.d -; CHECK-NEXT: faddv d0, p0, z1.d +; CHECK-NEXT: uaddv d2, p0, z2.d +; CHECK-NEXT: uzp2 z3.d, z1.d, z0.d +; CHECK-NEXT: uzp1 z1.d, z1.d, z0.d ; CHECK-NEXT: fmov x8, d2 -; CHECK-NEXT: faddv d1, p0, z3.d +; CHECK-NEXT: faddv d0, p0, z3.d +; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $z1 ; CHECK-NEXT: str w8, [x4] diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-reductions.ll @@ -15,26 +15,26 @@ ; CHECK-LABEL: complex_mul_v2f64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: .LBB0_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8 ; CHECK-NEXT: add x10, x1, x8 ; CHECK-NEXT: add x8, x8, #32 -; CHECK-NEXT: cmp x8, #1600 ; CHECK-NEXT: ldp q3, q2, [x9] -; CHECK-NEXT: ldp q4, q5, [x10] -; CHECK-NEXT: fcmla v0.2d, v3.2d, v4.2d, #0 -; CHECK-NEXT: fcmla v1.2d, v2.2d, v5.2d, #0 -; CHECK-NEXT: fcmla v0.2d, v3.2d, v4.2d, #90 -; CHECK-NEXT: fcmla v1.2d, v2.2d, v5.2d, #90 +; CHECK-NEXT: cmp x8, #1600 +; CHECK-NEXT: ldp q5, q4, [x10] +; CHECK-NEXT: fcmla v0.2d, v3.2d, v5.2d, #0 +; CHECK-NEXT: fcmla v1.2d, v2.2d, v4.2d, #0 +; CHECK-NEXT: fcmla v0.2d, v3.2d, v5.2d, #90 +; CHECK-NEXT: fcmla v1.2d, v2.2d, v4.2d, #90 ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %middle.block ; CHECK-NEXT: zip2 v2.2d, v0.2d, v1.2d ; CHECK-NEXT: zip1 v0.2d, v0.2d, v1.2d -; CHECK-NEXT: faddp d1, v2.2d ; CHECK-NEXT: faddp d0, v0.2d +; CHECK-NEXT: faddp d1, v2.2d ; CHECK-NEXT: ret entry: br label %vector.body @@ -80,28 +80,28 @@ define %"struct.std::complex" @complex_mul_nonzero_init_v2f64(ptr %a, ptr %b) { ; CHECK-LABEL: complex_mul_nonzero_init_v2f64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, .LCPI1_0 -; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI1_0] +; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: .LBB1_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8 ; CHECK-NEXT: add x10, x1, x8 ; CHECK-NEXT: add x8, x8, #32 -; CHECK-NEXT: cmp x8, #1600 ; CHECK-NEXT: ldp q3, q2, [x9] -; CHECK-NEXT: ldp q4, q5, [x10] -; CHECK-NEXT: fcmla v1.2d, v3.2d, v4.2d, #0 -; CHECK-NEXT: fcmla v0.2d, v2.2d, v5.2d, #0 -; CHECK-NEXT: fcmla v1.2d, v3.2d, v4.2d, #90 -; CHECK-NEXT: fcmla v0.2d, v2.2d, v5.2d, #90 +; CHECK-NEXT: cmp x8, #1600 +; CHECK-NEXT: ldp q5, q4, [x10] +; CHECK-NEXT: fcmla v1.2d, v3.2d, v5.2d, #0 +; CHECK-NEXT: fcmla v0.2d, v2.2d, v4.2d, #0 +; CHECK-NEXT: fcmla v1.2d, v3.2d, v5.2d, #90 +; CHECK-NEXT: fcmla v0.2d, v2.2d, v4.2d, #90 ; CHECK-NEXT: b.ne .LBB1_1 ; CHECK-NEXT: // %bb.2: // %middle.block ; CHECK-NEXT: zip2 v2.2d, v1.2d, v0.2d ; CHECK-NEXT: zip1 v0.2d, v1.2d, v0.2d -; CHECK-NEXT: faddp d1, v2.2d ; CHECK-NEXT: faddp d0, v0.2d +; CHECK-NEXT: faddp d1, v2.2d ; CHECK-NEXT: ret entry: br label %vector.body @@ -143,40 +143,40 @@ define %"struct.std::complex" @complex_mul_v2f64_unrolled(ptr %a, ptr %b) { ; CHECK-LABEL: complex_mul_v2f64_unrolled: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, .LCPI2_0 -; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI2_0] +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: .LBB2_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8 ; CHECK-NEXT: add x10, x1, x8 ; CHECK-NEXT: add x8, x8, #64 -; CHECK-NEXT: cmp x8, #1600 ; CHECK-NEXT: ldp q5, q4, [x9] -; CHECK-NEXT: ldp q7, q6, [x9, #32] -; CHECK-NEXT: ldp q17, q16, [x10] -; CHECK-NEXT: fcmla v1.2d, v5.2d, v17.2d, #0 +; CHECK-NEXT: cmp x8, #1600 +; CHECK-NEXT: ldp q7, q6, [x10] +; CHECK-NEXT: ldp q17, q16, [x9, #32] ; CHECK-NEXT: ldp q19, q18, [x10, #32] -; CHECK-NEXT: fcmla v0.2d, v4.2d, v16.2d, #0 -; CHECK-NEXT: fcmla v1.2d, v5.2d, v17.2d, #90 -; CHECK-NEXT: fcmla v2.2d, v7.2d, v19.2d, #0 -; CHECK-NEXT: fcmla v0.2d, v4.2d, v16.2d, #90 -; CHECK-NEXT: fcmla v3.2d, v6.2d, v18.2d, #0 -; CHECK-NEXT: fcmla v2.2d, v7.2d, v19.2d, #90 -; CHECK-NEXT: fcmla v3.2d, v6.2d, v18.2d, #90 +; CHECK-NEXT: fcmla v1.2d, v5.2d, v7.2d, #0 +; CHECK-NEXT: fcmla v0.2d, v4.2d, v6.2d, #0 +; CHECK-NEXT: fcmla v2.2d, v17.2d, v19.2d, #0 +; CHECK-NEXT: fcmla v3.2d, v16.2d, v18.2d, #0 +; CHECK-NEXT: fcmla v1.2d, v5.2d, v7.2d, #90 +; CHECK-NEXT: fcmla v0.2d, v4.2d, v6.2d, #90 +; CHECK-NEXT: fcmla v2.2d, v17.2d, v19.2d, #90 +; CHECK-NEXT: fcmla v3.2d, v16.2d, v18.2d, #90 ; CHECK-NEXT: b.ne .LBB2_1 ; CHECK-NEXT: // %bb.2: // %middle.block ; CHECK-NEXT: zip2 v4.2d, v2.2d, v3.2d ; CHECK-NEXT: zip1 v2.2d, v2.2d, v3.2d -; CHECK-NEXT: zip1 v3.2d, v1.2d, v0.2d -; CHECK-NEXT: zip2 v0.2d, v1.2d, v0.2d -; CHECK-NEXT: fadd v1.2d, v2.2d, v3.2d -; CHECK-NEXT: fadd v2.2d, v4.2d, v0.2d -; CHECK-NEXT: faddp d0, v1.2d -; CHECK-NEXT: faddp d1, v2.2d +; CHECK-NEXT: zip2 v3.2d, v1.2d, v0.2d +; CHECK-NEXT: zip1 v0.2d, v1.2d, v0.2d +; CHECK-NEXT: fadd v1.2d, v4.2d, v3.2d +; CHECK-NEXT: fadd v0.2d, v2.2d, v0.2d +; CHECK-NEXT: faddp d1, v1.2d +; CHECK-NEXT: faddp d0, v0.2d ; CHECK-NEXT: ret entry: %scevgep = getelementptr i8, ptr %a, i64 32 diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat-scalable.ll @@ -8,23 +8,23 @@ define @complex_mul_const( %a, %b) { ; CHECK-LABEL: complex_mul_const: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z4.d, #0 // =0x0 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z4.d, #0 // =0x0 +; CHECK-NEXT: fmov z7.d, #3.00000000 +; CHECK-NEXT: fmov z24.d, #11.00000000 ; CHECK-NEXT: mov z6.d, z4.d -; CHECK-NEXT: fcmla z5.d, p0/m, z0.d, z2.d, #0 +; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: fcmla z6.d, p0/m, z1.d, z3.d, #0 -; CHECK-NEXT: fcmla z5.d, p0/m, z0.d, z2.d, #90 +; CHECK-NEXT: fcmla z5.d, p0/m, z0.d, z2.d, #0 ; CHECK-NEXT: fcmla z6.d, p0/m, z1.d, z3.d, #90 -; CHECK-NEXT: fmov z1.d, #3.00000000 -; CHECK-NEXT: fmov z2.d, #11.00000000 -; CHECK-NEXT: zip2 z3.d, z2.d, z1.d +; CHECK-NEXT: zip2 z1.d, z24.d, z7.d +; CHECK-NEXT: fcmla z5.d, p0/m, z0.d, z2.d, #90 +; CHECK-NEXT: zip1 z2.d, z24.d, z7.d ; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: zip1 z1.d, z2.d, z1.d -; CHECK-NEXT: fcmla z4.d, p0/m, z6.d, z3.d, #0 -; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z1.d, #0 -; CHECK-NEXT: fcmla z4.d, p0/m, z6.d, z3.d, #90 -; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z1.d, #90 +; CHECK-NEXT: fcmla z4.d, p0/m, z6.d, z1.d, #0 +; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z2.d, #0 +; CHECK-NEXT: fcmla z4.d, p0/m, z6.d, z1.d, #90 +; CHECK-NEXT: fcmla z0.d, p0/m, z5.d, z2.d, #90 ; CHECK-NEXT: mov z1.d, z4.d ; CHECK-NEXT: ret entry: @@ -55,24 +55,24 @@ define @complex_mul_non_const( %a, %b, [2 x double] %c) { ; CHECK-LABEL: complex_mul_non_const: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z6.d, #0 // =0x0 ; CHECK-NEXT: // kill: def $d5 killed $d5 def $z5 ; CHECK-NEXT: // kill: def $d4 killed $d4 def $z4 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z7.d, z6.d -; CHECK-NEXT: mov z24.d, z6.d ; CHECK-NEXT: mov z5.d, d5 ; CHECK-NEXT: mov z4.d, d4 -; CHECK-NEXT: fcmla z7.d, p0/m, z0.d, z2.d, #0 +; CHECK-NEXT: mov z24.d, z6.d +; CHECK-NEXT: mov z7.d, z6.d +; CHECK-NEXT: zip2 z25.d, z4.d, z5.d +; CHECK-NEXT: zip1 z4.d, z4.d, z5.d ; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #0 -; CHECK-NEXT: fcmla z7.d, p0/m, z0.d, z2.d, #90 -; CHECK-NEXT: zip2 z2.d, z4.d, z5.d +; CHECK-NEXT: fcmla z7.d, p0/m, z0.d, z2.d, #0 ; CHECK-NEXT: fcmla z24.d, p0/m, z1.d, z3.d, #90 +; CHECK-NEXT: fcmla z7.d, p0/m, z0.d, z2.d, #90 ; CHECK-NEXT: mov z0.d, z6.d -; CHECK-NEXT: zip1 z4.d, z4.d, z5.d -; CHECK-NEXT: fcmla z6.d, p0/m, z24.d, z2.d, #0 +; CHECK-NEXT: fcmla z6.d, p0/m, z24.d, z25.d, #0 ; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #0 -; CHECK-NEXT: fcmla z6.d, p0/m, z24.d, z2.d, #90 +; CHECK-NEXT: fcmla z6.d, p0/m, z24.d, z25.d, #90 ; CHECK-NEXT: fcmla z0.d, p0/m, z7.d, z4.d, #90 ; CHECK-NEXT: mov z1.d, z6.d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-splat.ll @@ -10,19 +10,19 @@ ; CHECK-LABEL: complex_mul_const: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v6.2d, #0000000000000000 -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: movi v5.2d, #0000000000000000 +; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: movi v4.2d, #0000000000000000 ; CHECK-NEXT: fcmla v6.2d, v3.2d, v1.2d, #0 ; CHECK-NEXT: fcmla v5.2d, v2.2d, v0.2d, #0 ; CHECK-NEXT: fcmla v6.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: fcmla v5.2d, v2.2d, v0.2d, #90 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: fcmla v4.2d, v2.2d, v6.2d, #0 -; CHECK-NEXT: fcmla v0.2d, v2.2d, v5.2d, #0 -; CHECK-NEXT: fcmla v4.2d, v2.2d, v6.2d, #90 -; CHECK-NEXT: fcmla v0.2d, v2.2d, v5.2d, #90 +; CHECK-NEXT: fcmla v4.2d, v1.2d, v6.2d, #0 +; CHECK-NEXT: fcmla v0.2d, v1.2d, v5.2d, #0 +; CHECK-NEXT: fcmla v4.2d, v1.2d, v6.2d, #90 +; CHECK-NEXT: fcmla v0.2d, v1.2d, v5.2d, #90 ; CHECK-NEXT: mov v1.16b, v4.16b ; CHECK-NEXT: ret entry: @@ -52,22 +52,22 @@ define <4 x double> @complex_mul_non_const(<4 x double> %a, <4 x double> %b, [2 x double] %c) { ; CHECK-LABEL: complex_mul_non_const: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: movi v7.2d, #0000000000000000 ; CHECK-NEXT: movi v6.2d, #0000000000000000 -; CHECK-NEXT: // kill: def $d4 killed $d4 def $q4 ; CHECK-NEXT: // kill: def $d5 killed $d5 def $q5 -; CHECK-NEXT: movi v7.2d, #0000000000000000 +; CHECK-NEXT: // kill: def $d4 killed $d4 def $q4 ; CHECK-NEXT: mov v4.d[1], v5.d[0] -; CHECK-NEXT: fcmla v6.2d, v2.2d, v0.2d, #0 +; CHECK-NEXT: movi v5.2d, #0000000000000000 ; CHECK-NEXT: fcmla v7.2d, v3.2d, v1.2d, #0 -; CHECK-NEXT: fcmla v6.2d, v2.2d, v0.2d, #90 -; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: fcmla v6.2d, v2.2d, v0.2d, #0 ; CHECK-NEXT: fcmla v7.2d, v3.2d, v1.2d, #90 +; CHECK-NEXT: fcmla v6.2d, v2.2d, v0.2d, #90 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: fcmla v2.2d, v4.2d, v7.2d, #0 +; CHECK-NEXT: fcmla v5.2d, v4.2d, v7.2d, #0 ; CHECK-NEXT: fcmla v0.2d, v4.2d, v6.2d, #0 -; CHECK-NEXT: fcmla v2.2d, v4.2d, v7.2d, #90 +; CHECK-NEXT: fcmla v5.2d, v4.2d, v7.2d, #90 ; CHECK-NEXT: fcmla v0.2d, v4.2d, v6.2d, #90 -; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: mov v1.16b, v5.16b ; CHECK-NEXT: ret entry: %c.coerce.fca.1.extract = extractvalue [2 x double] %c, 1 diff --git a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll --- a/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll +++ b/llvm/test/CodeGen/AArch64/complex-deinterleaving-uniform-cases.ll @@ -35,14 +35,14 @@ ; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 ; CHECK-NEXT: zip1 v4.2s, v0.2s, v2.2s ; CHECK-NEXT: zip2 v0.2s, v0.2s, v2.2s -; CHECK-NEXT: zip2 v5.2s, v1.2s, v3.2s -; CHECK-NEXT: zip1 v1.2s, v1.2s, v3.2s -; CHECK-NEXT: fmul v2.2s, v5.2s, v4.2s +; CHECK-NEXT: zip1 v2.2s, v1.2s, v3.2s +; CHECK-NEXT: zip2 v1.2s, v1.2s, v3.2s ; CHECK-NEXT: fmul v3.2s, v1.2s, v4.2s -; CHECK-NEXT: fmul v4.2s, v0.2s, v5.2s -; CHECK-NEXT: fmla v2.2s, v0.2s, v1.2s -; CHECK-NEXT: fsub v0.2s, v3.2s, v4.2s -; CHECK-NEXT: zip1 v0.4s, v0.4s, v2.4s +; CHECK-NEXT: fmul v4.2s, v2.2s, v4.2s +; CHECK-NEXT: fmul v1.2s, v0.2s, v1.2s +; CHECK-NEXT: fmla v3.2s, v0.2s, v2.2s +; CHECK-NEXT: fsub v0.2s, v4.2s, v1.2s +; CHECK-NEXT: zip1 v0.4s, v0.4s, v3.4s ; CHECK-NEXT: ret entry: %strided.vec = shufflevector <4 x float> %a, <4 x float> poison, <2 x i32> @@ -155,8 +155,8 @@ ; CHECK-NEXT: zip2 v0.2s, v0.2s, v2.2s ; CHECK-NEXT: zip1 v2.2s, v1.2s, v3.2s ; CHECK-NEXT: zip2 v1.2s, v1.2s, v3.2s -; CHECK-NEXT: fsub v1.2s, v4.2s, v1.2s ; CHECK-NEXT: fadd v0.2s, v0.2s, v2.2s +; CHECK-NEXT: fsub v1.2s, v4.2s, v1.2s ; CHECK-NEXT: zip1 v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ret entry: @@ -201,93 +201,93 @@ define <12 x float> @abp90c12(<12 x float> %a, <12 x float> %b, <12 x float> %c) { ; CHECK-LABEL: abp90c12: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr s21, [sp, #32] -; CHECK-NEXT: add x9, sp, #48 -; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2 -; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 -; CHECK-NEXT: ldr s23, [sp, #40] -; CHECK-NEXT: add x11, sp, #56 -; CHECK-NEXT: mov v0.s[1], v2.s[0] -; CHECK-NEXT: ldr s2, [sp] -; CHECK-NEXT: add x10, sp, #16 -; CHECK-NEXT: ld1 { v21.s }[1], [x9] -; CHECK-NEXT: add x9, sp, #64 -; CHECK-NEXT: ld1 { v23.s }[1], [x11] ; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3 -; CHECK-NEXT: ldr s22, [sp, #96] -; CHECK-NEXT: add x11, sp, #24 -; CHECK-NEXT: ld1 { v2.s }[1], [x10] -; CHECK-NEXT: add x10, sp, #72 +; CHECK-NEXT: ldr s16, [sp, #40] +; CHECK-NEXT: add x10, sp, #56 +; CHECK-NEXT: add x9, sp, #48 ; CHECK-NEXT: mov v1.s[1], v3.s[0] -; CHECK-NEXT: ld1 { v21.s }[2], [x9] -; CHECK-NEXT: ldr s24, [sp, #8] -; CHECK-NEXT: add x9, sp, #112 -; CHECK-NEXT: ld1 { v23.s }[2], [x10] -; CHECK-NEXT: add x10, sp, #80 +; CHECK-NEXT: ldr s3, [sp, #32] +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: // kill: def $s5 killed $s5 def $q5 -; CHECK-NEXT: ldr s18, [sp, #128] +; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2 +; CHECK-NEXT: ldr s18, [sp, #8] +; CHECK-NEXT: ld1 { v16.s }[1], [x10] +; CHECK-NEXT: mov v0.s[1], v2.s[0] +; CHECK-NEXT: add x10, sp, #72 +; CHECK-NEXT: ld1 { v3.s }[1], [x9] +; CHECK-NEXT: add x9, sp, #64 +; CHECK-NEXT: ldr s17, [sp, #96] ; CHECK-NEXT: // kill: def $s7 killed $s7 def $q7 ; CHECK-NEXT: // kill: def $s4 killed $s4 def $q4 ; CHECK-NEXT: // kill: def $s6 killed $s6 def $q6 +; CHECK-NEXT: ldr s2, [sp, #136] +; CHECK-NEXT: ldr s20, [sp, #192] ; CHECK-NEXT: mov v1.s[2], v5.s[0] -; CHECK-NEXT: ldr s20, [sp, #104] -; CHECK-NEXT: ld1 { v24.s }[1], [x11] -; CHECK-NEXT: add x11, sp, #88 -; CHECK-NEXT: ld1 { v22.s }[1], [x9] -; CHECK-NEXT: add x9, sp, #144 -; CHECK-NEXT: ld1 { v21.s }[3], [x10] -; CHECK-NEXT: add x10, sp, #120 -; CHECK-NEXT: mov v0.s[2], v4.s[0] -; CHECK-NEXT: ld1 { v23.s }[3], [x11] +; CHECK-NEXT: ld1 { v16.s }[2], [x10] +; CHECK-NEXT: ldr s5, [sp, #104] +; CHECK-NEXT: ld1 { v3.s }[2], [x9] +; CHECK-NEXT: add x9, sp, #24 +; CHECK-NEXT: add x10, sp, #112 ; CHECK-NEXT: ld1 { v18.s }[1], [x9] -; CHECK-NEXT: add x11, sp, #152 -; CHECK-NEXT: ld1 { v20.s }[1], [x10] -; CHECK-NEXT: add x10, sp, #160 +; CHECK-NEXT: add x9, sp, #88 +; CHECK-NEXT: mov v0.s[2], v4.s[0] +; CHECK-NEXT: ld1 { v17.s }[1], [x10] +; CHECK-NEXT: add x10, sp, #80 +; CHECK-NEXT: ld1 { v16.s }[3], [x9] ; CHECK-NEXT: mov v1.s[3], v7.s[0] -; CHECK-NEXT: ldr s17, [sp, #136] -; CHECK-NEXT: ldr s19, [sp, #192] -; CHECK-NEXT: add x9, sp, #208 +; CHECK-NEXT: add x9, sp, #120 +; CHECK-NEXT: ldr s4, [sp, #128] +; CHECK-NEXT: ld1 { v3.s }[3], [x10] +; CHECK-NEXT: ld1 { v5.s }[1], [x9] +; CHECK-NEXT: add x9, sp, #144 +; CHECK-NEXT: ldr s7, [sp] +; CHECK-NEXT: ld1 { v4.s }[1], [x9] ; CHECK-NEXT: mov v0.s[3], v6.s[0] -; CHECK-NEXT: ld1 { v18.s }[2], [x10] -; CHECK-NEXT: ld1 { v17.s }[1], [x11] -; CHECK-NEXT: add x10, sp, #176 -; CHECK-NEXT: fmul v3.4s, v23.4s, v1.4s -; CHECK-NEXT: ld1 { v19.s }[1], [x9] -; CHECK-NEXT: fmul v4.4s, v20.4s, v24.4s +; CHECK-NEXT: add x10, sp, #16 +; CHECK-NEXT: add x9, sp, #160 +; CHECK-NEXT: fmul v6.4s, v16.4s, v1.4s +; CHECK-NEXT: fmul v19.4s, v5.4s, v18.4s +; CHECK-NEXT: fmul v18.4s, v17.4s, v18.4s +; CHECK-NEXT: fmul v1.4s, v3.4s, v1.4s +; CHECK-NEXT: ld1 { v7.s }[1], [x10] +; CHECK-NEXT: ld1 { v4.s }[2], [x9] +; CHECK-NEXT: add x9, sp, #152 +; CHECK-NEXT: add x10, sp, #208 +; CHECK-NEXT: ld1 { v2.s }[1], [x9] +; CHECK-NEXT: add x9, sp, #176 +; CHECK-NEXT: ld1 { v20.s }[1], [x10] +; CHECK-NEXT: fneg v6.4s, v6.4s +; CHECK-NEXT: fneg v19.4s, v19.4s +; CHECK-NEXT: fmla v18.4s, v7.4s, v5.4s +; CHECK-NEXT: fmla v1.4s, v0.4s, v16.4s +; CHECK-NEXT: ld1 { v4.s }[3], [x9] ; CHECK-NEXT: add x9, sp, #168 -; CHECK-NEXT: fmul v1.4s, v21.4s, v1.4s -; CHECK-NEXT: ld1 { v18.s }[3], [x10] -; CHECK-NEXT: fmul v5.4s, v22.4s, v24.4s -; CHECK-NEXT: ldr s16, [sp, #200] -; CHECK-NEXT: ld1 { v17.s }[2], [x9] -; CHECK-NEXT: add x11, sp, #216 -; CHECK-NEXT: fneg v3.4s, v3.4s -; CHECK-NEXT: add x9, sp, #184 -; CHECK-NEXT: fneg v4.4s, v4.4s -; CHECK-NEXT: fmla v1.4s, v0.4s, v23.4s -; CHECK-NEXT: fmla v5.4s, v2.4s, v20.4s -; CHECK-NEXT: ld1 { v16.s }[1], [x11] -; CHECK-NEXT: ld1 { v17.s }[3], [x9] -; CHECK-NEXT: fmla v3.4s, v0.4s, v21.4s -; CHECK-NEXT: fmla v4.4s, v2.4s, v22.4s -; CHECK-NEXT: fsub v0.4s, v18.4s, v1.4s -; CHECK-NEXT: fsub v1.4s, v19.4s, v5.4s -; CHECK-NEXT: fadd v2.4s, v17.4s, v3.4s -; CHECK-NEXT: fadd v3.4s, v16.4s, v4.4s +; CHECK-NEXT: ld1 { v2.s }[2], [x9] +; CHECK-NEXT: ldr s5, [sp, #200] +; CHECK-NEXT: add x9, sp, #216 +; CHECK-NEXT: add x10, sp, #184 +; CHECK-NEXT: fmla v6.4s, v0.4s, v3.4s +; CHECK-NEXT: fmla v19.4s, v7.4s, v17.4s +; CHECK-NEXT: ld1 { v5.s }[1], [x9] +; CHECK-NEXT: fsub v0.4s, v4.4s, v1.4s +; CHECK-NEXT: fsub v1.4s, v20.4s, v18.4s +; CHECK-NEXT: ld1 { v2.s }[3], [x10] +; CHECK-NEXT: fadd v3.4s, v5.4s, v19.4s +; CHECK-NEXT: fadd v2.4s, v2.4s, v6.4s ; CHECK-NEXT: ext v4.16b, v0.16b, v1.16b, #12 ; CHECK-NEXT: ext v5.16b, v2.16b, v3.16b, #12 ; CHECK-NEXT: trn2 v1.4s, v1.4s, v3.4s ; CHECK-NEXT: ext v4.16b, v0.16b, v4.16b, #12 -; CHECK-NEXT: zip2 v3.4s, v0.4s, v2.4s ; CHECK-NEXT: ext v5.16b, v2.16b, v5.16b, #8 -; CHECK-NEXT: zip1 v0.4s, v0.4s, v2.4s ; CHECK-NEXT: rev64 v4.4s, v4.4s +; CHECK-NEXT: trn2 v3.4s, v4.4s, v5.4s +; CHECK-NEXT: zip2 v4.4s, v0.4s, v2.4s +; CHECK-NEXT: zip1 v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ext v1.16b, v3.16b, v1.16b, #8 +; CHECK-NEXT: mov v4.d[1], v3.d[0] ; CHECK-NEXT: str q0, [x8] -; CHECK-NEXT: trn2 v4.4s, v4.4s, v5.4s -; CHECK-NEXT: ext v1.16b, v4.16b, v1.16b, #8 -; CHECK-NEXT: mov v3.d[1], v4.d[0] -; CHECK-NEXT: stp q3, q1, [x8, #16] +; CHECK-NEXT: stp q4, q1, [x8, #16] ; CHECK-NEXT: ret entry: %ar = shufflevector <12 x float> %a, <12 x float> poison, <6 x i32> diff --git a/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll b/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll --- a/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll +++ b/llvm/test/CodeGen/AArch64/concat_vector-truncate-combine.ll @@ -96,8 +96,8 @@ define void @test_concat_fptrunc_v4f64_to_v4f32(* %ptr) #1 { ; CHECK-LABEL: test_concat_fptrunc_v4f64_to_v4f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov z0.s, #1.00000000 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmov z0.s, #1.00000000 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll --- a/llvm/test/CodeGen/AArch64/cond-br-tuning.ll +++ b/llvm/test/CodeGen/AArch64/cond-br-tuning.ll @@ -27,9 +27,9 @@ define void @test_add_cbz_multiple_use(i32 %a, i32 %b, ptr %ptr) { ; CHECK-LABEL: test_add_cbz_multiple_use: ; CHECK: // %bb.0: // %common.ret -; CHECK-NEXT: mov w8, #10 -; CHECK-NEXT: adds w9, w0, w1 -; CHECK-NEXT: csel w8, w8, w9, ne +; CHECK-NEXT: adds w8, w0, w1 +; CHECK-NEXT: mov w9, #10 // =0xa +; CHECK-NEXT: csel w8, w9, w8, ne ; CHECK-NEXT: str w8, [x2] ; CHECK-NEXT: ret %c = add nsw i32 %a, %b diff --git a/llvm/test/CodeGen/AArch64/consthoist-gep.ll b/llvm/test/CodeGen/AArch64/consthoist-gep.ll --- a/llvm/test/CodeGen/AArch64/consthoist-gep.ll +++ b/llvm/test/CodeGen/AArch64/consthoist-gep.ll @@ -32,44 +32,44 @@ ; CHECK-NEXT: ldrb w9, [x8] ; CHECK-NEXT: tbnz w9, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: // %bb3 -; CHECK-NEXT: mov w9, #44032 +; CHECK-NEXT: mov w9, #44032 // =0xac00 +; CHECK-NEXT: mov w11, #172 // =0xac ; CHECK-NEXT: movk w9, #12296, lsl #16 -; CHECK-NEXT: orr w11, w9, #0x4 ; CHECK-NEXT: ldr w10, [x9] ; CHECK-NEXT: stur w10, [x8, #158] -; CHECK-NEXT: ldr w10, [x11] -; CHECK-NEXT: orr w11, w9, #0x8 +; CHECK-NEXT: orr w10, w9, #0x4 +; CHECK-NEXT: ldr w10, [x10] ; CHECK-NEXT: and w10, w10, #0xffff ; CHECK-NEXT: stur w10, [x8, #162] -; CHECK-NEXT: ldr w10, [x11] -; CHECK-NEXT: orr w11, w9, #0xc +; CHECK-NEXT: orr w10, w9, #0x8 +; CHECK-NEXT: ldr w10, [x10] ; CHECK-NEXT: and w10, w10, #0x1f1f1f1f ; CHECK-NEXT: stur w10, [x8, #166] -; CHECK-NEXT: ldr w10, [x11] -; CHECK-NEXT: mov w11, #172 -; CHECK-NEXT: orr w11, w9, w11 +; CHECK-NEXT: orr w10, w9, #0xc +; CHECK-NEXT: ldr w10, [x10] ; CHECK-NEXT: and w10, w10, #0x1f1f1f1f ; CHECK-NEXT: stur w10, [x8, #170] -; CHECK-NEXT: mov w10, #176 -; CHECK-NEXT: ldr w8, [x11] -; CHECK-NEXT: adrp x11, global+528 -; CHECK-NEXT: add x11, x11, :lo12:global+528 -; CHECK-NEXT: orr w10, w9, w10 +; CHECK-NEXT: orr w8, w9, w11 +; CHECK-NEXT: adrp x10, global+528 +; CHECK-NEXT: add x10, x10, :lo12:global+528 +; CHECK-NEXT: ldr w8, [x8] +; CHECK-NEXT: mov w11, #176 // =0xb0 ; CHECK-NEXT: and w8, w8, #0xffffff -; CHECK-NEXT: str w8, [x11] -; CHECK-NEXT: ldr w8, [x10] -; CHECK-NEXT: mov w10, #180 -; CHECK-NEXT: orr w10, w9, w10 +; CHECK-NEXT: str w8, [x10] +; CHECK-NEXT: orr w8, w9, w11 +; CHECK-NEXT: mov w11, #180 // =0xb4 +; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: and w8, w8, #0xffffff -; CHECK-NEXT: str w8, [x11, #4] -; CHECK-NEXT: ldr w8, [x10] -; CHECK-NEXT: mov w10, #184 -; CHECK-NEXT: orr w9, w9, w10 +; CHECK-NEXT: str w8, [x10, #4] +; CHECK-NEXT: orr w8, w9, w11 +; CHECK-NEXT: mov w11, #184 // =0xb8 +; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: and w8, w8, #0xffffff -; CHECK-NEXT: str w8, [x11, #8] -; CHECK-NEXT: ldr w8, [x9] +; CHECK-NEXT: str w8, [x10, #8] +; CHECK-NEXT: orr w8, w9, w11 +; CHECK-NEXT: ldr w8, [x8] ; CHECK-NEXT: and w8, w8, #0xffffff -; CHECK-NEXT: str w8, [x11, #12] +; CHECK-NEXT: str w8, [x10, #12] ; CHECK-NEXT: .LBB0_2: // %bb19 ; CHECK-NEXT: ret bb: diff --git a/llvm/test/CodeGen/AArch64/copyprop.ll b/llvm/test/CodeGen/AArch64/copyprop.ll --- a/llvm/test/CodeGen/AArch64/copyprop.ll +++ b/llvm/test/CodeGen/AArch64/copyprop.ll @@ -7,19 +7,19 @@ ; CHECK-NEXT: cmp w0, #10 ; CHECK-NEXT: b.ne .LBB0_2 ; CHECK-NEXT: // %bb.1: // %bb.0 -; CHECK-NEXT: mov w9, #15 -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: str w9, [x2] -; CHECK-NEXT: mov w9, #12 +; CHECK-NEXT: mov w8, #15 // =0xf +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: str w8, [x1] -; CHECK-NEXT: str w9, [x4] +; CHECK-NEXT: mov w8, #12 // =0xc +; CHECK-NEXT: str w8, [x4] ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %bb.1 -; CHECK-NEXT: mov w9, #25 +; CHECK-NEXT: mov w9, #25 // =0x19 ; CHECK-NEXT: str w9, [x3] -; CHECK-NEXT: mov w9, #12 ; CHECK-NEXT: str wzr, [x1] -; CHECK-NEXT: str w9, [x4] +; CHECK-NEXT: mov w8, #12 // =0xc +; CHECK-NEXT: str w8, [x4] ; CHECK-NEXT: ret %1 = icmp eq i32 %v, 10 br i1 %1, label %bb.0, label %bb.1 diff --git a/llvm/test/CodeGen/AArch64/ctpop-nonean.ll b/llvm/test/CodeGen/AArch64/ctpop-nonean.ll --- a/llvm/test/CodeGen/AArch64/ctpop-nonean.ll +++ b/llvm/test/CodeGen/AArch64/ctpop-nonean.ll @@ -7,28 +7,28 @@ define i128 @ctpop_i128(i128 %i) { ; CHECK-LABEL: ctpop_i128: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x8, x1, #1 ; CHECK-NEXT: lsr x9, x0, #1 -; CHECK-NEXT: and x8, x8, #0x5555555555555555 +; CHECK-NEXT: lsr x10, x1, #1 +; CHECK-NEXT: mov x8, #72340172838076673 // =0x101010101010101 ; CHECK-NEXT: and x9, x9, #0x5555555555555555 -; CHECK-NEXT: sub x8, x1, x8 +; CHECK-NEXT: and x10, x10, #0x5555555555555555 ; CHECK-NEXT: sub x9, x0, x9 -; CHECK-NEXT: and x10, x8, #0x3333333333333333 -; CHECK-NEXT: lsr x8, x8, #2 -; CHECK-NEXT: and x11, x9, #0x3333333333333333 -; CHECK-NEXT: lsr x9, x9, #2 -; CHECK-NEXT: and x8, x8, #0x3333333333333333 -; CHECK-NEXT: and x9, x9, #0x3333333333333333 -; CHECK-NEXT: add x8, x10, x8 -; CHECK-NEXT: add x9, x11, x9 -; CHECK-NEXT: mov x10, #72340172838076673 +; CHECK-NEXT: sub x10, x1, x10 ; CHECK-NEXT: mov x1, xzr -; CHECK-NEXT: add x8, x8, x8, lsr #4 +; CHECK-NEXT: lsr x11, x9, #2 +; CHECK-NEXT: lsr x12, x10, #2 +; CHECK-NEXT: and x9, x9, #0x3333333333333333 +; CHECK-NEXT: and x10, x10, #0x3333333333333333 +; CHECK-NEXT: and x11, x11, #0x3333333333333333 +; CHECK-NEXT: add x9, x9, x11 +; CHECK-NEXT: and x11, x12, #0x3333333333333333 ; CHECK-NEXT: add x9, x9, x9, lsr #4 -; CHECK-NEXT: and x8, x8, #0xf0f0f0f0f0f0f0f +; CHECK-NEXT: add x10, x10, x11 +; CHECK-NEXT: add x10, x10, x10, lsr #4 ; CHECK-NEXT: and x9, x9, #0xf0f0f0f0f0f0f0f -; CHECK-NEXT: mul x8, x8, x10 -; CHECK-NEXT: mul x9, x9, x10 +; CHECK-NEXT: mul x9, x9, x8 +; CHECK-NEXT: and x10, x10, #0xf0f0f0f0f0f0f0f +; CHECK-NEXT: mul x8, x10, x8 ; CHECK-NEXT: lsr x9, x9, #56 ; CHECK-NEXT: add x0, x9, x8, lsr #56 ; CHECK-NEXT: ret @@ -37,8 +37,8 @@ ; CHECK-CSSC: // %bb.0: ; CHECK-CSSC-NEXT: cnt x8, x1 ; CHECK-CSSC-NEXT: cnt x9, x0 -; CHECK-CSSC-NEXT: add x0, x9, x8 ; CHECK-CSSC-NEXT: mov x1, xzr +; CHECK-CSSC-NEXT: add x0, x9, x8 ; CHECK-CSSC-NEXT: ret %c = call i128 @llvm.ctpop.i128(i128 %i) ret i128 %c diff --git a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll --- a/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-concat-vectors.ll @@ -8,24 +8,24 @@ define fastcc i8 @allocno_reload_assign() { ; CHECK-LABEL: allocno_reload_assign: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z3.b, #0 // =0x0 +; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z4.h, z3.b -; CHECK-NEXT: uunpkhi z7.h, z3.b -; CHECK-NEXT: uunpklo z2.s, z4.h -; CHECK-NEXT: uunpkhi z4.s, z4.h -; CHECK-NEXT: uunpklo z6.s, z7.h -; CHECK-NEXT: uunpkhi z16.s, z7.h +; CHECK-NEXT: mov z16.d, #0 // =0x0 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mov z0.d, #0 // =0x0 -; CHECK-NEXT: uunpklo z1.d, z2.s -; CHECK-NEXT: uunpkhi z2.d, z2.s -; CHECK-NEXT: uunpklo z3.d, z4.s -; CHECK-NEXT: uunpkhi z4.d, z4.s -; CHECK-NEXT: uunpklo z5.d, z6.s -; CHECK-NEXT: uunpkhi z6.d, z6.s -; CHECK-NEXT: uunpklo z7.d, z16.s -; CHECK-NEXT: uunpkhi z16.d, z16.s +; CHECK-NEXT: uunpklo z1.h, z0.b +; CHECK-NEXT: uunpkhi z0.h, z0.b +; CHECK-NEXT: uunpklo z2.s, z1.h +; CHECK-NEXT: uunpkhi z3.s, z1.h +; CHECK-NEXT: uunpklo z5.s, z0.h +; CHECK-NEXT: uunpkhi z7.s, z0.h +; CHECK-NEXT: uunpklo z0.d, z2.s +; CHECK-NEXT: uunpkhi z1.d, z2.s +; CHECK-NEXT: uunpklo z2.d, z3.s +; CHECK-NEXT: uunpkhi z3.d, z3.s +; CHECK-NEXT: uunpklo z4.d, z5.s +; CHECK-NEXT: uunpkhi z5.d, z5.s +; CHECK-NEXT: uunpklo z6.d, z7.s +; CHECK-NEXT: uunpkhi z7.d, z7.s ; CHECK-NEXT: .LBB0_1: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: fmov d17, xzr ; CHECK-NEXT: cmpeq p2.d, p0/z, z17.d, #0 @@ -43,22 +43,22 @@ ; CHECK-NEXT: punpkhi p3.h, p3.b ; CHECK-NEXT: punpklo p5.h, p4.b ; CHECK-NEXT: punpkhi p4.h, p4.b -; CHECK-NEXT: st1b { z1.d }, p5, [z0.d] -; CHECK-NEXT: punpklo p5.h, p2.b -; CHECK-NEXT: st1b { z2.d }, p4, [z0.d] +; CHECK-NEXT: st1b { z0.d }, p5, [z16.d] +; CHECK-NEXT: st1b { z1.d }, p4, [z16.d] ; CHECK-NEXT: punpklo p4.h, p3.b -; CHECK-NEXT: punpkhi p2.h, p2.b ; CHECK-NEXT: punpkhi p3.h, p3.b -; CHECK-NEXT: st1b { z3.d }, p4, [z0.d] -; CHECK-NEXT: punpklo p4.h, p5.b -; CHECK-NEXT: st1b { z4.d }, p3, [z0.d] -; CHECK-NEXT: punpkhi p3.h, p5.b -; CHECK-NEXT: st1b { z5.d }, p4, [z0.d] +; CHECK-NEXT: st1b { z2.d }, p4, [z16.d] ; CHECK-NEXT: punpklo p4.h, p2.b ; CHECK-NEXT: punpkhi p2.h, p2.b -; CHECK-NEXT: st1b { z6.d }, p3, [z0.d] -; CHECK-NEXT: st1b { z7.d }, p4, [z0.d] -; CHECK-NEXT: st1b { z16.d }, p2, [z0.d] +; CHECK-NEXT: st1b { z3.d }, p3, [z16.d] +; CHECK-NEXT: punpklo p3.h, p4.b +; CHECK-NEXT: st1b { z4.d }, p3, [z16.d] +; CHECK-NEXT: punpkhi p3.h, p4.b +; CHECK-NEXT: st1b { z5.d }, p3, [z16.d] +; CHECK-NEXT: punpklo p3.h, p2.b +; CHECK-NEXT: punpkhi p2.h, p2.b +; CHECK-NEXT: st1b { z6.d }, p3, [z16.d] +; CHECK-NEXT: st1b { z7.d }, p2, [z16.d] ; CHECK-NEXT: b .LBB0_1 br label %1 diff --git a/llvm/test/CodeGen/AArch64/dag-combine-select.ll b/llvm/test/CodeGen/AArch64/dag-combine-select.ll --- a/llvm/test/CodeGen/AArch64/dag-combine-select.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-select.ll @@ -35,14 +35,14 @@ ; SDISEL-LABEL: test1: ; SDISEL: // %bb.0: ; SDISEL-NEXT: cmp w0, #7 -; SDISEL-NEXT: adrp x8, out -; SDISEL-NEXT: csel w9, w1, w2, eq -; SDISEL-NEXT: cmp w9, #13 -; SDISEL-NEXT: csel w9, w1, w2, lo +; SDISEL-NEXT: adrp x9, out +; SDISEL-NEXT: csel w8, w1, w2, eq +; SDISEL-NEXT: cmp w8, #13 +; SDISEL-NEXT: csel w8, w1, w2, lo ; SDISEL-NEXT: cmp w0, #42 -; SDISEL-NEXT: csel w10, w1, w9, eq -; SDISEL-NEXT: str w9, [x8, :lo12:out] -; SDISEL-NEXT: str w10, [x8, :lo12:out] +; SDISEL-NEXT: csel w10, w1, w8, eq +; SDISEL-NEXT: str w8, [x9, :lo12:out] +; SDISEL-NEXT: str w10, [x9, :lo12:out] ; SDISEL-NEXT: ret ; ; GISEL-LABEL: test1: diff --git a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll --- a/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-setcc.ll @@ -35,8 +35,8 @@ ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v32i1: ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v1.16b, v1.16b, #0 -; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: cmeq v0.16b, v0.16b, #0 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: umaxv b0, v0.16b ; CHECK-NEXT: fmov w9, s0 @@ -52,16 +52,16 @@ ; CHECK-LABEL: combine_setcc_eq_vecreduce_or_v64i1: ; CHECK: // %bb.0: ; CHECK-NEXT: cmeq v2.16b, v2.16b, #0 -; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: cmeq v0.16b, v0.16b, #0 +; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: cmeq v3.16b, v3.16b, #0 ; CHECK-NEXT: cmeq v1.16b, v1.16b, #0 -; CHECK-NEXT: cmeq v0.16b, v0.16b, #0 -; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: orr v1.16b, v1.16b, v3.16b ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: umaxv b0, v0.16b -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: bic w0, w8, w9 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: bic w0, w9, w8 ; CHECK-NEXT: ret %cmp1 = icmp eq <64 x i8> %a, zeroinitializer %cast = bitcast <64 x i1> %cmp1 to i64 @@ -223,8 +223,8 @@ ; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v32i1: ; CHECK: // %bb.0: ; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b -; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: cmeq v1.16b, v1.16b, #0 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: bic v0.16b, v0.16b, v1.16b ; CHECK-NEXT: uminv b0, v0.16b ; CHECK-NEXT: fmov w9, s0 @@ -240,16 +240,16 @@ ; CHECK-LABEL: combine_setcc_ne_vecreduce_and_v64i1: ; CHECK: // %bb.0: ; CHECK-NEXT: cmtst v1.16b, v1.16b, v1.16b -; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: cmtst v0.16b, v0.16b, v0.16b -; CHECK-NEXT: cmeq v3.16b, v3.16b, #0 +; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: cmeq v2.16b, v2.16b, #0 +; CHECK-NEXT: cmeq v3.16b, v3.16b, #0 ; CHECK-NEXT: bic v1.16b, v1.16b, v3.16b ; CHECK-NEXT: bic v0.16b, v0.16b, v2.16b ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: uminv b0, v0.16b -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: bic w0, w8, w9 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: bic w0, w9, w8 ; CHECK-NEXT: ret %cmp1 = icmp ne <64 x i8> %a, zeroinitializer %cast = bitcast <64 x i1> %cmp1 to i64 @@ -260,10 +260,10 @@ define i1 @combine_setcc_eq0_conjunction_xor_or(ptr %a, ptr %b) { ; CHECK-LABEL: combine_setcc_eq0_conjunction_xor_or: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %bcmp = tail call i32 @bcmp(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b, i64 16) @@ -274,10 +274,10 @@ define i1 @combine_setcc_ne0_conjunction_xor_or(ptr %a, ptr %b) { ; CHECK-LABEL: combine_setcc_ne0_conjunction_xor_or: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x11, [x1] -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: ccmp x9, x11, #0, eq +; CHECK-NEXT: ldp x8, x11, [x1] +; CHECK-NEXT: ldp x9, x10, [x0] +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: ccmp x10, x11, #0, eq ; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret %bcmp = tail call i32 @bcmp(ptr dereferenceable(16) %a, ptr dereferenceable(16) %b, i64 16) @@ -289,9 +289,9 @@ define i32 @combine_setcc_multiuse(i32 %0, i32 %1, i32 %2, i32 %3) { ; CHECK-LABEL: combine_setcc_multiuse: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w1, w0 -; CHECK-NEXT: eor w9, w3, w2 -; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: eor w8, w3, w2 +; CHECK-NEXT: eor w9, w1, w0 +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: cbz w8, .LBB18_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: mov w0, w8 diff --git a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll --- a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll +++ b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll @@ -6,13 +6,13 @@ define void @signbits_vXi1(<4 x i16> %a1) { ; CHECK-LABEL: signbits_vXi1: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: movi v2.4h, #1 -; CHECK-NEXT: dup v0.4h, v0.h[0] ; CHECK-NEXT: mov w1, wzr -; CHECK-NEXT: mov w2, wzr +; CHECK-NEXT: dup v0.4h, v0.h[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: mov w2, wzr ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: cmgt v0.4h, v2.4h, v0.4h ; CHECK-NEXT: umov w0, v0.h[0] diff --git a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll --- a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll +++ b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll @@ -78,16 +78,17 @@ ; ALL-NEXT: smov w13, v0.b[4] ; ALL-NEXT: smov w14, v0.b[5] ; ALL-NEXT: smov w15, v0.b[6] -; ALL-NEXT: sdiv w8, w9, w8 -; ALL-NEXT: smov w9, v1.b[0] ; ALL-NEXT: smov w16, v0.b[7] ; ALL-NEXT: smov w17, v0.b[8] +; ALL-NEXT: smov w18, v0.b[9] +; ALL-NEXT: sdiv w8, w9, w8 +; ALL-NEXT: smov w9, v1.b[0] ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: smov w10, v1.b[2] ; ALL-NEXT: sdiv w10, w11, w10 ; ALL-NEXT: smov w11, v1.b[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: smov w9, v1.b[9] +; ALL-NEXT: smov w9, v1.b[10] ; ALL-NEXT: mov v2.b[1], w8 ; ALL-NEXT: sdiv w11, w12, w11 ; ALL-NEXT: smov w12, v1.b[4] @@ -109,10 +110,9 @@ ; ALL-NEXT: smov w16, v1.b[8] ; ALL-NEXT: mov v2.b[6], w14 ; ALL-NEXT: sdiv w16, w17, w16 -; ALL-NEXT: smov w17, v0.b[9] +; ALL-NEXT: smov w17, v1.b[9] ; ALL-NEXT: mov v2.b[7], w15 -; ALL-NEXT: sdiv w8, w17, w9 -; ALL-NEXT: smov w9, v1.b[10] +; ALL-NEXT: sdiv w8, w18, w17 ; ALL-NEXT: mov v2.b[8], w16 ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: smov w10, v1.b[11] @@ -153,6 +153,7 @@ ; ALL-NEXT: smov w11, v0.h[2] ; ALL-NEXT: smov w12, v0.h[3] ; ALL-NEXT: smov w13, v0.h[4] +; ALL-NEXT: smov w14, v0.h[5] ; ALL-NEXT: sdiv w8, w9, w8 ; ALL-NEXT: smov w9, v1.h[0] ; ALL-NEXT: sdiv w9, w10, w9 @@ -160,18 +161,17 @@ ; ALL-NEXT: sdiv w10, w11, w10 ; ALL-NEXT: smov w11, v1.h[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: smov w9, v1.h[5] +; ALL-NEXT: smov w9, v1.h[6] ; ALL-NEXT: mov v2.h[1], w8 ; ALL-NEXT: sdiv w11, w12, w11 ; ALL-NEXT: smov w12, v1.h[4] ; ALL-NEXT: mov v2.h[2], w10 ; ALL-NEXT: smov w10, v0.h[6] ; ALL-NEXT: sdiv w12, w13, w12 -; ALL-NEXT: smov w13, v0.h[5] +; ALL-NEXT: smov w13, v1.h[5] ; ALL-NEXT: mov v2.h[3], w11 ; ALL-NEXT: smov w11, v0.h[7] -; ALL-NEXT: sdiv w8, w13, w9 -; ALL-NEXT: smov w9, v1.h[6] +; ALL-NEXT: sdiv w8, w14, w13 ; ALL-NEXT: mov v2.h[4], w12 ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: smov w10, v1.h[7] @@ -226,15 +226,15 @@ ; ALL-NEXT: mov x10, v1.d[1] ; ALL-NEXT: mov x11, v0.d[1] ; ALL-NEXT: sdiv x9, x9, x8 -; ALL-NEXT: mul x8, x9, x8 ; ALL-NEXT: sdiv x11, x11, x10 -; ALL-NEXT: fmov d2, x9 +; ALL-NEXT: mul x8, x9, x8 ; ALL-NEXT: fmov d1, x8 ; ALL-NEXT: mul x10, x11, x10 -; ALL-NEXT: mov v2.d[1], x11 ; ALL-NEXT: mov v1.d[1], x10 -; ALL-NEXT: str q2, [x0] ; ALL-NEXT: sub v0.2d, v0.2d, v1.2d +; ALL-NEXT: fmov d1, x9 +; ALL-NEXT: mov v1.d[1], x11 +; ALL-NEXT: str q1, [x0] ; ALL-NEXT: ret %div = sdiv <2 x i64> %x, %y store <2 x i64> %div, ptr %divdst, align 16 diff --git a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll --- a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll @@ -78,16 +78,17 @@ ; ALL-NEXT: umov w13, v0.b[4] ; ALL-NEXT: umov w14, v0.b[5] ; ALL-NEXT: umov w15, v0.b[6] -; ALL-NEXT: udiv w8, w9, w8 -; ALL-NEXT: umov w9, v1.b[0] ; ALL-NEXT: umov w16, v0.b[7] ; ALL-NEXT: umov w17, v0.b[8] +; ALL-NEXT: umov w18, v0.b[9] +; ALL-NEXT: udiv w8, w9, w8 +; ALL-NEXT: umov w9, v1.b[0] ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: umov w10, v1.b[2] ; ALL-NEXT: udiv w10, w11, w10 ; ALL-NEXT: umov w11, v1.b[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: umov w9, v1.b[9] +; ALL-NEXT: umov w9, v1.b[10] ; ALL-NEXT: mov v2.b[1], w8 ; ALL-NEXT: udiv w11, w12, w11 ; ALL-NEXT: umov w12, v1.b[4] @@ -109,10 +110,9 @@ ; ALL-NEXT: umov w16, v1.b[8] ; ALL-NEXT: mov v2.b[6], w14 ; ALL-NEXT: udiv w16, w17, w16 -; ALL-NEXT: umov w17, v0.b[9] +; ALL-NEXT: umov w17, v1.b[9] ; ALL-NEXT: mov v2.b[7], w15 -; ALL-NEXT: udiv w8, w17, w9 -; ALL-NEXT: umov w9, v1.b[10] +; ALL-NEXT: udiv w8, w18, w17 ; ALL-NEXT: mov v2.b[8], w16 ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: umov w10, v1.b[11] @@ -153,6 +153,7 @@ ; ALL-NEXT: umov w11, v0.h[2] ; ALL-NEXT: umov w12, v0.h[3] ; ALL-NEXT: umov w13, v0.h[4] +; ALL-NEXT: umov w14, v0.h[5] ; ALL-NEXT: udiv w8, w9, w8 ; ALL-NEXT: umov w9, v1.h[0] ; ALL-NEXT: udiv w9, w10, w9 @@ -160,18 +161,17 @@ ; ALL-NEXT: udiv w10, w11, w10 ; ALL-NEXT: umov w11, v1.h[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: umov w9, v1.h[5] +; ALL-NEXT: umov w9, v1.h[6] ; ALL-NEXT: mov v2.h[1], w8 ; ALL-NEXT: udiv w11, w12, w11 ; ALL-NEXT: umov w12, v1.h[4] ; ALL-NEXT: mov v2.h[2], w10 ; ALL-NEXT: umov w10, v0.h[6] ; ALL-NEXT: udiv w12, w13, w12 -; ALL-NEXT: umov w13, v0.h[5] +; ALL-NEXT: umov w13, v1.h[5] ; ALL-NEXT: mov v2.h[3], w11 ; ALL-NEXT: umov w11, v0.h[7] -; ALL-NEXT: udiv w8, w13, w9 -; ALL-NEXT: umov w9, v1.h[6] +; ALL-NEXT: udiv w8, w14, w13 ; ALL-NEXT: mov v2.h[4], w12 ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: umov w10, v1.h[7] @@ -226,15 +226,15 @@ ; ALL-NEXT: mov x10, v1.d[1] ; ALL-NEXT: mov x11, v0.d[1] ; ALL-NEXT: udiv x9, x9, x8 -; ALL-NEXT: mul x8, x9, x8 ; ALL-NEXT: udiv x11, x11, x10 -; ALL-NEXT: fmov d2, x9 +; ALL-NEXT: mul x8, x9, x8 ; ALL-NEXT: fmov d1, x8 ; ALL-NEXT: mul x10, x11, x10 -; ALL-NEXT: mov v2.d[1], x11 ; ALL-NEXT: mov v1.d[1], x10 -; ALL-NEXT: str q2, [x0] ; ALL-NEXT: sub v0.2d, v0.2d, v1.2d +; ALL-NEXT: fmov d1, x9 +; ALL-NEXT: mov v1.d[1], x11 +; ALL-NEXT: str q1, [x0] ; ALL-NEXT: ret %div = udiv <2 x i64> %x, %y store <2 x i64> %div, ptr %divdst, align 16 diff --git a/llvm/test/CodeGen/AArch64/double_reduct.ll b/llvm/test/CodeGen/AArch64/double_reduct.ll --- a/llvm/test/CodeGen/AArch64/double_reduct.ll +++ b/llvm/test/CodeGen/AArch64/double_reduct.ll @@ -87,9 +87,9 @@ ; CHECK-LABEL: fminimumnum_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fminv s2, v2.4s +; CHECK-NEXT: fminv s1, v2.4s ; CHECK-NEXT: fminv s0, v0.4s -; CHECK-NEXT: fminnm s0, s0, s2 +; CHECK-NEXT: fminnm s0, s0, s1 ; CHECK-NEXT: ret %r1 = call float @llvm.vector.reduce.fminimum.v8f32(<8 x float> %a) %r2 = call float @llvm.vector.reduce.fminimum.v4f32(<4 x float> %b) @@ -101,9 +101,9 @@ ; CHECK-LABEL: fmaxnumimum_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmaxnm v0.4s, v0.4s, v1.4s -; CHECK-NEXT: fmaxnmv s2, v2.4s +; CHECK-NEXT: fmaxnmv s1, v2.4s ; CHECK-NEXT: fmaxnmv s0, v0.4s -; CHECK-NEXT: fmax s0, s0, s2 +; CHECK-NEXT: fmax s0, s0, s1 ; CHECK-NEXT: ret %r1 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %a) %r2 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %b) diff --git a/llvm/test/CodeGen/AArch64/expand-select.ll b/llvm/test/CodeGen/AArch64/expand-select.ll --- a/llvm/test/CodeGen/AArch64/expand-select.ll +++ b/llvm/test/CodeGen/AArch64/expand-select.ll @@ -6,18 +6,18 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: fmov s0, wzr +; CHECK-NEXT: ldr x11, [sp] ; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: ldp x9, x8, [sp] +; CHECK-NEXT: ldp x9, x10, [sp, #8] ; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s -; CHECK-NEXT: fmov w10, s0 -; CHECK-NEXT: tst w10, #0x1 -; CHECK-NEXT: ldr x10, [sp, #16] -; CHECK-NEXT: csel x8, x5, x8, ne -; CHECK-NEXT: csel x9, x4, x9, ne -; CHECK-NEXT: csel x11, x3, x7, ne -; CHECK-NEXT: csel x12, x2, x6, ne +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: tst w8, #0x1 +; CHECK-NEXT: csel x8, x5, x9, ne +; CHECK-NEXT: csel x9, x4, x11, ne ; CHECK-NEXT: stp x9, x8, [x10, #16] -; CHECK-NEXT: stp x12, x11, [x10] +; CHECK-NEXT: csel x8, x3, x7, ne +; CHECK-NEXT: csel x9, x2, x6, ne +; CHECK-NEXT: stp x9, x8, [x10] ; CHECK-NEXT: ret %cond = and i32 %In1, 1 %cbool = icmp eq i32 %cond, 0 @@ -33,24 +33,24 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: fmov s0, wzr -; CHECK-NEXT: ldp x10, x9, [sp] -; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: ldr x11, [sp, #16] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: ldp x9, x10, [sp] ; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s ; CHECK-NEXT: dup v1.4s, v0.s[0] ; CHECK-NEXT: mov x8, v1.d[1] ; CHECK-NEXT: lsr x8, x8, #32 ; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: csel x9, x5, x9, ne -; CHECK-NEXT: csel x10, x4, x10, ne +; CHECK-NEXT: csel x10, x5, x10, ne +; CHECK-NEXT: csel x9, x4, x9, ne +; CHECK-NEXT: stur x9, [x11, #12] ; CHECK-NEXT: tst w8, #0x1 +; CHECK-NEXT: str w10, [x11, #20] ; CHECK-NEXT: csel x8, x2, x6, ne -; CHECK-NEXT: csel x12, x3, x7, ne -; CHECK-NEXT: stur x10, [x11, #12] -; CHECK-NEXT: str w9, [x11, #20] +; CHECK-NEXT: csel x9, x3, x7, ne ; CHECK-NEXT: str x8, [x11] -; CHECK-NEXT: str w12, [x11, #8] +; CHECK-NEXT: str w9, [x11, #8] ; CHECK-NEXT: ret %cond = and i32 %In1, 1 %cbool = icmp eq i32 %cond, 0 diff --git a/llvm/test/CodeGen/AArch64/expand-vector-rot.ll b/llvm/test/CodeGen/AArch64/expand-vector-rot.ll --- a/llvm/test/CodeGen/AArch64/expand-vector-rot.ll +++ b/llvm/test/CodeGen/AArch64/expand-vector-rot.ll @@ -7,14 +7,14 @@ ; CHECK-LABEL: rotlv2_16: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v2.2s, #15 -; CHECK-NEXT: movi d3, #0x00ffff0000ffff -; CHECK-NEXT: neg v4.2s, v1.2s -; CHECK-NEXT: and v4.8b, v4.8b, v2.8b -; CHECK-NEXT: and v3.8b, v0.8b, v3.8b -; CHECK-NEXT: neg v4.2s, v4.2s +; CHECK-NEXT: neg v3.2s, v1.2s +; CHECK-NEXT: movi d4, #0x00ffff0000ffff +; CHECK-NEXT: and v3.8b, v3.8b, v2.8b +; CHECK-NEXT: and v4.8b, v0.8b, v4.8b ; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: neg v3.2s, v3.2s ; CHECK-NEXT: ushl v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushl v2.2s, v3.2s, v4.2s +; CHECK-NEXT: ushl v2.2s, v4.2s, v3.2s ; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b ; CHECK-NEXT: ret %1 = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %vec2_16, <2 x i16> %vec2_16, <2 x i16> %shift) diff --git a/llvm/test/CodeGen/AArch64/extbinopload.ll b/llvm/test/CodeGen/AArch64/extbinopload.ll --- a/llvm/test/CodeGen/AArch64/extbinopload.ll +++ b/llvm/test/CodeGen/AArch64/extbinopload.ll @@ -110,21 +110,21 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: ldr w9, [x0, #8] -; CHECK-NEXT: ubfx x10, x8, #48, #12 -; CHECK-NEXT: lsr x11, x8, #60 -; CHECK-NEXT: orr w11, w11, w9, lsl #4 -; CHECK-NEXT: and w12, w8, #0xfff -; CHECK-NEXT: and w11, w11, #0xfff -; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: lsr x10, x8, #60 +; CHECK-NEXT: ubfx x11, x8, #48, #12 +; CHECK-NEXT: ubfx w12, w9, #8, #12 +; CHECK-NEXT: orr w10, w10, w9, lsl #4 +; CHECK-NEXT: fmov s0, w11 +; CHECK-NEXT: and w11, w8, #0xfff +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: lsr x9, x9, #20 +; CHECK-NEXT: and w10, w10, #0xfff +; CHECK-NEXT: mov v0.h[1], w10 ; CHECK-NEXT: ubfx w10, w8, #12, #12 -; CHECK-NEXT: fmov s1, w12 -; CHECK-NEXT: mov v0.h[1], w11 -; CHECK-NEXT: ubfx w11, w9, #8, #12 ; CHECK-NEXT: mov v1.h[1], w10 ; CHECK-NEXT: ubfx x10, x8, #24, #12 -; CHECK-NEXT: lsr x9, x9, #20 ; CHECK-NEXT: ubfx x8, x8, #36, #12 -; CHECK-NEXT: mov v0.h[2], w11 +; CHECK-NEXT: mov v0.h[2], w12 ; CHECK-NEXT: mov v1.h[2], w10 ; CHECK-NEXT: mov v0.h[3], w9 ; CHECK-NEXT: mov v1.h[3], w8 @@ -264,12 +264,12 @@ ; CHECK-LABEL: std_v2i8_v2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0, #2] -; CHECK-NEXT: ldrb w9, [x0] +; CHECK-NEXT: ldrb w9, [x0, #3] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrb w8, [x0, #3] -; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: mov v1.s[1], w9 ; CHECK-NEXT: shl v0.2s, v0.2s, #3 ; CHECK-NEXT: add v0.2s, v1.2s, v0.2s @@ -370,12 +370,12 @@ ; CHECK-NEXT: ld1 { v1.s }[1], [x1] ; CHECK-NEXT: ushll v2.8h, v2.8b, #0 ; CHECK-NEXT: ushll v3.8h, v3.8b, #0 -; CHECK-NEXT: ushll v2.4s, v2.4h, #3 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: uaddw v2.4s, v2.4s, v3.4h -; CHECK-NEXT: ushll2 v3.4s, v1.8h, #3 +; CHECK-NEXT: ushll v2.4s, v2.4h, #3 +; CHECK-NEXT: ushll2 v4.4s, v1.8h, #3 ; CHECK-NEXT: ushll v1.4s, v1.4h, #3 -; CHECK-NEXT: uaddw2 v3.4s, v3.4s, v0.8h +; CHECK-NEXT: uaddw v2.4s, v2.4s, v3.4h +; CHECK-NEXT: uaddw2 v3.4s, v4.4s, v0.8h ; CHECK-NEXT: uaddw v0.4s, v1.4s, v0.4h ; CHECK-NEXT: stp q3, q2, [x8, #16] ; CHECK-NEXT: str q0, [x8] @@ -450,10 +450,10 @@ ; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b ; CHECK-NEXT: ld1 { v3.s }[1], [x3] ; CHECK-NEXT: usubl v2.8h, v2.8b, v3.8b +; CHECK-NEXT: shll v3.4s, v2.4h, #16 ; CHECK-NEXT: shll2 v1.4s, v2.8h, #16 -; CHECK-NEXT: shll v2.4s, v2.4h, #16 ; CHECK-NEXT: saddw2 v1.4s, v1.4s, v0.8h -; CHECK-NEXT: saddw v0.4s, v2.4s, v0.4h +; CHECK-NEXT: saddw v0.4s, v3.4s, v0.4h ; CHECK-NEXT: ret %j1 = load <4 x i8>, ptr %p %p1 = getelementptr i8, ptr %p, i32 4 @@ -493,7 +493,7 @@ ; CHECK-NEXT: ld1 { v1.s }[1], [x1] ; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4 ; CHECK-NEXT: ldp s4, s5, [x4] -; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: usubl v1.8h, v0.8b, v1.8b ; CHECK-NEXT: ld1 { v3.s }[1], [x3] ; CHECK-NEXT: ld1 { v4.s }[1], [x5], #4 ; CHECK-NEXT: ldp s6, s7, [x6] @@ -502,15 +502,15 @@ ; CHECK-NEXT: ld1 { v6.s }[1], [x7], #4 ; CHECK-NEXT: usubl v4.8h, v4.8b, v5.8b ; CHECK-NEXT: ld1 { v7.s }[1], [x7] -; CHECK-NEXT: shll v3.4s, v4.4h, #16 -; CHECK-NEXT: shll2 v1.4s, v4.8h, #16 -; CHECK-NEXT: usubl v4.8h, v6.8b, v7.8b -; CHECK-NEXT: saddw2 v1.4s, v1.4s, v0.8h -; CHECK-NEXT: saddw v0.4s, v3.4s, v0.4h -; CHECK-NEXT: shll2 v3.4s, v4.8h, #16 -; CHECK-NEXT: shll v4.4s, v4.4h, #16 +; CHECK-NEXT: usubl v5.8h, v6.8b, v7.8b +; CHECK-NEXT: shll v0.4s, v4.4h, #16 +; CHECK-NEXT: shll2 v4.4s, v4.8h, #16 +; CHECK-NEXT: saddw v0.4s, v0.4s, v1.4h +; CHECK-NEXT: saddw2 v1.4s, v4.4s, v1.8h +; CHECK-NEXT: shll v6.4s, v5.4h, #16 +; CHECK-NEXT: shll2 v3.4s, v5.8h, #16 ; CHECK-NEXT: saddw2 v3.4s, v3.4s, v2.8h -; CHECK-NEXT: saddw v2.4s, v4.4s, v2.4h +; CHECK-NEXT: saddw v2.4s, v6.4s, v2.4h ; CHECK-NEXT: ret %j1 = load <4 x i8>, ptr %p %p1 = getelementptr i8, ptr %p, i32 4 @@ -564,26 +564,26 @@ define <16 x i32> @double2_bv_4xv4i8_i32(ptr %p, ptr %q, ptr %r, ptr %s, ptr %t, ptr %u, ptr %v, ptr %w) { ; CHECK-LABEL: double2_bv_4xv4i8_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x4] -; CHECK-NEXT: ldr d2, [x0] -; CHECK-NEXT: ldr d3, [x1] -; CHECK-NEXT: ldr d6, [x5] -; CHECK-NEXT: ldr d1, [x2] -; CHECK-NEXT: ldr d4, [x3] -; CHECK-NEXT: ldr d5, [x7] -; CHECK-NEXT: ldr d7, [x6] -; CHECK-NEXT: usubl v0.8h, v2.8b, v0.8b -; CHECK-NEXT: usubl v2.8h, v3.8b, v6.8b -; CHECK-NEXT: usubl v4.8h, v4.8b, v5.8b -; CHECK-NEXT: usubl v3.8h, v1.8b, v7.8b -; CHECK-NEXT: shll2 v1.4s, v0.8h, #16 +; CHECK-NEXT: ldr d0, [x2] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: ldr d3, [x3] +; CHECK-NEXT: ldr d4, [x4] +; CHECK-NEXT: ldr d5, [x5] +; CHECK-NEXT: ldr d6, [x6] +; CHECK-NEXT: ldr d7, [x7] +; CHECK-NEXT: usubl v1.8h, v1.8b, v4.8b +; CHECK-NEXT: usubl v2.8h, v2.8b, v5.8b +; CHECK-NEXT: usubl v3.8h, v3.8b, v7.8b +; CHECK-NEXT: usubl v4.8h, v0.8b, v6.8b +; CHECK-NEXT: shll2 v0.4s, v1.8h, #16 ; CHECK-NEXT: shll2 v5.4s, v2.8h, #16 -; CHECK-NEXT: saddw v0.4s, v1.4s, v0.4h +; CHECK-NEXT: shll2 v6.4s, v4.8h, #16 +; CHECK-NEXT: shll2 v7.4s, v3.8h, #16 +; CHECK-NEXT: saddw v0.4s, v0.4s, v1.4h ; CHECK-NEXT: saddw v1.4s, v5.4s, v2.4h -; CHECK-NEXT: shll2 v2.4s, v3.8h, #16 -; CHECK-NEXT: shll2 v5.4s, v4.8h, #16 -; CHECK-NEXT: saddw v2.4s, v2.4s, v3.4h -; CHECK-NEXT: saddw v3.4s, v5.4s, v4.4h +; CHECK-NEXT: saddw v2.4s, v6.4s, v4.4h +; CHECK-NEXT: saddw v3.4s, v7.4s, v3.4h ; CHECK-NEXT: ret %j1 = load <4 x i8>, ptr %p %p1 = getelementptr i8, ptr %p, i32 4 @@ -646,47 +646,47 @@ ; CHECK-LABEL: extrause_load: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: add x10, x1, #4 -; CHECK-NEXT: add x11, x1, #8 -; CHECK-NEXT: add x12, x1, #12 +; CHECK-NEXT: add x8, x3, #8 +; CHECK-NEXT: add x11, x1, #12 ; CHECK-NEXT: str s0, [x4] -; CHECK-NEXT: ldp s1, s5, [x2] ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ldp s1, s5, [x2] ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umov w9, v1.h[1] -; CHECK-NEXT: mov v2.b[8], w8 -; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v2.b[9], w9 -; CHECK-NEXT: umov w9, v1.h[3] +; CHECK-NEXT: umov w9, v1.h[0] +; CHECK-NEXT: umov w10, v1.h[1] +; CHECK-NEXT: mov v2.b[8], w9 +; CHECK-NEXT: umov w9, v1.h[2] +; CHECK-NEXT: mov v2.b[9], w10 +; CHECK-NEXT: umov w10, v1.h[3] ; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: mov v2.b[10], w8 -; CHECK-NEXT: add x8, x3, #8 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: mov v2.b[10], w9 +; CHECK-NEXT: add x9, x1, #4 ; CHECK-NEXT: uzp1 v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov v2.b[11], w9 -; CHECK-NEXT: add x9, x3, #12 +; CHECK-NEXT: mov v2.b[11], w10 +; CHECK-NEXT: add x10, x3, #12 ; CHECK-NEXT: ld1 { v2.s }[3], [x3], #4 -; CHECK-NEXT: ldp s3, s4, [x0, #4] +; CHECK-NEXT: ldr s4, [x0, #12] +; CHECK-NEXT: ldp s3, s16, [x0, #4] ; CHECK-NEXT: ldp s6, s7, [x2, #8] -; CHECK-NEXT: ldr s16, [x0, #12] -; CHECK-NEXT: ld1 { v5.s }[1], [x3] ; CHECK-NEXT: ld1 { v4.s }[1], [x11] +; CHECK-NEXT: ld1 { v5.s }[1], [x3] +; CHECK-NEXT: ld1 { v3.s }[1], [x9] ; CHECK-NEXT: ld1 { v6.s }[1], [x8] -; CHECK-NEXT: ld1 { v3.s }[1], [x10] -; CHECK-NEXT: ld1 { v16.s }[1], [x12] -; CHECK-NEXT: ld1 { v7.s }[1], [x9] -; CHECK-NEXT: ushll v1.8h, v6.8b, #0 -; CHECK-NEXT: uaddl v0.8h, v0.8b, v4.8b -; CHECK-NEXT: uaddl v3.8h, v3.8b, v16.8b +; CHECK-NEXT: ld1 { v7.s }[1], [x10] +; CHECK-NEXT: add x8, x1, #8 +; CHECK-NEXT: ld1 { v16.s }[1], [x8] +; CHECK-NEXT: uaddl v1.8h, v3.8b, v4.8b +; CHECK-NEXT: ushll v3.8h, v6.8b, #0 ; CHECK-NEXT: uaddl v4.8h, v5.8b, v7.8b -; CHECK-NEXT: uaddw2 v2.8h, v1.8h, v2.16b -; CHECK-NEXT: ushll v5.4s, v3.4h, #3 -; CHECK-NEXT: ushll2 v1.4s, v3.8h, #3 +; CHECK-NEXT: uaddl v5.8h, v0.8b, v16.8b +; CHECK-NEXT: uaddw2 v2.8h, v3.8h, v2.16b +; CHECK-NEXT: ushll v0.4s, v1.4h, #3 +; CHECK-NEXT: ushll2 v1.4s, v1.8h, #3 ; CHECK-NEXT: ushll v6.4s, v4.4h, #3 ; CHECK-NEXT: ushll2 v3.4s, v4.8h, #3 -; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v0.8h -; CHECK-NEXT: uaddw v0.4s, v5.4s, v0.4h +; CHECK-NEXT: uaddw v0.4s, v0.4s, v5.4h +; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v5.8h ; CHECK-NEXT: uaddw2 v3.4s, v3.4s, v2.8h ; CHECK-NEXT: uaddw v2.4s, v6.4s, v2.4h ; CHECK-NEXT: ret @@ -757,38 +757,38 @@ define <16 x i32> @extrause_shuffle(ptr %p, ptr %q, ptr %r, ptr %s, ptr %z) { ; CHECK-LABEL: extrause_shuffle: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp s0, s1, [x2] -; CHECK-NEXT: ldp s2, s3, [x0] -; CHECK-NEXT: ldp s6, s7, [x0, #8] -; CHECK-NEXT: ldr s18, [x1, #12] +; CHECK-NEXT: ldp s2, s3, [x0, #8] ; CHECK-NEXT: add x8, x3, #8 +; CHECK-NEXT: ldr s16, [x1, #12] +; CHECK-NEXT: ldp s0, s1, [x2] +; CHECK-NEXT: ldp s6, s7, [x0] ; CHECK-NEXT: add x9, x1, #8 +; CHECK-NEXT: mov v4.16b, v3.16b +; CHECK-NEXT: ldp s17, s18, [x2, #8] ; CHECK-NEXT: ldr s5, [x3, #12] -; CHECK-NEXT: ldp s16, s17, [x2, #8] +; CHECK-NEXT: mov v3.s[1], v16.s[0] ; CHECK-NEXT: ld1 { v0.s }[1], [x3], #4 -; CHECK-NEXT: ld1 { v2.s }[1], [x1], #4 -; CHECK-NEXT: mov v4.16b, v7.16b -; CHECK-NEXT: ld1 { v6.s }[1], [x9] -; CHECK-NEXT: mov v4.s[1], v18.s[0] -; CHECK-NEXT: ld1 { v16.s }[1], [x8] -; CHECK-NEXT: mov v7.s[1], v18.s[0] +; CHECK-NEXT: mov v4.s[1], v16.s[0] +; CHECK-NEXT: ld1 { v6.s }[1], [x1], #4 +; CHECK-NEXT: ld1 { v2.s }[1], [x9] +; CHECK-NEXT: ld1 { v17.s }[1], [x8] ; CHECK-NEXT: ld1 { v1.s }[1], [x3] -; CHECK-NEXT: ld1 { v3.s }[1], [x1] -; CHECK-NEXT: uaddl v2.8h, v2.8b, v6.8b -; CHECK-NEXT: mov v4.s[2], v17.s[0] -; CHECK-NEXT: mov v17.s[1], v5.s[0] -; CHECK-NEXT: uaddl v3.8h, v3.8b, v7.8b -; CHECK-NEXT: uaddl v6.8h, v0.8b, v16.8b +; CHECK-NEXT: ld1 { v7.s }[1], [x1] +; CHECK-NEXT: mov v4.s[2], v18.s[0] +; CHECK-NEXT: mov v18.s[1], v5.s[0] +; CHECK-NEXT: uaddl v2.8h, v6.8b, v2.8b +; CHECK-NEXT: uaddl v6.8h, v0.8b, v17.8b +; CHECK-NEXT: uaddl v3.8h, v7.8b, v3.8b +; CHECK-NEXT: uaddl v1.8h, v1.8b, v18.8b ; CHECK-NEXT: mov v4.s[3], v5.s[0] -; CHECK-NEXT: uaddl v7.8h, v1.8b, v17.8b -; CHECK-NEXT: ushll2 v0.4s, v3.8h, #3 -; CHECK-NEXT: ushll v3.4s, v3.4h, #3 -; CHECK-NEXT: uaddw2 v1.4s, v0.4s, v2.8h +; CHECK-NEXT: ushll v0.4s, v3.4h, #3 +; CHECK-NEXT: ushll v7.4s, v1.4h, #3 +; CHECK-NEXT: ushll2 v16.4s, v1.8h, #3 +; CHECK-NEXT: ushll2 v1.4s, v3.8h, #3 +; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h ; CHECK-NEXT: str q4, [x4] -; CHECK-NEXT: uaddw v0.4s, v3.4s, v2.4h -; CHECK-NEXT: ushll2 v2.4s, v7.8h, #3 -; CHECK-NEXT: ushll v7.4s, v7.4h, #3 -; CHECK-NEXT: uaddw2 v3.4s, v2.4s, v6.8h +; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v2.8h +; CHECK-NEXT: uaddw2 v3.4s, v16.4s, v6.8h ; CHECK-NEXT: uaddw v2.4s, v7.4s, v6.4h ; CHECK-NEXT: ret %lp1 = load <4 x i8>, ptr %p @@ -859,35 +859,35 @@ ; CHECK-LABEL: extrause_ext: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s0, s1, [x2] -; CHECK-NEXT: ldp s2, s3, [x0] ; CHECK-NEXT: add x8, x3, #8 +; CHECK-NEXT: ldp s2, s3, [x0] ; CHECK-NEXT: add x9, x1, #8 -; CHECK-NEXT: ldp s5, s6, [x2, #8] -; CHECK-NEXT: add x10, x1, #12 -; CHECK-NEXT: add x11, x3, #12 -; CHECK-NEXT: ldp s7, s4, [x0, #8] +; CHECK-NEXT: add x10, x3, #12 +; CHECK-NEXT: ldp s4, s5, [x2, #8] +; CHECK-NEXT: ldp s6, s7, [x0, #8] +; CHECK-NEXT: add x11, x1, #12 ; CHECK-NEXT: ld1 { v0.s }[1], [x3], #4 ; CHECK-NEXT: ld1 { v2.s }[1], [x1], #4 -; CHECK-NEXT: ld1 { v6.s }[1], [x11] -; CHECK-NEXT: ld1 { v4.s }[1], [x10] +; CHECK-NEXT: ld1 { v5.s }[1], [x10] +; CHECK-NEXT: ld1 { v7.s }[1], [x11] +; CHECK-NEXT: ld1 { v6.s }[1], [x9] +; CHECK-NEXT: ld1 { v4.s }[1], [x8] ; CHECK-NEXT: ld1 { v1.s }[1], [x3] ; CHECK-NEXT: ld1 { v3.s }[1], [x1] -; CHECK-NEXT: ld1 { v7.s }[1], [x9] -; CHECK-NEXT: ld1 { v5.s }[1], [x8] -; CHECK-NEXT: uaddl v1.8h, v1.8b, v6.8b -; CHECK-NEXT: uaddl v3.8h, v3.8b, v4.8b -; CHECK-NEXT: uaddl v2.8h, v2.8b, v7.8b -; CHECK-NEXT: uaddl v5.8h, v0.8b, v5.8b -; CHECK-NEXT: ushll v7.4s, v1.4h, #3 -; CHECK-NEXT: ushll v0.4s, v3.4h, #3 -; CHECK-NEXT: ushll2 v3.4s, v3.8h, #3 +; CHECK-NEXT: uaddl v2.8h, v2.8b, v6.8b +; CHECK-NEXT: uaddl v4.8h, v0.8b, v4.8b +; CHECK-NEXT: uaddl v1.8h, v1.8b, v5.8b +; CHECK-NEXT: ushll v5.8h, v5.8b, #0 +; CHECK-NEXT: uaddl v3.8h, v3.8b, v7.8b +; CHECK-NEXT: ushll v6.4s, v1.4h, #3 ; CHECK-NEXT: ushll2 v16.4s, v1.8h, #3 -; CHECK-NEXT: uaddw2 v1.4s, v3.4s, v2.8h -; CHECK-NEXT: uaddw2 v3.4s, v16.4s, v5.8h +; CHECK-NEXT: ushll v0.4s, v3.4h, #3 +; CHECK-NEXT: ushll2 v1.4s, v3.8h, #3 +; CHECK-NEXT: uaddw2 v3.4s, v16.4s, v4.8h ; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h -; CHECK-NEXT: uaddw v2.4s, v7.4s, v5.4h -; CHECK-NEXT: ushll v5.8h, v6.8b, #0 -; CHECK-NEXT: ushll v4.8h, v4.8b, #0 +; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v2.8h +; CHECK-NEXT: uaddw v2.4s, v6.4s, v4.4h +; CHECK-NEXT: ushll v4.8h, v7.8b, #0 ; CHECK-NEXT: stp q4, q5, [x4] ; CHECK-NEXT: ret %lp1 = load <4 x i8>, ptr %p @@ -958,34 +958,34 @@ ; CHECK-LABEL: extrause_add: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s0, s1, [x0] -; CHECK-NEXT: ldp s2, s3, [x2] ; CHECK-NEXT: add x8, x3, #8 +; CHECK-NEXT: ldp s2, s3, [x2] ; CHECK-NEXT: add x9, x1, #8 +; CHECK-NEXT: add x10, x3, #12 ; CHECK-NEXT: ldp s4, s5, [x0, #8] -; CHECK-NEXT: add x10, x1, #12 -; CHECK-NEXT: add x11, x3, #12 ; CHECK-NEXT: ldp s6, s7, [x2, #8] +; CHECK-NEXT: add x11, x1, #12 ; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4 ; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4 +; CHECK-NEXT: ld1 { v5.s }[1], [x11] +; CHECK-NEXT: ld1 { v7.s }[1], [x10] ; CHECK-NEXT: ld1 { v4.s }[1], [x9] -; CHECK-NEXT: ld1 { v7.s }[1], [x11] +; CHECK-NEXT: ld1 { v6.s }[1], [x8] ; CHECK-NEXT: ld1 { v3.s }[1], [x3] ; CHECK-NEXT: ld1 { v1.s }[1], [x1] -; CHECK-NEXT: ld1 { v5.s }[1], [x10] -; CHECK-NEXT: ld1 { v6.s }[1], [x8] -; CHECK-NEXT: uaddl v0.8h, v0.8b, v4.8b -; CHECK-NEXT: uaddl v4.8h, v3.8b, v7.8b -; CHECK-NEXT: uaddl v5.8h, v1.8b, v5.8b ; CHECK-NEXT: uaddl v2.8h, v2.8b, v6.8b -; CHECK-NEXT: ushll v6.4s, v4.4h, #3 -; CHECK-NEXT: ushll v7.4s, v5.4h, #3 -; CHECK-NEXT: stp q5, q4, [x4] -; CHECK-NEXT: ushll2 v1.4s, v5.8h, #3 -; CHECK-NEXT: ushll2 v3.4s, v4.8h, #3 -; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v0.8h +; CHECK-NEXT: uaddl v7.8h, v3.8b, v7.8b +; CHECK-NEXT: uaddl v5.8h, v1.8b, v5.8b +; CHECK-NEXT: uaddl v1.8h, v0.8b, v4.8b +; CHECK-NEXT: ushll v4.4s, v7.4h, #3 +; CHECK-NEXT: ushll2 v3.4s, v7.8h, #3 +; CHECK-NEXT: ushll v0.4s, v5.4h, #3 +; CHECK-NEXT: ushll2 v6.4s, v5.8h, #3 +; CHECK-NEXT: stp q5, q7, [x4] ; CHECK-NEXT: uaddw2 v3.4s, v3.4s, v2.8h -; CHECK-NEXT: uaddw v0.4s, v7.4s, v0.4h -; CHECK-NEXT: uaddw v2.4s, v6.4s, v2.4h +; CHECK-NEXT: uaddw v2.4s, v4.4s, v2.4h +; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h +; CHECK-NEXT: uaddw2 v1.4s, v6.4s, v1.8h ; CHECK-NEXT: ret %lp1 = load <4 x i8>, ptr %p %p2 = getelementptr i8, ptr %p, i32 4 @@ -1055,38 +1055,38 @@ ; CHECK-LABEL: extrause_ext2: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s0, s1, [x2] -; CHECK-NEXT: ldp s2, s3, [x0] ; CHECK-NEXT: add x8, x3, #8 +; CHECK-NEXT: ldp s2, s3, [x0] ; CHECK-NEXT: add x9, x1, #8 +; CHECK-NEXT: add x10, x3, #12 ; CHECK-NEXT: ldp s4, s5, [x2, #8] -; CHECK-NEXT: add x10, x1, #12 -; CHECK-NEXT: add x11, x3, #12 ; CHECK-NEXT: ldp s6, s7, [x0, #8] +; CHECK-NEXT: add x11, x1, #12 ; CHECK-NEXT: ld1 { v0.s }[1], [x3], #4 ; CHECK-NEXT: ld1 { v2.s }[1], [x1], #4 -; CHECK-NEXT: ld1 { v5.s }[1], [x11] +; CHECK-NEXT: ld1 { v5.s }[1], [x10] +; CHECK-NEXT: ld1 { v7.s }[1], [x11] ; CHECK-NEXT: ld1 { v6.s }[1], [x9] -; CHECK-NEXT: ld1 { v7.s }[1], [x10] -; CHECK-NEXT: ld1 { v3.s }[1], [x1] -; CHECK-NEXT: ld1 { v1.s }[1], [x3] ; CHECK-NEXT: ld1 { v4.s }[1], [x8] +; CHECK-NEXT: ld1 { v1.s }[1], [x3] +; CHECK-NEXT: ld1 { v3.s }[1], [x1] ; CHECK-NEXT: uaddl v2.8h, v2.8b, v6.8b -; CHECK-NEXT: uaddl v6.8h, v3.8b, v7.8b -; CHECK-NEXT: uaddl v5.8h, v1.8b, v5.8b ; CHECK-NEXT: uaddl v4.8h, v0.8b, v4.8b -; CHECK-NEXT: ushll2 v0.4s, v6.8h, #3 -; CHECK-NEXT: ushll v3.4s, v6.4h, #3 -; CHECK-NEXT: ushll v7.4s, v5.4h, #0 -; CHECK-NEXT: ushll2 v16.4s, v5.8h, #0 -; CHECK-NEXT: uaddw2 v1.4s, v0.4s, v2.8h -; CHECK-NEXT: uaddw v0.4s, v3.4s, v2.4h -; CHECK-NEXT: stp q7, q16, [x4, #32] -; CHECK-NEXT: ushll2 v2.4s, v5.8h, #3 -; CHECK-NEXT: ushll v5.4s, v5.4h, #3 -; CHECK-NEXT: uaddw2 v3.4s, v2.4s, v4.8h +; CHECK-NEXT: uaddl v7.8h, v3.8b, v7.8b +; CHECK-NEXT: uaddl v3.8h, v1.8b, v5.8b +; CHECK-NEXT: ushll v0.4s, v7.4h, #3 +; CHECK-NEXT: ushll2 v1.4s, v7.8h, #3 +; CHECK-NEXT: ushll v5.4s, v3.4h, #3 +; CHECK-NEXT: ushll2 v6.4s, v3.8h, #3 +; CHECK-NEXT: ushll2 v16.4s, v3.8h, #0 +; CHECK-NEXT: ushll v17.4s, v3.4h, #0 +; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v2.8h +; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h ; CHECK-NEXT: uaddw v2.4s, v5.4s, v4.4h -; CHECK-NEXT: ushll2 v4.4s, v6.8h, #0 -; CHECK-NEXT: ushll v5.4s, v6.4h, #0 +; CHECK-NEXT: uaddw2 v3.4s, v6.4s, v4.8h +; CHECK-NEXT: ushll2 v4.4s, v7.8h, #0 +; CHECK-NEXT: ushll v5.4s, v7.4h, #0 +; CHECK-NEXT: stp q17, q16, [x4, #32] ; CHECK-NEXT: stp q5, q4, [x4] ; CHECK-NEXT: ret %lp1 = load <4 x i8>, ptr %p @@ -1157,35 +1157,35 @@ ; CHECK-LABEL: extrause_shl: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp s0, s1, [x0] -; CHECK-NEXT: ldp s2, s3, [x2] ; CHECK-NEXT: add x8, x3, #8 +; CHECK-NEXT: ldp s2, s3, [x2] ; CHECK-NEXT: add x9, x1, #8 +; CHECK-NEXT: add x10, x3, #12 ; CHECK-NEXT: ldp s4, s5, [x0, #8] -; CHECK-NEXT: add x10, x1, #12 -; CHECK-NEXT: add x11, x3, #12 ; CHECK-NEXT: ldp s6, s7, [x2, #8] +; CHECK-NEXT: add x11, x1, #12 ; CHECK-NEXT: ld1 { v2.s }[1], [x3], #4 ; CHECK-NEXT: ld1 { v0.s }[1], [x1], #4 -; CHECK-NEXT: ld1 { v5.s }[1], [x10] -; CHECK-NEXT: ld1 { v7.s }[1], [x11] -; CHECK-NEXT: ld1 { v3.s }[1], [x3] -; CHECK-NEXT: ld1 { v1.s }[1], [x1] +; CHECK-NEXT: ld1 { v5.s }[1], [x11] +; CHECK-NEXT: ld1 { v7.s }[1], [x10] ; CHECK-NEXT: ld1 { v4.s }[1], [x9] ; CHECK-NEXT: ld1 { v6.s }[1], [x8] +; CHECK-NEXT: ld1 { v3.s }[1], [x3] +; CHECK-NEXT: ld1 { v1.s }[1], [x1] +; CHECK-NEXT: uaddl v4.8h, v0.8b, v4.8b +; CHECK-NEXT: uaddl v2.8h, v2.8b, v6.8b ; CHECK-NEXT: uaddl v3.8h, v3.8b, v7.8b ; CHECK-NEXT: uaddl v1.8h, v1.8b, v5.8b -; CHECK-NEXT: uaddl v0.8h, v0.8b, v4.8b -; CHECK-NEXT: uaddl v2.8h, v2.8b, v6.8b -; CHECK-NEXT: ushll v4.4s, v1.4h, #3 -; CHECK-NEXT: ushll v5.4s, v3.4h, #3 -; CHECK-NEXT: ushll2 v6.4s, v1.8h, #3 -; CHECK-NEXT: ushll2 v7.4s, v3.8h, #3 -; CHECK-NEXT: uaddw2 v1.4s, v6.4s, v0.8h -; CHECK-NEXT: stp q4, q6, [x4] -; CHECK-NEXT: uaddw2 v3.4s, v7.4s, v2.8h -; CHECK-NEXT: stp q5, q7, [x4, #32] -; CHECK-NEXT: uaddw v0.4s, v4.4s, v0.4h -; CHECK-NEXT: uaddw v2.4s, v5.4s, v2.4h +; CHECK-NEXT: ushll v6.4s, v3.4h, #3 +; CHECK-NEXT: ushll2 v16.4s, v3.8h, #3 +; CHECK-NEXT: ushll v5.4s, v1.4h, #3 +; CHECK-NEXT: ushll2 v7.4s, v1.8h, #3 +; CHECK-NEXT: uaddw2 v3.4s, v16.4s, v2.8h +; CHECK-NEXT: uaddw v2.4s, v6.4s, v2.4h +; CHECK-NEXT: stp q6, q16, [x4, #32] +; CHECK-NEXT: uaddw v0.4s, v5.4s, v4.4h +; CHECK-NEXT: uaddw2 v1.4s, v7.4s, v4.8h +; CHECK-NEXT: stp q5, q7, [x4] ; CHECK-NEXT: ret %lp1 = load <4 x i8>, ptr %p %p2 = getelementptr i8, ptr %p, i32 4 @@ -1283,16 +1283,16 @@ define <8 x i32> @commuted_loads2(ptr %p1, ptr %p2) { ; CHECK-LABEL: commuted_loads2: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp d0, d1, [x0] -; CHECK-NEXT: ldp d2, d3, [x1] -; CHECK-NEXT: add v0.8b, v0.8b, v2.8b -; CHECK-NEXT: add v1.8b, v1.8b, v3.8b +; CHECK-NEXT: ldp d0, d3, [x1] +; CHECK-NEXT: ldp d1, d2, [x0] +; CHECK-NEXT: add v0.8b, v1.8b, v0.8b +; CHECK-NEXT: add v1.8b, v2.8b, v3.8b ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v2.8h, v1.8b, #0 -; CHECK-NEXT: ushll2 v1.4s, v0.8h, #3 -; CHECK-NEXT: ushll v0.4s, v0.4h, #3 -; CHECK-NEXT: uaddw2 v1.4s, v1.4s, v2.8h -; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h +; CHECK-NEXT: ushll v3.4s, v0.4h, #3 +; CHECK-NEXT: ushll2 v0.4s, v0.8h, #3 +; CHECK-NEXT: uaddw2 v1.4s, v0.4s, v2.8h +; CHECK-NEXT: uaddw v0.4s, v3.4s, v2.4h ; CHECK-NEXT: ret %l11 = load <8 x i8>, ptr %p1 %q1 = getelementptr i8, ptr %p1, i32 8 @@ -1312,16 +1312,17 @@ define <8 x i32> @commuted_sub(ptr %p1, ptr %p2) { ; CHECK-LABEL: commuted_sub: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp d0, d1, [x0] -; CHECK-NEXT: ldp d3, d2, [x1] -; CHECK-NEXT: add v0.8b, v0.8b, v3.8b +; CHECK-NEXT: ldp d2, d1, [x1] +; CHECK-NEXT: ldr d0, [x0, #8] +; CHECK-NEXT: add v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: add v1.8b, v1.8b, v2.8b ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll2 v2.4s, v1.8h, #3 -; CHECK-NEXT: ushll v3.4s, v1.4h, #3 -; CHECK-NEXT: usubw2 v1.4s, v2.4s, v0.8h -; CHECK-NEXT: usubw v0.4s, v3.4s, v0.4h +; CHECK-NEXT: ushll v2.8h, v1.8b, #0 +; CHECK-NEXT: ushll v3.4s, v0.4h, #3 +; CHECK-NEXT: ushll2 v0.4s, v0.8h, #3 +; CHECK-NEXT: usubw2 v1.4s, v0.4s, v2.8h +; CHECK-NEXT: usubw v0.4s, v3.4s, v2.4h ; CHECK-NEXT: ret %l11 = load <8 x i8>, ptr %p1 %q1 = getelementptr i8, ptr %p1, i32 8 @@ -1362,15 +1363,15 @@ ; CHECK-LABEL: atomic: ; CHECK: // %bb.0: ; CHECK-NEXT: ldar w8, [x0] -; CHECK-NEXT: ldr s0, [x0, #4] -; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: zip1 v1.8b, v1.8b, v0.8b -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #3 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: movi v0.2d, #0x0000ff000000ff +; CHECK-NEXT: ldr s1, [x0, #4] +; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: zip1 v2.8b, v2.8b, v0.8b +; CHECK-NEXT: ushll v1.4s, v1.4h, #3 +; CHECK-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %l1b = load atomic float, ptr %p acquire, align 4 %l1 = bitcast float %l1b to <4 x i8> @@ -1391,8 +1392,8 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ldr s1, [x0, #4] -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.4s, v1.4h, #3 ; CHECK-NEXT: uaddw v0.4s, v1.4s, v0.4h ; CHECK-NEXT: add sp, sp, #16 diff --git a/llvm/test/CodeGen/AArch64/extend_inreg_of_concat_subvectors.ll b/llvm/test/CodeGen/AArch64/extend_inreg_of_concat_subvectors.ll --- a/llvm/test/CodeGen/AArch64/extend_inreg_of_concat_subvectors.ll +++ b/llvm/test/CodeGen/AArch64/extend_inreg_of_concat_subvectors.ll @@ -18,14 +18,14 @@ ; ; CHECK-BE-LABEL: zext_of_concat: ; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] ; CHECK-BE-NEXT: ld1 { v1.2s }, [x1] -; CHECK-BE-NEXT: ld1 { v2.2s }, [x0] -; CHECK-BE-NEXT: movi v0.2d, #0000000000000000 -; CHECK-BE-NEXT: add v1.2s, v2.2s, v1.2s -; CHECK-BE-NEXT: ld1 { v2.4s }, [x2] -; CHECK-BE-NEXT: zip1 v1.4s, v1.4s, v1.4s -; CHECK-BE-NEXT: trn2 v0.4s, v1.4s, v0.4s -; CHECK-BE-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-BE-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-BE-NEXT: movi v1.2d, #0000000000000000 +; CHECK-BE-NEXT: zip1 v0.4s, v0.4s, v0.4s +; CHECK-BE-NEXT: trn2 v0.4s, v0.4s, v1.4s +; CHECK-BE-NEXT: ld1 { v1.4s }, [x2] +; CHECK-BE-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-BE-NEXT: st1 { v0.4s }, [x2] ; CHECK-BE-NEXT: ret %i0.a = load <2 x i32>, ptr %a @@ -42,29 +42,29 @@ define void @zext_of_concat_extrause(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) nounwind { ; CHECK-LABEL: zext_of_concat_extrause: ; CHECK: ; %bb.0: +; CHECK-NEXT: ldr d0, [x1] ; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: movi.2d v0, #0000000000000000 -; CHECK-NEXT: add.2s v1, v1, v2 -; CHECK-NEXT: mov.d v1[1], v1[0] -; CHECK-NEXT: str q1, [x4] -; CHECK-NEXT: zip1.4s v0, v1, v0 -; CHECK-NEXT: ldr q1, [x2] -; CHECK-NEXT: add.4s v0, v0, v1 +; CHECK-NEXT: add.2s v0, v1, v0 +; CHECK-NEXT: movi.2d v1, #0000000000000000 +; CHECK-NEXT: mov.d v0[1], v0[0] +; CHECK-NEXT: zip1.4s v1, v0, v1 +; CHECK-NEXT: str q0, [x4] +; CHECK-NEXT: ldr q0, [x2] +; CHECK-NEXT: add.4s v0, v1, v0 ; CHECK-NEXT: str q0, [x2] ; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: zext_of_concat_extrause: ; CHECK-BE: // %bb.0: +; CHECK-BE-NEXT: ld1 { v0.2s }, [x1] ; CHECK-BE-NEXT: ld1 { v1.2s }, [x0] -; CHECK-BE-NEXT: ld1 { v2.2s }, [x1] -; CHECK-BE-NEXT: movi v0.2d, #0000000000000000 -; CHECK-BE-NEXT: add v1.2s, v1.2s, v2.2s -; CHECK-BE-NEXT: mov v1.d[1], v1.d[0] -; CHECK-BE-NEXT: zip1 v2.4s, v1.4s, v1.4s -; CHECK-BE-NEXT: st1 { v1.4s }, [x4] +; CHECK-BE-NEXT: movi v2.2d, #0000000000000000 +; CHECK-BE-NEXT: add v0.2s, v1.2s, v0.2s +; CHECK-BE-NEXT: mov v0.d[1], v0.d[0] +; CHECK-BE-NEXT: zip1 v1.4s, v0.4s, v0.4s +; CHECK-BE-NEXT: st1 { v0.4s }, [x4] +; CHECK-BE-NEXT: trn2 v0.4s, v1.4s, v2.4s ; CHECK-BE-NEXT: ld1 { v1.4s }, [x2] -; CHECK-BE-NEXT: trn2 v0.4s, v2.4s, v0.4s ; CHECK-BE-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-BE-NEXT: st1 { v0.4s }, [x2] ; CHECK-BE-NEXT: ret @@ -116,9 +116,9 @@ define void @aext_of_concat_extrause(ptr %a, ptr %b, ptr %c, ptr %d, ptr %e) nounwind { ; CHECK-LABEL: aext_of_concat_extrause: ; CHECK: ; %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: add.2s v0, v0, v1 +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: add.2s v0, v1, v0 ; CHECK-NEXT: mov.16b v1, v0 ; CHECK-NEXT: mov.d v1[1], v0[0] ; CHECK-NEXT: zip1.4s v0, v0, v0 @@ -130,9 +130,9 @@ ; ; CHECK-BE-LABEL: aext_of_concat_extrause: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ld1 { v0.2s }, [x0] -; CHECK-BE-NEXT: ld1 { v1.2s }, [x1] -; CHECK-BE-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-BE-NEXT: ld1 { v0.2s }, [x1] +; CHECK-BE-NEXT: ld1 { v1.2s }, [x0] +; CHECK-BE-NEXT: add v0.2s, v1.2s, v0.2s ; CHECK-BE-NEXT: mov v1.16b, v0.16b ; CHECK-BE-NEXT: mov v1.d[1], v0.d[0] ; CHECK-BE-NEXT: zip1 v0.4s, v0.4s, v0.4s diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll --- a/llvm/test/CodeGen/AArch64/extract-bits.ll +++ b/llvm/test/CodeGen/AArch64/extract-bits.ll @@ -21,7 +21,7 @@ define i32 @bextr32_a0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_a0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: lsr w9, w0, w1 ; CHECK-NEXT: lsl w8, w8, w2 ; CHECK-NEXT: sub w8, w8, #1 @@ -37,7 +37,7 @@ define i32 @bextr32_a0_arithmetic(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_a0_arithmetic: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: asr w9, w0, w1 ; CHECK-NEXT: lsl w8, w8, w2 ; CHECK-NEXT: sub w8, w8, #1 @@ -53,7 +53,7 @@ define i32 @bextr32_a1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; CHECK-LABEL: bextr32_a1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: lsr w9, w0, w1 ; CHECK-NEXT: lsl w8, w8, w2 ; CHECK-NEXT: sub w8, w8, #1 @@ -71,12 +71,12 @@ define i32 @bextr32_a2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_a2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsl w8, w8, w2 -; CHECK-NEXT: sub w8, w8, #1 -; CHECK-NEXT: lsr w9, w9, w1 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #1 // =0x1 +; CHECK-NEXT: lsl w9, w9, w2 +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: sub w9, w9, #1 +; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %val = load i32, ptr %w %shifted = lshr i32 %val, %numskipbits @@ -89,12 +89,12 @@ define i32 @bextr32_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; CHECK-LABEL: bextr32_a3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsl w8, w8, w2 -; CHECK-NEXT: sub w8, w8, #1 -; CHECK-NEXT: lsr w9, w9, w1 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #1 // =0x1 +; CHECK-NEXT: lsl w9, w9, w2 +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: sub w9, w9, #1 +; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %val = load i32, ptr %w %skip = zext i8 %numskipbits to i32 @@ -109,7 +109,7 @@ define i32 @bextr32_a4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_a4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: lsr w9, w0, w1 ; CHECK-NEXT: lsl w8, w8, w2 ; CHECK-NEXT: sub w8, w8, #1 @@ -127,7 +127,7 @@ define i64 @bextr64_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_a0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: lsr x9, x0, x1 ; CHECK-NEXT: lsl x8, x8, x2 ; CHECK-NEXT: sub x8, x8, #1 @@ -143,7 +143,7 @@ define i64 @bextr64_a0_arithmetic(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_a0_arithmetic: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: asr x9, x0, x1 ; CHECK-NEXT: lsl x8, x8, x2 ; CHECK-NEXT: sub x8, x8, #1 @@ -159,7 +159,7 @@ define i64 @bextr64_a1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; CHECK-LABEL: bextr64_a1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsr x9, x0, x1 @@ -179,12 +179,12 @@ define i64 @bextr64_a2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_a2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsl x8, x8, x2 -; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: lsr x9, x9, x1 -; CHECK-NEXT: and x0, x8, x9 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov w9, #1 // =0x1 +; CHECK-NEXT: lsl x9, x9, x2 +; CHECK-NEXT: lsr x8, x8, x1 +; CHECK-NEXT: sub x9, x9, #1 +; CHECK-NEXT: and x0, x9, x8 ; CHECK-NEXT: ret %val = load i64, ptr %w %shifted = lshr i64 %val, %numskipbits @@ -197,14 +197,14 @@ define i64 @bextr64_a3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; CHECK-LABEL: bextr64_a3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: lsl x8, x8, x2 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: lsr x9, x9, x1 -; CHECK-NEXT: and x0, x8, x9 +; CHECK-NEXT: lsl x9, x9, x2 +; CHECK-NEXT: lsr x8, x8, x1 +; CHECK-NEXT: sub x9, x9, #1 +; CHECK-NEXT: and x0, x9, x8 ; CHECK-NEXT: ret %val = load i64, ptr %w %skip = zext i8 %numskipbits to i64 @@ -219,7 +219,7 @@ define i64 @bextr64_a4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_a4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: lsr x9, x0, x1 ; CHECK-NEXT: lsl x8, x8, x2 ; CHECK-NEXT: sub x8, x8, #1 @@ -238,7 +238,7 @@ define i32 @bextr64_32_a0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_a0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: lsr x9, x0, x1 ; CHECK-NEXT: lsl x8, x8, x2 ; CHECK-NEXT: sub w8, w8, #1 @@ -256,7 +256,7 @@ define i32 @bextr64_32_a1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_a1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: lsr x9, x0, x1 ; CHECK-NEXT: lsl w8, w8, w2 ; CHECK-NEXT: sub w8, w8, #1 @@ -275,7 +275,7 @@ define i32 @bextr64_32_a2(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_a2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: lsr x9, x0, x1 ; CHECK-NEXT: lsl w8, w8, w2 ; CHECK-NEXT: sub w8, w8, #1 @@ -297,7 +297,7 @@ define i32 @bextr32_b0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_b0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: lsr w9, w0, w1 ; CHECK-NEXT: lsl w8, w8, w2 ; CHECK-NEXT: bic w0, w9, w8 @@ -312,7 +312,7 @@ define i32 @bextr32_b1_indexzext(i32 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; CHECK-LABEL: bextr32_b1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: lsr w9, w0, w1 ; CHECK-NEXT: lsl w8, w8, w2 ; CHECK-NEXT: bic w0, w9, w8 @@ -329,11 +329,11 @@ define i32 @bextr32_b2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_b2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: lsl w8, w8, w2 -; CHECK-NEXT: lsr w9, w9, w1 -; CHECK-NEXT: bic w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #-1 // =0xffffffff +; CHECK-NEXT: lsl w9, w9, w2 +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: bic w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %w %shifted = lshr i32 %val, %numskipbits @@ -346,11 +346,11 @@ define i32 @bextr32_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; CHECK-LABEL: bextr32_b3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: lsl w8, w8, w2 -; CHECK-NEXT: lsr w9, w9, w1 -; CHECK-NEXT: bic w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #-1 // =0xffffffff +; CHECK-NEXT: lsl w9, w9, w2 +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: bic w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %w %skip = zext i8 %numskipbits to i32 @@ -365,7 +365,7 @@ define i32 @bextr32_b4_commutative(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_b4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: lsr w9, w0, w1 ; CHECK-NEXT: lsl w8, w8, w2 ; CHECK-NEXT: bic w0, w9, w8 @@ -382,7 +382,7 @@ define i64 @bextr64_b0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_b0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: lsr x9, x0, x1 ; CHECK-NEXT: lsl x8, x8, x2 ; CHECK-NEXT: bic x0, x9, x8 @@ -397,7 +397,7 @@ define i64 @bextr64_b1_indexzext(i64 %val, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; CHECK-LABEL: bextr64_b1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsr x9, x0, x1 @@ -416,11 +416,11 @@ define i64 @bextr64_b2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_b2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: lsl x8, x8, x2 -; CHECK-NEXT: lsr x9, x9, x1 -; CHECK-NEXT: bic x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff +; CHECK-NEXT: lsl x9, x9, x2 +; CHECK-NEXT: lsr x8, x8, x1 +; CHECK-NEXT: bic x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %w %shifted = lshr i64 %val, %numskipbits @@ -433,13 +433,13 @@ define i64 @bextr64_b3_load_indexzext(ptr %w, i8 zeroext %numskipbits, i8 zeroext %numlowbits) nounwind { ; CHECK-LABEL: bextr64_b3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl x8, x8, x2 -; CHECK-NEXT: lsr x9, x9, x1 -; CHECK-NEXT: bic x0, x9, x8 +; CHECK-NEXT: lsl x9, x9, x2 +; CHECK-NEXT: lsr x8, x8, x1 +; CHECK-NEXT: bic x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %w %skip = zext i8 %numskipbits to i64 @@ -454,7 +454,7 @@ define i64 @bextr64_b4_commutative(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_b4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: lsr x9, x0, x1 ; CHECK-NEXT: lsl x8, x8, x2 ; CHECK-NEXT: bic x0, x9, x8 @@ -472,7 +472,7 @@ define i32 @bextr64_32_b0(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_b0: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: lsr x9, x0, x1 ; CHECK-NEXT: lsl x8, x8, x2 @@ -491,7 +491,7 @@ define i32 @bextr64_32_b1(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_b1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: lsr x9, x0, x1 ; CHECK-NEXT: lsl w8, w8, w2 @@ -511,7 +511,7 @@ define i32 @bextr64_32_b2(i64 %val, i64 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_b2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: lsr x9, x0, x1 ; CHECK-NEXT: lsl w8, w8, w2 @@ -535,7 +535,7 @@ ; CHECK-LABEL: bextr32_c0: ; CHECK: // %bb.0: ; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: mov w9, #-1 // =0xffffffff ; CHECK-NEXT: lsr w10, w0, w1 ; CHECK-NEXT: lsr w8, w9, w8 ; CHECK-NEXT: and w0, w8, w10 @@ -550,11 +550,11 @@ define i32 @bextr32_c1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_c1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: mov w9, #-1 -; CHECK-NEXT: sub w8, w8, w2 +; CHECK-NEXT: mov w8, #32 // =0x20 +; CHECK-NEXT: mov w9, #-1 // =0xffffffff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsr w10, w0, w1 +; CHECK-NEXT: sub w8, w8, w2 ; CHECK-NEXT: lsr w8, w9, w8 ; CHECK-NEXT: and w0, w8, w10 ; CHECK-NEXT: ret @@ -570,12 +570,12 @@ define i32 @bextr32_c2_load(ptr %w, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_c2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #-1 -; CHECK-NEXT: lsr w9, w9, w1 -; CHECK-NEXT: lsr w8, w10, w8 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w2 +; CHECK-NEXT: mov w10, #-1 // =0xffffffff +; CHECK-NEXT: lsr w9, w10, w9 +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %val = load i32, ptr %w %shifted = lshr i32 %val, %numskipbits @@ -588,14 +588,14 @@ define i32 @bextr32_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_c3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: sub w8, w8, w2 -; CHECK-NEXT: mov w10, #-1 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mov w9, #32 // =0x20 +; CHECK-NEXT: mov w10, #-1 // =0xffffffff +; CHECK-NEXT: sub w9, w9, w2 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w9, w9, w1 -; CHECK-NEXT: lsr w8, w10, w8 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: lsr w9, w10, w9 +; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %val = load i32, ptr %w %skip = zext i8 %numskipbits to i32 @@ -611,7 +611,7 @@ ; CHECK-LABEL: bextr32_c4_commutative: ; CHECK: // %bb.0: ; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: mov w9, #-1 // =0xffffffff ; CHECK-NEXT: lsr w10, w0, w1 ; CHECK-NEXT: lsr w8, w9, w8 ; CHECK-NEXT: and w0, w10, w8 @@ -629,7 +629,7 @@ ; CHECK-LABEL: bextr64_c0: ; CHECK: // %bb.0: ; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff ; CHECK-NEXT: lsr x10, x0, x1 ; CHECK-NEXT: lsr x8, x9, x8 ; CHECK-NEXT: and x0, x8, x10 @@ -644,11 +644,11 @@ define i64 @bextr64_c1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_c1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 -; CHECK-NEXT: mov x9, #-1 -; CHECK-NEXT: sub w8, w8, w2 +; CHECK-NEXT: mov w8, #64 // =0x40 +; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsr x10, x0, x1 +; CHECK-NEXT: sub w8, w8, w2 ; CHECK-NEXT: lsr x8, x9, x8 ; CHECK-NEXT: and x0, x8, x10 ; CHECK-NEXT: ret @@ -664,12 +664,12 @@ define i64 @bextr64_c2_load(ptr %w, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_c2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov x10, #-1 -; CHECK-NEXT: lsr x9, x9, x1 -; CHECK-NEXT: lsr x8, x10, x8 -; CHECK-NEXT: and x0, x8, x9 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x2 +; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff +; CHECK-NEXT: lsr x9, x10, x9 +; CHECK-NEXT: lsr x8, x8, x1 +; CHECK-NEXT: and x0, x9, x8 ; CHECK-NEXT: ret %val = load i64, ptr %w %shifted = lshr i64 %val, %numskipbits @@ -682,14 +682,14 @@ define i64 @bextr64_c3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_c3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: sub w8, w8, w2 -; CHECK-NEXT: mov x10, #-1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mov w9, #64 // =0x40 +; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff +; CHECK-NEXT: sub w9, w9, w2 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr x9, x9, x1 -; CHECK-NEXT: lsr x8, x10, x8 -; CHECK-NEXT: and x0, x8, x9 +; CHECK-NEXT: lsr x8, x8, x1 +; CHECK-NEXT: lsr x9, x10, x9 +; CHECK-NEXT: and x0, x9, x8 ; CHECK-NEXT: ret %val = load i64, ptr %w %skip = zext i8 %numskipbits to i64 @@ -705,7 +705,7 @@ ; CHECK-LABEL: bextr64_c4_commutative: ; CHECK: // %bb.0: ; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff ; CHECK-NEXT: lsr x10, x0, x1 ; CHECK-NEXT: lsr x8, x9, x8 ; CHECK-NEXT: and x0, x10, x8 @@ -724,7 +724,7 @@ ; CHECK-LABEL: bextr64_32_c0: ; CHECK: // %bb.0: ; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: mov x9, #-1 +; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff ; CHECK-NEXT: lsr x10, x0, x1 ; CHECK-NEXT: lsr x8, x9, x8 ; CHECK-NEXT: and w0, w8, w10 @@ -742,7 +742,7 @@ ; CHECK-LABEL: bextr64_32_c1: ; CHECK: // %bb.0: ; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: mov w9, #-1 // =0xffffffff ; CHECK-NEXT: lsr x10, x0, x1 ; CHECK-NEXT: lsr w8, w9, w8 ; CHECK-NEXT: and w0, w8, w10 @@ -761,7 +761,7 @@ ; CHECK-LABEL: bextr64_32_c2: ; CHECK: // %bb.0: ; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: mov w9, #-1 // =0xffffffff ; CHECK-NEXT: lsr x10, x0, x1 ; CHECK-NEXT: lsr w8, w9, w8 ; CHECK-NEXT: and w0, w8, w10 @@ -782,10 +782,10 @@ define i32 @bextr32_d0(i32 %val, i32 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_d0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: lsr w9, w0, w1 -; CHECK-NEXT: lsl w9, w9, w8 -; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: neg w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %shifted = lshr i32 %val, %numskipbits %numhighbits = sub i32 32, %numlowbits @@ -797,12 +797,12 @@ define i32 @bextr32_d1_indexzext(i32 %val, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_d1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w9, w0, w1 -; CHECK-NEXT: sub w8, w8, w2 -; CHECK-NEXT: lsl w9, w9, w8 -; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: mov w9, #32 // =0x20 +; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %skip = zext i8 %numskipbits to i32 %shifted = lshr i32 %val, %skip @@ -833,13 +833,13 @@ define i32 @bextr32_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr32_d3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: sub w8, w8, w2 +; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w9, w9, w1 -; CHECK-NEXT: lsl w9, w9, w8 -; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: mov w9, #32 // =0x20 +; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %w %skip = zext i8 %numskipbits to i32 @@ -856,10 +856,10 @@ define i64 @bextr64_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_d0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: lsr x9, x0, x1 -; CHECK-NEXT: lsl x9, x9, x8 -; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg x9, x2 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits %numhighbits = sub i64 64, %numlowbits @@ -871,12 +871,12 @@ define i64 @bextr64_d1_indexzext(i64 %val, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_d1_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr x9, x0, x1 -; CHECK-NEXT: sub w8, w8, w2 -; CHECK-NEXT: lsl x9, x9, x8 -; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: mov w9, #64 // =0x40 +; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %skip = zext i8 %numskipbits to i64 %shifted = lshr i64 %val, %skip @@ -907,13 +907,13 @@ define i64 @bextr64_d3_load_indexzext(ptr %w, i8 %numskipbits, i8 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_d3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: sub w8, w8, w2 +; CHECK-NEXT: ldr x8, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr x9, x9, x1 -; CHECK-NEXT: lsl x9, x9, x8 -; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: mov w9, #64 // =0x40 +; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: lsr x8, x8, x1 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %w %skip = zext i8 %numskipbits to i64 @@ -931,10 +931,10 @@ define i32 @bextr64_32_d0(i64 %val, i64 %numskipbits, i64 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_d0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x2 -; CHECK-NEXT: lsr x9, x0, x1 -; CHECK-NEXT: lsl x9, x9, x8 -; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg x9, x2 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits @@ -949,10 +949,10 @@ define i32 @bextr64_32_d1(i64 %val, i64 %numskipbits, i32 %numlowbits) nounwind { ; CHECK-LABEL: bextr64_32_d1: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w2 -; CHECK-NEXT: lsr x9, x0, x1 -; CHECK-NEXT: lsl w9, w9, w8 -; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: neg w9, w2 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %shifted = lshr i64 %val, %numskipbits %truncshifted = trunc i64 %shifted to i32 diff --git a/llvm/test/CodeGen/AArch64/extract-lowbits.ll b/llvm/test/CodeGen/AArch64/extract-lowbits.ll --- a/llvm/test/CodeGen/AArch64/extract-lowbits.ll +++ b/llvm/test/CodeGen/AArch64/extract-lowbits.ll @@ -162,8 +162,8 @@ ; CHECK-LABEL: bzhi64_a3_load_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: lsl x8, x8, x1 ; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: and x0, x8, x9 @@ -224,10 +224,10 @@ define i32 @bzhi32_b2_load(ptr %w, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_b2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsl w9, w9, w1 -; CHECK-NEXT: bic w0, w8, w9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: bic w0, w9, w8 ; CHECK-NEXT: ret %val = load i32, ptr %w %notmask = shl i32 -1, %numlowbits @@ -239,10 +239,10 @@ define i32 @bzhi32_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_b3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsl w9, w9, w1 -; CHECK-NEXT: bic w0, w8, w9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: lsl w8, w8, w1 +; CHECK-NEXT: bic w0, w9, w8 ; CHECK-NEXT: ret %val = load i32, ptr %w %conv = zext i8 %numlowbits to i32 @@ -298,10 +298,10 @@ define i64 @bzhi64_b2_load(ptr %w, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_b2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff -; CHECK-NEXT: lsl x9, x9, x1 -; CHECK-NEXT: bic x0, x8, x9 +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: bic x0, x9, x8 ; CHECK-NEXT: ret %val = load i64, ptr %w %notmask = shl i64 -1, %numlowbits @@ -313,11 +313,11 @@ define i64 @bzhi64_b3_load_indexzext(ptr %w, i8 zeroext %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_b3_load_indexzext: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr x8, [x0] -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl x9, x9, x1 -; CHECK-NEXT: bic x0, x8, x9 +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: lsl x8, x8, x1 +; CHECK-NEXT: bic x0, x9, x8 ; CHECK-NEXT: ret %val = load i64, ptr %w %conv = zext i8 %numlowbits to i64 @@ -347,9 +347,9 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: and w0, w8, w0 ; CHECK-NEXT: ret %numhighbits = sub i32 32, %numlowbits @@ -377,11 +377,11 @@ define i32 @bzhi32_c2_load(ptr %w, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #-1 // =0xffffffff -; CHECK-NEXT: lsr w8, w10, w8 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: ldr w10, [x0] +; CHECK-NEXT: lsr w8, w8, w9 +; CHECK-NEXT: and w0, w8, w10 ; CHECK-NEXT: ret %val = load i32, ptr %w %numhighbits = sub i32 32, %numlowbits @@ -394,11 +394,11 @@ ; CHECK-LABEL: bzhi32_c3_load_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #32 // =0x20 -; CHECK-NEXT: ldr w9, [x0] +; CHECK-NEXT: mov w9, #-1 // =0xffffffff +; CHECK-NEXT: ldr w10, [x0] ; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: mov w10, #-1 // =0xffffffff -; CHECK-NEXT: lsr w8, w10, w8 -; CHECK-NEXT: and w0, w8, w9 +; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: and w0, w8, w10 ; CHECK-NEXT: ret %val = load i32, ptr %w %numhighbits = sub i8 32, %numlowbits @@ -411,9 +411,9 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_c4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: mov w9, #-1 // =0xffffffff -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: and w0, w0, w8 ; CHECK-NEXT: ret %numhighbits = sub i32 32, %numlowbits @@ -427,9 +427,9 @@ define i64 @bzhi64_c0(i64 %val, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c0: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff -; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsr x8, x8, x9 ; CHECK-NEXT: and x0, x8, x0 ; CHECK-NEXT: ret %numhighbits = sub i64 64, %numlowbits @@ -457,11 +457,11 @@ define i64 @bzhi64_c2_load(ptr %w, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff -; CHECK-NEXT: lsr x8, x10, x8 -; CHECK-NEXT: and x0, x8, x9 +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: ldr x10, [x0] +; CHECK-NEXT: lsr x8, x8, x9 +; CHECK-NEXT: and x0, x8, x10 ; CHECK-NEXT: ret %val = load i64, ptr %w %numhighbits = sub i64 64, %numlowbits @@ -474,11 +474,11 @@ ; CHECK-LABEL: bzhi64_c3_load_indexzext: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #64 // =0x40 -; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ldr x10, [x0] ; CHECK-NEXT: sub w8, w8, w1 -; CHECK-NEXT: mov x10, #-1 // =0xffffffffffffffff -; CHECK-NEXT: lsr x8, x10, x8 -; CHECK-NEXT: and x0, x8, x9 +; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: and x0, x8, x10 ; CHECK-NEXT: ret %val = load i64, ptr %w %numhighbits = sub i8 64, %numlowbits @@ -491,9 +491,9 @@ define i64 @bzhi64_c4_commutative(i64 %val, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_c4_commutative: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: mov x9, #-1 // =0xffffffffffffffff -; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsr x8, x8, x9 ; CHECK-NEXT: and x0, x0, x8 ; CHECK-NEXT: ret %numhighbits = sub i64 64, %numlowbits @@ -537,10 +537,10 @@ define i32 @bzhi32_d2_load(ptr %w, i32 %numlowbits) nounwind { ; CHECK-LABEL: bzhi32_d2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsl w9, w9, w8 -; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %w %numhighbits = sub i32 32, %numlowbits @@ -599,10 +599,10 @@ define i64 @bzhi64_d2_load(ptr %w, i64 %numlowbits) nounwind { ; CHECK-LABEL: bzhi64_d2_load: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsl x9, x9, x8 -; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %w %numhighbits = sub i64 64, %numlowbits diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -1,499 +1,137 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK-CVT --check-prefix=CHECK-COMMON ; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-FP16 - ; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple \ ; RUN: -asm-verbose=false -disable-post-ra -frame-pointer=non-leaf -global-isel \ ; RUN: -global-isel-abort=2 -pass-remarks-missed=gisel-* 2>&1 | FileCheck %s \ ; RUN: --check-prefixes=FALLBACK,GISEL-CVT,GISEL - ; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 \ ; RUN: -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra \ ; RUN: -frame-pointer=non-leaf -global-isel -global-isel-abort=2 \ ; RUN: -pass-remarks-missed=gisel-* 2>&1 | FileCheck %s \ ; RUN: --check-prefixes=FALLBACK-FP16,GISEL-FP16,GISEL - target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" - -; CHECK-CVT-LABEL: test_fadd: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fadd: -; CHECK-FP16-NEXT: fadd h0, h0, h1 -; CHECK-FP16-NEXT: ret - define half @test_fadd(half %a, half %b) #0 { %r = fadd half %a, %b ret half %r } - -; CHECK-CVT-LABEL: test_fsub: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fsub s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fsub: -; CHECK-FP16-NEXT: fsub h0, h0, h1 -; CHECK-FP16-NEXT: ret - define half @test_fsub(half %a, half %b) #0 { %r = fsub half %a, %b ret half %r } - -; CHECK-CVT-LABEL: test_fmul: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmul s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fmul: -; CHECK-FP16-NEXT: fmul h0, h0, h1 -; CHECK-FP16-NEXT: ret - define half @test_fmul(half %a, half %b) #0 { %r = fmul half %a, %b ret half %r } - -; CHECK-CVT-LABEL: test_fmadd: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmul s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvt s1, h2 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fmadd: -; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 -; CHECK-FP16-NEXT: ret - define half @test_fmadd(half %a, half %b, half %c) #0 { %mul = fmul fast half %a, %b %r = fadd fast half %mul, %c ret half %r } -; CHECK-CVT-LABEL: test_fdiv: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fdiv s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fdiv: -; CHECK-FP16-NEXT: fdiv h0, h0, h1 -; CHECK-FP16-NEXT: ret - define half @test_fdiv(half %a, half %b) #0 { %r = fdiv half %a, %b ret half %r } - -; CHECK-COMMON-LABEL: test_frem: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: fcvt s1, h1 -; CHECK-COMMON-NEXT: bl {{_?}}fmodf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_frem(half %a, half %b) #0 { %r = frem half %a, %b ret half %r } - -; CHECK-COMMON-LABEL: test_store: -; CHECK-COMMON-NEXT: str h0, [x0] -; CHECK-COMMON-NEXT: ret define void @test_store(half %a, ptr %b) #0 { store half %a, ptr %b ret void } - -; CHECK-COMMON-LABEL: test_load: -; CHECK-COMMON-NEXT: ldr h0, [x0] -; CHECK-COMMON-NEXT: ret define half @test_load(ptr %a) #0 { %r = load half, ptr %a ret half %r } - declare half @test_callee(half %a, half %b) #0 - -; CHECK-COMMON-LABEL: test_call: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: bl {{_?}}test_callee -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_call(half %a, half %b) #0 { %r = call half @test_callee(half %a, half %b) ret half %r } - -; CHECK-COMMON-LABEL: test_call_flipped: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fmov s2, s0 -; CHECK-COMMON-NEXT: fmov s0, s1 -; CHECK-COMMON-NEXT: fmov s1, s2 -; CHECK-COMMON-NEXT: bl {{_?}}test_callee -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_call_flipped(half %a, half %b) #0 { %r = call half @test_callee(half %b, half %a) ret half %r } - -; CHECK-COMMON-LABEL: test_tailcall_flipped: -; CHECK-COMMON-NEXT: fmov s2, s0 -; CHECK-COMMON-NEXT: fmov s0, s1 -; CHECK-COMMON-NEXT: fmov s1, s2 -; CHECK-COMMON-NEXT: b {{_?}}test_callee define half @test_tailcall_flipped(half %a, half %b) #0 { %r = tail call half @test_callee(half %b, half %a) ret half %r } - -; CHECK-CVT-LABEL: test_select: -; CHECK-CVT-NEXT: cmp w0, #0 -; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_select: -; CHECK-FP16-NEXT: cmp w0, #0 -; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne -; CHECK-FP16-NEXT: ret - define half @test_select(half %a, half %b, i1 zeroext %c) #0 { %r = select i1 %c, half %a, half %b ret half %r } - -; CHECK-CVT-LABEL: test_select_cc: -; CHECK-CVT-DAG: fcvt s3, h3 -; CHECK-CVT-DAG: fcvt s2, h2 -; CHECK-CVT-DAG: fcmp s2, s3 -; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_select_cc: -; CHECK-FP16-NEXT: fcmp h2, h3 -; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne -; CHECK-FP16-NEXT: ret - define half @test_select_cc(half %a, half %b, half %c, half %d) #0 { %cc = fcmp une half %c, %d %r = select i1 %cc, half %a, half %b ret half %r } - -; CHECK-CVT-LABEL: test_select_cc_f32_f16: -; CHECK-CVT-DAG: fcvt s2, h2 -; CHECK-CVT-DAG: fcvt s3, h3 -; CHECK-CVT-NEXT: fcmp s2, s3 -; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_select_cc_f32_f16: -; CHECK-FP16-NEXT: fcmp h2, h3 -; CHECK-FP16-NEXT: fcsel s0, s0, s1, ne -; CHECK-FP16-NEXT: ret - define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 { %cc = fcmp une half %c, %d %r = select i1 %cc, float %a, float %b ret float %r } - -; CHECK-CVT-LABEL: test_select_cc_f16_f32: -; CHECK-CVT-DAG: fcmp s2, s3 -; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_select_cc_f16_f32: -; CHECK-FP16-NEXT: fcmp s2, s3 -; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne -; CHECK-FP16-NEXT: ret - define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 { %cc = fcmp une float %c, %d %r = select i1 %cc, half %a, half %b ret half %r } - -; CHECK-CVT-LABEL: test_fcmp_une: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, ne -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_une: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, ne -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_une(half %a, half %b) #0 { %r = fcmp une half %a, %b ret i1 %r } - -; CHECK-CVT-LABEL: test_fcmp_ueq: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset [[TRUE:w[0-9]+]], eq -; CHECK-CVT-NEXT: csinc w0, [[TRUE]], wzr, vc -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ueq: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset [[TRUE:w[0-9]+]], eq -; CHECK-FP16-NEXT: csinc w0, [[TRUE]], wzr, vc -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ueq(half %a, half %b) #0 { %r = fcmp ueq half %a, %b ret i1 %r } - -; CHECK-CVT-LABEL: test_fcmp_ugt: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, hi -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ugt: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, hi -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ugt(half %a, half %b) #0 { %r = fcmp ugt half %a, %b ret i1 %r } - -; CHECK-CVT-LABEL: test_fcmp_uge: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, pl -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_uge: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, pl -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_uge(half %a, half %b) #0 { %r = fcmp uge half %a, %b ret i1 %r } - -; CHECK-CVT-LABEL: test_fcmp_ult: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, lt -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ult: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, lt -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ult(half %a, half %b) #0 { %r = fcmp ult half %a, %b ret i1 %r } - -; CHECK-CVT-LABEL: test_fcmp_ule: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, le -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ule: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, le -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ule(half %a, half %b) #0 { %r = fcmp ule half %a, %b ret i1 %r } - -; CHECK-CVT-LABEL: test_fcmp_uno: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, vs -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_uno: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, vs -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_uno(half %a, half %b) #0 { %r = fcmp uno half %a, %b ret i1 %r } - -; CHECK-CVT-LABEL: test_fcmp_one: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset [[TRUE:w[0-9]+]], mi -; CHECK-CVT-NEXT: csinc w0, [[TRUE]], wzr, le -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_one: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset [[TRUE:w[0-9]+]], mi -; CHECK-FP16-NEXT: csinc w0, [[TRUE]], wzr, le -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_one(half %a, half %b) #0 { %r = fcmp one half %a, %b ret i1 %r } - -; CHECK-CVT-LABEL: test_fcmp_oeq: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, eq -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_oeq: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, eq -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_oeq(half %a, half %b) #0 { %r = fcmp oeq half %a, %b ret i1 %r } - -; CHECK-CVT-LABEL: test_fcmp_ogt: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, gt -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ogt: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, gt -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ogt(half %a, half %b) #0 { %r = fcmp ogt half %a, %b ret i1 %r } - -; CHECK-CVT-LABEL: test_fcmp_oge: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, ge -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_oge: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, ge -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_oge(half %a, half %b) #0 { %r = fcmp oge half %a, %b ret i1 %r } - -; CHECK-CVT-LABEL: test_fcmp_olt: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, mi -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_olt: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, mi -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_olt(half %a, half %b) #0 { %r = fcmp olt half %a, %b ret i1 %r } - -; CHECK-CVT-LABEL: test_fcmp_ole: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, ls -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ole: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, ls -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ole(half %a, half %b) #0 { %r = fcmp ole half %a, %b ret i1 %r } - -; CHECK-CVT-LABEL: test_fcmp_ord: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, vc -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fcmp_ord: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, vc -; CHECK-FP16-NEXT: ret - define i1 @test_fcmp_ord(half %a, half %b) #0 { %r = fcmp ord half %a, %b ret i1 %r } - -; CHECK-COMMON-LABEL: test_fccmp: -; CHECK-CVT: fcvt s1, h0 -; CHECK-CVT-NEXT: fmov s2, #5.00000000 -; CHECK-CVT-NEXT: fcmp s1, s2 -; CHECK-CVT-NEXT: fmov s2, #8.00000000 -; CHECK-CVT-NEXT: adrp x8 -; CHECK-CVT-NEXT: fccmp s1, s2, #4, mi -; CHECK-CVT-NEXT: ldr h1, [x8, -; CHECK-CVT-NEXT: fcsel s0, s0, s1, gt -; CHECK-CVT-NEXT: str h0, [x0] -; CHECK-CVT-NEXT: ret -; CHECK-FP16: fmov h1, #5.00000000 -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: fmov h2, #8.00000000 -; CHECK-FP16-NEXT: fccmp h0, h2, #4, mi -; CHECK-FP16-NEXT: fcsel h0, h0, h1, gt -; CHECK-FP16-NEXT: str h0, [x0] -; CHECK-FP16-NEXT: ret - define void @test_fccmp(half %in, ptr %out) { %cmp1 = fcmp ogt half %in, 0xH4800 %cmp2 = fcmp olt half %in, 0xH4500 @@ -502,21 +140,6 @@ store half %result, ptr %out ret void } - -; CHECK-CVT-LABEL: test_br_cc: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: csel x8, x0, x1, pl -; CHECK-CVT-NEXT: str wzr, [x8] -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_br_cc: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: csel x8, x0, x1, pl -; CHECK-FP16-NEXT: str wzr, [x8] -; CHECK-FP16-NEXT: ret - define void @test_br_cc(half %a, half %b, ptr %p1, ptr %p2) #0 { %c = fcmp uge half %a, %b br i1 %c, label %then, label %else @@ -527,17 +150,6 @@ store i32 0, ptr %p2 ret void } - -; CHECK-COMMON-LABEL: test_phi: -; CHECK-COMMON: mov x[[PTR:[0-9]+]], x0 -; CHECK-COMMON: ldr h[[AB:[0-9]+]], [x0] -; CHECK-COMMON: [[LOOP:LBB[0-9_]+]]: -; CHECK-COMMON: fmov s[[R:[0-9]+]], s[[AB]] -; CHECK-COMMON: ldr h[[AB]], [x[[PTR]]] -; CHECK-COMMON: mov x0, x[[PTR]] -; CHECK-COMMON: bl {{_?}}test_dummy -; CHECK-COMMON: fmov s0, s[[R]] -; CHECK-COMMON: ret define half @test_phi(ptr %p1) #0 { entry: %a = load half, ptr %p1 @@ -550,211 +162,73 @@ return: ret half %r } - declare i1 @test_dummy(ptr %p1) #0 - -; CHECK-CVT-LABEL: test_fptosi_i32: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzs w0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fptosi_i32: -; CHECK-FP16-NEXT: fcvtzs w0, h0 -; CHECK-FP16-NEXT: ret - define i32 @test_fptosi_i32(half %a) #0 { %r = fptosi half %a to i32 ret i32 %r } - -; CHECK-CVT-LABEL: test_fptosi_i64: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzs x0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fptosi_i64: -; CHECK-FP16-NEXT: fcvtzs x0, h0 -; CHECK-FP16-NEXT: ret - define i64 @test_fptosi_i64(half %a) #0 { %r = fptosi half %a to i64 ret i64 %r } - -; CHECK-CVT-LABEL: test_fptoui_i32: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzu w0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fptoui_i32: -; CHECK-FP16-NEXT: fcvtzu w0, h0 -; CHECK-FP16-NEXT: ret - define i32 @test_fptoui_i32(half %a) #0 { %r = fptoui half %a to i32 ret i32 %r } - -; CHECK-CVT-LABEL: test_fptoui_i64: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzu x0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fptoui_i64: -; CHECK-FP16-NEXT: fcvtzu x0, h0 -; CHECK-FP16-NEXT: ret - define i64 @test_fptoui_i64(half %a) #0 { %r = fptoui half %a to i64 ret i64 %r } - -; CHECK-CVT-LABEL: test_uitofp_i32: -; CHECK-CVT-NEXT: ucvtf s0, w0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_uitofp_i32: -; CHECK-FP16-NEXT: ucvtf h0, w0 -; CHECK-FP16-NEXT: ret - define half @test_uitofp_i32(i32 %a) #0 { %r = uitofp i32 %a to half ret half %r } - -; CHECK-CVT-LABEL: test_uitofp_i64: -; CHECK-CVT-NEXT: ucvtf s0, x0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_uitofp_i64: -; CHECK-FP16-NEXT: ucvtf h0, x0 -; CHECK-FP16-NEXT: ret - define half @test_uitofp_i64(i64 %a) #0 { %r = uitofp i64 %a to half ret half %r } - -; CHECK-CVT-LABEL: test_sitofp_i32: -; CHECK-CVT-NEXT: scvtf s0, w0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_sitofp_i32: -; CHECK-FP16-NEXT: scvtf h0, w0 -; CHECK-FP16-NEXT: ret - define half @test_sitofp_i32(i32 %a) #0 { %r = sitofp i32 %a to half ret half %r } - -; CHECK-CVT-LABEL: test_sitofp_i64: -; CHECK-CVT-NEXT: scvtf s0, x0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_sitofp_i64: -; CHECK-FP16-NEXT: scvtf h0, x0 -; CHECK-FP16-NEXT: ret define half @test_sitofp_i64(i64 %a) #0 { %r = sitofp i64 %a to half ret half %r } - -; CHECK-CVT-LABEL: test_uitofp_i32_fadd: -; CHECK-CVT-NEXT: ucvtf s1, w0 -; CHECK-CVT-NEXT: fcvt h1, s1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_uitofp_i32_fadd: -; CHECK-FP16-NEXT: ucvtf h1, w0 -; CHECK-FP16-NEXT: fadd h0, h0, h1 -; CHECK-FP16-NEXT: ret - define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { %c = uitofp i32 %a to half %r = fadd half %b, %c ret half %r } - -; CHECK-CVT-LABEL: test_sitofp_i32_fadd: -; CHECK-CVT-NEXT: scvtf s1, w0 -; CHECK-CVT-NEXT: fcvt h1, s1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_sitofp_i32_fadd: -; CHECK-FP16-NEXT: scvtf h1, w0 -; CHECK-FP16-NEXT: fadd h0, h0, h1 -; CHECK-FP16-NEXT: ret - define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { %c = sitofp i32 %a to half %r = fadd half %b, %c ret half %r } - -; CHECK-COMMON-LABEL: test_fptrunc_float: -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ret - define half @test_fptrunc_float(float %a) #0 { %r = fptrunc float %a to half ret half %r } - -; CHECK-COMMON-LABEL: test_fptrunc_double: -; CHECK-COMMON-NEXT: fcvt h0, d0 -; CHECK-COMMON-NEXT: ret define half @test_fptrunc_double(double %a) #0 { %r = fptrunc double %a to half ret half %r } - -; CHECK-COMMON-LABEL: test_fpext_float: -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: ret define float @test_fpext_float(half %a) #0 { %r = fpext half %a to float ret float %r } - -; CHECK-COMMON-LABEL: test_fpext_double: -; CHECK-COMMON-NEXT: fcvt d0, h0 -; CHECK-COMMON-NEXT: ret define double @test_fpext_double(half %a) #0 { %r = fpext half %a to double ret double %r } - - -; CHECK-COMMON-LABEL: test_bitcast_halftoi16: -; CHECK-COMMON-NEXT: fmov w0, s0 -; CHECK-COMMON-NEXT: ret define i16 @test_bitcast_halftoi16(half %a) #0 { %r = bitcast half %a to i16 ret i16 %r } - -; CHECK-COMMON-LABEL: test_bitcast_i16tohalf: -; CHECK-COMMON-NEXT: fmov s0, w0 -; CHECK-COMMON-NEXT: ret define half @test_bitcast_i16tohalf(i16 %a) #0 { %r = bitcast i16 %a to half ret half %r } - - declare half @llvm.sqrt.f16(half %a) #0 declare half @llvm.powi.f16.i32(half %a, i32 %b) #0 declare half @llvm.sin.f16(half %a) #0 @@ -778,60 +252,26 @@ declare half @llvm.round.f16(half %a) #0 declare half @llvm.roundeven.f16(half %a) #0 declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0 - ; FALLBACK-NOT: remark:{{.*}}test_sqrt ; FALLBACK-FP16-NOT: remark:{{.*}}test_sqrt - -; CHECK-CVT-LABEL: test_sqrt: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fsqrt s0, s0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_sqrt: -; CHECK-FP16-NEXT: fsqrt h0, h0 -; CHECK-FP16-NEXT: ret - ; GISEL-CVT-LABEL: test_sqrt: ; GISEL-CVT-NEXT: fcvt s0, h0 ; GISEL-CVT-NEXT: fsqrt s0, s0 ; GISEL-CVT-NEXT: fcvt h0, s0 ; GISEL-CVT-NEXT: ret - ; GISEL-FP16-LABEL: test_sqrt: ; GISEL-FP16-NEXT: fsqrt h0, h0 ; GISEL-FP16-NEXT: ret - define half @test_sqrt(half %a) #0 { %r = call half @llvm.sqrt.f16(half %a) ret half %r } - -; CHECK-COMMON-LABEL: test_powi: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}__powisf2 -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_powi(half %a, i32 %b) #0 { %r = call half @llvm.powi.f16.i32(half %a, i32 %b) ret half %r } - ; FALLBACK-NOT: remark:{{.*}}test_sin ; FALLBACK-FP16-NOT: remark:{{.*}}test_sin - -; CHECK-COMMON-LABEL: test_sin: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}sinf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_sin: ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -844,19 +284,8 @@ %r = call half @llvm.sin.f16(half %a) ret half %r } - ; FALLBACK-NOT: remark:{{.*}}test_cos ; FALLBACK-FP16-NOT: remark:{{.*}}test_cos - -; CHECK-COMMON-LABEL: test_cos: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}cosf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_cos: ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -869,33 +298,12 @@ %r = call half @llvm.cos.f16(half %a) ret half %r } - -; CHECK-COMMON-LABEL: test_pow: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: fcvt s1, h1 -; CHECK-COMMON-NEXT: bl {{_?}}powf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_pow(half %a, half %b) #0 { %r = call half @llvm.pow.f16(half %a, half %b) ret half %r } - ; FALLBACK-NOT: remark:{{.*}}test_exp ; FALLBACK-FP16-NOT: remark:{{.*}}test_exp - -; CHECK-COMMON-LABEL: test_exp: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}expf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_exp: ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -908,16 +316,6 @@ %r = call half @llvm.exp.f16(half %a) ret half %r } - -; CHECK-COMMON-LABEL: test_exp2: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}exp2f -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_exp2: ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -930,19 +328,8 @@ %r = call half @llvm.exp2.f16(half %a) ret half %r } - ; FALLBACK-NOT: remark:{{.*}}test_log ; FALLBACK-FP16-NOT: remark:{{.*}}test_log - -; CHECK-COMMON-LABEL: test_log: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}logf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_log: ; GISEL: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -951,24 +338,12 @@ ; GISEL-NEXT: fcvt h0, s0 ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; GISEL-NEXT: ret - define half @test_log(half %a) #0 { %r = call half @llvm.log.f16(half %a) ret half %r } - ; FALLBACK-NOT: remark:{{.*}}test_log10 ; FALLBACK-FP16-NOT: remark:{{.*}}test_log10 - -; CHECK-COMMON-LABEL: test_log10: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}log10f -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_log10: ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -977,24 +352,12 @@ ; GISEL-NEXT: fcvt h0, s0 ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; GISEL-NEXT: ret - define half @test_log10(half %a) #0 { %r = call half @llvm.log10.f16(half %a) ret half %r } - ; FALLBACK-NOT: remark:{{.*}}test_log2 ; FALLBACK-FP16-NOT: remark:{{.*}}test_log2 - -; CHECK-COMMON-LABEL: test_log2: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}log2f -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_log2: ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -1003,215 +366,77 @@ ; GISEL-NEXT: fcvt h0, s0 ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; GISEL-NEXT: ret - define half @test_log2(half %a) #0 { %r = call half @llvm.log2.f16(half %a) ret half %r } - -; CHECK-CVT-LABEL: test_fma: -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmadd s0, s0, s1, s2 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fma: -; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 -; CHECK-FP16-NEXT: ret - define half @test_fma(half %a, half %b, half %c) #0 { %r = call half @llvm.fma.f16(half %a, half %b, half %c) ret half %r } - -; CHECK-CVT-LABEL: test_fabs: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fabs s0, s0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fabs: -; CHECK-FP16-NEXT: fabs h0, h0 -; CHECK-FP16-NEXT: ret - ; FALLBACK-NOT: remark:{{.*}}test_fabs ; FALLBACK-FP16-NOT: remark:{{.*}}test_fabs - ; GISEL-CVT-LABEL: test_fabs: ; GISEL-CVT-NEXT: fcvt s0, h0 ; GISEL-CVT-NEXT: fabs s0, s0 ; GISEL-CVT-NEXT: fcvt h0, s0 ; GISEL-CVT-NEXT: ret - ; GISEL-FP16-LABEL: test_fabs: ; GISEL-FP16-NEXT: fabs h0, h0 ; GISEL-FP16-NEXT: ret - define half @test_fabs(half %a) #0 { %r = call half @llvm.fabs.f16(half %a) ret half %r } - -; CHECK-CVT-LABEL: test_minnum: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fminnm s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_minnum: -; CHECK-FP16-NEXT: fminnm h0, h0, h1 -; CHECK-FP16-NEXT: ret - define half @test_minnum(half %a, half %b) #0 { %r = call half @llvm.minnum.f16(half %a, half %b) ret half %r } - -; CHECK-CVT-LABEL: test_maxnum: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmaxnm s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_maxnum: -; CHECK-FP16-NEXT: fmaxnm h0, h0, h1 -; CHECK-FP16-NEXT: ret - define half @test_maxnum(half %a, half %b) #0 { %r = call half @llvm.maxnum.f16(half %a, half %b) ret half %r } - -; CHECK-CVT-LABEL: test_copysign: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 -; CHECK-CVT-NEXT: bif.16b v0, v1, v2 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_copysign: -; CHECK-FP16-NEXT: mvni.8h v2, #128, lsl #8 -; CHECK-FP16-NEXT: bif.16b v0, v1, v2 -; CHECK-FP16-NEXT: ret - define half @test_copysign(half %a, half %b) #0 { %r = call half @llvm.copysign.f16(half %a, half %b) ret half %r } - -; CHECK-CVT-LABEL: test_copysign_f32: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 -; CHECK-CVT-NEXT: bif.16b v0, v1, v2 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_copysign_f32: -; CHECK-FP16-NEXT: fcvt h1, s1 -; CHECK-FP16-NEXT: mvni.8h v2, #128, lsl #8 -; CHECK-FP16-NEXT: bif.16b v0, v1, v2 -; CHECK-FP16-NEXT: ret - define half @test_copysign_f32(half %a, float %b) #0 { %tb = fptrunc float %b to half %r = call half @llvm.copysign.f16(half %a, half %tb) ret half %r } - -; CHECK-CVT-LABEL: test_copysign_f64: -; CHECK-CVT-NEXT: fcvt s1, d1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 -; CHECK-CVT-NEXT: bif.16b v0, v1, v2 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_copysign_f64: -; CHECK-FP16-NEXT: fcvt h1, d1 -; CHECK-FP16-NEXT: mvni.8h v2, #128, lsl #8 -; CHECK-FP16-NEXT: bif.16b v0, v1, v2 -; CHECK-FP16-NEXT: ret - define half @test_copysign_f64(half %a, double %b) #0 { %tb = fptrunc double %b to half %r = call half @llvm.copysign.f16(half %a, half %tb) ret half %r } - -; Check that the FP promotion will use a truncating FP_ROUND, so we can fold ; away the (fpext (fp_round )) here. - -; CHECK-CVT-LABEL: test_copysign_extended: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 -; CHECK-CVT-NEXT: bif.16b v0, v1, v2 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_copysign_extended: -; CHECK-FP16-NEXT: mvni.8h v2, #128, lsl #8 -; CHECK-FP16-NEXT: bif.16b v0, v1, v2 -; CHECK-FP16-NEXT: fcvt s0, h0 -; CHECK-FP16-NEXT: ret - define float @test_copysign_extended(half %a, half %b) #0 { %r = call half @llvm.copysign.f16(half %a, half %b) %xr = fpext half %r to float ret float %xr } - -; CHECK-CVT-LABEL: test_floor: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frintm [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_floor: -; CHECK-FP16-NEXT: frintm h0, h0 -; CHECK-FP16-NEXT: ret - ; FALLBACK-NOT: remark:{{.*}}test_floor ; FALLBACK-FP16-NOT: remark:{{.*}}test_floor - ; GISEL-CVT-LABEL: test_floor: ; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 ; GISEL-CVT-NEXT: frintm [[INT32:s[0-9]+]], [[FLOAT32]] ; GISEL-CVT-NEXT: fcvt h0, [[INT32]] ; GISEL-CVT-NEXT: ret - ; GISEL-FP16-LABEL: test_floor: ; GISEL-FP16-NEXT: frintm h0, h0 ; GISEL-FP16-NEXT: ret - define half @test_floor(half %a) #0 { %r = call half @llvm.floor.f16(half %a) ret half %r } - -; CHECK-CVT-LABEL: test_ceil: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frintp [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_ceil: -; CHECK-FP16-NEXT: frintp h0, h0 -; CHECK-FP16-NEXT: ret - ; FALLBACK-NOT: remark:{{.*}}test_ceil ; FALLBACK-FP16-NOT: remark:{{.*}}test_ceil - ; GISEL-CVT-LABEL: test_ceil: ; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 ; GISEL-CVT-NEXT: frintp [[INT32:s[0-9]+]], [[FLOAT32]] ; GISEL-CVT-NEXT: fcvt h0, [[INT32]] ; GISEL-CVT-NEXT: ret - ; GISEL-FP16-LABEL: test_ceil: ; GISEL-FP16-NEXT: frintp h0, h0 ; GISEL-FP16-NEXT: ret @@ -1219,122 +444,53 @@ %r = call half @llvm.ceil.f16(half %a) ret half %r } - -; CHECK-CVT-LABEL: test_trunc: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frintz [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_trunc: -; CHECK-FP16-NEXT: frintz h0, h0 -; CHECK-FP16-NEXT: ret - define half @test_trunc(half %a) #0 { %r = call half @llvm.trunc.f16(half %a) ret half %r } - -; CHECK-CVT-LABEL: test_rint: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: frintx s0, s0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_rint: -; CHECK-FP16-NEXT: frintx h0, h0 -; CHECK-FP16-NEXT: ret - define half @test_rint(half %a) #0 { %r = call half @llvm.rint.f16(half %a) ret half %r } - -; CHECK-CVT-LABEL: test_nearbyint: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: frinti s0, s0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_nearbyint: -; CHECK-FP16-NEXT: frinti h0, h0 -; CHECK-FP16-NEXT: ret - define half @test_nearbyint(half %a) #0 { %r = call half @llvm.nearbyint.f16(half %a) ret half %r } - -; CHECK-CVT-LABEL: test_round: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frinta [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - ; GISEL-CVT-LABEL: test_round: ; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 ; GISEL-CVT-NEXT: frinta [[INT32:s[0-9]+]], [[FLOAT32]] ; GISEL-CVT-NEXT: fcvt h0, [[INT32]] ; GISEL-CVT-NEXT: ret - - -; CHECK-FP16-LABEL: test_round: -; CHECK-FP16-NEXT: frinta h0, h0 -; CHECK-FP16-NEXT: ret - ; GISEL-FP16-LABEL: test_round: ; GISEL-FP16-NEXT: frinta h0, h0 ; GISEL-FP16-NEXT: ret - define half @test_round(half %a) #0 { %r = call half @llvm.round.f16(half %a) ret half %r } - -; CHECK-CVT-LABEL: test_roundeven: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - ; GISEL-CVT-LABEL: test_roundeven: ; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 ; GISEL-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]] ; GISEL-CVT-NEXT: fcvt h0, [[INT32]] ; GISEL-CVT-NEXT: ret - - -; CHECK-FP16-LABEL: test_roundeven: -; CHECK-FP16-NEXT: frintn h0, h0 -; CHECK-FP16-NEXT: ret - ; GISEL-FP16-LABEL: test_roundeven: ; GISEL-FP16-NEXT: frintn h0, h0 ; GISEL-FP16-NEXT: ret - define half @test_roundeven(half %a) #0 { %r = call half @llvm.roundeven.f16(half %a) ret half %r } - -; CHECK-CVT-LABEL: test_fmuladd: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmul s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvt s1, h2 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fmuladd: -; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 -; CHECK-FP16-NEXT: ret - define half @test_fmuladd(half %a, half %b, half %c) #0 { %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c) ret half %r } - attributes #0 = { nounwind } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-COMMON: {{.*}} +; CHECK-CVT: {{.*}} +; CHECK-FP16: {{.*}} +; FALLBACK: {{.*}} +; FALLBACK-FP16: {{.*}} +; GISEL: {{.*}} +; GISEL-CVT: {{.*}} +; GISEL-FP16: {{.*}} diff --git a/llvm/test/CodeGen/AArch64/fadd-combines.ll b/llvm/test/CodeGen/AArch64/fadd-combines.ll --- a/llvm/test/CodeGen/AArch64/fadd-combines.ll +++ b/llvm/test/CodeGen/AArch64/fadd-combines.ll @@ -28,9 +28,9 @@ define double @test3(double %a, double %b, double %c) { ; CHECK-LABEL: test3: ; CHECK: // %bb.0: -; CHECK-NEXT: fadd d2, d2, d2 ; CHECK-NEXT: fmul d0, d0, d1 -; CHECK-NEXT: fsub d0, d0, d2 +; CHECK-NEXT: fadd d1, d2, d2 +; CHECK-NEXT: fsub d0, d0, d1 ; CHECK-NEXT: ret %mul = fmul double %a, %b %mul1 = fmul double %c, 2.000000e+00 @@ -41,9 +41,9 @@ define double @test4(double %a, double %b, double %c) { ; CHECK-LABEL: test4: ; CHECK: // %bb.0: -; CHECK-NEXT: fadd d2, d2, d2 ; CHECK-NEXT: fmul d0, d0, d1 -; CHECK-NEXT: fsub d0, d0, d2 +; CHECK-NEXT: fadd d1, d2, d2 +; CHECK-NEXT: fsub d0, d0, d1 ; CHECK-NEXT: ret %mul = fmul double %a, %b %mul1 = fmul double %c, -2.000000e+00 @@ -132,8 +132,8 @@ define float @fadd_const_multiuse_fmf(float %x) { ; CHECK-LABEL: fadd_const_multiuse_fmf: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1109917696 -; CHECK-NEXT: mov w9, #1114374144 +; CHECK-NEXT: mov w8, #1109917696 // =0x42280000 +; CHECK-NEXT: mov w9, #1114374144 // =0x426c0000 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: fadd s1, s0, s1 @@ -150,8 +150,8 @@ define float @fadd_const_multiuse_attr(float %x) { ; CHECK-LABEL: fadd_const_multiuse_attr: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1109917696 -; CHECK-NEXT: mov w9, #1114374144 +; CHECK-NEXT: mov w8, #1109917696 // =0x42280000 +; CHECK-NEXT: mov w9, #1114374144 // =0x426c0000 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: fadd s1, s0, s1 diff --git a/llvm/test/CodeGen/AArch64/faddp-half.ll b/llvm/test/CodeGen/AArch64/faddp-half.ll --- a/llvm/test/CodeGen/AArch64/faddp-half.ll +++ b/llvm/test/CodeGen/AArch64/faddp-half.ll @@ -152,51 +152,51 @@ ; CHECKNOFP16-NEXT: rev32 v2.8h, v0.8h ; CHECKNOFP16-NEXT: mov h1, v0.h[1] ; CHECKNOFP16-NEXT: fcvt s4, h0 -; CHECKNOFP16-NEXT: mov h6, v0.h[2] +; CHECKNOFP16-NEXT: mov h5, v0.h[2] ; CHECKNOFP16-NEXT: mov h16, v0.h[3] ; CHECKNOFP16-NEXT: mov h3, v2.h[1] +; CHECKNOFP16-NEXT: fcvt s6, h2 ; CHECKNOFP16-NEXT: fcvt s1, h1 -; CHECKNOFP16-NEXT: fcvt s5, h2 ; CHECKNOFP16-NEXT: mov h7, v2.h[2] -; CHECKNOFP16-NEXT: mov h17, v2.h[3] +; CHECKNOFP16-NEXT: fcvt s5, h5 +; CHECKNOFP16-NEXT: fcvt s16, h16 ; CHECKNOFP16-NEXT: fcvt s3, h3 -; CHECKNOFP16-NEXT: fadd s4, s5, s4 -; CHECKNOFP16-NEXT: fcvt s5, h6 -; CHECKNOFP16-NEXT: fcvt s6, h7 -; CHECKNOFP16-NEXT: fcvt s7, h16 -; CHECKNOFP16-NEXT: fcvt s16, h17 +; CHECKNOFP16-NEXT: fadd s4, s6, s4 +; CHECKNOFP16-NEXT: mov h6, v2.h[3] +; CHECKNOFP16-NEXT: fcvt s7, h7 ; CHECKNOFP16-NEXT: fadd s3, s3, s1 +; CHECKNOFP16-NEXT: fcvt s6, h6 ; CHECKNOFP16-NEXT: fcvt h1, s4 -; CHECKNOFP16-NEXT: fadd s4, s6, s5 +; CHECKNOFP16-NEXT: fadd s4, s7, s5 ; CHECKNOFP16-NEXT: mov h5, v0.h[4] -; CHECKNOFP16-NEXT: mov h6, v2.h[4] -; CHECKNOFP16-NEXT: fadd s7, s16, s7 +; CHECKNOFP16-NEXT: mov h7, v2.h[4] ; CHECKNOFP16-NEXT: fcvt h3, s3 +; CHECKNOFP16-NEXT: fadd s6, s6, s16 ; CHECKNOFP16-NEXT: mov h16, v2.h[5] -; CHECKNOFP16-NEXT: fcvt s5, h5 -; CHECKNOFP16-NEXT: fcvt s6, h6 -; CHECKNOFP16-NEXT: fcvt h7, s7 +; CHECKNOFP16-NEXT: fcvt h4, s4 ; CHECKNOFP16-NEXT: mov v1.h[1], v3.h[0] -; CHECKNOFP16-NEXT: fcvt h3, s4 -; CHECKNOFP16-NEXT: mov h4, v0.h[5] -; CHECKNOFP16-NEXT: fadd s5, s6, s5 -; CHECKNOFP16-NEXT: mov h6, v0.h[6] -; CHECKNOFP16-NEXT: mov v1.h[2], v3.h[0] +; CHECKNOFP16-NEXT: fcvt s3, h5 +; CHECKNOFP16-NEXT: fcvt s5, h7 +; CHECKNOFP16-NEXT: mov h7, v0.h[5] +; CHECKNOFP16-NEXT: fcvt h6, s6 +; CHECKNOFP16-NEXT: fcvt s16, h16 +; CHECKNOFP16-NEXT: mov v1.h[2], v4.h[0] +; CHECKNOFP16-NEXT: mov h4, v0.h[6] +; CHECKNOFP16-NEXT: fadd s3, s5, s3 +; CHECKNOFP16-NEXT: mov h5, v2.h[6] +; CHECKNOFP16-NEXT: fcvt s7, h7 ; CHECKNOFP16-NEXT: mov h0, v0.h[7] -; CHECKNOFP16-NEXT: fcvt s3, h4 -; CHECKNOFP16-NEXT: fcvt s4, h16 -; CHECKNOFP16-NEXT: mov h16, v2.h[6] ; CHECKNOFP16-NEXT: mov h2, v2.h[7] -; CHECKNOFP16-NEXT: mov v1.h[3], v7.h[0] +; CHECKNOFP16-NEXT: mov v1.h[3], v6.h[0] +; CHECKNOFP16-NEXT: fcvt s4, h4 +; CHECKNOFP16-NEXT: fcvt h3, s3 +; CHECKNOFP16-NEXT: fcvt s5, h5 +; CHECKNOFP16-NEXT: fadd s6, s16, s7 ; CHECKNOFP16-NEXT: fcvt s0, h0 -; CHECKNOFP16-NEXT: fadd s3, s4, s3 -; CHECKNOFP16-NEXT: fcvt h4, s5 -; CHECKNOFP16-NEXT: fcvt s5, h6 -; CHECKNOFP16-NEXT: fcvt s6, h16 ; CHECKNOFP16-NEXT: fcvt s2, h2 -; CHECKNOFP16-NEXT: mov v1.h[4], v4.h[0] -; CHECKNOFP16-NEXT: fcvt h3, s3 -; CHECKNOFP16-NEXT: fadd s4, s6, s5 +; CHECKNOFP16-NEXT: mov v1.h[4], v3.h[0] +; CHECKNOFP16-NEXT: fadd s4, s5, s4 +; CHECKNOFP16-NEXT: fcvt h3, s6 ; CHECKNOFP16-NEXT: fadd s0, s2, s0 ; CHECKNOFP16-NEXT: mov v1.h[5], v3.h[0] ; CHECKNOFP16-NEXT: fcvt h3, s4 @@ -221,112 +221,112 @@ ; ; CHECKNOFP16-LABEL: addp_v16f16: ; CHECKNOFP16: // %bb.0: // %entry -; CHECKNOFP16-NEXT: rev32 v4.8h, v0.8h +; CHECKNOFP16-NEXT: rev32 v5.8h, v0.8h +; CHECKNOFP16-NEXT: rev32 v4.8h, v1.8h ; CHECKNOFP16-NEXT: mov h2, v0.h[1] -; CHECKNOFP16-NEXT: fcvt s6, h0 -; CHECKNOFP16-NEXT: rev32 v5.8h, v1.8h +; CHECKNOFP16-NEXT: mov h6, v1.h[1] +; CHECKNOFP16-NEXT: fcvt s16, h0 ; CHECKNOFP16-NEXT: mov h17, v0.h[2] -; CHECKNOFP16-NEXT: mov h18, v0.h[3] -; CHECKNOFP16-NEXT: mov h3, v4.h[1] +; CHECKNOFP16-NEXT: fcvt s20, h1 +; CHECKNOFP16-NEXT: mov h21, v1.h[2] +; CHECKNOFP16-NEXT: mov h3, v5.h[1] +; CHECKNOFP16-NEXT: mov h7, v4.h[1] ; CHECKNOFP16-NEXT: fcvt s2, h2 -; CHECKNOFP16-NEXT: fcvt s7, h4 -; CHECKNOFP16-NEXT: mov h20, v4.h[2] -; CHECKNOFP16-NEXT: mov h16, v5.h[1] -; CHECKNOFP16-NEXT: fcvt s19, h5 -; CHECKNOFP16-NEXT: mov h21, v4.h[3] -; CHECKNOFP16-NEXT: mov h22, v0.h[4] +; CHECKNOFP16-NEXT: fcvt s18, h5 +; CHECKNOFP16-NEXT: mov h19, v5.h[2] +; CHECKNOFP16-NEXT: fcvt s6, h6 +; CHECKNOFP16-NEXT: fcvt s22, h4 +; CHECKNOFP16-NEXT: mov h23, v4.h[2] +; CHECKNOFP16-NEXT: fcvt s17, h17 +; CHECKNOFP16-NEXT: mov h24, v5.h[3] +; CHECKNOFP16-NEXT: fcvt s21, h21 +; CHECKNOFP16-NEXT: mov h25, v4.h[6] ; CHECKNOFP16-NEXT: fcvt s3, h3 -; CHECKNOFP16-NEXT: fadd s6, s7, s6 -; CHECKNOFP16-NEXT: mov h7, v1.h[1] -; CHECKNOFP16-NEXT: fcvt s16, h16 -; CHECKNOFP16-NEXT: fadd s3, s3, s2 -; CHECKNOFP16-NEXT: fcvt h2, s6 -; CHECKNOFP16-NEXT: fcvt s6, h1 ; CHECKNOFP16-NEXT: fcvt s7, h7 -; CHECKNOFP16-NEXT: fcvt h3, s3 -; CHECKNOFP16-NEXT: mov v2.h[1], v3.h[0] -; CHECKNOFP16-NEXT: fadd s3, s19, s6 -; CHECKNOFP16-NEXT: fadd s6, s16, s7 -; CHECKNOFP16-NEXT: fcvt s7, h17 -; CHECKNOFP16-NEXT: fcvt s16, h20 -; CHECKNOFP16-NEXT: fcvt s17, h18 -; CHECKNOFP16-NEXT: fcvt s18, h21 -; CHECKNOFP16-NEXT: mov h19, v1.h[2] -; CHECKNOFP16-NEXT: mov h20, v5.h[2] -; CHECKNOFP16-NEXT: fcvt h3, s3 +; CHECKNOFP16-NEXT: fadd s16, s18, s16 +; CHECKNOFP16-NEXT: fcvt s18, h19 +; CHECKNOFP16-NEXT: mov h19, v0.h[3] +; CHECKNOFP16-NEXT: fadd s20, s22, s20 +; CHECKNOFP16-NEXT: fcvt s22, h23 +; CHECKNOFP16-NEXT: mov h23, v4.h[3] +; CHECKNOFP16-NEXT: fadd s3, s3, s2 +; CHECKNOFP16-NEXT: fadd s6, s7, s6 +; CHECKNOFP16-NEXT: mov h7, v1.h[3] +; CHECKNOFP16-NEXT: fcvt h2, s16 +; CHECKNOFP16-NEXT: fadd s16, s18, s17 +; CHECKNOFP16-NEXT: fcvt s18, h19 +; CHECKNOFP16-NEXT: fcvt s19, h24 +; CHECKNOFP16-NEXT: mov h24, v5.h[6] +; CHECKNOFP16-NEXT: fcvt h17, s3 +; CHECKNOFP16-NEXT: fcvt h3, s20 +; CHECKNOFP16-NEXT: fadd s20, s22, s21 ; CHECKNOFP16-NEXT: fcvt h6, s6 -; CHECKNOFP16-NEXT: fadd s7, s16, s7 -; CHECKNOFP16-NEXT: mov h16, v1.h[3] -; CHECKNOFP16-NEXT: fadd s17, s18, s17 -; CHECKNOFP16-NEXT: mov h18, v4.h[4] -; CHECKNOFP16-NEXT: fcvt s19, h19 -; CHECKNOFP16-NEXT: fcvt s20, h20 +; CHECKNOFP16-NEXT: fcvt s7, h7 +; CHECKNOFP16-NEXT: fcvt s22, h23 +; CHECKNOFP16-NEXT: mov h21, v0.h[4] +; CHECKNOFP16-NEXT: mov h23, v5.h[4] +; CHECKNOFP16-NEXT: fcvt h16, s16 +; CHECKNOFP16-NEXT: fadd s18, s19, s18 +; CHECKNOFP16-NEXT: mov h19, v4.h[4] +; CHECKNOFP16-NEXT: mov v2.h[1], v17.h[0] +; CHECKNOFP16-NEXT: mov h17, v1.h[4] ; CHECKNOFP16-NEXT: mov v3.h[1], v6.h[0] -; CHECKNOFP16-NEXT: mov h6, v5.h[3] -; CHECKNOFP16-NEXT: fcvt h7, s7 +; CHECKNOFP16-NEXT: fcvt h6, s20 +; CHECKNOFP16-NEXT: fadd s7, s22, s7 +; CHECKNOFP16-NEXT: fcvt s20, h21 +; CHECKNOFP16-NEXT: mov h21, v0.h[5] +; CHECKNOFP16-NEXT: mov h22, v5.h[5] +; CHECKNOFP16-NEXT: fcvt h18, s18 +; CHECKNOFP16-NEXT: fcvt s19, h19 +; CHECKNOFP16-NEXT: mov h5, v5.h[7] +; CHECKNOFP16-NEXT: mov v2.h[2], v16.h[0] +; CHECKNOFP16-NEXT: fcvt s16, h23 +; CHECKNOFP16-NEXT: fcvt s17, h17 +; CHECKNOFP16-NEXT: mov v3.h[2], v6.h[0] +; CHECKNOFP16-NEXT: fcvt h6, s7 +; CHECKNOFP16-NEXT: mov h7, v1.h[5] +; CHECKNOFP16-NEXT: mov h23, v4.h[5] +; CHECKNOFP16-NEXT: mov h4, v4.h[7] +; CHECKNOFP16-NEXT: fcvt s5, h5 +; CHECKNOFP16-NEXT: fadd s16, s16, s20 +; CHECKNOFP16-NEXT: mov h20, v0.h[6] +; CHECKNOFP16-NEXT: fadd s17, s19, s17 +; CHECKNOFP16-NEXT: mov h19, v1.h[6] +; CHECKNOFP16-NEXT: mov v2.h[3], v18.h[0] +; CHECKNOFP16-NEXT: fcvt s18, h21 ; CHECKNOFP16-NEXT: fcvt s21, h22 -; CHECKNOFP16-NEXT: fcvt s18, h18 -; CHECKNOFP16-NEXT: fadd s19, s20, s19 -; CHECKNOFP16-NEXT: fcvt s16, h16 -; CHECKNOFP16-NEXT: fcvt s6, h6 -; CHECKNOFP16-NEXT: fcvt h17, s17 -; CHECKNOFP16-NEXT: mov v2.h[2], v7.h[0] -; CHECKNOFP16-NEXT: mov h20, v5.h[4] -; CHECKNOFP16-NEXT: fadd s7, s18, s21 -; CHECKNOFP16-NEXT: mov h18, v1.h[4] -; CHECKNOFP16-NEXT: fadd s6, s6, s16 -; CHECKNOFP16-NEXT: fcvt h16, s19 -; CHECKNOFP16-NEXT: mov v2.h[3], v17.h[0] -; CHECKNOFP16-NEXT: mov h19, v5.h[5] -; CHECKNOFP16-NEXT: fcvt h7, s7 -; CHECKNOFP16-NEXT: fcvt s17, h18 -; CHECKNOFP16-NEXT: fcvt s18, h20 -; CHECKNOFP16-NEXT: fcvt h6, s6 -; CHECKNOFP16-NEXT: mov v3.h[2], v16.h[0] -; CHECKNOFP16-NEXT: mov h16, v0.h[5] -; CHECKNOFP16-NEXT: mov v2.h[4], v7.h[0] -; CHECKNOFP16-NEXT: fadd s7, s18, s17 -; CHECKNOFP16-NEXT: mov h17, v4.h[5] -; CHECKNOFP16-NEXT: mov h18, v1.h[5] ; CHECKNOFP16-NEXT: mov v3.h[3], v6.h[0] -; CHECKNOFP16-NEXT: fcvt h6, s7 -; CHECKNOFP16-NEXT: fcvt s7, h16 -; CHECKNOFP16-NEXT: fcvt s16, h17 -; CHECKNOFP16-NEXT: fcvt s17, h18 -; CHECKNOFP16-NEXT: fcvt s18, h19 -; CHECKNOFP16-NEXT: mov h19, v0.h[6] -; CHECKNOFP16-NEXT: mov h0, v0.h[7] -; CHECKNOFP16-NEXT: mov v3.h[4], v6.h[0] -; CHECKNOFP16-NEXT: mov h6, v4.h[6] -; CHECKNOFP16-NEXT: fadd s7, s16, s7 -; CHECKNOFP16-NEXT: fadd s16, s18, s17 -; CHECKNOFP16-NEXT: mov h17, v1.h[6] -; CHECKNOFP16-NEXT: mov h18, v5.h[6] +; CHECKNOFP16-NEXT: fcvt s6, h7 +; CHECKNOFP16-NEXT: fcvt s7, h23 +; CHECKNOFP16-NEXT: fcvt s22, h24 +; CHECKNOFP16-NEXT: fcvt s23, h25 +; CHECKNOFP16-NEXT: fcvt h16, s16 +; CHECKNOFP16-NEXT: fcvt s20, h20 +; CHECKNOFP16-NEXT: fcvt h17, s17 ; CHECKNOFP16-NEXT: fcvt s19, h19 -; CHECKNOFP16-NEXT: fcvt s6, h6 -; CHECKNOFP16-NEXT: mov h4, v4.h[7] +; CHECKNOFP16-NEXT: mov h0, v0.h[7] ; CHECKNOFP16-NEXT: mov h1, v1.h[7] -; CHECKNOFP16-NEXT: mov h5, v5.h[7] -; CHECKNOFP16-NEXT: fcvt s17, h17 -; CHECKNOFP16-NEXT: fcvt h7, s7 -; CHECKNOFP16-NEXT: fcvt s18, h18 -; CHECKNOFP16-NEXT: fcvt s0, h0 -; CHECKNOFP16-NEXT: fadd s6, s6, s19 +; CHECKNOFP16-NEXT: fadd s18, s21, s18 ; CHECKNOFP16-NEXT: fcvt s4, h4 +; CHECKNOFP16-NEXT: fadd s6, s7, s6 +; CHECKNOFP16-NEXT: mov v2.h[4], v16.h[0] +; CHECKNOFP16-NEXT: fadd s7, s22, s20 +; CHECKNOFP16-NEXT: mov v3.h[4], v17.h[0] +; CHECKNOFP16-NEXT: fadd s16, s23, s19 +; CHECKNOFP16-NEXT: fcvt s0, h0 ; CHECKNOFP16-NEXT: fcvt s1, h1 -; CHECKNOFP16-NEXT: fcvt s5, h5 -; CHECKNOFP16-NEXT: fcvt h16, s16 -; CHECKNOFP16-NEXT: fadd s17, s18, s17 -; CHECKNOFP16-NEXT: mov v2.h[5], v7.h[0] +; CHECKNOFP16-NEXT: fcvt h17, s18 ; CHECKNOFP16-NEXT: fcvt h6, s6 -; CHECKNOFP16-NEXT: fadd s0, s4, s0 -; CHECKNOFP16-NEXT: fadd s1, s5, s1 -; CHECKNOFP16-NEXT: mov v3.h[5], v16.h[0] -; CHECKNOFP16-NEXT: fcvt h4, s17 -; CHECKNOFP16-NEXT: mov v2.h[6], v6.h[0] +; CHECKNOFP16-NEXT: fadd s0, s5, s0 +; CHECKNOFP16-NEXT: fcvt h5, s7 +; CHECKNOFP16-NEXT: fadd s1, s4, s1 +; CHECKNOFP16-NEXT: mov v2.h[5], v17.h[0] +; CHECKNOFP16-NEXT: mov v3.h[5], v6.h[0] +; CHECKNOFP16-NEXT: fcvt h6, s16 ; CHECKNOFP16-NEXT: fcvt h0, s0 ; CHECKNOFP16-NEXT: fcvt h1, s1 -; CHECKNOFP16-NEXT: mov v3.h[6], v4.h[0] +; CHECKNOFP16-NEXT: mov v2.h[6], v5.h[0] +; CHECKNOFP16-NEXT: mov v3.h[6], v6.h[0] ; CHECKNOFP16-NEXT: mov v2.h[7], v0.h[0] ; CHECKNOFP16-NEXT: mov v3.h[7], v1.h[0] ; CHECKNOFP16-NEXT: mov v0.16b, v2.16b diff --git a/llvm/test/CodeGen/AArch64/faddp.ll b/llvm/test/CodeGen/AArch64/faddp.ll --- a/llvm/test/CodeGen/AArch64/faddp.ll +++ b/llvm/test/CodeGen/AArch64/faddp.ll @@ -216,10 +216,10 @@ define <8 x float> @addp_v8f32(<8 x float> %a) { ; CHECK-LABEL: addp_v8f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: rev64 v2.4s, v0.4s -; CHECK-NEXT: rev64 v3.4s, v1.4s -; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s -; CHECK-NEXT: fadd v1.4s, v3.4s, v1.4s +; CHECK-NEXT: rev64 v2.4s, v1.4s +; CHECK-NEXT: rev64 v3.4s, v0.4s +; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s +; CHECK-NEXT: fadd v1.4s, v2.4s, v1.4s ; CHECK-NEXT: ret entry: %s = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> diff --git a/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll b/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll --- a/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-addressing-modes.ll @@ -97,13 +97,13 @@ define void @store_breg_i1_2(ptr %a) { ; SDAG-LABEL: store_breg_i1_2: ; SDAG: ; %bb.0: -; SDAG-NEXT: mov w8, #1 +; SDAG-NEXT: mov w8, #1 ; =0x1 ; SDAG-NEXT: strb w8, [x0] ; SDAG-NEXT: ret ; ; FAST-LABEL: store_breg_i1_2: ; FAST: ; %bb.0: -; FAST-NEXT: mov w8, #1 +; FAST-NEXT: mov w8, #1 ; =0x1 ; FAST-NEXT: and w8, w8, #0x1 ; FAST-NEXT: strb w8, [x0] ; FAST-NEXT: ret @@ -169,13 +169,13 @@ define i32 @load_immoff_1() { ; SDAG-LABEL: load_immoff_1: ; SDAG: ; %bb.0: -; SDAG-NEXT: mov w8, #128 +; SDAG-NEXT: mov w8, #128 ; =0x80 ; SDAG-NEXT: ldr w0, [x8] ; SDAG-NEXT: ret ; ; FAST-LABEL: load_immoff_1: ; FAST: ; %bb.0: -; FAST-NEXT: mov x8, #128 +; FAST-NEXT: mov x8, #128 ; =0x80 ; FAST-NEXT: ldr w0, [x8] ; FAST-NEXT: ret %1 = inttoptr i64 128 to ptr @@ -250,7 +250,7 @@ define i32 @load_breg_immoff_6(i64 %a) { ; SDAG-LABEL: load_breg_immoff_6: ; SDAG: ; %bb.0: -; SDAG-NEXT: mov w8, #16384 +; SDAG-NEXT: mov w8, #16384 ; =0x4000 ; SDAG-NEXT: ldr w0, [x0, x8] ; SDAG-NEXT: ret ; @@ -331,7 +331,7 @@ define void @store_breg_immoff_6(i64 %a) { ; SDAG-LABEL: store_breg_immoff_6: ; SDAG: ; %bb.0: -; SDAG-NEXT: mov w8, #16384 +; SDAG-NEXT: mov w8, #16384 ; =0x4000 ; SDAG-NEXT: str wzr, [x0, x8] ; SDAG-NEXT: ret ; @@ -410,7 +410,7 @@ define i64 @load_breg_offreg_immoff_2(i64 %a, i64 %b) { ; SDAG-LABEL: load_breg_offreg_immoff_2: ; SDAG: ; %bb.0: -; SDAG-NEXT: mov w8, #61440 +; SDAG-NEXT: mov w8, #61440 ; =0xf000 ; SDAG-NEXT: add x9, x0, x1 ; SDAG-NEXT: ldr x0, [x9, x8] ; SDAG-NEXT: ret @@ -772,10 +772,10 @@ ; ; FAST-LABEL: kill_reg: ; FAST: ; %bb.0: -; FAST-NEXT: ldr x8, [x0, #88] -; FAST-NEXT: sub x9, x0, #8 -; FAST-NEXT: add x9, x9, #96 -; FAST-NEXT: add x0, x9, x8 +; FAST-NEXT: sub x8, x0, #8 +; FAST-NEXT: ldr x9, [x0, #88] +; FAST-NEXT: add x8, x8, #96 +; FAST-NEXT: add x0, x8, x9 ; FAST-NEXT: ret %1 = sub i64 %a, 8 %2 = add i64 %1, 96 @@ -786,25 +786,15 @@ } define void @store_fi(i64 %i) { -; SDAG-LABEL: store_fi: -; SDAG: ; %bb.0: -; SDAG-NEXT: sub sp, sp, #32 -; SDAG-NEXT: .cfi_def_cfa_offset 32 -; SDAG-NEXT: mov x8, sp -; SDAG-NEXT: mov w9, #47 -; SDAG-NEXT: str w9, [x8, x0, lsl #2] -; SDAG-NEXT: add sp, sp, #32 -; SDAG-NEXT: ret -; -; FAST-LABEL: store_fi: -; FAST: ; %bb.0: -; FAST-NEXT: sub sp, sp, #32 -; FAST-NEXT: .cfi_def_cfa_offset 32 -; FAST-NEXT: mov w8, #47 -; FAST-NEXT: mov x9, sp -; FAST-NEXT: str w8, [x9, x0, lsl #2] -; FAST-NEXT: add sp, sp, #32 -; FAST-NEXT: ret +; CHECK-LABEL: store_fi: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: mov w9, #47 ; =0x2f +; CHECK-NEXT: str w9, [x8, x0, lsl #2] +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret %1 = alloca [8 x i32] %2 = ptrtoint ptr %1 to i64 %3 = mul i64 %i, 4 diff --git a/llvm/test/CodeGen/AArch64/fast-isel-gep.ll b/llvm/test/CodeGen/AArch64/fast-isel-gep.ll --- a/llvm/test/CodeGen/AArch64/fast-isel-gep.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-gep.ll @@ -15,7 +15,7 @@ define ptr @test_array1(ptr %a, i64 %i) { ; CHECK-LABEL: test_array1: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov x8, #4 +; CHECK-NEXT: mov x8, #4 ; =0x4 ; CHECK-NEXT: madd x0, x1, x8, x0 ; CHECK-NEXT: ret %1 = getelementptr inbounds i32, ptr %a, i64 %i @@ -43,7 +43,7 @@ define ptr @test_array4(ptr %a) { ; CHECK-LABEL: test_array4: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov x8, #4104 +; CHECK-NEXT: mov x8, #4104 ; =0x1008 ; CHECK-NEXT: add x0, x0, x8 ; CHECK-NEXT: ret %1 = getelementptr inbounds i32, ptr %a, i64 1026 @@ -54,9 +54,9 @@ ; CHECK-LABEL: test_array5: ; CHECK: ; %bb.0: ; CHECK-NEXT: ; kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: mov x8, #4 -; CHECK-NEXT: sxtw x9, w1 -; CHECK-NEXT: madd x0, x9, x8, x0 +; CHECK-NEXT: sxtw x8, w1 +; CHECK-NEXT: mov x9, #4 ; =0x4 +; CHECK-NEXT: madd x0, x8, x9, x0 ; CHECK-NEXT: ret %1 = getelementptr inbounds i32, ptr %a, i32 %i ret ptr %1 diff --git a/llvm/test/CodeGen/AArch64/fast-isel-memcpy.ll b/llvm/test/CodeGen/AArch64/fast-isel-memcpy.ll --- a/llvm/test/CodeGen/AArch64/fast-isel-memcpy.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-memcpy.ll @@ -5,9 +5,9 @@ define void @test(i64 %a, ptr %b) { ; CHECK-LABEL: test: ; CHECK: ; %bb.0: -; CHECK-NEXT: and x8, x0, #0x7fffffffffffffff -; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: str x9, [x8] +; CHECK-NEXT: ldr x8, [x1] +; CHECK-NEXT: and x9, x0, #0x7fffffffffffffff +; CHECK-NEXT: str x8, [x9] ; CHECK-NEXT: ret %1 = and i64 %a, 9223372036854775807 %2 = inttoptr i64 %1 to ptr diff --git a/llvm/test/CodeGen/AArch64/fast-isel-shift.ll b/llvm/test/CodeGen/AArch64/fast-isel-shift.ll --- a/llvm/test/CodeGen/AArch64/fast-isel-shift.ll +++ b/llvm/test/CodeGen/AArch64/fast-isel-shift.ll @@ -391,9 +391,9 @@ define zeroext i8 @lsrv_i8(i8 %a, i8 %b) { ; CHECK-LABEL: lsrv_i8: ; CHECK: ; %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xff +; CHECK-NEXT: and w9, w1, #0xff +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: and w8, w8, #0xff ; CHECK-NEXT: uxtb w0, w8 ; CHECK-NEXT: ret @@ -458,9 +458,9 @@ define zeroext i16 @lsrv_i16(i16 %a, i16 %b) { ; CHECK-LABEL: lsrv_i16: ; CHECK: ; %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: and w9, w1, #0xffff +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: and w8, w8, #0xffff ; CHECK-NEXT: uxth w0, w8 ; CHECK-NEXT: ret @@ -517,9 +517,9 @@ define zeroext i8 @asrv_i8(i8 %a, i8 %b) { ; CHECK-LABEL: asrv_i8: ; CHECK: ; %bb.0: -; CHECK-NEXT: and w8, w1, #0xff -; CHECK-NEXT: sxtb w9, w0 -; CHECK-NEXT: asr w8, w9, w8 +; CHECK-NEXT: sxtb w8, w0 +; CHECK-NEXT: and w9, w1, #0xff +; CHECK-NEXT: asr w8, w8, w9 ; CHECK-NEXT: and w8, w8, #0xff ; CHECK-NEXT: uxtb w0, w8 ; CHECK-NEXT: ret @@ -582,9 +582,9 @@ define zeroext i16 @asrv_i16(i16 %a, i16 %b) { ; CHECK-LABEL: asrv_i16: ; CHECK: ; %bb.0: -; CHECK-NEXT: and w8, w1, #0xffff -; CHECK-NEXT: sxth w9, w0 -; CHECK-NEXT: asr w8, w9, w8 +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: and w9, w1, #0xffff +; CHECK-NEXT: asr w8, w8, w9 ; CHECK-NEXT: and w8, w8, #0xffff ; CHECK-NEXT: uxth w0, w8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fcopysign.ll b/llvm/test/CodeGen/AArch64/fcopysign.ll --- a/llvm/test/CodeGen/AArch64/fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/fcopysign.ll @@ -95,8 +95,8 @@ define float @copysign32(float %a, float %b) { ; CHECK-LABEL: copysign32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mvni v2.4s, #128, lsl #24 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 @@ -156,8 +156,8 @@ ; CHECK-NONEON-NEXT: fcvt s0, h0 ; CHECK-NONEON-NEXT: str h1, [sp, #12] ; CHECK-NONEON-NEXT: ldrb w8, [sp, #13] -; CHECK-NONEON-NEXT: fabs s0, s0 ; CHECK-NONEON-NEXT: tst w8, #0x80 +; CHECK-NONEON-NEXT: fabs s0, s0 ; CHECK-NONEON-NEXT: fneg s1, s0 ; CHECK-NONEON-NEXT: fcsel s0, s1, s0, ne ; CHECK-NONEON-NEXT: fcvt h0, s0 diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll --- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll +++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll @@ -110,7 +110,7 @@ define <2 x i32> @test10(<2 x float> %f) { ; CHECK-LABEL: test10: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2143289344 +; CHECK-NEXT: mov w8, #2143289344 // =0x7fc00000 ; CHECK-NEXT: dup v0.2s, w8 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-NEXT: ret @@ -180,48 +180,48 @@ define <8 x i16> @test_v8f16(<8 x half> %in) { ; CHECK-NO16-LABEL: test_v8f16: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov h2, v0.h[4] -; CHECK-NO16-NEXT: mov h3, v0.h[5] -; CHECK-NO16-NEXT: mov h4, v0.h[1] +; CHECK-NO16-NEXT: mov h2, v0.h[1] +; CHECK-NO16-NEXT: mov h3, v0.h[4] +; CHECK-NO16-NEXT: mov h4, v0.h[5] ; CHECK-NO16-NEXT: mov h5, v0.h[2] -; CHECK-NO16-NEXT: mov h6, v0.h[6] -; CHECK-NO16-NEXT: fcvt s7, h0 +; CHECK-NO16-NEXT: fcvt s6, h0 +; CHECK-NO16-NEXT: mov h7, v0.h[6] ; CHECK-NO16-NEXT: fmov s1, #4.00000000 ; CHECK-NO16-NEXT: mov h16, v0.h[3] +; CHECK-NO16-NEXT: mov h0, v0.h[7] ; CHECK-NO16-NEXT: fcvt s2, h2 ; CHECK-NO16-NEXT: fcvt s3, h3 ; CHECK-NO16-NEXT: fcvt s4, h4 -; CHECK-NO16-NEXT: mov h0, v0.h[7] +; CHECK-NO16-NEXT: fmul s6, s6, s1 ; CHECK-NO16-NEXT: fcvt s5, h5 -; CHECK-NO16-NEXT: fcvt s6, h6 -; CHECK-NO16-NEXT: fmul s7, s7, s1 +; CHECK-NO16-NEXT: fcvt s7, h7 ; CHECK-NO16-NEXT: fcvt s16, h16 +; CHECK-NO16-NEXT: fcvt s0, h0 ; CHECK-NO16-NEXT: fmul s2, s2, s1 ; CHECK-NO16-NEXT: fmul s3, s3, s1 ; CHECK-NO16-NEXT: fmul s4, s4, s1 -; CHECK-NO16-NEXT: fcvt s0, h0 ; CHECK-NO16-NEXT: fmul s5, s5, s1 -; CHECK-NO16-NEXT: fmul s6, s6, s1 -; CHECK-NO16-NEXT: fcvt h7, s7 +; CHECK-NO16-NEXT: fcvt h6, s6 +; CHECK-NO16-NEXT: fmul s7, s7, s1 ; CHECK-NO16-NEXT: fmul s16, s16, s1 +; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h2, s2 ; CHECK-NO16-NEXT: fcvt h3, s3 ; CHECK-NO16-NEXT: fcvt h4, s4 -; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h1, s5 -; CHECK-NO16-NEXT: fcvt h5, s6 -; CHECK-NO16-NEXT: mov v2.h[1], v3.h[0] -; CHECK-NO16-NEXT: fcvt h3, s16 -; CHECK-NO16-NEXT: mov v7.h[1], v4.h[0] +; CHECK-NO16-NEXT: fcvt h5, s5 +; CHECK-NO16-NEXT: fcvt h1, s7 ; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: mov v2.h[2], v5.h[0] -; CHECK-NO16-NEXT: mov v7.h[2], v1.h[0] -; CHECK-NO16-NEXT: mov v2.h[3], v0.h[0] -; CHECK-NO16-NEXT: mov v7.h[3], v3.h[0] -; CHECK-NO16-NEXT: fcvtl v0.4s, v2.4h -; CHECK-NO16-NEXT: fcvtl v1.4s, v7.4h -; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: mov v6.h[1], v2.h[0] +; CHECK-NO16-NEXT: fcvt h2, s16 +; CHECK-NO16-NEXT: mov v3.h[1], v4.h[0] +; CHECK-NO16-NEXT: mov v6.h[2], v5.h[0] +; CHECK-NO16-NEXT: mov v3.h[2], v1.h[0] +; CHECK-NO16-NEXT: mov v6.h[3], v2.h[0] +; CHECK-NO16-NEXT: mov v3.h[3], v0.h[0] +; CHECK-NO16-NEXT: fcvtl v1.4s, v6.4h +; CHECK-NO16-NEXT: fcvtl v0.4s, v3.4h ; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NO16-NEXT: ret ; @@ -331,9 +331,9 @@ ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %mul.i = fmul <2 x double> %d, @@ -376,9 +376,9 @@ ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %mul.i = fmul <2 x float> %f, %vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %mul.i) @@ -426,7 +426,7 @@ define <2 x i32> @test10_sat(<2 x float> %f) { ; CHECK-LABEL: test10_sat: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2143289344 +; CHECK-NEXT: mov w8, #2143289344 // =0x7fc00000 ; CHECK-NEXT: dup v0.2s, w8 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s ; CHECK-NEXT: ret @@ -498,101 +498,101 @@ ; CHECK-NO16: // %bb.0: ; CHECK-NO16-NEXT: mov h2, v0.h[4] ; CHECK-NO16-NEXT: mov h3, v0.h[5] +; CHECK-NO16-NEXT: mov w9, #32767 // =0x7fff ; CHECK-NO16-NEXT: mov h4, v0.h[6] ; CHECK-NO16-NEXT: fmov s1, #4.00000000 +; CHECK-NO16-NEXT: mov w11, #-32768 // =0xffff8000 ; CHECK-NO16-NEXT: mov h5, v0.h[7] ; CHECK-NO16-NEXT: mov h6, v0.h[1] ; CHECK-NO16-NEXT: mov h7, v0.h[2] ; CHECK-NO16-NEXT: fcvt s16, h0 +; CHECK-NO16-NEXT: mov h0, v0.h[3] ; CHECK-NO16-NEXT: fcvt s2, h2 ; CHECK-NO16-NEXT: fcvt s3, h3 ; CHECK-NO16-NEXT: fcvt s4, h4 -; CHECK-NO16-NEXT: mov h0, v0.h[3] ; CHECK-NO16-NEXT: fcvt s5, h5 ; CHECK-NO16-NEXT: fcvt s6, h6 -; CHECK-NO16-NEXT: mov w9, #32767 -; CHECK-NO16-NEXT: mov w10, #-32768 +; CHECK-NO16-NEXT: fcvt s0, h0 ; CHECK-NO16-NEXT: fmul s2, s2, s1 ; CHECK-NO16-NEXT: fmul s3, s3, s1 ; CHECK-NO16-NEXT: fmul s4, s4, s1 -; CHECK-NO16-NEXT: fcvt s0, h0 ; CHECK-NO16-NEXT: fmul s5, s5, s1 ; CHECK-NO16-NEXT: fmul s6, s6, s1 +; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fcvt h2, s2 ; CHECK-NO16-NEXT: fcvt h3, s3 -; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h4, s4 ; CHECK-NO16-NEXT: fcvt h5, s5 ; CHECK-NO16-NEXT: fcvt h6, s6 +; CHECK-NO16-NEXT: fcvt h0, s0 ; CHECK-NO16-NEXT: mov v2.h[1], v3.h[0] -; CHECK-NO16-NEXT: fcvt h3, s4 -; CHECK-NO16-NEXT: fcvt s4, h7 +; CHECK-NO16-NEXT: fcvt s3, h7 ; CHECK-NO16-NEXT: fmul s7, s16, s1 -; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: mov v2.h[2], v3.h[0] -; CHECK-NO16-NEXT: fmul s3, s4, s1 +; CHECK-NO16-NEXT: mov v2.h[2], v4.h[0] +; CHECK-NO16-NEXT: fmul s3, s3, s1 ; CHECK-NO16-NEXT: fcvt h4, s7 ; CHECK-NO16-NEXT: mov v2.h[3], v5.h[0] ; CHECK-NO16-NEXT: fcvt h1, s3 ; CHECK-NO16-NEXT: mov v4.h[1], v6.h[0] ; CHECK-NO16-NEXT: fcvtl v2.4s, v2.4h ; CHECK-NO16-NEXT: mov v4.h[2], v1.h[0] -; CHECK-NO16-NEXT: mov s1, v2.s[1] -; CHECK-NO16-NEXT: fcvtzs w11, s2 +; CHECK-NO16-NEXT: mov s3, v2.s[1] ; CHECK-NO16-NEXT: mov v4.h[3], v0.h[0] ; CHECK-NO16-NEXT: mov s0, v2.s[2] +; CHECK-NO16-NEXT: fcvtzs w10, s2 ; CHECK-NO16-NEXT: mov s2, v2.s[3] -; CHECK-NO16-NEXT: fcvtzs w8, s1 +; CHECK-NO16-NEXT: fcvtzs w8, s3 ; CHECK-NO16-NEXT: fcvtl v1.4s, v4.4h ; CHECK-NO16-NEXT: fcvtzs w12, s0 -; CHECK-NO16-NEXT: cmp w8, w9 ; CHECK-NO16-NEXT: fcvtzs w13, s2 -; CHECK-NO16-NEXT: csel w8, w8, w9, lt -; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: csel w8, w8, w10, gt -; CHECK-NO16-NEXT: cmp w11, w9 -; CHECK-NO16-NEXT: csel w11, w11, w9, lt +; CHECK-NO16-NEXT: cmp w8, w9 ; CHECK-NO16-NEXT: mov s0, v1.s[1] -; CHECK-NO16-NEXT: cmn w11, #8, lsl #12 // =32768 ; CHECK-NO16-NEXT: fcvtzs w15, s1 -; CHECK-NO16-NEXT: csel w11, w11, w10, gt +; CHECK-NO16-NEXT: csel w8, w8, w9, lt +; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: csel w8, w8, w11, gt +; CHECK-NO16-NEXT: cmp w10, w9 +; CHECK-NO16-NEXT: csel w10, w10, w9, lt +; CHECK-NO16-NEXT: fcvtzs w14, s0 +; CHECK-NO16-NEXT: mov s0, v1.s[2] +; CHECK-NO16-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: csel w10, w10, w11, gt ; CHECK-NO16-NEXT: cmp w12, w9 ; CHECK-NO16-NEXT: csel w12, w12, w9, lt ; CHECK-NO16-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: fcvtzs w14, s0 -; CHECK-NO16-NEXT: csel w12, w12, w10, gt +; CHECK-NO16-NEXT: fcvtzs w16, s0 +; CHECK-NO16-NEXT: mov s0, v1.s[3] +; CHECK-NO16-NEXT: csel w12, w12, w11, gt ; CHECK-NO16-NEXT: cmp w13, w9 +; CHECK-NO16-NEXT: fmov s1, w10 ; CHECK-NO16-NEXT: csel w13, w13, w9, lt -; CHECK-NO16-NEXT: mov s0, v1.s[2] ; CHECK-NO16-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: fmov s2, w11 -; CHECK-NO16-NEXT: csel w13, w13, w10, gt +; CHECK-NO16-NEXT: csel w13, w13, w11, gt ; CHECK-NO16-NEXT: cmp w14, w9 +; CHECK-NO16-NEXT: mov v1.s[1], w8 ; CHECK-NO16-NEXT: csel w14, w14, w9, lt +; CHECK-NO16-NEXT: fcvtzs w8, s0 ; CHECK-NO16-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: csel w14, w14, w10, gt +; CHECK-NO16-NEXT: csel w14, w14, w11, gt ; CHECK-NO16-NEXT: cmp w15, w9 ; CHECK-NO16-NEXT: csel w15, w15, w9, lt +; CHECK-NO16-NEXT: mov v1.s[2], w12 ; CHECK-NO16-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: csel w11, w15, w10, gt -; CHECK-NO16-NEXT: fcvtzs w15, s0 -; CHECK-NO16-NEXT: mov s0, v1.s[3] -; CHECK-NO16-NEXT: mov v2.s[1], w8 -; CHECK-NO16-NEXT: fmov s1, w11 -; CHECK-NO16-NEXT: cmp w15, w9 -; CHECK-NO16-NEXT: csel w8, w15, w9, lt -; CHECK-NO16-NEXT: fcvtzs w11, s0 +; CHECK-NO16-NEXT: csel w10, w15, w11, gt +; CHECK-NO16-NEXT: cmp w16, w9 +; CHECK-NO16-NEXT: fmov s2, w10 +; CHECK-NO16-NEXT: csel w10, w16, w9, lt +; CHECK-NO16-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: mov v1.s[3], w13 +; CHECK-NO16-NEXT: csel w10, w10, w11, gt +; CHECK-NO16-NEXT: cmp w8, w9 +; CHECK-NO16-NEXT: mov v2.s[1], w14 +; CHECK-NO16-NEXT: csel w8, w8, w9, lt ; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: mov v1.s[1], w14 -; CHECK-NO16-NEXT: csel w8, w8, w10, gt -; CHECK-NO16-NEXT: mov v2.s[2], w12 -; CHECK-NO16-NEXT: cmp w11, w9 -; CHECK-NO16-NEXT: csel w9, w11, w9, lt -; CHECK-NO16-NEXT: mov v1.s[2], w8 -; CHECK-NO16-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: csel w8, w9, w10, gt -; CHECK-NO16-NEXT: mov v2.s[3], w13 -; CHECK-NO16-NEXT: mov v1.s[3], w8 -; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-NO16-NEXT: csel w8, w8, w11, gt +; CHECK-NO16-NEXT: mov v2.s[2], w10 +; CHECK-NO16-NEXT: mov v2.s[3], w8 +; CHECK-NO16-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-NO16-NEXT: ret ; ; CHECK-FP16-LABEL: test_v8f16_sat: diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll --- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll +++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll @@ -100,8 +100,8 @@ define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { ; CHECK-LABEL: splat_three_fdiv_4xfloat: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov v4.4s, #1.00000000 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: fmov v4.4s, #1.00000000 ; CHECK-NEXT: dup v0.4s, v0.s[0] ; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s ; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s @@ -120,8 +120,8 @@ define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 { ; CHECK-LABEL: splat_fdiv_v4f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov v2.4s, #1.00000000 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: fmov v2.4s, #1.00000000 ; CHECK-NEXT: dup v0.4s, v0.s[0] ; CHECK-NEXT: fdiv v0.4s, v2.4s, v0.4s ; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s @@ -171,8 +171,8 @@ define @splat_fdiv_nxv2f64(double %D, %a) #1 { ; CHECK-LABEL: splat_fdiv_nxv2f64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll --- a/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll +++ b/llvm/test/CodeGen/AArch64/fixed-vector-deinterleave.ll @@ -9,8 +9,8 @@ ; CHECK-NEXT: mov v1.16b, v2.16b ; CHECK-NEXT: mov v1.h[0], v0.h[1] ; CHECK-NEXT: mov v0.h[1], v2.h[0] -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-NEXT: ret %retval = call {<2 x half>, <2 x half>} @llvm.experimental.vector.deinterleave2.v4f16(<4 x half> %vec) ret {<2 x half>, <2 x half>} %retval diff --git a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll --- a/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll +++ b/llvm/test/CodeGen/AArch64/fixed-vector-interleave.ll @@ -13,9 +13,9 @@ define <8 x half> @interleave2_v8f16(<4 x half> %vec0, <4 x half> %vec1) { ; CHECK-LABEL: interleave2_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] ; CHECK-NEXT: tbl v0.16b, { v0.16b }, v1.16b diff --git a/llvm/test/CodeGen/AArch64/flags-multiuse.ll b/llvm/test/CodeGen/AArch64/flags-multiuse.ll --- a/llvm/test/CodeGen/AArch64/flags-multiuse.ll +++ b/llvm/test/CodeGen/AArch64/flags-multiuse.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-none-linux-gnu -aarch64-enable-atomic-cfg-tidy=0 -verify-machineinstrs -o - %s | FileCheck %s ; LLVM should be able to cope with multiple uses of the same flag-setting @@ -10,26 +11,45 @@ define i32 @test_multiflag(i32 %n, i32 %m, i32 %o) { ; CHECK-LABEL: test_multiflag: +; CHECK: // %bb.0: +; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w30, -32 +; CHECK-NEXT: adrp x8, :got:var +; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: ldr x8, [x8, :got_lo12:var] +; CHECK-NEXT: cset w9, ne +; CHECK-NEXT: mov w20, w0 +; CHECK-NEXT: str w9, [x8] +; CHECK-NEXT: bl bar +; CHECK-NEXT: cmp w20, w19 +; CHECK-NEXT: b.eq .LBB0_2 +; CHECK-NEXT: // %bb.1: // %iftrue +; CHECK-NEXT: mov w0, #42 // =0x2a +; CHECK-NEXT: b .LBB0_3 +; CHECK-NEXT: .LBB0_2: // %iffalse +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: .LBB0_3: // %iftrue +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret %test = icmp ne i32 %n, %m -; CHECK: cmp [[LHS:w[0-9]+]], [[RHS:w[0-9]+]] %val = zext i1 %test to i32 -; CHECK: cset {{[xw][0-9]+}}, ne -; CHECK: mov [[RHSCOPY:w[0-9]+]], [[RHS]] -; CHECK: mov [[LHSCOPY:w[0-9]+]], [[LHS]] store i32 %val, ptr @var call void @bar() -; CHECK: bl bar ; Currently, the comparison is emitted again. An MSR/MRS pair would also be ; acceptable, but assuming the call preserves NZCV is not. br i1 %test, label %iftrue, label %iffalse -; CHECK: cmp [[LHSCOPY]], [[RHSCOPY]] -; CHECK: b.eq iftrue: ret i32 42 diff --git a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll --- a/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll +++ b/llvm/test/CodeGen/AArch64/fmaximum-legalization.ll @@ -13,22 +13,22 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov h2, v1.h[1] ; CHECK-NEXT: mov h3, v0.h[1] -; CHECK-NEXT: fcvt s4, h1 -; CHECK-NEXT: fcvt s5, h0 -; CHECK-NEXT: mov h6, v1.h[2] -; CHECK-NEXT: mov h7, v0.h[2] +; CHECK-NEXT: mov h4, v1.h[2] +; CHECK-NEXT: mov h5, v0.h[2] +; CHECK-NEXT: fcvt s6, h1 +; CHECK-NEXT: fcvt s7, h0 ; CHECK-NEXT: mov h1, v1.h[3] ; CHECK-NEXT: fcvt s2, h2 ; CHECK-NEXT: fcvt s3, h3 -; CHECK-NEXT: fmax s4, s5, s4 -; CHECK-NEXT: fcvt s5, h7 ; CHECK-NEXT: fcvt s1, h1 ; CHECK-NEXT: fmax s2, s3, s2 -; CHECK-NEXT: fcvt s3, h6 +; CHECK-NEXT: fcvt s3, h4 +; CHECK-NEXT: fcvt s4, h5 +; CHECK-NEXT: fmax s5, s7, s6 ; CHECK-NEXT: mov h6, v0.h[3] -; CHECK-NEXT: fcvt h0, s4 +; CHECK-NEXT: fmax s3, s4, s3 ; CHECK-NEXT: fcvt h2, s2 -; CHECK-NEXT: fmax s3, s5, s3 +; CHECK-NEXT: fcvt h0, s5 ; CHECK-NEXT: fcvt s4, h6 ; CHECK-NEXT: mov v0.h[1], v2.h[0] ; CHECK-NEXT: fcvt h2, s3 diff --git a/llvm/test/CodeGen/AArch64/fmlal-loreg.ll b/llvm/test/CodeGen/AArch64/fmlal-loreg.ll --- a/llvm/test/CodeGen/AArch64/fmlal-loreg.ll +++ b/llvm/test/CodeGen/AArch64/fmlal-loreg.ll @@ -1,9 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64 -mattr=+fp16fml -verify-machineinstrs %s -o - 2>&1 | FileCheck %s - ; This tests that the fmlal/fmlal2 instructions only accept lo registers for ; the index operand, using inline asm to force the available registers. - define <4 x float> @test(ptr %lhs_panel, ptr %rhs_panel, <4 x float> %a) { ; CHECK-LABEL: test: ; CHECK: // %bb.0: // %entry @@ -36,18 +34,17 @@ %z = fadd <4 x float> %vfmlal_low3.i, %vfmlal_high3.i ret <4 x float> %z } - define void @loop(ptr %out_tile, ptr %lhs_panel, ptr %rhs_panel, i32 noundef %K, i32 noundef %flags) { ; CHECK-LABEL: loop: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov w8, w3 ; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: mov w8, w3 ; CHECK-NEXT: .LBB1_1: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x1], #2 -; CHECK-NEXT: ldr q3, [x2], #2 ; CHECK-NEXT: subs x8, x8, #1 +; CHECK-NEXT: ldr q3, [x2], #2 ; CHECK-NEXT: fmlal v0.4s, v3.4h, v2.h[0] ; CHECK-NEXT: fmlal2 v1.4s, v3.4h, v2.h[0] ; CHECK-NEXT: b.ne .LBB1_1 @@ -57,13 +54,11 @@ entry: %wide.trip.count = zext i32 %K to i64 br label %for.body - for.cond.cleanup: ; preds = %for.body store <4 x float> %vfmlal_low3.i, ptr %out_tile, align 4 %add.ptr1399 = getelementptr inbounds float, ptr %out_tile, i64 4 store <4 x float> %vfmlal_high3.i, ptr %add.ptr1399, align 4 ret void - for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %acc0.01714 = phi <4 x float> [ zeroinitializer, %entry ], [ %vfmlal_low3.i, %for.body ] @@ -79,13 +74,12 @@ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } - define void @sink(ptr %out_tile, ptr %lhs_panel, ptr %rhs_panel, i32 noundef %K, i32 noundef %flags, <8 x half> noundef %lhs) { ; CHECK-LABEL: sink: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: mov w8, w3 ; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: mov w8, w3 ; CHECK-NEXT: .LBB2_1: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q3, [x2], #2 @@ -100,13 +94,11 @@ %vecinit89 = shufflevector <8 x half> %lhs, <8 x half> undef, <8 x i32> zeroinitializer %wide.trip.count = zext i32 %K to i64 br label %for.body - for.cond.cleanup: ; preds = %for.body store <4 x float> %vfmlal_low3.i, ptr %out_tile, align 4 %add.ptr1395 = getelementptr inbounds float, ptr %out_tile, i64 4 store <4 x float> %vfmlal_high3.i, ptr %add.ptr1395, align 4 ret void - for.body: ; preds = %entry, %for.body %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] %acc0.01702 = phi <4 x float> [ zeroinitializer, %entry ], [ %vfmlal_low3.i, %for.body ] @@ -119,8 +111,5 @@ %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count br i1 %exitcond.not, label %for.cond.cleanup, label %for.body } - - declare <4 x float> @llvm.aarch64.neon.fmlal.v4f32.v8f16(<4 x float>, <8 x half>, <8 x half>) #2 declare <4 x float> @llvm.aarch64.neon.fmlal2.v4f32.v8f16(<4 x float>, <8 x half>, <8 x half>) #2 - diff --git a/llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll b/llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll --- a/llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll +++ b/llvm/test/CodeGen/AArch64/fold-csel-cttz-and.ll @@ -112,9 +112,9 @@ ; CHECK-LABEL: cttzlhsnot0: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: rbit w9, w0 -; CHECK-NEXT: mov w8, #10 -; CHECK-NEXT: clz w9, w9 +; CHECK-NEXT: mov w8, #10 // =0xa ; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: clz w9, w9 ; CHECK-NEXT: csel w0, w8, w9, eq ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll --- a/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/fp-conversion-to-tbl.ll @@ -29,10 +29,10 @@ ; CHECK-LABEL: fptoui_v8f32_to_v8i8_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh0: -; CHECK-NEXT: adrp x9, lCPI0_0@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x8, lCPI0_0@PAGE ; CHECK-NEXT: Lloh1: -; CHECK-NEXT: ldr q0, [x9, lCPI0_0@PAGEOFF] +; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB0_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8, lsl #5 @@ -71,10 +71,10 @@ ; CHECK-LABEL: fptoui_v8f32_to_v8i8_no_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: fcvtzs.4s v0, v0 ; CHECK-NEXT: fcvtzs.4s v1, v1 -; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: fcvtzs.4s v0, v0 ; CHECK-NEXT: xtn.4h v1, v1 +; CHECK-NEXT: xtn.4h v0, v0 ; CHECK-NEXT: uzp1.8b v0, v0, v1 ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: ret @@ -109,22 +109,22 @@ ; CHECK-LABEL: fptoui_2x_v8f32_to_v8i8_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x9, lCPI2_0@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x8, lCPI2_0@PAGE ; CHECK-NEXT: Lloh3: -; CHECK-NEXT: ldr q0, [x9, lCPI2_0@PAGEOFF] +; CHECK-NEXT: ldr q0, [x8, lCPI2_0@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB2_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lsl x9, x8, #5 ; CHECK-NEXT: add x10, x0, x9 ; CHECK-NEXT: add x9, x1, x9 ; CHECK-NEXT: ldp q2, q1, [x10] -; CHECK-NEXT: ldp q4, q3, [x9] -; CHECK-NEXT: fcvtzu.4s v17, v1 -; CHECK-NEXT: fcvtzu.4s v16, v2 -; CHECK-NEXT: fcvtzu.4s v19, v3 -; CHECK-NEXT: fcvtzu.4s v18, v4 -; CHECK-NEXT: tbl.16b v1, { v16, v17, v18, v19 }, v0 +; CHECK-NEXT: fcvtzu.4s v5, v1 +; CHECK-NEXT: ldp q1, q3, [x9] +; CHECK-NEXT: fcvtzu.4s v4, v2 +; CHECK-NEXT: fcvtzu.4s v7, v3 +; CHECK-NEXT: fcvtzu.4s v6, v1 +; CHECK-NEXT: tbl.16b v1, { v4, v5, v6, v7 }, v0 ; CHECK-NEXT: str q1, [x2, x8, lsl #4] ; CHECK-NEXT: add x8, x8, #1 ; CHECK-NEXT: cmp x8, #1000 @@ -176,22 +176,22 @@ ; CHECK-LABEL: fptoui_2x_v8f32_to_v8i8_in_loop_no_concat_shuffle: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh4: -; CHECK-NEXT: adrp x9, lCPI3_0@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x8, lCPI3_0@PAGE ; CHECK-NEXT: Lloh5: -; CHECK-NEXT: ldr q0, [x9, lCPI3_0@PAGEOFF] +; CHECK-NEXT: ldr q0, [x8, lCPI3_0@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB3_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lsl x9, x8, #5 ; CHECK-NEXT: add x10, x0, x9 ; CHECK-NEXT: add x9, x1, x9 ; CHECK-NEXT: ldp q2, q1, [x10] -; CHECK-NEXT: ldp q4, q3, [x9] -; CHECK-NEXT: fcvtzu.4s v17, v1 -; CHECK-NEXT: fcvtzu.4s v16, v2 -; CHECK-NEXT: fcvtzu.4s v19, v3 -; CHECK-NEXT: fcvtzu.4s v18, v4 -; CHECK-NEXT: tbl.16b v1, { v16, v17, v18, v19 }, v0 +; CHECK-NEXT: fcvtzu.4s v5, v1 +; CHECK-NEXT: ldp q1, q3, [x9] +; CHECK-NEXT: fcvtzu.4s v4, v2 +; CHECK-NEXT: fcvtzu.4s v7, v3 +; CHECK-NEXT: fcvtzu.4s v6, v1 +; CHECK-NEXT: tbl.16b v1, { v4, v5, v6, v7 }, v0 ; CHECK-NEXT: str q1, [x2, x8, lsl #4] ; CHECK-NEXT: add x8, x8, #1 ; CHECK-NEXT: cmp x8, #1000 @@ -243,22 +243,22 @@ ; CHECK-LABEL: fptoui_v16f32_to_v16i8_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh6: -; CHECK-NEXT: adrp x9, lCPI4_0@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x8, lCPI4_0@PAGE ; CHECK-NEXT: Lloh7: -; CHECK-NEXT: ldr q0, [x9, lCPI4_0@PAGEOFF] +; CHECK-NEXT: ldr q0, [x8, lCPI4_0@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB4_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8, lsl #6 ; CHECK-NEXT: add x8, x8, #1 ; CHECK-NEXT: cmp x8, #1000 ; CHECK-NEXT: ldp q2, q1, [x9, #32] -; CHECK-NEXT: ldp q4, q3, [x9] -; CHECK-NEXT: fcvtzu.4s v19, v1 -; CHECK-NEXT: fcvtzu.4s v18, v2 -; CHECK-NEXT: fcvtzu.4s v17, v3 -; CHECK-NEXT: fcvtzu.4s v16, v4 -; CHECK-NEXT: tbl.16b v1, { v16, v17, v18, v19 }, v0 +; CHECK-NEXT: fcvtzu.4s v7, v1 +; CHECK-NEXT: ldp q1, q3, [x9] +; CHECK-NEXT: fcvtzu.4s v6, v2 +; CHECK-NEXT: fcvtzu.4s v5, v3 +; CHECK-NEXT: fcvtzu.4s v4, v1 +; CHECK-NEXT: tbl.16b v1, { v4, v5, v6, v7 }, v0 ; CHECK-NEXT: str q1, [x1], #32 ; CHECK-NEXT: b.eq LBB4_1 ; CHECK-NEXT: ; %bb.2: ; %exit @@ -304,32 +304,32 @@ ; CHECK-LABEL: fptoui_2x_v16f32_to_v16i8_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh8: -; CHECK-NEXT: adrp x9, lCPI5_0@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x8, lCPI5_0@PAGE ; CHECK-NEXT: Lloh9: -; CHECK-NEXT: ldr q0, [x9, lCPI5_0@PAGEOFF] +; CHECK-NEXT: ldr q0, [x8, lCPI5_0@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB5_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lsl x9, x8, #6 -; CHECK-NEXT: add x10, x0, x9 -; CHECK-NEXT: add x9, x1, x9 -; CHECK-NEXT: ldp q1, q2, [x10, #32] +; CHECK-NEXT: add x10, x1, x9 +; CHECK-NEXT: add x9, x0, x9 +; CHECK-NEXT: ldp q2, q1, [x10, #32] ; CHECK-NEXT: ldp q3, q4, [x9, #32] -; CHECK-NEXT: fcvtzu.4s v24, v2 -; CHECK-NEXT: fcvtzu.4s v23, v1 -; CHECK-NEXT: ldp q5, q6, [x9] -; CHECK-NEXT: fcvtzu.4s v20, v4 +; CHECK-NEXT: ldp q5, q6, [x10] +; CHECK-NEXT: fcvtzu.4s v19, v1 +; CHECK-NEXT: fcvtzu.4s v18, v2 +; CHECK-NEXT: ldp q2, q1, [x9] +; CHECK-NEXT: fcvtzu.4s v23, v4 +; CHECK-NEXT: fcvtzu.4s v17, v6 ; CHECK-NEXT: add x9, x2, x8, lsl #5 -; CHECK-NEXT: fcvtzu.4s v19, v3 +; CHECK-NEXT: fcvtzu.4s v22, v3 +; CHECK-NEXT: fcvtzu.4s v16, v5 ; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: fcvtzu.4s v21, v1 ; CHECK-NEXT: cmp x8, #1000 -; CHECK-NEXT: ldp q7, q16, [x10] -; CHECK-NEXT: fcvtzu.4s v18, v6 -; CHECK-NEXT: fcvtzu.4s v17, v5 -; CHECK-NEXT: fcvtzu.4s v22, v16 -; CHECK-NEXT: fcvtzu.4s v21, v7 -; CHECK-NEXT: tbl.16b v1, { v17, v18, v19, v20 }, v0 -; CHECK-NEXT: tbl.16b v2, { v21, v22, v23, v24 }, v0 +; CHECK-NEXT: fcvtzu.4s v20, v2 +; CHECK-NEXT: tbl.16b v1, { v16, v17, v18, v19 }, v0 +; CHECK-NEXT: tbl.16b v2, { v20, v21, v22, v23 }, v0 ; CHECK-NEXT: stp q2, q1, [x9] ; CHECK-NEXT: b.eq LBB5_1 ; CHECK-NEXT: ; %bb.2: ; %exit @@ -365,8 +365,8 @@ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8, lsl #5 ; CHECK-NEXT: ldp q0, q1, [x9] -; CHECK-NEXT: fcvtzu.4s v0, v0 ; CHECK-NEXT: fcvtzu.4s v1, v1 +; CHECK-NEXT: fcvtzu.4s v0, v0 ; CHECK-NEXT: uzp1.8h v0, v0, v1 ; CHECK-NEXT: str q0, [x1, x8, lsl #4] ; CHECK-NEXT: add x8, x8, #1 @@ -400,17 +400,17 @@ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: lsl x9, x8, #5 ; CHECK-NEXT: add x8, x8, #1 +; CHECK-NEXT: cmp x8, #1000 ; CHECK-NEXT: add x10, x0, x9 ; CHECK-NEXT: add x11, x1, x9 ; CHECK-NEXT: add x9, x2, x9 -; CHECK-NEXT: cmp x8, #1000 ; CHECK-NEXT: ldp q0, q1, [x10] -; CHECK-NEXT: fcvtzu.4s v0, v0 ; CHECK-NEXT: ldp q2, q3, [x11] ; CHECK-NEXT: fcvtzu.4s v1, v1 +; CHECK-NEXT: fcvtzu.4s v0, v0 +; CHECK-NEXT: fcvtzu.4s v3, v3 ; CHECK-NEXT: fcvtzu.4s v2, v2 ; CHECK-NEXT: uzp1.8h v0, v0, v1 -; CHECK-NEXT: fcvtzu.4s v3, v3 ; CHECK-NEXT: uzp1.8h v1, v2, v3 ; CHECK-NEXT: stp q0, q1, [x9] ; CHECK-NEXT: b.eq LBB7_1 @@ -477,14 +477,14 @@ ; CHECK-LABEL: uitofp_v8i8_to_v8f32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh10: -; CHECK-NEXT: adrp x9, lCPI8_0@PAGE +; CHECK-NEXT: adrp x8, lCPI8_0@PAGE ; CHECK-NEXT: Lloh11: -; CHECK-NEXT: adrp x10, lCPI8_1@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x9, lCPI8_1@PAGE ; CHECK-NEXT: Lloh12: -; CHECK-NEXT: ldr q0, [x9, lCPI8_0@PAGEOFF] +; CHECK-NEXT: ldr q0, [x8, lCPI8_0@PAGEOFF] ; CHECK-NEXT: Lloh13: -; CHECK-NEXT: ldr q1, [x10, lCPI8_1@PAGEOFF] +; CHECK-NEXT: ldr q1, [x9, lCPI8_1@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB8_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d2, [x0, x8, lsl #3] @@ -592,22 +592,22 @@ ; CHECK-LABEL: uitofp_v16i8_to_v16f32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh14: -; CHECK-NEXT: adrp x9, lCPI9_0@PAGE +; CHECK-NEXT: adrp x8, lCPI9_0@PAGE ; CHECK-NEXT: Lloh15: -; CHECK-NEXT: adrp x10, lCPI9_1@PAGE +; CHECK-NEXT: adrp x9, lCPI9_1@PAGE ; CHECK-NEXT: Lloh16: -; CHECK-NEXT: adrp x11, lCPI9_2@PAGE +; CHECK-NEXT: adrp x10, lCPI9_2@PAGE ; CHECK-NEXT: Lloh17: -; CHECK-NEXT: adrp x12, lCPI9_3@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: ldr q0, [x8, lCPI9_0@PAGEOFF] ; CHECK-NEXT: Lloh18: -; CHECK-NEXT: ldr q0, [x9, lCPI9_0@PAGEOFF] +; CHECK-NEXT: adrp x8, lCPI9_3@PAGE ; CHECK-NEXT: Lloh19: -; CHECK-NEXT: ldr q1, [x10, lCPI9_1@PAGEOFF] +; CHECK-NEXT: ldr q1, [x9, lCPI9_1@PAGEOFF] ; CHECK-NEXT: Lloh20: -; CHECK-NEXT: ldr q2, [x11, lCPI9_2@PAGEOFF] +; CHECK-NEXT: ldr q2, [x10, lCPI9_2@PAGEOFF] ; CHECK-NEXT: Lloh21: -; CHECK-NEXT: ldr q3, [x12, lCPI9_3@PAGEOFF] +; CHECK-NEXT: ldr q3, [x8, lCPI9_3@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB9_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q4, [x0, x8, lsl #4] @@ -627,10 +627,11 @@ ; CHECK-NEXT: b.eq LBB9_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh17, Lloh21 +; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh21 ; CHECK-NEXT: .loh AdrpLdr Lloh16, Lloh20 ; CHECK-NEXT: .loh AdrpLdr Lloh15, Lloh19 -; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh18 +; CHECK-NEXT: .loh AdrpAdrp Lloh14, Lloh18 +; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh17 entry: br label %loop diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll --- a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll @@ -233,22 +233,22 @@ ; CHECK-NEXT: mov s2, v1.s[1] ; CHECK-NEXT: mov s3, v0.s[1] ; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: mov s4, v1.s[2] -; CHECK-NEXT: mov s5, v0.s[2] +; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: fcmp s3, s2 +; CHECK-NEXT: mov s2, v1.s[2] +; CHECK-NEXT: mov s3, v0.s[2] +; CHECK-NEXT: fmov s4, w8 ; CHECK-NEXT: mov s1, v1.s[3] ; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: mov v4.s[1], w8 ; CHECK-NEXT: fcmp s3, s2 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: csetm w8, eq -; CHECK-NEXT: fcmp s5, s4 -; CHECK-NEXT: mov v2.s[1], w8 ; CHECK-NEXT: csetm w8, eq ; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: mov v2.s[2], w8 +; CHECK-NEXT: mov v4.s[2], w8 ; CHECK-NEXT: csetm w8, eq -; CHECK-NEXT: mov v2.s[3], w8 -; CHECK-NEXT: xtn v0.4h, v2.4s +; CHECK-NEXT: mov v4.s[3], w8 +; CHECK-NEXT: xtn v0.4h, v4.4s ; CHECK-NEXT: ret entry: %val = call <4 x i1> @llvm.experimental.constrained.fcmp.v4f64(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict") @@ -261,22 +261,22 @@ ; CHECK-NEXT: mov s2, v1.s[1] ; CHECK-NEXT: mov s3, v0.s[1] ; CHECK-NEXT: fcmpe s0, s1 -; CHECK-NEXT: mov s4, v1.s[2] -; CHECK-NEXT: mov s5, v0.s[2] +; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: fcmpe s3, s2 +; CHECK-NEXT: mov s2, v1.s[2] +; CHECK-NEXT: mov s3, v0.s[2] +; CHECK-NEXT: fmov s4, w8 ; CHECK-NEXT: mov s1, v1.s[3] ; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: mov v4.s[1], w8 ; CHECK-NEXT: fcmpe s3, s2 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: csetm w8, eq -; CHECK-NEXT: fcmpe s5, s4 -; CHECK-NEXT: mov v2.s[1], w8 ; CHECK-NEXT: csetm w8, eq ; CHECK-NEXT: fcmpe s0, s1 -; CHECK-NEXT: mov v2.s[2], w8 +; CHECK-NEXT: mov v4.s[2], w8 ; CHECK-NEXT: csetm w8, eq -; CHECK-NEXT: mov v2.s[3], w8 -; CHECK-NEXT: xtn v0.4h, v2.4s +; CHECK-NEXT: mov v4.s[3], w8 +; CHECK-NEXT: xtn v0.4h, v4.4s ; CHECK-NEXT: ret entry: %val = call <4 x i1> @llvm.experimental.constrained.fcmps.v4f64(<4 x float> %x, <4 x float> %y, metadata !"oeq", metadata !"fpexcept.strict") diff --git a/llvm/test/CodeGen/AArch64/fp16-v16-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v16-instructions.ll --- a/llvm/test/CodeGen/AArch64/fp16-v16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v16-instructions.ll @@ -5,8 +5,8 @@ define <16 x half> @sitofp_i32(<16 x i32> %a) #0 { ; CHECK-LABEL: sitofp_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: scvtf v2.4s, v2.4s ; CHECK-NEXT: scvtf v0.4s, v0.4s +; CHECK-NEXT: scvtf v2.4s, v2.4s ; CHECK-NEXT: scvtf v4.4s, v1.4s ; CHECK-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NEXT: fcvtn v1.4h, v2.4s @@ -30,16 +30,16 @@ ; CHECK-NEXT: scvtf v6.2d, v6.2d ; CHECK-NEXT: scvtf v5.2d, v5.2d ; CHECK-NEXT: scvtf v3.2d, v3.2d -; CHECK-NEXT: scvtf v7.2d, v7.2d ; CHECK-NEXT: fcvtn v0.2s, v0.2d ; CHECK-NEXT: fcvtn v4.2s, v4.2d ; CHECK-NEXT: fcvtn v2.2s, v2.2d ; CHECK-NEXT: fcvtn v6.2s, v6.2d ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-NEXT: scvtf v1.2d, v7.2d ; CHECK-NEXT: fcvtn2 v4.4s, v5.2d ; CHECK-NEXT: fcvtn2 v2.4s, v3.2d -; CHECK-NEXT: fcvtn2 v6.4s, v7.2d ; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: fcvtn2 v6.4s, v1.2d ; CHECK-NEXT: fcvtn v1.4h, v4.4s ; CHECK-NEXT: fcvtn2 v0.8h, v2.4s ; CHECK-NEXT: fcvtn2 v1.8h, v6.4s @@ -56,8 +56,8 @@ define <16 x half> @uitofp_i32(<16 x i32> %a) #0 { ; CHECK-LABEL: uitofp_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf v2.4s, v2.4s ; CHECK-NEXT: ucvtf v0.4s, v0.4s +; CHECK-NEXT: ucvtf v2.4s, v2.4s ; CHECK-NEXT: ucvtf v4.4s, v1.4s ; CHECK-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NEXT: fcvtn v1.4h, v2.4s @@ -81,16 +81,16 @@ ; CHECK-NEXT: ucvtf v6.2d, v6.2d ; CHECK-NEXT: ucvtf v5.2d, v5.2d ; CHECK-NEXT: ucvtf v3.2d, v3.2d -; CHECK-NEXT: ucvtf v7.2d, v7.2d ; CHECK-NEXT: fcvtn v0.2s, v0.2d ; CHECK-NEXT: fcvtn v4.2s, v4.2d ; CHECK-NEXT: fcvtn v2.2s, v2.2d ; CHECK-NEXT: fcvtn v6.2s, v6.2d ; CHECK-NEXT: fcvtn2 v0.4s, v1.2d +; CHECK-NEXT: ucvtf v1.2d, v7.2d ; CHECK-NEXT: fcvtn2 v4.4s, v5.2d ; CHECK-NEXT: fcvtn2 v2.4s, v3.2d -; CHECK-NEXT: fcvtn2 v6.4s, v7.2d ; CHECK-NEXT: fcvtn v0.4h, v0.4s +; CHECK-NEXT: fcvtn2 v6.4s, v1.2d ; CHECK-NEXT: fcvtn v1.4h, v4.4s ; CHECK-NEXT: fcvtn2 v0.8h, v2.4s ; CHECK-NEXT: fcvtn2 v1.8h, v6.4s diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll --- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -12,46 +12,46 @@ ; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: mov h7, v0.h[2] ; CHECK-CVT-NEXT: mov h16, v1.h[3] -; CHECK-CVT-NEXT: mov h17, v0.h[3] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fadd s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h7 -; CHECK-CVT-NEXT: fcvt s7, h16 -; CHECK-CVT-NEXT: fcvt s16, h17 +; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fadd s3, s3, s2 +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: fcvt h2, s4 -; CHECK-CVT-NEXT: fadd s4, s6, s5 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: mov h6, v0.h[4] -; CHECK-CVT-NEXT: fadd s7, s16, s7 +; CHECK-CVT-NEXT: fadd s4, s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt h3, s3 +; CHECK-CVT-NEXT: fadd s5, s5, s16 ; CHECK-CVT-NEXT: mov h16, v0.h[5] -; CHECK-CVT-NEXT: fcvt s5, h5 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt h7, s7 +; CHECK-CVT-NEXT: fcvt h4, s4 ; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: mov h4, v1.h[5] -; CHECK-CVT-NEXT: fadd s5, s6, s5 -; CHECK-CVT-NEXT: mov h6, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] +; CHECK-CVT-NEXT: fcvt s3, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: mov h7, v1.h[5] +; CHECK-CVT-NEXT: fcvt h5, s5 +; CHECK-CVT-NEXT: fcvt s16, h16 +; CHECK-CVT-NEXT: mov v2.h[2], v4.h[0] +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: fadd s3, s6, s3 +; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h16 -; CHECK-CVT-NEXT: mov h16, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fadd s3, s4, s3 -; CHECK-CVT-NEXT: fcvt h4, s5 +; CHECK-CVT-NEXT: mov v2.h[3], v5.h[0] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt h3, s3 ; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h16 +; CHECK-CVT-NEXT: fadd s6, s16, s7 +; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v4.h[0] -; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: fadd s4, s6, s5 +; CHECK-CVT-NEXT: mov v2.h[4], v3.h[0] +; CHECK-CVT-NEXT: fadd s4, s5, s4 +; CHECK-CVT-NEXT: fcvt h3, s6 ; CHECK-CVT-NEXT: fadd s0, s0, s1 ; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] ; CHECK-CVT-NEXT: fcvt h3, s4 @@ -81,46 +81,46 @@ ; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: mov h7, v0.h[2] ; CHECK-CVT-NEXT: mov h16, v1.h[3] -; CHECK-CVT-NEXT: mov h17, v0.h[3] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fsub s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h7 -; CHECK-CVT-NEXT: fcvt s7, h16 -; CHECK-CVT-NEXT: fcvt s16, h17 +; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fsub s3, s3, s2 +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: fcvt h2, s4 -; CHECK-CVT-NEXT: fsub s4, s6, s5 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: mov h6, v0.h[4] -; CHECK-CVT-NEXT: fsub s7, s16, s7 +; CHECK-CVT-NEXT: fsub s4, s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt h3, s3 +; CHECK-CVT-NEXT: fsub s5, s5, s16 ; CHECK-CVT-NEXT: mov h16, v0.h[5] -; CHECK-CVT-NEXT: fcvt s5, h5 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt h7, s7 +; CHECK-CVT-NEXT: fcvt h4, s4 ; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: mov h4, v1.h[5] -; CHECK-CVT-NEXT: fsub s5, s6, s5 -; CHECK-CVT-NEXT: mov h6, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] +; CHECK-CVT-NEXT: fcvt s3, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: mov h7, v1.h[5] +; CHECK-CVT-NEXT: fcvt h5, s5 +; CHECK-CVT-NEXT: fcvt s16, h16 +; CHECK-CVT-NEXT: mov v2.h[2], v4.h[0] +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: fsub s3, s6, s3 +; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h16 -; CHECK-CVT-NEXT: mov h16, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fsub s3, s4, s3 -; CHECK-CVT-NEXT: fcvt h4, s5 +; CHECK-CVT-NEXT: mov v2.h[3], v5.h[0] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt h3, s3 ; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h16 +; CHECK-CVT-NEXT: fsub s6, s16, s7 +; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v4.h[0] -; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: fsub s4, s6, s5 +; CHECK-CVT-NEXT: mov v2.h[4], v3.h[0] +; CHECK-CVT-NEXT: fsub s4, s5, s4 +; CHECK-CVT-NEXT: fcvt h3, s6 ; CHECK-CVT-NEXT: fsub s0, s0, s1 ; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] ; CHECK-CVT-NEXT: fcvt h3, s4 @@ -149,47 +149,47 @@ ; CHECK-CVT-NEXT: fcvt s5, h0 ; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: mov h7, v0.h[2] -; CHECK-CVT-NEXT: mov h16, v0.h[3] +; CHECK-CVT-NEXT: mov h16, v1.h[3] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fmul s4, s5, s4 -; CHECK-CVT-NEXT: mov h5, v1.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fmul s3, s3, s2 +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: fcvt h2, s4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: fcvt s5, h16 -; CHECK-CVT-NEXT: fmul s6, s7, s6 -; CHECK-CVT-NEXT: mov h7, v1.h[4] -; CHECK-CVT-NEXT: mov h16, v0.h[4] +; CHECK-CVT-NEXT: fmul s4, s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: fmul s4, s5, s4 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: fcvt h6, s6 -; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fmul s5, s5, s16 +; CHECK-CVT-NEXT: mov h16, v0.h[5] +; CHECK-CVT-NEXT: fcvt h4, s4 ; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] -; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: fcvt s3, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: mov h7, v1.h[5] +; CHECK-CVT-NEXT: fcvt h5, s5 ; CHECK-CVT-NEXT: fcvt s16, h16 -; CHECK-CVT-NEXT: fcvt h4, s4 -; CHECK-CVT-NEXT: fcvt s5, h5 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov v2.h[2], v6.h[0] -; CHECK-CVT-NEXT: fmul s6, s16, s7 -; CHECK-CVT-NEXT: mov h7, v1.h[6] -; CHECK-CVT-NEXT: mov h16, v0.h[6] +; CHECK-CVT-NEXT: mov v2.h[2], v4.h[0] +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: fmul s3, s6, s3 +; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: fmul s3, s5, s3 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v4.h[0] -; CHECK-CVT-NEXT: fcvt h4, s6 -; CHECK-CVT-NEXT: fcvt s5, h7 -; CHECK-CVT-NEXT: fcvt s6, h16 -; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v2.h[3], v5.h[0] +; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: fcvt h3, s3 +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fmul s6, s16, s7 +; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v4.h[0] -; CHECK-CVT-NEXT: fmul s4, s6, s5 +; CHECK-CVT-NEXT: mov v2.h[4], v3.h[0] +; CHECK-CVT-NEXT: fmul s4, s5, s4 +; CHECK-CVT-NEXT: fcvt h3, s6 ; CHECK-CVT-NEXT: fmul s0, s0, s1 ; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] ; CHECK-CVT-NEXT: fcvt h3, s4 @@ -220,17 +220,17 @@ ; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: mov h16, v0.h[5] ; CHECK-CVT-NEXT: mov h17, v0.h[6] +; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcvt s5, h5 -; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcvt s17, h17 +; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fdiv s2, s3, s2 ; CHECK-CVT-NEXT: fcvt s3, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fdiv s3, s4, s3 ; CHECK-CVT-NEXT: mov h4, v1.h[2] ; CHECK-CVT-NEXT: fcvt h18, s2 @@ -312,22 +312,22 @@ define <8 x half> @d_to_h(<8 x double> %a) { ; CHECK-LABEL: d_to_h: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v0.d[1] +; CHECK-NEXT: mov d5, v0.d[1] ; CHECK-NEXT: fcvt h0, d0 -; CHECK-NEXT: mov d5, v1.d[1] +; CHECK-NEXT: fcvt h4, d1 +; CHECK-NEXT: mov d1, v1.d[1] +; CHECK-NEXT: fcvt h5, d5 ; CHECK-NEXT: fcvt h1, d1 -; CHECK-NEXT: fcvt h4, d4 -; CHECK-NEXT: mov v0.h[1], v4.h[0] -; CHECK-NEXT: fcvt h4, d5 -; CHECK-NEXT: mov v0.h[2], v1.h[0] +; CHECK-NEXT: mov v0.h[1], v5.h[0] +; CHECK-NEXT: mov v0.h[2], v4.h[0] +; CHECK-NEXT: mov v0.h[3], v1.h[0] ; CHECK-NEXT: fcvt h1, d2 ; CHECK-NEXT: mov d2, v2.d[1] -; CHECK-NEXT: mov v0.h[3], v4.h[0] -; CHECK-NEXT: fcvt h2, d2 ; CHECK-NEXT: mov v0.h[4], v1.h[0] -; CHECK-NEXT: fcvt h1, d3 -; CHECK-NEXT: mov v0.h[5], v2.h[0] +; CHECK-NEXT: fcvt h1, d2 ; CHECK-NEXT: mov d2, v3.d[1] +; CHECK-NEXT: mov v0.h[5], v1.h[0] +; CHECK-NEXT: fcvt h1, d3 ; CHECK-NEXT: mov v0.h[6], v1.h[0] ; CHECK-NEXT: fcvt h1, d2 ; CHECK-NEXT: mov v0.h[7], v1.h[0] @@ -440,16 +440,16 @@ ; CHECK-CVT-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-CVT-NEXT: sshll v2.4s, v1.4h, #0 ; CHECK-CVT-NEXT: sshll v3.4s, v0.4h, #0 +; CHECK-CVT-NEXT: sshll2 v4.4s, v1.8h, #0 +; CHECK-CVT-NEXT: sshll2 v5.4s, v0.8h, #0 ; CHECK-CVT-NEXT: scvtf v2.4s, v2.4s ; CHECK-CVT-NEXT: scvtf v3.4s, v3.4s -; CHECK-CVT-NEXT: sshll2 v1.4s, v1.8h, #0 -; CHECK-CVT-NEXT: sshll2 v4.4s, v0.8h, #0 -; CHECK-CVT-NEXT: scvtf v5.4s, v1.4s ; CHECK-CVT-NEXT: fcvtn v1.4h, v2.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v3.4s ; CHECK-CVT-NEXT: scvtf v2.4s, v4.4s -; CHECK-CVT-NEXT: fcvtn2 v1.8h, v5.4s -; CHECK-CVT-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: fcvtn v0.4h, v3.4s +; CHECK-CVT-NEXT: scvtf v3.4s, v5.4s +; CHECK-CVT-NEXT: fcvtn2 v1.8h, v2.4s +; CHECK-CVT-NEXT: fcvtn2 v0.8h, v3.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: sitofp_v16i8: @@ -559,16 +559,16 @@ ; CHECK-CVT-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-CVT-NEXT: ushll v2.4s, v1.4h, #0 ; CHECK-CVT-NEXT: ushll v3.4s, v0.4h, #0 +; CHECK-CVT-NEXT: ushll2 v4.4s, v1.8h, #0 +; CHECK-CVT-NEXT: ushll2 v5.4s, v0.8h, #0 ; CHECK-CVT-NEXT: ucvtf v2.4s, v2.4s ; CHECK-CVT-NEXT: ucvtf v3.4s, v3.4s -; CHECK-CVT-NEXT: ushll2 v1.4s, v1.8h, #0 -; CHECK-CVT-NEXT: ushll2 v4.4s, v0.8h, #0 -; CHECK-CVT-NEXT: ucvtf v5.4s, v1.4s ; CHECK-CVT-NEXT: fcvtn v1.4h, v2.4s -; CHECK-CVT-NEXT: fcvtn v0.4h, v3.4s ; CHECK-CVT-NEXT: ucvtf v2.4s, v4.4s -; CHECK-CVT-NEXT: fcvtn2 v1.8h, v5.4s -; CHECK-CVT-NEXT: fcvtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: fcvtn v0.4h, v3.4s +; CHECK-CVT-NEXT: ucvtf v3.4s, v5.4s +; CHECK-CVT-NEXT: fcvtn2 v1.8h, v2.4s +; CHECK-CVT-NEXT: fcvtn2 v0.8h, v3.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: uitofp_v16i8: @@ -728,39 +728,39 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] ; CHECK-CVT-NEXT: csetm w8, ne ; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: csetm w9, ne -; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcmp s2, s5 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, ne ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: mov v2.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, ne @@ -797,66 +797,66 @@ ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v0.h[4] +; CHECK-CVT-NEXT: fcvt s6, h0 +; CHECK-CVT-NEXT: mov h5, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc -; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcmp s6, s4 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h6, v1.h[4] ; CHECK-CVT-NEXT: csetm w9, eq ; CHECK-CVT-NEXT: csinv w9, w9, wzr, vc -; CHECK-CVT-NEXT: fcvt s4, h4 -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fmov s3, w9 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[1], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s2, s4 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[5] -; CHECK-CVT-NEXT: mov h6, v0.h[5] -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: fcmp s2, s3 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fmov s5, w9 +; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: mov v5.h[1], w8 +; CHECK-CVT-NEXT: csetm w8, eq +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc +; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v5.h[2], w8 +; CHECK-CVT-NEXT: csetm w8, eq +; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: mov h4, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: csetm w9, eq ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v3.h[3], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v5.h[3], w8 +; CHECK-CVT-NEXT: csetm w8, eq +; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[4], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v3.h[5], w8 +; CHECK-CVT-NEXT: mov v5.h[4], w8 +; CHECK-CVT-NEXT: csetm w8, eq +; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v5.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v3.h[6], w8 +; CHECK-CVT-NEXT: mov v5.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc -; CHECK-CVT-NEXT: mov v3.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v3.8h +; CHECK-CVT-NEXT: mov v5.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v5.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ueq: @@ -878,39 +878,39 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] ; CHECK-CVT-NEXT: csetm w8, hi ; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: csetm w9, hi -; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcmp s2, s5 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, hi ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: mov v2.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, hi @@ -948,39 +948,39 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] ; CHECK-CVT-NEXT: csetm w8, pl ; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: csetm w9, pl -; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcmp s2, s5 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, pl ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: mov v2.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, pl @@ -1018,39 +1018,39 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] ; CHECK-CVT-NEXT: csetm w8, lt ; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: csetm w9, lt -; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcmp s2, s5 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, lt ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: mov v2.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, lt @@ -1088,39 +1088,39 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] ; CHECK-CVT-NEXT: csetm w8, le ; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: csetm w9, le -; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcmp s2, s5 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, le ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: mov v2.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, le @@ -1158,39 +1158,39 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] ; CHECK-CVT-NEXT: csetm w8, vs ; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: csetm w9, vs -; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcmp s2, s5 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, vs ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: mov v2.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, vs @@ -1229,66 +1229,66 @@ ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v0.h[4] +; CHECK-CVT-NEXT: fcvt s6, h0 +; CHECK-CVT-NEXT: mov h5, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le -; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: fcmp s6, s4 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h6, v1.h[4] ; CHECK-CVT-NEXT: csetm w9, mi ; CHECK-CVT-NEXT: csinv w9, w9, wzr, le -; CHECK-CVT-NEXT: fcvt s4, h4 -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fmov s3, w9 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[1], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s2, s4 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[5] -; CHECK-CVT-NEXT: mov h6, v0.h[5] -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: fcmp s2, s3 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fmov s5, w9 +; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: mov v5.h[1], w8 +; CHECK-CVT-NEXT: csetm w8, mi +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csinv w8, w8, wzr, le +; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v5.h[2], w8 +; CHECK-CVT-NEXT: csetm w8, mi +; CHECK-CVT-NEXT: csinv w8, w8, wzr, le +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: mov h4, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: csetm w9, mi ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v3.h[3], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v5.h[3], w8 +; CHECK-CVT-NEXT: csetm w8, mi +; CHECK-CVT-NEXT: csinv w8, w8, wzr, le +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcvt s2, h4 +; CHECK-CVT-NEXT: fcvt s3, h6 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[4], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v3.h[5], w8 +; CHECK-CVT-NEXT: mov v5.h[4], w8 +; CHECK-CVT-NEXT: csetm w8, mi +; CHECK-CVT-NEXT: csinv w8, w8, wzr, le +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v5.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v3.h[6], w8 +; CHECK-CVT-NEXT: mov v5.h[6], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le -; CHECK-CVT-NEXT: mov v3.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v3.8h +; CHECK-CVT-NEXT: mov v5.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v5.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_one: @@ -1309,39 +1309,39 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcmp s2, s5 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: mov v2.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, eq @@ -1378,39 +1378,39 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] ; CHECK-CVT-NEXT: csetm w8, gt ; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: csetm w9, gt -; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcmp s2, s5 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, gt ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: mov v2.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, gt @@ -1447,39 +1447,39 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] ; CHECK-CVT-NEXT: csetm w8, ge ; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: csetm w9, ge -; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcmp s2, s5 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, ge ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: mov v2.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, ge @@ -1516,39 +1516,39 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcmp s2, s5 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: mov v2.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, mi @@ -1585,39 +1585,39 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] ; CHECK-CVT-NEXT: csetm w8, ls ; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: csetm w9, ls -; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcmp s2, s5 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, ls ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: mov v2.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, ls @@ -1654,39 +1654,39 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] ; CHECK-CVT-NEXT: csetm w8, vc ; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: csetm w9, vc -; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fcmp s2, s5 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, vc ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s6, s5 +; CHECK-CVT-NEXT: mov h5, v1.h[6] ; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: mov v2.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, vc diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -9,9 +9,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -29,9 +29,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -47,9 +47,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -194,10 +194,10 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: movi d1, #0x00ffff0000ffff -; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s -; CHECK-NEXT: smax v0.2s, v0.2s, v2.2s +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i32> @@ -372,9 +372,9 @@ ; CHECK-NEXT: csel x8, x0, xzr, eq ; CHECK-NEXT: cmp x20, #0 ; CHECK-NEXT: csel x9, x19, xzr, eq -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -412,13 +412,13 @@ ; CHECK-NEXT: csel x10, x19, xzr, lt ; CHECK-NEXT: csinc x11, x20, xzr, lt ; CHECK-NEXT: cmp xzr, x10 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ngcs xzr, x11 ; CHECK-NEXT: csel x10, x10, xzr, lt ; CHECK-NEXT: cmp xzr, x8 ; CHECK-NEXT: ngcs xzr, x9 -; CHECK-NEXT: csel x8, x8, xzr, lt -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: csel x8, x8, xzr, lt ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -439,9 +439,9 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -477,9 +477,9 @@ ; CHECK-NEXT: csel x8, x0, xzr, eq ; CHECK-NEXT: cmp x20, #0 ; CHECK-NEXT: csel x9, x19, xzr, eq -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -518,13 +518,13 @@ ; CHECK-NEXT: csel x10, x19, xzr, lt ; CHECK-NEXT: csinc x11, x20, xzr, lt ; CHECK-NEXT: cmp xzr, x10 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ngcs xzr, x11 ; CHECK-NEXT: csel x10, x10, xzr, lt ; CHECK-NEXT: cmp xzr, x9 ; CHECK-NEXT: ngcs xzr, x8 -; CHECK-NEXT: csel x8, x9, xzr, lt -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: csel x8, x9, xzr, lt ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -547,9 +547,9 @@ ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvtzs x8, s0 +; CHECK-CVT-NEXT: fcvtzs x9, s1 ; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s1 -; CHECK-CVT-NEXT: mov v0.d[1], x8 +; CHECK-CVT-NEXT: mov v0.d[1], x9 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i64: @@ -557,9 +557,9 @@ ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-NEXT: mov h1, v0.h[1] ; CHECK-FP16-NEXT: fcvtzs x8, h0 +; CHECK-FP16-NEXT: fcvtzs x9, h1 ; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fcvtzs x8, h1 -; CHECK-FP16-NEXT: mov v0.d[1], x8 +; CHECK-FP16-NEXT: mov v0.d[1], x9 ; CHECK-FP16-NEXT: ret entry: %conv = fptosi <2 x half> %x to <2 x i128> @@ -595,9 +595,9 @@ ; CHECK-NEXT: csel x8, x0, xzr, eq ; CHECK-NEXT: cmp x20, #0 ; CHECK-NEXT: csel x9, x19, xzr, eq -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -636,13 +636,13 @@ ; CHECK-NEXT: csel x10, x19, xzr, lt ; CHECK-NEXT: csinc x11, x20, xzr, lt ; CHECK-NEXT: cmp xzr, x10 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ngcs xzr, x11 ; CHECK-NEXT: csel x10, x10, xzr, lt ; CHECK-NEXT: cmp xzr, x9 ; CHECK-NEXT: ngcs xzr, x8 -; CHECK-NEXT: csel x8, x9, xzr, lt -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: csel x8, x9, xzr, lt ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 @@ -666,9 +666,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -684,9 +684,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -701,9 +701,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret entry: @@ -833,10 +833,10 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: movi d1, #0x00ffff0000ffff -; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s -; CHECK-NEXT: smax v0.2s, v0.2s, v2.2s +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i32> @@ -997,9 +997,9 @@ ; CHECK-NEXT: csel x8, x0, xzr, eq ; CHECK-NEXT: cmp x20, #0 ; CHECK-NEXT: csel x9, x19, xzr, eq -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -1036,10 +1036,10 @@ ; CHECK-NEXT: csinc x10, x20, xzr, lt ; CHECK-NEXT: csel x11, x19, xzr, lt ; CHECK-NEXT: cmp x10, #0 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: csel x10, xzr, x11, lt ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: csel x8, xzr, x8, lt -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] @@ -1059,9 +1059,9 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret entry: %conv = fptosi <2 x float> %x to <2 x i128> @@ -1095,9 +1095,9 @@ ; CHECK-NEXT: csel x8, x0, xzr, eq ; CHECK-NEXT: cmp x20, #0 ; CHECK-NEXT: csel x9, x19, xzr, eq -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -1135,10 +1135,10 @@ ; CHECK-NEXT: csinc x10, x20, xzr, lt ; CHECK-NEXT: csel x11, x19, xzr, lt ; CHECK-NEXT: cmp x10, #0 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: csel x10, xzr, x11, lt ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: csel x8, xzr, x8, lt -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] @@ -1160,9 +1160,9 @@ ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvtzs x8, s0 +; CHECK-CVT-NEXT: fcvtzs x9, s1 ; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s1 -; CHECK-CVT-NEXT: mov v0.d[1], x8 +; CHECK-CVT-NEXT: mov v0.d[1], x9 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: stest_f16i64_mm: @@ -1170,9 +1170,9 @@ ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-NEXT: mov h1, v0.h[1] ; CHECK-FP16-NEXT: fcvtzs x8, h0 +; CHECK-FP16-NEXT: fcvtzs x9, h1 ; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fcvtzs x8, h1 -; CHECK-FP16-NEXT: mov v0.d[1], x8 +; CHECK-FP16-NEXT: mov v0.d[1], x9 ; CHECK-FP16-NEXT: ret entry: %conv = fptosi <2 x half> %x to <2 x i128> @@ -1206,9 +1206,9 @@ ; CHECK-NEXT: csel x8, x0, xzr, eq ; CHECK-NEXT: cmp x20, #0 ; CHECK-NEXT: csel x9, x19, xzr, eq -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: add sp, sp, #48 ; CHECK-NEXT: ret @@ -1246,10 +1246,10 @@ ; CHECK-NEXT: csinc x10, x20, xzr, lt ; CHECK-NEXT: csel x11, x19, xzr, lt ; CHECK-NEXT: cmp x10, #0 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: csel x10, xzr, x11, lt ; CHECK-NEXT: cmp x9, #0 ; CHECK-NEXT: csel x8, xzr, x8, lt -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v0.d[1], v1.d[0] diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -165,9 +165,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptosi.sat.v2f64.v2i32(<2 x double> %f) @@ -178,10 +178,10 @@ ; CHECK-LABEL: test_signed_v3f64_v3i32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzs w8, d2 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: fcvtzs w8, d0 ; CHECK-NEXT: mov v0.s[3], w8 @@ -195,11 +195,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: fcvtzs w9, d2 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d2 -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzs w8, d1 ; CHECK-NEXT: mov d1, v1.d[1] +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: fcvtzs w8, d1 ; CHECK-NEXT: mov v0.s[3], w8 @@ -261,9 +261,9 @@ ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: csel w19, w8, w0, lt ; CHECK-NEXT: adrp x8, .LCPI14_1 -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -295,11 +295,11 @@ ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: adrp x8, .LCPI15_0 -; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill +; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -308,9 +308,9 @@ ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] ; CHECK-NEXT: mov w20, #-2147483648 // =0x80000000 ; CHECK-NEXT: csel w19, w20, w0, lt -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -320,16 +320,16 @@ ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: bl __unordtf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csel w22, wzr, w19, ne ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, w20, w0, lt ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -364,12 +364,12 @@ ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -378,9 +378,9 @@ ; CHECK-NEXT: adrp x8, .LCPI16_1 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] ; CHECK-NEXT: mov w20, #-2147483648 // =0x80000000 ; CHECK-NEXT: csel w19, w20, w0, lt -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -390,16 +390,16 @@ ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: bl __unordtf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csel w22, wzr, w19, ne ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, w20, w0, lt ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -419,8 +419,8 @@ ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, w20, w0, lt ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload @@ -430,8 +430,8 @@ ; CHECK-NEXT: bl __unordtf2 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: csel w8, wzr, w19, ne ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: csel w8, wzr, w19, ne ; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.s[2], w8 @@ -454,11 +454,11 @@ ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: stp q2, q3, [sp, #64] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill @@ -469,9 +469,9 @@ ; CHECK-NEXT: adrp x8, .LCPI17_1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] ; CHECK-NEXT: mov w20, #-2147483648 // =0x80000000 ; CHECK-NEXT: csel w19, w20, w0, lt -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -488,8 +488,8 @@ ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, w20, w0, lt ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -509,8 +509,8 @@ ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, w20, w0, lt ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload @@ -529,8 +529,8 @@ ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: bl __fixtfsi ; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, w20, w0, lt ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload @@ -540,8 +540,8 @@ ; CHECK-NEXT: bl __unordtf2 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: csel w8, wzr, w19, ne ; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-NEXT: csel w8, wzr, w19, ne ; CHECK-NEXT: ldp x20, x19, [sp, #128] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.s[3], w8 @@ -638,10 +638,10 @@ ; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] -; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptosi.sat.v6f16.v6i32(<6 x half> %f) ret <6 x i32> %x @@ -697,9 +697,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s -; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s -; CHECK-NEXT: smax v0.2s, v0.2s, v2.2s +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i1> @llvm.fptosi.sat.v2f32.v2i1(<2 x float> %f) ret <2 x i1> %x @@ -796,9 +796,9 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: fcvtzs x9, s1 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %x = call <2 x i64> @llvm.fptosi.sat.v2f32.v2i64(<2 x float> %f) ret <2 x i64> %x @@ -830,9 +830,9 @@ ; CHECK-NEXT: movi v9.2s, #241, lsl #24 ; CHECK-NEXT: mov w8, #1895825407 // =0x70ffffff ; CHECK-NEXT: mov x21, #-34359738368 // =0xfffffff800000000 +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov x22, #34359738367 // =0x7ffffffff ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -847,8 +847,8 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x2, x19 ; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload ; CHECK-NEXT: fcmp s0, s9 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, x21, x1, lt @@ -896,9 +896,9 @@ ; CHECK-NEXT: movi v9.2s, #255, lsl #24 ; CHECK-NEXT: mov w8, #2130706431 // =0x7effffff ; CHECK-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov x22, #9223372036854775807 // =0x7fffffffffffffff ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -913,8 +913,8 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x2, x19 ; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload ; CHECK-NEXT: fcmp s0, s9 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, x21, x1, lt @@ -1028,30 +1028,30 @@ ; CHECK-LABEL: test_signed_v4f32_v4i50: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov x9, #562949953421311 // =0x1ffffffffffff -; CHECK-NEXT: mov x10, #-562949953421312 // =0xfffe000000000000 +; CHECK-NEXT: mov x8, #562949953421311 // =0x1ffffffffffff +; CHECK-NEXT: mov x11, #-562949953421312 // =0xfffe000000000000 ; CHECK-NEXT: fcvtzs x12, s0 ; CHECK-NEXT: mov s2, v1.s[1] -; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: fcvtzs x9, s1 ; CHECK-NEXT: mov s1, v0.s[1] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: fcvtzs x11, s2 -; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: csel x2, x8, x10, gt -; CHECK-NEXT: cmp x11, x9 -; CHECK-NEXT: csel x8, x11, x9, lt -; CHECK-NEXT: fcvtzs x11, s1 -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: csel x3, x8, x10, gt -; CHECK-NEXT: cmp x12, x9 -; CHECK-NEXT: csel x8, x12, x9, lt -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: csel x0, x8, x10, gt -; CHECK-NEXT: cmp x11, x9 -; CHECK-NEXT: csel x8, x11, x9, lt -; CHECK-NEXT: cmp x8, x10 -; CHECK-NEXT: csel x1, x8, x10, gt +; CHECK-NEXT: fcvtzs x10, s2 +; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: csel x9, x9, x8, lt +; CHECK-NEXT: cmp x9, x11 +; CHECK-NEXT: csel x2, x9, x11, gt +; CHECK-NEXT: cmp x10, x8 +; CHECK-NEXT: csel x9, x10, x8, lt +; CHECK-NEXT: fcvtzs x10, s1 +; CHECK-NEXT: cmp x9, x11 +; CHECK-NEXT: csel x3, x9, x11, gt +; CHECK-NEXT: cmp x12, x8 +; CHECK-NEXT: csel x9, x12, x8, lt +; CHECK-NEXT: cmp x9, x11 +; CHECK-NEXT: csel x0, x9, x11, gt +; CHECK-NEXT: cmp x10, x8 +; CHECK-NEXT: csel x8, x10, x8, lt +; CHECK-NEXT: cmp x8, x11 +; CHECK-NEXT: csel x1, x8, x11, gt ; CHECK-NEXT: ret %x = call <4 x i50> @llvm.fptosi.sat.v4f32.v4i50(<4 x float> %f) ret <4 x i50> %x @@ -1065,12 +1065,12 @@ ; CHECK-NEXT: fcvtzs x9, s0 ; CHECK-NEXT: mov s2, v1.s[1] ; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: fcvtzs x11, s3 ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzs x9, s3 +; CHECK-NEXT: fcvtzs x10, s2 ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fcvtzs x8, s2 -; CHECK-NEXT: mov v0.d[1], x9 -; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v1.d[1], x10 ; CHECK-NEXT: ret %x = call <4 x i64> @llvm.fptosi.sat.v4f32.v4i64(<4 x float> %f) ret <4 x i64> %x @@ -1107,11 +1107,11 @@ ; CHECK-NEXT: movi v9.2s, #241, lsl #24 ; CHECK-NEXT: mov w8, #1895825407 // =0x70ffffff ; CHECK-NEXT: mov x25, #-34359738368 // =0xfffffff800000000 +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov x26, #34359738367 // =0x7ffffffff -; CHECK-NEXT: fmov s10, w8 -; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 @@ -1156,22 +1156,22 @@ ; CHECK-NEXT: mov x6, x23 ; CHECK-NEXT: fcmp s0, s9 ; CHECK-NEXT: mov x7, x24 +; CHECK-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, x25, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s0, s10 -; CHECK-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr d10, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #40] // 16-byte Folded Reload ; CHECK-NEXT: csinv x9, x9, xzr, le ; CHECK-NEXT: csel x8, x26, x8, gt ; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: ldr d10, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: ldp d9, d8, [sp, #40] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #128 @@ -1211,11 +1211,11 @@ ; CHECK-NEXT: movi v9.2s, #255, lsl #24 ; CHECK-NEXT: mov w8, #2130706431 // =0x7effffff ; CHECK-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov x26, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-NEXT: fmov s10, w8 -; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 @@ -1260,22 +1260,22 @@ ; CHECK-NEXT: mov x6, x23 ; CHECK-NEXT: fcmp s0, s9 ; CHECK-NEXT: mov x7, x24 +; CHECK-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, x25, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s0, s10 -; CHECK-NEXT: ldr x30, [sp, #56] // 8-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr d10, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #40] // 16-byte Folded Reload ; CHECK-NEXT: csinv x9, x9, xzr, le ; CHECK-NEXT: csel x8, x26, x8, gt ; CHECK-NEXT: fcmp s0, s0 -; CHECK-NEXT: ldr d10, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: ldp d9, d8, [sp, #40] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #128 @@ -1320,8 +1320,8 @@ ; CHECK-LABEL: test_signed_v2f64_v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: mov w8, #127 // =0x7f ; CHECK-NEXT: fcvtzs w10, d0 +; CHECK-NEXT: mov w8, #127 // =0x7f ; CHECK-NEXT: mov w11, #-128 // =0xffffff80 ; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: cmp w9, #127 @@ -1344,8 +1344,8 @@ ; CHECK-LABEL: test_signed_v2f64_v2i13: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: mov w8, #4095 // =0xfff ; CHECK-NEXT: fcvtzs w10, d0 +; CHECK-NEXT: mov w8, #4095 // =0xfff ; CHECK-NEXT: mov w11, #-4096 // =0xfffff000 ; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: cmp w9, #4095 @@ -1417,9 +1417,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptosi.sat.v2f64.v2i32(<2 x double> %f) @@ -1483,12 +1483,12 @@ ; CHECK-NEXT: mov x8, #-4170333254945079296 // =0xc620000000000000 ; CHECK-NEXT: mov x21, #-34359738368 // =0xfffffff800000000 ; CHECK-NEXT: mov x22, #34359738367 // =0x7ffffffff -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: fmov d9, x8 ; CHECK-NEXT: mov x8, #5053038781909696511 // =0x461fffffffffffff -; CHECK-NEXT: fcmp d8, d9 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fmov d10, x8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: fcmp d8, d9 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp d8, d10 @@ -1501,8 +1501,8 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x2, x19 ; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload ; CHECK-NEXT: fcmp d0, d9 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, x21, x1, lt @@ -1549,12 +1549,12 @@ ; CHECK-NEXT: mov x8, #-4044232465378705408 // =0xc7e0000000000000 ; CHECK-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: mov x22, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: fmov d9, x8 ; CHECK-NEXT: mov x8, #5179139571476070399 // =0x47dfffffffffffff -; CHECK-NEXT: fcmp d8, d9 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fmov d10, x8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: fcmp d8, d9 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp d8, d10 @@ -1567,8 +1567,8 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x2, x19 ; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload ; CHECK-NEXT: fcmp d0, d9 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, x21, x1, lt @@ -1620,9 +1620,9 @@ ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h -; CHECK-FP16-NEXT: movi v2.2d, #0xffffffffffffffff ; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h -; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v2.4h +; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i1> @llvm.fptosi.sat.v4f16.v4i1(<4 x half> %f) ret <4 x i1> %x @@ -1667,9 +1667,9 @@ ; CHECK-FP16-LABEL: test_signed_v4f16_v4i13: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: mvni v1.4h, #240, lsl #8 +; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: movi v1.4h, #240, lsl #8 -; CHECK-FP16-NEXT: mvni v2.4h, #240, lsl #8 -; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v2.4h ; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i13> @llvm.fptosi.sat.v4f16.v4i13(<4 x half> %f) @@ -1722,27 +1722,27 @@ ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-NEXT: mov h1, v0.h[1] ; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov x8, #562949953421311 // =0x1ffffffffffff ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: mov x8, #562949953421311 // =0x1ffffffffffff ; CHECK-CVT-NEXT: mov x11, #-562949953421312 // =0xfffe000000000000 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvtzs x9, s2 +; CHECK-CVT-NEXT: fcvt s2, h3 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: cmp x9, x8 ; CHECK-CVT-NEXT: fcvtzs x10, s1 -; CHECK-CVT-NEXT: fcvt s1, h3 +; CHECK-CVT-NEXT: cmp x9, x8 ; CHECK-CVT-NEXT: csel x9, x9, x8, lt +; CHECK-CVT-NEXT: fcvtzs x12, s2 ; CHECK-CVT-NEXT: cmp x9, x11 ; CHECK-CVT-NEXT: csel x0, x9, x11, gt ; CHECK-CVT-NEXT: cmp x10, x8 -; CHECK-CVT-NEXT: fcvtzs x9, s1 -; CHECK-CVT-NEXT: csel x10, x10, x8, lt -; CHECK-CVT-NEXT: cmp x10, x11 -; CHECK-CVT-NEXT: csel x1, x10, x11, gt +; CHECK-CVT-NEXT: csel x9, x10, x8, lt ; CHECK-CVT-NEXT: fcvtzs x10, s0 -; CHECK-CVT-NEXT: cmp x9, x8 -; CHECK-CVT-NEXT: csel x9, x9, x8, lt +; CHECK-CVT-NEXT: cmp x9, x11 +; CHECK-CVT-NEXT: csel x1, x9, x11, gt +; CHECK-CVT-NEXT: cmp x12, x8 +; CHECK-CVT-NEXT: csel x9, x12, x8, lt ; CHECK-CVT-NEXT: cmp x9, x11 ; CHECK-CVT-NEXT: csel x2, x9, x11, gt ; CHECK-CVT-NEXT: cmp x10, x8 @@ -1757,22 +1757,22 @@ ; CHECK-FP16-NEXT: mov h1, v0.h[1] ; CHECK-FP16-NEXT: fcvtzs x9, h0 ; CHECK-FP16-NEXT: mov x8, #562949953421311 // =0x1ffffffffffff +; CHECK-FP16-NEXT: mov h2, v0.h[2] ; CHECK-FP16-NEXT: mov x11, #-562949953421312 // =0xfffe000000000000 -; CHECK-FP16-NEXT: cmp x9, x8 +; CHECK-FP16-NEXT: mov h0, v0.h[3] ; CHECK-FP16-NEXT: fcvtzs x10, h1 -; CHECK-FP16-NEXT: mov h1, v0.h[2] +; CHECK-FP16-NEXT: cmp x9, x8 ; CHECK-FP16-NEXT: csel x9, x9, x8, lt -; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: fcvtzs x12, h2 ; CHECK-FP16-NEXT: cmp x9, x11 ; CHECK-FP16-NEXT: csel x0, x9, x11, gt ; CHECK-FP16-NEXT: cmp x10, x8 -; CHECK-FP16-NEXT: fcvtzs x9, h1 -; CHECK-FP16-NEXT: csel x10, x10, x8, lt -; CHECK-FP16-NEXT: cmp x10, x11 -; CHECK-FP16-NEXT: csel x1, x10, x11, gt +; CHECK-FP16-NEXT: csel x9, x10, x8, lt ; CHECK-FP16-NEXT: fcvtzs x10, h0 -; CHECK-FP16-NEXT: cmp x9, x8 -; CHECK-FP16-NEXT: csel x9, x9, x8, lt +; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: csel x1, x9, x11, gt +; CHECK-FP16-NEXT: cmp x12, x8 +; CHECK-FP16-NEXT: csel x9, x12, x8, lt ; CHECK-FP16-NEXT: cmp x9, x11 ; CHECK-FP16-NEXT: csel x2, x9, x11, gt ; CHECK-FP16-NEXT: cmp x10, x8 @@ -1790,19 +1790,19 @@ ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-NEXT: mov h1, v0.h[2] ; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: fcvt s3, h0 -; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] +; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzs x8, s3 -; CHECK-CVT-NEXT: fcvt s3, h0 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvtzs x8, s0 ; CHECK-CVT-NEXT: fcvtzs x9, s1 +; CHECK-CVT-NEXT: fcvtzs x10, s2 +; CHECK-CVT-NEXT: fcvtzs x11, s3 ; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s2 ; CHECK-CVT-NEXT: fmov d1, x9 -; CHECK-CVT-NEXT: fcvtzs x9, s3 -; CHECK-CVT-NEXT: mov v0.d[1], x8 -; CHECK-CVT-NEXT: mov v1.d[1], x9 +; CHECK-CVT-NEXT: mov v0.d[1], x10 +; CHECK-CVT-NEXT: mov v1.d[1], x11 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v4f16_v4i64: @@ -1813,12 +1813,12 @@ ; CHECK-FP16-NEXT: mov h3, v0.h[3] ; CHECK-FP16-NEXT: fcvtzs x8, h0 ; CHECK-FP16-NEXT: fcvtzs x9, h1 +; CHECK-FP16-NEXT: fcvtzs x10, h2 +; CHECK-FP16-NEXT: fcvtzs x11, h3 ; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fcvtzs x8, h2 ; CHECK-FP16-NEXT: fmov d1, x9 -; CHECK-FP16-NEXT: fcvtzs x9, h3 -; CHECK-FP16-NEXT: mov v0.d[1], x8 -; CHECK-FP16-NEXT: mov v1.d[1], x9 +; CHECK-FP16-NEXT: mov v0.d[1], x10 +; CHECK-FP16-NEXT: mov v1.d[1], x11 ; CHECK-FP16-NEXT: ret %x = call <4 x i64> @llvm.fptosi.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x @@ -1857,11 +1857,11 @@ ; CHECK-NEXT: movi v9.2s, #241, lsl #24 ; CHECK-NEXT: mov w8, #1895825407 // =0x70ffffff ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov x25, #-34359738368 // =0xfffffff800000000 ; CHECK-NEXT: mov x26, #34359738367 // =0x7ffffffff -; CHECK-NEXT: fmov s10, w8 -; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 @@ -1873,8 +1873,8 @@ ; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt @@ -1906,24 +1906,24 @@ ; CHECK-NEXT: mov x4, x21 ; CHECK-NEXT: mov x5, x22 ; CHECK-NEXT: mov x6, x23 +; CHECK-NEXT: mov x7, x24 +; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, x25, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov x7, x24 -; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: csinv x9, x9, xzr, le ; CHECK-NEXT: csel x8, x26, x8, gt ; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret @@ -1964,11 +1964,11 @@ ; CHECK-NEXT: movi v9.2s, #255, lsl #24 ; CHECK-NEXT: mov w8, #2130706431 // =0x7effffff ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov x25, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: mov x26, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-NEXT: fmov s10, w8 -; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 @@ -1980,8 +1980,8 @@ ; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt @@ -2013,24 +2013,24 @@ ; CHECK-NEXT: mov x4, x21 ; CHECK-NEXT: mov x5, x22 ; CHECK-NEXT: mov x6, x23 +; CHECK-NEXT: mov x7, x24 +; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, x25, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov x7, x24 -; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: csinv x9, x9, xzr, le ; CHECK-NEXT: csel x8, x26, x8, gt ; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: csel x1, xzr, x8, vs -; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret @@ -2058,44 +2058,44 @@ ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s2, v1.s[1] -; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] ; CHECK-CVT-NEXT: fcvtzs w9, s1 -; CHECK-CVT-NEXT: fcvtzs w10, s0 -; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: fcvtzs w13, s0 ; CHECK-CVT-NEXT: fcvtzs w8, s2 -; CHECK-CVT-NEXT: fcvtzs w11, s3 -; CHECK-CVT-NEXT: fcvtzs w12, s4 -; CHECK-CVT-NEXT: fcvtzs w13, s5 +; CHECK-CVT-NEXT: mov s2, v1.s[2] +; CHECK-CVT-NEXT: mov s1, v1.s[3] ; CHECK-CVT-NEXT: ands w8, w8, w8, asr #31 +; CHECK-CVT-NEXT: fcvtzs w10, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge ; CHECK-CVT-NEXT: ands w9, w9, w9, asr #31 ; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge +; CHECK-CVT-NEXT: ands w10, w10, w10, asr #31 +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: fcvtzs w14, s1 +; CHECK-CVT-NEXT: fmov s1, w9 +; CHECK-CVT-NEXT: fcvtzs w9, s0 +; CHECK-CVT-NEXT: csinv w10, w10, wzr, ge ; CHECK-CVT-NEXT: ands w11, w11, w11, asr #31 ; CHECK-CVT-NEXT: csinv w11, w11, wzr, ge ; CHECK-CVT-NEXT: ands w12, w12, w12, asr #31 +; CHECK-CVT-NEXT: mov v1.s[1], w8 ; CHECK-CVT-NEXT: csinv w12, w12, wzr, ge ; CHECK-CVT-NEXT: ands w13, w13, w13, asr #31 ; CHECK-CVT-NEXT: csinv w13, w13, wzr, ge -; CHECK-CVT-NEXT: ands w10, w10, w10, asr #31 -; CHECK-CVT-NEXT: csinv w10, w10, wzr, ge -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvtzs w9, s1 -; CHECK-CVT-NEXT: fmov s3, w10 -; CHECK-CVT-NEXT: mov v2.s[1], w8 -; CHECK-CVT-NEXT: ands w8, w9, w9, asr #31 +; CHECK-CVT-NEXT: ands w8, w14, w14, asr #31 +; CHECK-CVT-NEXT: mov v1.s[2], w10 +; CHECK-CVT-NEXT: fmov s2, w13 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge -; CHECK-CVT-NEXT: fcvtzs w9, s0 -; CHECK-CVT-NEXT: mov v3.s[1], w13 -; CHECK-CVT-NEXT: mov v2.s[2], w11 -; CHECK-CVT-NEXT: mov v3.s[2], w8 +; CHECK-CVT-NEXT: mov v2.s[1], w12 +; CHECK-CVT-NEXT: mov v1.s[3], w11 +; CHECK-CVT-NEXT: mov v2.s[2], w8 ; CHECK-CVT-NEXT: ands w8, w9, w9, asr #31 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge -; CHECK-CVT-NEXT: mov v2.s[3], w12 -; CHECK-CVT-NEXT: mov v3.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; @@ -2103,9 +2103,9 @@ ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h -; CHECK-FP16-NEXT: movi v2.2d, #0xffffffffffffffff ; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h -; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v2.8h +; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret %x = call <8 x i1> @llvm.fptosi.sat.v8f16.v8i1(<8 x half> %f) @@ -2118,62 +2118,62 @@ ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: mov w8, #127 // =0x7f ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: mov w10, #-128 // =0xffffff80 +; CHECK-CVT-NEXT: mov w11, #-128 // =0xffffff80 ; CHECK-CVT-NEXT: mov s2, v1.s[1] -; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: fcvtzs w10, s1 ; CHECK-CVT-NEXT: fcvtzs w15, s0 ; CHECK-CVT-NEXT: fcvtzs w9, s2 ; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: mov s1, v1.s[3] ; CHECK-CVT-NEXT: cmp w9, #127 -; CHECK-CVT-NEXT: csel w9, w9, w8, lt ; CHECK-CVT-NEXT: fcvtzs w12, s2 -; CHECK-CVT-NEXT: cmn w9, #128 ; CHECK-CVT-NEXT: mov s2, v0.s[1] -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, #127 -; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: csel w9, w9, w8, lt ; CHECK-CVT-NEXT: fcvtzs w13, s1 -; CHECK-CVT-NEXT: cmn w11, #128 ; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: cmn w9, #128 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csel w9, w9, w11, gt +; CHECK-CVT-NEXT: cmp w10, #127 +; CHECK-CVT-NEXT: csel w10, w10, w8, lt +; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: cmn w10, #128 +; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: csel w10, w10, w11, gt ; CHECK-CVT-NEXT: cmp w12, #127 ; CHECK-CVT-NEXT: csel w12, w12, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: fmov s1, w10 ; CHECK-CVT-NEXT: cmn w12, #128 -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: csel w12, w12, w10, gt +; CHECK-CVT-NEXT: csel w12, w12, w11, gt ; CHECK-CVT-NEXT: cmp w13, #127 ; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: fcvtzs w9, s0 ; CHECK-CVT-NEXT: cmn w13, #128 -; CHECK-CVT-NEXT: csel w13, w13, w10, gt +; CHECK-CVT-NEXT: csel w13, w13, w11, gt ; CHECK-CVT-NEXT: cmp w14, #127 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt ; CHECK-CVT-NEXT: cmn w14, #128 -; CHECK-CVT-NEXT: csel w14, w14, w10, gt +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: csel w14, w14, w11, gt ; CHECK-CVT-NEXT: cmp w15, #127 ; CHECK-CVT-NEXT: csel w15, w15, w8, lt ; CHECK-CVT-NEXT: cmn w15, #128 -; CHECK-CVT-NEXT: csel w11, w15, w10, gt -; CHECK-CVT-NEXT: fcvtzs w15, s1 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: cmp w15, #127 -; CHECK-CVT-NEXT: csel w9, w15, w8, lt -; CHECK-CVT-NEXT: fcvtzs w11, s0 -; CHECK-CVT-NEXT: cmn w9, #128 -; CHECK-CVT-NEXT: mov v1.s[1], w14 -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, #127 -; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: csel w10, w15, w11, gt +; CHECK-CVT-NEXT: cmp w16, #127 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: fmov s2, w10 +; CHECK-CVT-NEXT: csel w10, w16, w8, lt +; CHECK-CVT-NEXT: cmn w10, #128 +; CHECK-CVT-NEXT: csel w10, w10, w11, gt +; CHECK-CVT-NEXT: cmp w9, #127 +; CHECK-CVT-NEXT: mov v2.s[1], w14 +; CHECK-CVT-NEXT: csel w8, w9, w8, lt ; CHECK-CVT-NEXT: cmn w8, #128 -; CHECK-CVT-NEXT: mov v1.s[2], w9 -; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: csel w8, w8, w11, gt +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; @@ -2192,70 +2192,70 @@ ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: mov w8, #4095 // =0xfff ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: mov w10, #-4096 // =0xfffff000 +; CHECK-CVT-NEXT: mov w11, #-4096 // =0xfffff000 ; CHECK-CVT-NEXT: mov s2, v1.s[1] -; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: fcvtzs w10, s1 ; CHECK-CVT-NEXT: fcvtzs w15, s0 ; CHECK-CVT-NEXT: fcvtzs w9, s2 ; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: mov s1, v1.s[3] ; CHECK-CVT-NEXT: cmp w9, #4095 -; CHECK-CVT-NEXT: csel w9, w9, w8, lt ; CHECK-CVT-NEXT: fcvtzs w12, s2 -; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: mov s2, v0.s[1] -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, #4095 -; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: csel w9, w9, w8, lt ; CHECK-CVT-NEXT: fcvtzs w13, s1 -; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csel w9, w9, w11, gt +; CHECK-CVT-NEXT: cmp w10, #4095 +; CHECK-CVT-NEXT: csel w10, w10, w8, lt +; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: cmn w10, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: csel w10, w10, w11, gt ; CHECK-CVT-NEXT: cmp w12, #4095 ; CHECK-CVT-NEXT: csel w12, w12, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: fmov s1, w10 ; CHECK-CVT-NEXT: cmn w12, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: csel w12, w12, w10, gt +; CHECK-CVT-NEXT: csel w12, w12, w11, gt ; CHECK-CVT-NEXT: cmp w13, #4095 ; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: fcvtzs w9, s0 ; CHECK-CVT-NEXT: cmn w13, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: csel w13, w13, w10, gt +; CHECK-CVT-NEXT: csel w13, w13, w11, gt ; CHECK-CVT-NEXT: cmp w14, #4095 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt ; CHECK-CVT-NEXT: cmn w14, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: csel w14, w14, w10, gt +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: csel w14, w14, w11, gt ; CHECK-CVT-NEXT: cmp w15, #4095 ; CHECK-CVT-NEXT: csel w15, w15, w8, lt ; CHECK-CVT-NEXT: cmn w15, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: csel w11, w15, w10, gt -; CHECK-CVT-NEXT: fcvtzs w15, s1 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: cmp w15, #4095 -; CHECK-CVT-NEXT: csel w9, w15, w8, lt -; CHECK-CVT-NEXT: fcvtzs w11, s0 -; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: mov v1.s[1], w14 -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, #4095 -; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: csel w10, w15, w11, gt +; CHECK-CVT-NEXT: cmp w16, #4095 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: fmov s2, w10 +; CHECK-CVT-NEXT: csel w10, w16, w8, lt +; CHECK-CVT-NEXT: cmn w10, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w10, w10, w11, gt +; CHECK-CVT-NEXT: cmp w9, #4095 +; CHECK-CVT-NEXT: mov v2.s[1], w14 +; CHECK-CVT-NEXT: csel w8, w9, w8, lt ; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: mov v1.s[2], w9 -; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: csel w8, w8, w11, gt +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i13: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: mvni v1.8h, #240, lsl #8 +; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: movi v1.8h, #240, lsl #8 -; CHECK-FP16-NEXT: mvni v2.8h, #240, lsl #8 -; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v2.8h ; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: ret %x = call <8 x i13> @llvm.fptosi.sat.v8f16.v8i13(<8 x half> %f) @@ -2268,62 +2268,62 @@ ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: mov w8, #32767 // =0x7fff ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: mov w10, #-32768 // =0xffff8000 +; CHECK-CVT-NEXT: mov w11, #-32768 // =0xffff8000 ; CHECK-CVT-NEXT: mov s2, v1.s[1] -; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: fcvtzs w10, s1 ; CHECK-CVT-NEXT: fcvtzs w15, s0 ; CHECK-CVT-NEXT: fcvtzs w9, s2 ; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: mov s1, v1.s[3] ; CHECK-CVT-NEXT: cmp w9, w8 -; CHECK-CVT-NEXT: csel w9, w9, w8, lt ; CHECK-CVT-NEXT: fcvtzs w12, s2 -; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: mov s2, v0.s[1] -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: csel w9, w9, w8, lt ; CHECK-CVT-NEXT: fcvtzs w13, s1 -; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: csel w9, w9, w11, gt +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w10, w10, w8, lt +; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: csel w10, w10, w11, gt ; CHECK-CVT-NEXT: cmp w12, w8 ; CHECK-CVT-NEXT: csel w12, w12, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: fmov s1, w10 ; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: csel w12, w12, w10, gt +; CHECK-CVT-NEXT: csel w12, w12, w11, gt ; CHECK-CVT-NEXT: cmp w13, w8 ; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: fcvtzs w9, s0 ; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: csel w13, w13, w10, gt +; CHECK-CVT-NEXT: csel w13, w13, w11, gt ; CHECK-CVT-NEXT: cmp w14, w8 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt ; CHECK-CVT-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: csel w14, w14, w10, gt +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: csel w14, w14, w11, gt ; CHECK-CVT-NEXT: cmp w15, w8 ; CHECK-CVT-NEXT: csel w15, w15, w8, lt ; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: csel w11, w15, w10, gt -; CHECK-CVT-NEXT: fcvtzs w15, s1 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: cmp w15, w8 -; CHECK-CVT-NEXT: csel w9, w15, w8, lt -; CHECK-CVT-NEXT: fcvtzs w11, s0 -; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov v1.s[1], w14 -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: csel w10, w15, w11, gt +; CHECK-CVT-NEXT: cmp w16, w8 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: fmov s2, w10 +; CHECK-CVT-NEXT: csel w10, w16, w8, lt +; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w10, w10, w11, gt +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: mov v2.s[1], w14 +; CHECK-CVT-NEXT: csel w8, w9, w8, lt ; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov v1.s[2], w9 -; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: csel w8, w8, w11, gt +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i16: @@ -2340,21 +2340,21 @@ ; CHECK-NEXT: fcvtl v2.4s, v0.4h ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: movi v1.4s, #3, msl #16 +; CHECK-NEXT: mvni v3.4s, #3, msl #16 ; CHECK-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mvni v1.4s, #3, msl #16 -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mov w1, v2.s[1] -; CHECK-NEXT: mov w2, v2.s[2] +; CHECK-NEXT: smax v1.4s, v2.4s, v3.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v3.4s +; CHECK-NEXT: mov w1, v1.s[1] +; CHECK-NEXT: mov w2, v1.s[2] +; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w3, v2.s[3] ; CHECK-NEXT: mov w6, v0.s[2] ; CHECK-NEXT: mov w7, v0.s[3] ; CHECK-NEXT: fmov w4, s0 -; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: ret %x = call <8 x i19> @llvm.fptosi.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x @@ -2377,61 +2377,61 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-CVT-NEXT: mov x8, #562949953421311 // =0x1ffffffffffff -; CHECK-CVT-NEXT: mov x12, #-562949953421312 // =0xfffe000000000000 -; CHECK-CVT-NEXT: fcvt s5, h0 +; CHECK-CVT-NEXT: mov x11, #-562949953421312 // =0xfffe000000000000 ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: fcvt s3, h1 ; CHECK-CVT-NEXT: mov h4, v1.h[2] ; CHECK-CVT-NEXT: mov h1, v1.h[3] -; CHECK-CVT-NEXT: fcvtzs x10, s5 ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvtzs x9, s3 ; CHECK-CVT-NEXT: fcvt s3, h4 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzs x10, s2 ; CHECK-CVT-NEXT: cmp x9, x8 -; CHECK-CVT-NEXT: fcvtzs x11, s2 +; CHECK-CVT-NEXT: fcvtzs x12, s3 ; CHECK-CVT-NEXT: csel x9, x9, x8, lt -; CHECK-CVT-NEXT: cmp x9, x12 -; CHECK-CVT-NEXT: fcvtzs x13, s3 -; CHECK-CVT-NEXT: csel x4, x9, x12, gt ; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: cmp x11, x8 -; CHECK-CVT-NEXT: fcvtzs x9, s1 -; CHECK-CVT-NEXT: csel x11, x11, x8, lt +; CHECK-CVT-NEXT: fcvt s3, h0 +; CHECK-CVT-NEXT: cmp x9, x11 +; CHECK-CVT-NEXT: csel x4, x9, x11, gt +; CHECK-CVT-NEXT: cmp x10, x8 +; CHECK-CVT-NEXT: csel x9, x10, x8, lt +; CHECK-CVT-NEXT: fcvtzs x10, s1 ; CHECK-CVT-NEXT: mov h1, v0.h[2] -; CHECK-CVT-NEXT: cmp x11, x12 -; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: csel x5, x11, x12, gt -; CHECK-CVT-NEXT: cmp x13, x8 -; CHECK-CVT-NEXT: csel x11, x13, x8, lt +; CHECK-CVT-NEXT: cmp x9, x11 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: cmp x11, x12 +; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: csel x5, x9, x11, gt +; CHECK-CVT-NEXT: cmp x12, x8 +; CHECK-CVT-NEXT: csel x9, x12, x8, lt +; CHECK-CVT-NEXT: fcvtzs x12, s3 +; CHECK-CVT-NEXT: cmp x9, x11 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: csel x6, x11, x12, gt -; CHECK-CVT-NEXT: cmp x9, x8 -; CHECK-CVT-NEXT: csel x9, x9, x8, lt -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: cmp x9, x12 -; CHECK-CVT-NEXT: fcvtzs x11, s2 -; CHECK-CVT-NEXT: csel x7, x9, x12, gt +; CHECK-CVT-NEXT: csel x6, x9, x11, gt ; CHECK-CVT-NEXT: cmp x10, x8 +; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: csel x9, x10, x8, lt -; CHECK-CVT-NEXT: fcvtzs x10, s1 -; CHECK-CVT-NEXT: cmp x9, x12 -; CHECK-CVT-NEXT: csel x0, x9, x12, gt -; CHECK-CVT-NEXT: cmp x11, x8 -; CHECK-CVT-NEXT: csel x9, x11, x8, lt -; CHECK-CVT-NEXT: fcvtzs x11, s0 -; CHECK-CVT-NEXT: cmp x9, x12 -; CHECK-CVT-NEXT: csel x1, x9, x12, gt +; CHECK-CVT-NEXT: fcvtzs x10, s2 +; CHECK-CVT-NEXT: cmp x9, x11 +; CHECK-CVT-NEXT: csel x7, x9, x11, gt +; CHECK-CVT-NEXT: cmp x12, x8 +; CHECK-CVT-NEXT: csel x9, x12, x8, lt +; CHECK-CVT-NEXT: fcvtzs x12, s1 +; CHECK-CVT-NEXT: cmp x9, x11 +; CHECK-CVT-NEXT: csel x0, x9, x11, gt ; CHECK-CVT-NEXT: cmp x10, x8 ; CHECK-CVT-NEXT: csel x9, x10, x8, lt -; CHECK-CVT-NEXT: cmp x9, x12 -; CHECK-CVT-NEXT: csel x2, x9, x12, gt -; CHECK-CVT-NEXT: cmp x11, x8 -; CHECK-CVT-NEXT: csel x8, x11, x8, lt -; CHECK-CVT-NEXT: cmp x8, x12 -; CHECK-CVT-NEXT: csel x3, x8, x12, gt +; CHECK-CVT-NEXT: fcvtzs x10, s0 +; CHECK-CVT-NEXT: cmp x9, x11 +; CHECK-CVT-NEXT: csel x1, x9, x11, gt +; CHECK-CVT-NEXT: cmp x12, x8 +; CHECK-CVT-NEXT: csel x9, x12, x8, lt +; CHECK-CVT-NEXT: cmp x9, x11 +; CHECK-CVT-NEXT: csel x2, x9, x11, gt +; CHECK-CVT-NEXT: cmp x10, x8 +; CHECK-CVT-NEXT: csel x8, x10, x8, lt +; CHECK-CVT-NEXT: cmp x8, x11 +; CHECK-CVT-NEXT: csel x3, x8, x11, gt ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i50: @@ -2443,24 +2443,24 @@ ; CHECK-FP16-NEXT: fcvtzs x9, h1 ; CHECK-FP16-NEXT: mov h3, v1.h[2] ; CHECK-FP16-NEXT: mov h1, v1.h[3] -; CHECK-FP16-NEXT: cmp x9, x8 ; CHECK-FP16-NEXT: fcvtzs x10, h2 +; CHECK-FP16-NEXT: cmp x9, x8 +; CHECK-FP16-NEXT: fcvtzs x12, h3 ; CHECK-FP16-NEXT: csel x9, x9, x8, lt +; CHECK-FP16-NEXT: mov h2, v0.h[2] ; CHECK-FP16-NEXT: cmp x9, x11 -; CHECK-FP16-NEXT: fcvtzs x12, h3 ; CHECK-FP16-NEXT: csel x4, x9, x11, gt -; CHECK-FP16-NEXT: mov h2, v0.h[2] ; CHECK-FP16-NEXT: cmp x10, x8 ; CHECK-FP16-NEXT: csel x9, x10, x8, lt ; CHECK-FP16-NEXT: fcvtzs x10, h1 -; CHECK-FP16-NEXT: cmp x9, x11 ; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: cmp x9, x11 ; CHECK-FP16-NEXT: csel x5, x9, x11, gt ; CHECK-FP16-NEXT: cmp x12, x8 ; CHECK-FP16-NEXT: csel x9, x12, x8, lt ; CHECK-FP16-NEXT: fcvtzs x12, h0 -; CHECK-FP16-NEXT: cmp x9, x11 ; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: cmp x9, x11 ; CHECK-FP16-NEXT: csel x6, x9, x11, gt ; CHECK-FP16-NEXT: cmp x10, x8 ; CHECK-FP16-NEXT: csel x9, x10, x8, lt @@ -2494,63 +2494,63 @@ ; CHECK-CVT-LABEL: test_signed_v8f16_v8i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-CVT-NEXT: mov h2, v0.h[2] -; CHECK-CVT-NEXT: fcvt s3, h0 -; CHECK-CVT-NEXT: mov h7, v0.h[1] -; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: mov h4, v1.h[1] -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: fcvt s5, h1 -; CHECK-CVT-NEXT: mov h1, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v0.h[1] +; CHECK-CVT-NEXT: mov h7, v0.h[3] +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov h2, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v1.h[1] +; CHECK-CVT-NEXT: mov h6, v1.h[3] +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvtzs x9, s0 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzs x8, s3 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: fcvtzs x9, s5 -; CHECK-CVT-NEXT: fcvt s5, h7 -; CHECK-CVT-NEXT: fcvt s6, h0 -; CHECK-CVT-NEXT: fcvt s7, h1 -; CHECK-CVT-NEXT: fcvtzs x10, s2 -; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fmov d2, x9 -; CHECK-CVT-NEXT: fcvtzs x9, s4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: fcvtzs x8, s1 +; CHECK-CVT-NEXT: fcvtzs x12, s4 ; CHECK-CVT-NEXT: fcvtzs x11, s3 -; CHECK-CVT-NEXT: fcvtzs x8, s5 -; CHECK-CVT-NEXT: fmov d1, x10 -; CHECK-CVT-NEXT: fcvtzs x10, s6 -; CHECK-CVT-NEXT: fmov d3, x9 -; CHECK-CVT-NEXT: fcvtzs x9, s7 -; CHECK-CVT-NEXT: mov v2.d[1], x11 -; CHECK-CVT-NEXT: mov v0.d[1], x8 -; CHECK-CVT-NEXT: mov v1.d[1], x10 -; CHECK-CVT-NEXT: mov v3.d[1], x9 +; CHECK-CVT-NEXT: fcvtzs x15, s7 +; CHECK-CVT-NEXT: fmov d0, x9 +; CHECK-CVT-NEXT: fcvtzs x10, s2 +; CHECK-CVT-NEXT: fcvtzs x13, s5 +; CHECK-CVT-NEXT: fcvtzs x14, s6 +; CHECK-CVT-NEXT: fmov d2, x8 +; CHECK-CVT-NEXT: fmov d1, x12 +; CHECK-CVT-NEXT: mov v0.d[1], x11 +; CHECK-CVT-NEXT: fmov d3, x10 +; CHECK-CVT-NEXT: mov v2.d[1], x13 +; CHECK-CVT-NEXT: mov v1.d[1], x15 +; CHECK-CVT-NEXT: mov v3.d[1], x14 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i64: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h5, v0.h[1] -; CHECK-FP16-NEXT: mov h6, v0.h[3] -; CHECK-FP16-NEXT: fcvtzs x8, h0 -; CHECK-FP16-NEXT: mov h4, v1.h[2] -; CHECK-FP16-NEXT: fcvtzs x9, h1 -; CHECK-FP16-NEXT: mov h3, v1.h[1] -; CHECK-FP16-NEXT: mov h7, v1.h[3] -; CHECK-FP16-NEXT: fcvtzs x10, h2 -; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fmov d2, x9 -; CHECK-FP16-NEXT: fcvtzs x8, h5 -; CHECK-FP16-NEXT: fcvtzs x9, h4 +; CHECK-FP16-NEXT: mov h4, v0.h[2] +; CHECK-FP16-NEXT: mov h3, v0.h[1] +; CHECK-FP16-NEXT: mov h7, v0.h[3] +; CHECK-FP16-NEXT: fcvtzs x9, h0 +; CHECK-FP16-NEXT: mov h2, v1.h[2] +; CHECK-FP16-NEXT: mov h5, v1.h[1] +; CHECK-FP16-NEXT: mov h6, v1.h[3] +; CHECK-FP16-NEXT: fcvtzs x8, h1 +; CHECK-FP16-NEXT: fcvtzs x12, h4 ; CHECK-FP16-NEXT: fcvtzs x11, h3 -; CHECK-FP16-NEXT: fmov d1, x10 -; CHECK-FP16-NEXT: fcvtzs x10, h6 -; CHECK-FP16-NEXT: fmov d3, x9 -; CHECK-FP16-NEXT: fcvtzs x9, h7 -; CHECK-FP16-NEXT: mov v2.d[1], x11 -; CHECK-FP16-NEXT: mov v0.d[1], x8 -; CHECK-FP16-NEXT: mov v1.d[1], x10 -; CHECK-FP16-NEXT: mov v3.d[1], x9 +; CHECK-FP16-NEXT: fcvtzs x15, h7 +; CHECK-FP16-NEXT: fmov d0, x9 +; CHECK-FP16-NEXT: fcvtzs x10, h2 +; CHECK-FP16-NEXT: fcvtzs x13, h5 +; CHECK-FP16-NEXT: fcvtzs x14, h6 +; CHECK-FP16-NEXT: fmov d2, x8 +; CHECK-FP16-NEXT: fmov d1, x12 +; CHECK-FP16-NEXT: mov v0.d[1], x11 +; CHECK-FP16-NEXT: fmov d3, x10 +; CHECK-FP16-NEXT: mov v2.d[1], x13 +; CHECK-FP16-NEXT: mov v1.d[1], x15 +; CHECK-FP16-NEXT: mov v3.d[1], x14 ; CHECK-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x @@ -2585,8 +2585,8 @@ ; CHECK-NEXT: .cfi_offset b9, -112 ; CHECK-NEXT: .cfi_offset b10, -128 ; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: mov x19, x8 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov x19, x8 ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fcvt s8, h0 @@ -2595,76 +2595,76 @@ ; CHECK-NEXT: movi v10.2s, #241, lsl #24 ; CHECK-NEXT: mov w8, #1895825407 // =0x70ffffff ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov x25, #-34359738368 // =0xfffffff800000000 -; CHECK-NEXT: mov x22, #34359738367 // =0x7ffffffff ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: mov x21, #-34359738368 // =0xfffffff800000000 +; CHECK-NEXT: mov x23, #34359738367 // =0x7ffffffff ; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: csel x8, x25, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x8, x21, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x22, x8, gt +; CHECK-NEXT: csel x8, x23, x8, gt ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp, #72] // 8-byte Folded Spill ; CHECK-NEXT: csel x8, xzr, x9, vs +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x25, x1, lt +; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x10, xzr, x8, vs ; CHECK-NEXT: csel x8, xzr, x9, vs -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: stp x8, x10, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: csel x8, x25, x1, lt +; CHECK-NEXT: csel x8, x21, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x22, x8, gt +; CHECK-NEXT: csel x8, x23, x8, gt ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x26, xzr, x8, vs ; CHECK-NEXT: csel x8, xzr, x9, vs -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: csel x8, x25, x1, lt +; CHECK-NEXT: csel x8, x21, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x22, x8, gt +; CHECK-NEXT: csel x8, x23, x8, gt ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x28, xzr, x8, vs ; CHECK-NEXT: csel x8, xzr, x9, vs -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: csel x8, x25, x1, lt +; CHECK-NEXT: csel x8, x21, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x22, x8, gt +; CHECK-NEXT: csel x8, x23, x8, gt ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x27, xzr, x8, vs @@ -2674,79 +2674,79 @@ ; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x25, x1, lt +; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x29, xzr, x8, vs -; CHECK-NEXT: csel x21, xzr, x9, vs +; CHECK-NEXT: csel x22, xzr, x8, vs +; CHECK-NEXT: csel x29, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: csel x8, x25, x1, lt +; CHECK-NEXT: csel x8, x21, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x22, x8, gt +; CHECK-NEXT: csel x8, x23, x8, gt ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x23, xzr, x8, vs -; CHECK-NEXT: csel x24, xzr, x9, vs +; CHECK-NEXT: csel x24, xzr, x8, vs +; CHECK-NEXT: csel x25, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: ldr x9, [sp] // 8-byte Folded Reload +; CHECK-NEXT: extr x8, x29, x22, #28 ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: extr x9, x21, x29, #28 -; CHECK-NEXT: bfi x23, x20, #36, #28 -; CHECK-NEXT: extr x11, x27, x20, #28 -; CHECK-NEXT: str x24, [x19] -; CHECK-NEXT: csel x8, x25, x1, lt -; CHECK-NEXT: csel x10, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: stur x9, [x19, #41] -; CHECK-NEXT: stp x23, x11, [x19, #8] +; CHECK-NEXT: bfi x24, x20, #36, #28 ; CHECK-NEXT: lsr x11, x27, #28 -; CHECK-NEXT: csinv x9, x10, xzr, le -; CHECK-NEXT: lsr x10, x21, #28 -; CHECK-NEXT: csel x8, x22, x8, gt +; CHECK-NEXT: stur x9, [x19, #75] +; CHECK-NEXT: extr x9, x27, x20, #28 +; CHECK-NEXT: stur x8, [x19, #41] +; CHECK-NEXT: csel x8, x21, x1, lt +; CHECK-NEXT: str x9, [x19, #16] +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: stp x25, x24, [x19] +; CHECK-NEXT: stur x10, [x19, #50] +; CHECK-NEXT: lsr x10, x29, #28 +; CHECK-NEXT: csinv x9, x9, xzr, le +; CHECK-NEXT: csel x8, x23, x8, gt ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: strb w10, [x19, #49] +; CHECK-NEXT: ldp x14, x12, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: strb w11, [x19, #24] ; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: ldr x10, [sp] // 8-byte Folded Reload +; CHECK-NEXT: ldr x13, [sp, #24] // 8-byte Folded Reload ; CHECK-NEXT: csel x9, xzr, x9, vs -; CHECK-NEXT: bfi x8, x29, #36, #28 -; CHECK-NEXT: strb w11, [x19, #24] -; CHECK-NEXT: stur x10, [x19, #75] -; CHECK-NEXT: ldp x12, x11, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: bfi x8, x22, #36, #28 +; CHECK-NEXT: extr x10, x14, x12, #28 +; CHECK-NEXT: bfi x28, x12, #36, #28 +; CHECK-NEXT: ldr x12, [sp, #72] // 8-byte Folded Reload +; CHECK-NEXT: bfi x26, x13, #36, #28 ; CHECK-NEXT: stur x9, [x19, #25] +; CHECK-NEXT: lsr x9, x14, #28 +; CHECK-NEXT: extr x11, x12, x13, #28 ; CHECK-NEXT: stur x8, [x19, #33] -; CHECK-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: extr x10, x12, x11, #28 -; CHECK-NEXT: bfi x28, x11, #36, #28 -; CHECK-NEXT: stur x8, [x19, #50] -; CHECK-NEXT: ldr x9, [sp, #24] // 8-byte Folded Reload -; CHECK-NEXT: ldr x11, [sp, #72] // 8-byte Folded Reload +; CHECK-NEXT: lsr x8, x12, #28 ; CHECK-NEXT: stur x10, [x19, #91] ; CHECK-NEXT: stur x28, [x19, #83] -; CHECK-NEXT: extr x8, x11, x9, #28 -; CHECK-NEXT: bfi x26, x9, #36, #28 -; CHECK-NEXT: lsr x9, x12, #28 -; CHECK-NEXT: stur x8, [x19, #66] -; CHECK-NEXT: lsr x8, x11, #28 +; CHECK-NEXT: stur x11, [x19, #66] ; CHECK-NEXT: stur x26, [x19, #58] ; CHECK-NEXT: strb w9, [x19, #99] ; CHECK-NEXT: strb w8, [x19, #74] ; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #144] // 16-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #128] // 16-byte Folded Reload ; CHECK-NEXT: ldp x28, x27, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #192 ; CHECK-NEXT: ret %x = call <8 x i100> @llvm.fptosi.sat.v8f16.v8i100(<8 x half> %f) @@ -2782,69 +2782,69 @@ ; CHECK-NEXT: .cfi_offset b9, -112 ; CHECK-NEXT: .cfi_offset b10, -128 ; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: mov x19, x8 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov x19, x8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: movi v10.2s, #255, lsl #24 +; CHECK-NEXT: movi v9.2s, #255, lsl #24 ; CHECK-NEXT: mov w8, #2130706431 // =0x7effffff ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov x21, #-9223372036854775808 // =0x8000000000000000 +; CHECK-NEXT: fmov s10, w8 +; CHECK-NEXT: mov x23, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: mov x22, #9223372036854775807 // =0x7fffffffffffffff -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, x23, x1, lt +; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: csel x9, x22, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp, #72] // 8-byte Folded Spill ; CHECK-NEXT: csel x8, xzr, x9, vs +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: csel x9, x23, x1, lt +; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: csel x9, x22, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x10, xzr, x8, vs ; CHECK-NEXT: csel x8, xzr, x9, vs -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: stp x8, x10, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, x23, x1, lt +; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: csel x9, x22, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: csel x8, xzr, x9, vs +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: csel x9, x23, x1, lt +; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: csel x9, x22, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 @@ -2853,26 +2853,26 @@ ; CHECK-NEXT: csel x29, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: csel x9, x23, x1, lt +; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: csel x9, x22, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x20, xzr, x8, vs -; CHECK-NEXT: csel x23, xzr, x9, vs +; CHECK-NEXT: csel x21, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: csel x9, x23, x1, lt +; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: csel x9, x22, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 @@ -2881,12 +2881,12 @@ ; CHECK-NEXT: csel x25, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: csel x9, x23, x1, lt +; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: csel x9, x22, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 @@ -2895,24 +2895,24 @@ ; CHECK-NEXT: csel x27, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: stp x26, x27, [x19, #32] ; CHECK-NEXT: stp x24, x25, [x19, #16] -; CHECK-NEXT: stp x20, x23, [x19] +; CHECK-NEXT: stp x20, x21, [x19] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt -; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: csel x9, x23, x1, lt +; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: stp x28, x29, [x19, #112] -; CHECK-NEXT: ldr x10, [sp] // 8-byte Folded Reload ; CHECK-NEXT: csel x9, x22, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: str x10, [x19, #104] -; CHECK-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: str x10, [x19, #96] ; CHECK-NEXT: stp x8, x9, [x19, #48] +; CHECK-NEXT: ldr x8, [sp] // 8-byte Folded Reload +; CHECK-NEXT: str x8, [x19, #104] +; CHECK-NEXT: ldr x8, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: str x8, [x19, #96] ; CHECK-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: str x8, [x19, #88] ; CHECK-NEXT: ldr x8, [sp, #16] // 8-byte Folded Reload @@ -2922,13 +2922,13 @@ ; CHECK-NEXT: ldr x8, [sp, #72] // 8-byte Folded Reload ; CHECK-NEXT: str x8, [x19, #64] ; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #144] // 16-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #128] // 16-byte Folded Reload ; CHECK-NEXT: ldp x28, x27, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: ldr d10, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #192 ; CHECK-NEXT: ret %x = call <8 x i128> @llvm.fptosi.sat.v8f16.v8i128(<8 x half> %f) @@ -3010,11 +3010,11 @@ ; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-NEXT: fcvtzs v4.4s, v1.4s -; CHECK-NEXT: fcvtzs v3.4s, v3.4s ; CHECK-NEXT: sqxtn v0.4h, v0.4s ; CHECK-NEXT: sqxtn v1.4h, v2.4s +; CHECK-NEXT: fcvtzs v2.4s, v3.4s ; CHECK-NEXT: sqxtn2 v0.8h, v4.4s -; CHECK-NEXT: sqxtn2 v1.8h, v3.4s +; CHECK-NEXT: sqxtn2 v1.8h, v2.4s ; CHECK-NEXT: ret %x = call <16 x i16> @llvm.fptosi.sat.v16f32.v16i16(<16 x float> %f) ret <16 x i16> %x @@ -3028,119 +3028,119 @@ ; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h ; CHECK-CVT-NEXT: mov w8, #127 // =0x7f ; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: mov w9, #-128 // =0xffffff80 ; CHECK-CVT-NEXT: mov s3, v2.s[1] -; CHECK-CVT-NEXT: fcvtzs w11, s2 -; CHECK-CVT-NEXT: fcvtzs w10, s3 +; CHECK-CVT-NEXT: fcvtzs w10, s2 +; CHECK-CVT-NEXT: fcvtzs w9, s3 ; CHECK-CVT-NEXT: mov s3, v2.s[2] ; CHECK-CVT-NEXT: mov s2, v2.s[3] -; CHECK-CVT-NEXT: cmp w10, #127 -; CHECK-CVT-NEXT: csel w10, w10, w8, lt +; CHECK-CVT-NEXT: cmp w9, #127 ; CHECK-CVT-NEXT: fcvtzs w12, s3 -; CHECK-CVT-NEXT: cmn w10, #128 ; CHECK-CVT-NEXT: mov s3, v1.s[1] -; CHECK-CVT-NEXT: csel w10, w10, w9, gt -; CHECK-CVT-NEXT: cmp w11, #127 -; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: csel w11, w9, w8, lt +; CHECK-CVT-NEXT: mov w9, #-128 // =0xffffff80 ; CHECK-CVT-NEXT: fcvtzs w14, s2 ; CHECK-CVT-NEXT: cmn w11, #128 -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: csel w11, w11, w9, gt -; CHECK-CVT-NEXT: cmp w12, #127 -; CHECK-CVT-NEXT: csel w12, w12, w8, lt +; CHECK-CVT-NEXT: cmp w10, #127 +; CHECK-CVT-NEXT: csel w10, w10, w8, lt ; CHECK-CVT-NEXT: fcvtzs w15, s3 -; CHECK-CVT-NEXT: cmn w12, #128 -; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: csel w13, w12, w9, gt +; CHECK-CVT-NEXT: fcvtl2 v3.4s, v0.8h +; CHECK-CVT-NEXT: cmn w10, #128 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: csel w13, w10, w9, gt +; CHECK-CVT-NEXT: cmp w12, #127 +; CHECK-CVT-NEXT: fcvtzs w16, s2 +; CHECK-CVT-NEXT: csel w10, w12, w8, lt +; CHECK-CVT-NEXT: cmn w10, #128 +; CHECK-CVT-NEXT: mov s2, v3.s[1] +; CHECK-CVT-NEXT: fcvtzs w0, s3 +; CHECK-CVT-NEXT: csel w10, w10, w9, gt ; CHECK-CVT-NEXT: cmp w14, #127 +; CHECK-CVT-NEXT: fcvtzs w4, s0 ; CHECK-CVT-NEXT: csel w12, w14, w8, lt ; CHECK-CVT-NEXT: fcvtzs w14, s1 -; CHECK-CVT-NEXT: cmn w12, #128 ; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: cmn w12, #128 ; CHECK-CVT-NEXT: csel w12, w12, w9, gt ; CHECK-CVT-NEXT: cmp w15, #127 +; CHECK-CVT-NEXT: fcvtzs w18, s2 ; CHECK-CVT-NEXT: csel w15, w15, w8, lt -; CHECK-CVT-NEXT: fcvtzs w16, s3 +; CHECK-CVT-NEXT: mov s2, v3.s[3] ; CHECK-CVT-NEXT: cmn w15, #128 -; CHECK-CVT-NEXT: mov s3, v2.s[1] +; CHECK-CVT-NEXT: fcvtzs w17, s1 +; CHECK-CVT-NEXT: mov s1, v3.s[2] ; CHECK-CVT-NEXT: csel w15, w15, w9, gt ; CHECK-CVT-NEXT: cmp w14, #127 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt -; CHECK-CVT-NEXT: fcvtzs w17, s1 ; CHECK-CVT-NEXT: cmn w14, #128 -; CHECK-CVT-NEXT: mov s1, v2.s[2] +; CHECK-CVT-NEXT: fcvtzs w2, s2 +; CHECK-CVT-NEXT: fmov s2, w13 ; CHECK-CVT-NEXT: csel w14, w14, w9, gt ; CHECK-CVT-NEXT: cmp w16, #127 +; CHECK-CVT-NEXT: fcvtzs w1, s1 ; CHECK-CVT-NEXT: csel w16, w16, w8, lt -; CHECK-CVT-NEXT: fcvtzs w18, s3 +; CHECK-CVT-NEXT: mov s1, v0.s[1] ; CHECK-CVT-NEXT: cmn w16, #128 -; CHECK-CVT-NEXT: fcvtzs w0, s2 +; CHECK-CVT-NEXT: mov v2.s[1], w11 ; CHECK-CVT-NEXT: csel w16, w16, w9, gt ; CHECK-CVT-NEXT: cmp w17, #127 ; CHECK-CVT-NEXT: csel w17, w17, w8, lt -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: cmn w17, #128 -; CHECK-CVT-NEXT: mov s2, v2.s[3] +; CHECK-CVT-NEXT: fcvtzs w3, s1 +; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: csel w17, w17, w9, gt ; CHECK-CVT-NEXT: cmp w18, #127 +; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csel w18, w18, w8, lt -; CHECK-CVT-NEXT: fcvtzs w1, s1 +; CHECK-CVT-NEXT: mov v2.s[2], w10 ; CHECK-CVT-NEXT: cmn w18, #128 -; CHECK-CVT-NEXT: mov s1, v0.s[1] ; CHECK-CVT-NEXT: csel w18, w18, w9, gt ; CHECK-CVT-NEXT: cmp w0, #127 ; CHECK-CVT-NEXT: csel w0, w0, w8, lt -; CHECK-CVT-NEXT: fcvtzs w2, s2 ; CHECK-CVT-NEXT: cmn w0, #128 -; CHECK-CVT-NEXT: fcvtzs w4, s0 +; CHECK-CVT-NEXT: mov v2.s[3], w12 ; CHECK-CVT-NEXT: csel w0, w0, w9, gt ; CHECK-CVT-NEXT: cmp w1, #127 ; CHECK-CVT-NEXT: csel w1, w1, w8, lt -; CHECK-CVT-NEXT: fcvtzs w3, s1 +; CHECK-CVT-NEXT: fmov s3, w0 ; CHECK-CVT-NEXT: cmn w1, #128 -; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: csel w1, w1, w9, gt ; CHECK-CVT-NEXT: cmp w2, #127 ; CHECK-CVT-NEXT: csel w2, w2, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: mov v3.s[1], w18 ; CHECK-CVT-NEXT: cmn w2, #128 -; CHECK-CVT-NEXT: fmov s3, w14 ; CHECK-CVT-NEXT: csel w2, w2, w9, gt ; CHECK-CVT-NEXT: cmp w3, #127 ; CHECK-CVT-NEXT: csel w3, w3, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s1 ; CHECK-CVT-NEXT: cmn w3, #128 -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: csel w3, w3, w9, gt +; CHECK-CVT-NEXT: mov v3.s[2], w1 +; CHECK-CVT-NEXT: csel w13, w3, w9, gt ; CHECK-CVT-NEXT: cmp w4, #127 -; CHECK-CVT-NEXT: csel w11, w4, w8, lt -; CHECK-CVT-NEXT: fmov s4, w0 -; CHECK-CVT-NEXT: cmn w11, #128 -; CHECK-CVT-NEXT: csel w11, w11, w9, gt -; CHECK-CVT-NEXT: cmp w14, #127 -; CHECK-CVT-NEXT: mov v2.s[1], w10 -; CHECK-CVT-NEXT: csel w10, w14, w8, lt -; CHECK-CVT-NEXT: mov v3.s[1], w15 -; CHECK-CVT-NEXT: cmn w10, #128 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: csel w10, w10, w9, gt +; CHECK-CVT-NEXT: csel w3, w4, w8, lt +; CHECK-CVT-NEXT: fcvtzs w4, s1 +; CHECK-CVT-NEXT: fmov s1, w14 +; CHECK-CVT-NEXT: cmn w3, #128 +; CHECK-CVT-NEXT: csel w11, w3, w9, gt +; CHECK-CVT-NEXT: mov v3.s[3], w2 +; CHECK-CVT-NEXT: fmov s4, w11 +; CHECK-CVT-NEXT: mov v1.s[1], w15 ; CHECK-CVT-NEXT: fcvtzs w11, s0 -; CHECK-CVT-NEXT: mov v4.s[1], w18 -; CHECK-CVT-NEXT: mov v1.s[1], w3 +; CHECK-CVT-NEXT: cmp w4, #127 +; CHECK-CVT-NEXT: mov v4.s[1], w13 +; CHECK-CVT-NEXT: csel w13, w4, w8, lt +; CHECK-CVT-NEXT: cmn w13, #128 +; CHECK-CVT-NEXT: mov v1.s[2], w16 +; CHECK-CVT-NEXT: csel w10, w13, w9, gt ; CHECK-CVT-NEXT: cmp w11, #127 ; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w13 +; CHECK-CVT-NEXT: mov v4.s[2], w10 ; CHECK-CVT-NEXT: cmn w8, #128 -; CHECK-CVT-NEXT: mov v3.s[2], w16 ; CHECK-CVT-NEXT: csel w8, w8, w9, gt -; CHECK-CVT-NEXT: mov v4.s[2], w1 -; CHECK-CVT-NEXT: mov v1.s[2], w10 -; CHECK-CVT-NEXT: mov v2.s[3], w12 -; CHECK-CVT-NEXT: mov v3.s[3], w17 -; CHECK-CVT-NEXT: mov v4.s[3], w2 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h -; CHECK-CVT-NEXT: uzp1 v1.8h, v1.8h, v4.8h +; CHECK-CVT-NEXT: mov v1.s[3], w17 +; CHECK-CVT-NEXT: mov v4.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: uzp1 v1.8h, v4.8h, v3.8h ; CHECK-CVT-NEXT: uzp1 v0.16b, v1.16b, v0.16b ; CHECK-CVT-NEXT: ret ; @@ -3161,119 +3161,119 @@ ; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h ; CHECK-CVT-NEXT: mov w8, #32767 // =0x7fff ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: mov w9, #-32768 // =0xffff8000 ; CHECK-CVT-NEXT: mov s3, v2.s[1] -; CHECK-CVT-NEXT: fcvtzs w11, s2 -; CHECK-CVT-NEXT: fcvtzs w10, s3 +; CHECK-CVT-NEXT: fcvtzs w10, s2 +; CHECK-CVT-NEXT: fcvtzs w9, s3 ; CHECK-CVT-NEXT: mov s3, v2.s[2] ; CHECK-CVT-NEXT: mov s2, v2.s[3] -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w10, w10, w8, lt +; CHECK-CVT-NEXT: cmp w9, w8 ; CHECK-CVT-NEXT: fcvtzs w12, s3 -; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: mov s3, v0.s[1] -; CHECK-CVT-NEXT: csel w10, w10, w9, gt -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: csel w11, w9, w8, lt +; CHECK-CVT-NEXT: mov w9, #-32768 // =0xffff8000 ; CHECK-CVT-NEXT: fcvtzs w14, s2 ; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-CVT-NEXT: mov s2, v0.s[2] ; CHECK-CVT-NEXT: csel w11, w11, w9, gt -; CHECK-CVT-NEXT: cmp w12, w8 -; CHECK-CVT-NEXT: csel w12, w12, w8, lt +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: csel w10, w10, w8, lt ; CHECK-CVT-NEXT: fcvtzs w15, s3 -; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s3, v0.s[2] -; CHECK-CVT-NEXT: csel w13, w12, w9, gt +; CHECK-CVT-NEXT: fcvtl2 v3.4s, v1.8h +; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-NEXT: csel w13, w10, w9, gt +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: fcvtzs w16, s2 +; CHECK-CVT-NEXT: csel w10, w12, w8, lt +; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov s2, v3.s[1] +; CHECK-CVT-NEXT: fcvtzs w0, s3 +; CHECK-CVT-NEXT: csel w10, w10, w9, gt ; CHECK-CVT-NEXT: cmp w14, w8 +; CHECK-CVT-NEXT: fcvtzs w4, s1 ; CHECK-CVT-NEXT: csel w12, w14, w8, lt ; CHECK-CVT-NEXT: fcvtzs w14, s0 -; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: csel w12, w12, w9, gt ; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: fcvtzs w18, s2 ; CHECK-CVT-NEXT: csel w15, w15, w8, lt -; CHECK-CVT-NEXT: fcvtzs w16, s3 +; CHECK-CVT-NEXT: mov s2, v3.s[3] ; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s3, v2.s[1] +; CHECK-CVT-NEXT: fcvtzs w17, s0 +; CHECK-CVT-NEXT: mov s0, v3.s[2] ; CHECK-CVT-NEXT: csel w15, w15, w9, gt ; CHECK-CVT-NEXT: cmp w14, w8 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt -; CHECK-CVT-NEXT: fcvtzs w17, s0 ; CHECK-CVT-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fcvtl v0.4s, v1.4h +; CHECK-CVT-NEXT: fcvtzs w2, s2 +; CHECK-CVT-NEXT: fmov s2, w13 ; CHECK-CVT-NEXT: csel w14, w14, w9, gt ; CHECK-CVT-NEXT: cmp w16, w8 +; CHECK-CVT-NEXT: fcvtzs w1, s0 ; CHECK-CVT-NEXT: csel w16, w16, w8, lt -; CHECK-CVT-NEXT: fcvtzs w18, s3 +; CHECK-CVT-NEXT: mov s0, v1.s[1] ; CHECK-CVT-NEXT: cmn w16, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s1, v2.s[2] +; CHECK-CVT-NEXT: mov v2.s[1], w11 ; CHECK-CVT-NEXT: csel w16, w16, w9, gt ; CHECK-CVT-NEXT: cmp w17, w8 ; CHECK-CVT-NEXT: csel w17, w17, w8, lt -; CHECK-CVT-NEXT: fcvtzs w0, s2 ; CHECK-CVT-NEXT: cmn w17, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s2, v2.s[3] +; CHECK-CVT-NEXT: fcvtzs w3, s0 +; CHECK-CVT-NEXT: mov s0, v1.s[2] ; CHECK-CVT-NEXT: csel w17, w17, w9, gt ; CHECK-CVT-NEXT: cmp w18, w8 +; CHECK-CVT-NEXT: mov v2.s[2], w10 ; CHECK-CVT-NEXT: csel w18, w18, w8, lt -; CHECK-CVT-NEXT: fcvtzs w1, s1 ; CHECK-CVT-NEXT: cmn w18, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s1, v0.s[1] ; CHECK-CVT-NEXT: csel w18, w18, w9, gt ; CHECK-CVT-NEXT: cmp w0, w8 ; CHECK-CVT-NEXT: csel w0, w0, w8, lt -; CHECK-CVT-NEXT: fcvtzs w2, s2 +; CHECK-CVT-NEXT: mov v2.s[3], w12 ; CHECK-CVT-NEXT: cmn w0, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fcvtzs w4, s0 ; CHECK-CVT-NEXT: csel w0, w0, w9, gt ; CHECK-CVT-NEXT: cmp w1, w8 ; CHECK-CVT-NEXT: csel w1, w1, w8, lt -; CHECK-CVT-NEXT: fcvtzs w3, s1 +; CHECK-CVT-NEXT: fmov s3, w0 ; CHECK-CVT-NEXT: cmn w1, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: csel w1, w1, w9, gt ; CHECK-CVT-NEXT: cmp w2, w8 ; CHECK-CVT-NEXT: csel w2, w2, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: mov v3.s[1], w18 ; CHECK-CVT-NEXT: cmn w2, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fmov s3, w14 ; CHECK-CVT-NEXT: csel w2, w2, w9, gt ; CHECK-CVT-NEXT: cmp w3, w8 ; CHECK-CVT-NEXT: csel w3, w3, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s1 ; CHECK-CVT-NEXT: cmn w3, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: csel w3, w3, w9, gt +; CHECK-CVT-NEXT: mov v3.s[2], w1 +; CHECK-CVT-NEXT: csel w13, w3, w9, gt +; CHECK-CVT-NEXT: cmp w4, w8 +; CHECK-CVT-NEXT: csel w3, w4, w8, lt +; CHECK-CVT-NEXT: fcvtzs w4, s0 +; CHECK-CVT-NEXT: mov s0, v1.s[3] +; CHECK-CVT-NEXT: cmn w3, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s1, w14 +; CHECK-CVT-NEXT: csel w11, w3, w9, gt +; CHECK-CVT-NEXT: mov v3.s[3], w2 +; CHECK-CVT-NEXT: fmov s4, w11 +; CHECK-CVT-NEXT: mov v1.s[1], w15 ; CHECK-CVT-NEXT: cmp w4, w8 -; CHECK-CVT-NEXT: csel w11, w4, w8, lt -; CHECK-CVT-NEXT: fmov s4, w0 -; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: csel w11, w11, w9, gt -; CHECK-CVT-NEXT: cmp w14, w8 -; CHECK-CVT-NEXT: mov v2.s[1], w10 -; CHECK-CVT-NEXT: csel w10, w14, w8, lt -; CHECK-CVT-NEXT: mov v3.s[1], w15 -; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: csel w10, w10, w9, gt ; CHECK-CVT-NEXT: fcvtzs w11, s0 -; CHECK-CVT-NEXT: mov v4.s[1], w18 -; CHECK-CVT-NEXT: mov v1.s[1], w3 +; CHECK-CVT-NEXT: mov v4.s[1], w13 +; CHECK-CVT-NEXT: csel w13, w4, w8, lt +; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w10, w13, w9, gt +; CHECK-CVT-NEXT: mov v1.s[2], w16 ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w13 +; CHECK-CVT-NEXT: mov v4.s[2], w10 ; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov v3.s[2], w16 ; CHECK-CVT-NEXT: csel w8, w8, w9, gt -; CHECK-CVT-NEXT: mov v4.s[2], w1 -; CHECK-CVT-NEXT: mov v1.s[2], w10 -; CHECK-CVT-NEXT: mov v2.s[3], w12 -; CHECK-CVT-NEXT: mov v3.s[3], w17 -; CHECK-CVT-NEXT: mov v4.s[3], w2 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h -; CHECK-CVT-NEXT: uzp1 v1.8h, v1.8h, v4.8h +; CHECK-CVT-NEXT: mov v1.s[3], w17 +; CHECK-CVT-NEXT: mov v4.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: uzp1 v1.8h, v4.8h, v3.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v16f16_v16i16: @@ -3289,62 +3289,62 @@ ; CHECK-LABEL: test_signed_v8f64_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d4, v3.d[1] -; CHECK-NEXT: mov w8, #127 // =0x7f -; CHECK-NEXT: fcvtzs w10, d3 +; CHECK-NEXT: fcvtzs w9, d3 +; CHECK-NEXT: mov w10, #127 // =0x7f ; CHECK-NEXT: mov w11, #-128 // =0xffffff80 ; CHECK-NEXT: mov d3, v1.d[1] ; CHECK-NEXT: fcvtzs w13, d2 ; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzs w9, d4 +; CHECK-NEXT: fcvtzs w17, d0 +; CHECK-NEXT: fcvtzs w8, d4 ; CHECK-NEXT: mov d4, v2.d[1] +; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fcvtzs w14, d3 -; CHECK-NEXT: cmp w9, #127 -; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: cmp w8, #127 ; CHECK-NEXT: fcvtzs w12, d4 +; CHECK-NEXT: fcvtzs w16, d2 +; CHECK-NEXT: csel w8, w8, w10, lt +; CHECK-NEXT: cmn w8, #128 +; CHECK-NEXT: csel w8, w8, w11, gt +; CHECK-NEXT: cmp w9, #127 +; CHECK-NEXT: csel w9, w9, w10, lt ; CHECK-NEXT: cmn w9, #128 ; CHECK-NEXT: csel w9, w9, w11, gt -; CHECK-NEXT: cmp w10, #127 -; CHECK-NEXT: csel w10, w10, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: csel w10, w10, w11, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: csel w12, w12, w10, lt +; CHECK-NEXT: fmov s3, w9 ; CHECK-NEXT: cmn w12, #128 ; CHECK-NEXT: csel w12, w12, w11, gt ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: fmov s5, w10 +; CHECK-NEXT: csel w13, w13, w10, lt +; CHECK-NEXT: mov v3.s[1], w8 ; CHECK-NEXT: cmn w13, #128 ; CHECK-NEXT: csel w13, w13, w11, gt ; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: csel w14, w14, w8, lt +; CHECK-NEXT: csel w14, w14, w10, lt +; CHECK-NEXT: fmov s2, w13 ; CHECK-NEXT: cmn w14, #128 -; CHECK-NEXT: csel w10, w14, w11, gt +; CHECK-NEXT: csel w14, w14, w11, gt ; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: fcvtzs w14, d1 -; CHECK-NEXT: csel w15, w15, w8, lt +; CHECK-NEXT: csel w15, w15, w10, lt +; CHECK-NEXT: mov v2.s[1], w12 ; CHECK-NEXT: cmn w15, #128 -; CHECK-NEXT: mov v5.s[1], w9 -; CHECK-NEXT: csel w9, w15, w11, gt -; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: fcvtzs w15, d0 -; CHECK-NEXT: fmov s4, w13 -; CHECK-NEXT: csel w13, w14, w8, lt -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: csel w13, w13, w11, gt -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: mov v4.s[1], w12 -; CHECK-NEXT: csel w8, w15, w8, lt -; CHECK-NEXT: fmov s3, w9 -; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: csel w8, w8, w11, gt -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: csel w15, w15, w11, gt +; CHECK-NEXT: cmp w16, #127 +; CHECK-NEXT: csel w9, w16, w10, lt +; CHECK-NEXT: fmov s1, w15 +; CHECK-NEXT: cmn w9, #128 +; CHECK-NEXT: csel w8, w9, w11, gt +; CHECK-NEXT: cmp w17, #127 +; CHECK-NEXT: csel w9, w17, w10, lt +; CHECK-NEXT: mov v1.s[1], w14 +; CHECK-NEXT: cmn w9, #128 +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: adrp x8, .LCPI82_0 -; CHECK-NEXT: mov v2.s[1], w13 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI82_0] -; CHECK-NEXT: tbl v0.8b, { v2.16b, v3.16b, v4.16b, v5.16b }, v0.8b +; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI82_0] +; CHECK-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b ; CHECK-NEXT: ret %x = call <8 x i8> @llvm.fptosi.sat.v8f64.v8i8(<8 x double> %f) ret <8 x i8> %x @@ -3354,130 +3354,130 @@ ; CHECK-LABEL: test_signed_v16f64_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d16, v0.d[1] +; CHECK-NEXT: fcvtzs w10, d0 ; CHECK-NEXT: mov w8, #127 // =0x7f -; CHECK-NEXT: fcvtzs w11, d0 -; CHECK-NEXT: mov w9, #-128 // =0xffffff80 -; CHECK-NEXT: mov d0, v2.d[1] +; CHECK-NEXT: mov d0, v1.d[1] ; CHECK-NEXT: fcvtzs w13, d1 -; CHECK-NEXT: fcvtzs w10, d16 -; CHECK-NEXT: mov d16, v1.d[1] -; CHECK-NEXT: fcvtzs w14, d0 +; CHECK-NEXT: mov d1, v2.d[1] +; CHECK-NEXT: fcvtzs w9, d16 +; CHECK-NEXT: fcvtzs w12, d0 +; CHECK-NEXT: cmp w9, #127 +; CHECK-NEXT: csel w11, w9, w8, lt +; CHECK-NEXT: mov w9, #-128 // =0xffffff80 +; CHECK-NEXT: cmn w11, #128 +; CHECK-NEXT: csel w11, w11, w9, gt ; CHECK-NEXT: cmp w10, #127 ; CHECK-NEXT: csel w10, w10, w8, lt -; CHECK-NEXT: fcvtzs w12, d16 ; CHECK-NEXT: cmn w10, #128 ; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: csel w11, w11, w8, lt -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: csel w11, w11, w9, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: csel w10, w12, w8, lt +; CHECK-NEXT: cmn w10, #128 +; CHECK-NEXT: csel w10, w10, w9, gt +; CHECK-NEXT: cmp w13, #127 +; CHECK-NEXT: csel w12, w13, w8, lt +; CHECK-NEXT: mov v0.s[1], w11 +; CHECK-NEXT: fcvtzs w11, d1 ; CHECK-NEXT: cmn w12, #128 ; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: csel w11, w13, w8, lt -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: fcvtzs w13, d2 -; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: csel w14, w14, w8, lt -; CHECK-NEXT: cmn w14, #128 -; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: csel w11, w14, w9, gt -; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: fcvtzs w10, d3 -; CHECK-NEXT: mov w14, v0.s[1] -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: mov d3, v4.d[1] -; CHECK-NEXT: csel w13, w13, w9, gt -; CHECK-NEXT: mov v1.s[1], w12 +; CHECK-NEXT: fmov s1, w12 ; CHECK-NEXT: fcvtzs w12, d2 -; CHECK-NEXT: mov v0.b[1], w14 -; CHECK-NEXT: fmov s2, w13 +; CHECK-NEXT: mov d2, v3.d[1] +; CHECK-NEXT: cmp w11, #127 +; CHECK-NEXT: mov w13, v0.s[1] +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: csel w10, w11, w8, lt +; CHECK-NEXT: cmn w10, #128 +; CHECK-NEXT: fcvtzs w11, d2 +; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: fcvtzs w13, d3 +; CHECK-NEXT: mov v0.b[1], w13 ; CHECK-NEXT: csel w12, w12, w8, lt -; CHECK-NEXT: fcvtzs w14, d4 ; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: mov d3, v5.d[1] -; CHECK-NEXT: mov v2.s[1], w11 -; CHECK-NEXT: mov w11, v1.s[1] -; CHECK-NEXT: mov v0.b[2], v1.b[0] +; CHECK-NEXT: mov w13, v1.s[1] ; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: cmp w10, #127 -; CHECK-NEXT: mov d4, v6.d[1] -; CHECK-NEXT: csel w10, w10, w8, lt +; CHECK-NEXT: cmp w11, #127 +; CHECK-NEXT: fmov s2, w12 +; CHECK-NEXT: fcvtzs w12, d3 +; CHECK-NEXT: mov d3, v4.d[1] +; CHECK-NEXT: mov v0.b[2], v1.b[0] +; CHECK-NEXT: mov v2.s[1], w10 +; CHECK-NEXT: csel w10, w11, w8, lt ; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov v0.b[3], w11 -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: cmn w13, #128 ; CHECK-NEXT: fcvtzs w11, d3 -; CHECK-NEXT: csel w13, w13, w9, gt -; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: fmov s3, w10 -; CHECK-NEXT: csel w10, w14, w8, lt -; CHECK-NEXT: mov w14, v2.s[1] -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: mov v0.b[4], v2.b[0] ; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: mov v3.s[1], w12 +; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: mov v0.b[3], w13 +; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: cmn w12, #128 +; CHECK-NEXT: mov w13, v2.s[1] +; CHECK-NEXT: csel w12, w12, w9, gt ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: csel w11, w11, w8, lt -; CHECK-NEXT: fcvtzs w12, d5 -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: mov v0.b[5], w14 -; CHECK-NEXT: fcvtzs w14, d4 -; CHECK-NEXT: fmov s4, w10 -; CHECK-NEXT: csel w10, w11, w9, gt -; CHECK-NEXT: mov w11, v3.s[1] +; CHECK-NEXT: fmov s3, w12 +; CHECK-NEXT: fcvtzs w12, d4 +; CHECK-NEXT: mov v0.b[4], v2.b[0] +; CHECK-NEXT: mov d4, v5.d[1] +; CHECK-NEXT: mov v3.s[1], w10 +; CHECK-NEXT: csel w10, w11, w8, lt +; CHECK-NEXT: cmn w10, #128 +; CHECK-NEXT: mov v0.b[5], w13 +; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: fcvtzs w11, d4 ; CHECK-NEXT: csel w12, w12, w8, lt -; CHECK-NEXT: mov v0.b[6], v3.b[0] ; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: mov v4.s[1], w13 +; CHECK-NEXT: mov w13, v3.s[1] ; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: csel w13, w14, w8, lt -; CHECK-NEXT: mov v0.b[7], w11 -; CHECK-NEXT: fcvtzs w11, d6 -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: fmov s5, w12 -; CHECK-NEXT: csel w12, w13, w9, gt -; CHECK-NEXT: mov w13, v4.s[1] +; CHECK-NEXT: mov v0.b[6], v3.b[0] +; CHECK-NEXT: fmov s4, w12 +; CHECK-NEXT: fcvtzs w12, d5 ; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: mov d6, v7.d[1] -; CHECK-NEXT: mov v0.b[8], v4.b[0] -; CHECK-NEXT: csel w11, w11, w8, lt +; CHECK-NEXT: mov d5, v6.d[1] +; CHECK-NEXT: mov v4.s[1], w10 +; CHECK-NEXT: csel w10, w11, w8, lt +; CHECK-NEXT: mov v0.b[7], w13 +; CHECK-NEXT: cmn w10, #128 +; CHECK-NEXT: csel w10, w10, w9, gt +; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: fcvtzs w13, d5 +; CHECK-NEXT: csel w11, w12, w8, lt ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: mov v5.s[1], w10 -; CHECK-NEXT: csel w10, w11, w9, gt +; CHECK-NEXT: mov w12, v4.s[1] +; CHECK-NEXT: mov v0.b[8], v4.b[0] +; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: fmov s5, w11 ; CHECK-NEXT: fcvtzs w11, d6 -; CHECK-NEXT: mov v0.b[9], w13 -; CHECK-NEXT: fcvtzs w13, d7 -; CHECK-NEXT: fmov s6, w10 -; CHECK-NEXT: mov w10, v5.s[1] +; CHECK-NEXT: cmp w13, #127 +; CHECK-NEXT: mov d6, v7.d[1] +; CHECK-NEXT: mov v0.b[9], w12 +; CHECK-NEXT: mov v5.s[1], w10 +; CHECK-NEXT: csel w10, w13, w8, lt +; CHECK-NEXT: cmn w10, #128 +; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w11, #127 +; CHECK-NEXT: fcvtzs w13, d6 ; CHECK-NEXT: csel w11, w11, w8, lt -; CHECK-NEXT: mov v0.b[10], v5.b[0] ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: mov v6.s[1], w12 -; CHECK-NEXT: mov v0.b[11], w10 -; CHECK-NEXT: csel w10, w11, w9, gt +; CHECK-NEXT: mov v0.b[10], v5.b[0] +; CHECK-NEXT: mov w12, v5.s[1] +; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: fmov s6, w11 +; CHECK-NEXT: fcvtzs w11, d7 ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: csel w8, w13, w8, lt +; CHECK-NEXT: mov v0.b[11], w12 +; CHECK-NEXT: mov v6.s[1], w10 +; CHECK-NEXT: csel w10, w13, w8, lt +; CHECK-NEXT: cmn w10, #128 +; CHECK-NEXT: csel w10, w10, w9, gt +; CHECK-NEXT: cmp w11, #127 +; CHECK-NEXT: csel w8, w11, w8, lt ; CHECK-NEXT: cmn w8, #128 -; CHECK-NEXT: csel w8, w8, w9, gt -; CHECK-NEXT: mov w9, v6.s[1] ; CHECK-NEXT: mov v0.b[12], v6.b[0] +; CHECK-NEXT: mov w11, v6.s[1] +; CHECK-NEXT: csel w8, w8, w9, gt ; CHECK-NEXT: fmov s7, w8 -; CHECK-NEXT: mov v0.b[13], w9 +; CHECK-NEXT: mov v0.b[13], w11 ; CHECK-NEXT: mov v7.s[1], w10 ; CHECK-NEXT: mov v0.b[14], v7.b[0] ; CHECK-NEXT: mov w8, v7.s[1] @@ -3491,62 +3491,62 @@ ; CHECK-LABEL: test_signed_v8f64_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d4, v3.d[1] -; CHECK-NEXT: mov w8, #32767 // =0x7fff +; CHECK-NEXT: mov w9, #32767 // =0x7fff ; CHECK-NEXT: fcvtzs w10, d3 ; CHECK-NEXT: mov w11, #-32768 // =0xffff8000 ; CHECK-NEXT: mov d3, v1.d[1] ; CHECK-NEXT: fcvtzs w13, d2 ; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzs w9, d4 +; CHECK-NEXT: fcvtzs w17, d0 +; CHECK-NEXT: fcvtzs w8, d4 ; CHECK-NEXT: mov d4, v2.d[1] +; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fcvtzs w14, d3 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w9, w9, w8, lt +; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: fcvtzs w12, d4 -; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w9, w9, w11, gt -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: csel w10, w10, w8, lt +; CHECK-NEXT: fcvtzs w16, d2 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w8, w8, w11, gt +; CHECK-NEXT: cmp w10, w9 +; CHECK-NEXT: csel w10, w10, w9, lt ; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w10, w10, w11, gt -; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: cmp w12, w9 +; CHECK-NEXT: csel w12, w12, w9, lt +; CHECK-NEXT: fmov s3, w10 ; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w12, w12, w11, gt -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: fmov s5, w10 +; CHECK-NEXT: cmp w13, w9 +; CHECK-NEXT: csel w13, w13, w9, lt +; CHECK-NEXT: mov v3.s[1], w8 ; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w13, w13, w11, gt -; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: csel w14, w14, w8, lt +; CHECK-NEXT: cmp w14, w9 +; CHECK-NEXT: csel w14, w14, w9, lt +; CHECK-NEXT: fmov s2, w13 ; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w10, w14, w11, gt -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: fcvtzs w14, d1 -; CHECK-NEXT: csel w15, w15, w8, lt +; CHECK-NEXT: csel w14, w14, w11, gt +; CHECK-NEXT: cmp w15, w9 +; CHECK-NEXT: csel w15, w15, w9, lt +; CHECK-NEXT: mov v2.s[1], w12 ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NEXT: mov v5.s[1], w9 -; CHECK-NEXT: csel w9, w15, w11, gt -; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: fcvtzs w15, d0 -; CHECK-NEXT: fmov s4, w13 -; CHECK-NEXT: csel w13, w14, w8, lt -; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w13, w13, w11, gt -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: mov v4.s[1], w12 -; CHECK-NEXT: csel w8, w15, w8, lt -; CHECK-NEXT: fmov s3, w9 -; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w8, w8, w11, gt -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: csel w15, w15, w11, gt +; CHECK-NEXT: cmp w16, w9 +; CHECK-NEXT: csel w10, w16, w9, lt +; CHECK-NEXT: fmov s1, w15 +; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w8, w10, w11, gt +; CHECK-NEXT: cmp w17, w9 +; CHECK-NEXT: csel w9, w17, w9, lt +; CHECK-NEXT: mov v1.s[1], w14 +; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w9, w9, w11, gt +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: adrp x8, .LCPI84_0 -; CHECK-NEXT: mov v2.s[1], w13 -; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI84_0] -; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b, v4.16b, v5.16b }, v0.16b +; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI84_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret %x = call <8 x i16> @llvm.fptosi.sat.v8f64.v8i16(<8 x double> %f) ret <8 x i16> %x @@ -3558,44 +3558,48 @@ ; CHECK-NEXT: mov d16, v3.d[1] ; CHECK-NEXT: mov w9, #32767 // =0x7fff ; CHECK-NEXT: fcvtzs w11, d3 -; CHECK-NEXT: mov w8, #-32768 // =0xffff8000 ; CHECK-NEXT: mov d3, v1.d[1] ; CHECK-NEXT: fcvtzs w14, d2 ; CHECK-NEXT: fcvtzs w15, d1 ; CHECK-NEXT: mov d1, v7.d[1] -; CHECK-NEXT: fcvtzs w10, d16 -; CHECK-NEXT: mov d16, v2.d[1] -; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fcvtzs w18, d0 -; CHECK-NEXT: mov d0, v6.d[1] ; CHECK-NEXT: fcvtzs w0, d7 -; CHECK-NEXT: cmp w10, w9 ; CHECK-NEXT: fcvtzs w2, d6 -; CHECK-NEXT: csel w10, w10, w9, lt -; CHECK-NEXT: fcvtzs w12, d16 -; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-NEXT: fcvtzs w4, d5 +; CHECK-NEXT: fcvtzs w6, d4 +; CHECK-NEXT: fcvtzs w8, d16 +; CHECK-NEXT: mov d16, v2.d[1] +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: mov d0, v6.d[1] +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: fcvtzs w13, d16 ; CHECK-NEXT: fcvtzs w17, d2 +; CHECK-NEXT: csel w10, w8, w9, lt +; CHECK-NEXT: mov w8, #-32768 // =0xffff8000 +; CHECK-NEXT: fcvtzs w1, d0 +; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-NEXT: mov d0, v5.d[1] ; CHECK-NEXT: csel w10, w10, w8, gt ; CHECK-NEXT: cmp w11, w9 ; CHECK-NEXT: csel w11, w11, w9, lt -; CHECK-NEXT: fcvtzs w1, d0 ; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w12, w11, w8, gt +; CHECK-NEXT: cmp w13, w9 +; CHECK-NEXT: fcvtzs w3, d0 +; CHECK-NEXT: csel w11, w13, w9, lt +; CHECK-NEXT: fcvtzs w13, d3 ; CHECK-NEXT: mov d0, v4.d[1] -; CHECK-NEXT: csel w13, w11, w8, gt -; CHECK-NEXT: cmp w12, w9 -; CHECK-NEXT: csel w11, w12, w9, lt -; CHECK-NEXT: fcvtzs w12, d3 ; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w11, w11, w8, gt ; CHECK-NEXT: cmp w14, w9 ; CHECK-NEXT: csel w14, w14, w9, lt -; CHECK-NEXT: fmov s19, w13 ; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 +; CHECK-NEXT: fcvtzs w5, d0 ; CHECK-NEXT: csel w14, w14, w8, gt -; CHECK-NEXT: cmp w12, w9 -; CHECK-NEXT: csel w12, w12, w9, lt -; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w12, w12, w8, gt +; CHECK-NEXT: cmp w13, w9 +; CHECK-NEXT: csel w13, w13, w9, lt +; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w13, w13, w8, gt ; CHECK-NEXT: cmp w15, w9 ; CHECK-NEXT: csel w15, w15, w9, lt ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 @@ -3603,68 +3607,64 @@ ; CHECK-NEXT: cmp w17, w9 ; CHECK-NEXT: csel w15, w17, w9, lt ; CHECK-NEXT: fcvtzs w17, d1 +; CHECK-NEXT: fmov s3, w12 ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NEXT: mov d1, v5.d[1] ; CHECK-NEXT: csel w15, w15, w8, gt ; CHECK-NEXT: cmp w18, w9 ; CHECK-NEXT: csel w18, w18, w9, lt +; CHECK-NEXT: mov v3.s[1], w10 ; CHECK-NEXT: cmn w18, #8, lsl #12 // =32768 +; CHECK-NEXT: fmov s2, w14 ; CHECK-NEXT: csel w18, w18, w8, gt ; CHECK-NEXT: cmp w17, w9 ; CHECK-NEXT: csel w17, w17, w9, lt ; CHECK-NEXT: cmn w17, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v2.s[1], w11 ; CHECK-NEXT: csel w17, w17, w8, gt ; CHECK-NEXT: cmp w0, w9 +; CHECK-NEXT: fmov s1, w16 ; CHECK-NEXT: csel w0, w0, w9, lt ; CHECK-NEXT: cmn w0, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w13, w0, w8, gt +; CHECK-NEXT: csel w0, w0, w8, gt ; CHECK-NEXT: cmp w1, w9 +; CHECK-NEXT: mov v1.s[1], w13 ; CHECK-NEXT: csel w1, w1, w9, lt -; CHECK-NEXT: fcvtzs w0, d1 +; CHECK-NEXT: fmov s7, w0 +; CHECK-NEXT: fmov s0, w18 ; CHECK-NEXT: cmn w1, #8, lsl #12 // =32768 -; CHECK-NEXT: mov v19.s[1], w10 -; CHECK-NEXT: csel w10, w1, w8, gt +; CHECK-NEXT: csel w1, w1, w8, gt ; CHECK-NEXT: cmp w2, w9 -; CHECK-NEXT: fcvtzs w1, d5 ; CHECK-NEXT: csel w2, w2, w9, lt -; CHECK-NEXT: fmov s18, w14 +; CHECK-NEXT: mov v7.s[1], w17 +; CHECK-NEXT: mov v0.s[1], w15 ; CHECK-NEXT: cmn w2, #8, lsl #12 // =32768 -; CHECK-NEXT: fmov s23, w13 ; CHECK-NEXT: csel w2, w2, w8, gt -; CHECK-NEXT: cmp w0, w9 -; CHECK-NEXT: csel w14, w0, w9, lt -; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w13, w14, w8, gt -; CHECK-NEXT: cmp w1, w9 -; CHECK-NEXT: fcvtzs w14, d0 -; CHECK-NEXT: csel w0, w1, w9, lt -; CHECK-NEXT: cmn w0, #8, lsl #12 // =32768 -; CHECK-NEXT: mov v18.s[1], w11 -; CHECK-NEXT: csel w11, w0, w8, gt -; CHECK-NEXT: mov v23.s[1], w17 -; CHECK-NEXT: cmp w14, w9 -; CHECK-NEXT: fcvtzs w17, d4 -; CHECK-NEXT: csel w14, w14, w9, lt -; CHECK-NEXT: fmov s22, w2 -; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w14, w14, w8, gt -; CHECK-NEXT: fmov s17, w16 -; CHECK-NEXT: cmp w17, w9 -; CHECK-NEXT: mov v22.s[1], w10 -; CHECK-NEXT: csel w9, w17, w9, lt -; CHECK-NEXT: fmov s21, w11 +; CHECK-NEXT: cmp w3, w9 +; CHECK-NEXT: csel w3, w3, w9, lt +; CHECK-NEXT: fmov s6, w2 +; CHECK-NEXT: cmn w3, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w3, w3, w8, gt +; CHECK-NEXT: cmp w4, w9 +; CHECK-NEXT: csel w4, w4, w9, lt +; CHECK-NEXT: mov v6.s[1], w1 +; CHECK-NEXT: cmn w4, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w12, w4, w8, gt +; CHECK-NEXT: cmp w5, w9 +; CHECK-NEXT: csel w10, w5, w9, lt +; CHECK-NEXT: fmov s5, w12 +; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w10, w10, w8, gt +; CHECK-NEXT: cmp w6, w9 +; CHECK-NEXT: csel w9, w6, w9, lt +; CHECK-NEXT: mov v5.s[1], w3 ; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w8, w9, w8, gt -; CHECK-NEXT: adrp x9, .LCPI85_0 -; CHECK-NEXT: mov v17.s[1], w12 -; CHECK-NEXT: mov v21.s[1], w13 -; CHECK-NEXT: fmov s16, w18 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI85_0] -; CHECK-NEXT: fmov s20, w8 -; CHECK-NEXT: mov v16.s[1], w15 -; CHECK-NEXT: mov v20.s[1], w14 -; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b +; CHECK-NEXT: fmov s4, w8 +; CHECK-NEXT: adrp x8, .LCPI85_0 +; CHECK-NEXT: ldr q16, [x8, :lo12:.LCPI85_0] +; CHECK-NEXT: mov v4.s[1], w10 +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v16.16b +; CHECK-NEXT: tbl v1.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v16.16b ; CHECK-NEXT: ret %x = call <16 x i16> @llvm.fptosi.sat.v16f64.v16i16(<16 x double> %f) ret <16 x i16> %x diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll @@ -32,7 +32,7 @@ ; CHECK-LABEL: test_unsigned_i8_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu w9, s0 -; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov w8, #255 // =0xff ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w0, w9, w8, lo ; CHECK-NEXT: ret @@ -44,7 +44,7 @@ ; CHECK-LABEL: test_unsigned_i13_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: mov w9, #8191 +; CHECK-NEXT: mov w9, #8191 // =0x1fff ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret @@ -56,7 +56,7 @@ ; CHECK-LABEL: test_unsigned_i16_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: mov w9, #65535 +; CHECK-NEXT: mov w9, #65535 // =0xffff ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret @@ -68,7 +68,7 @@ ; CHECK-LABEL: test_unsigned_i19_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu w8, s0 -; CHECK-NEXT: mov w9, #524287 +; CHECK-NEXT: mov w9, #524287 // =0x7ffff ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret @@ -89,7 +89,7 @@ ; CHECK-LABEL: test_unsigned_i50_f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu x8, s0 -; CHECK-NEXT: mov x9, #1125899906842623 +; CHECK-NEXT: mov x9, #1125899906842623 // =0x3ffffffffffff ; CHECK-NEXT: cmp x8, x9 ; CHECK-NEXT: csel x0, x8, x9, lo ; CHECK-NEXT: ret @@ -113,11 +113,11 @@ ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: fmov s8, s0 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov w8, #1904214015 +; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: mov x10, #68719476735 +; CHECK-NEXT: mov x10, #68719476735 // =0xfffffffff ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s0 @@ -136,7 +136,7 @@ ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: fmov s8, s0 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov w8, #2139095039 +; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 @@ -181,7 +181,7 @@ ; CHECK-LABEL: test_unsigned_i8_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu w9, d0 -; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov w8, #255 // =0xff ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w0, w9, w8, lo ; CHECK-NEXT: ret @@ -193,7 +193,7 @@ ; CHECK-LABEL: test_unsigned_i13_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu w8, d0 -; CHECK-NEXT: mov w9, #8191 +; CHECK-NEXT: mov w9, #8191 // =0x1fff ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret @@ -205,7 +205,7 @@ ; CHECK-LABEL: test_unsigned_i16_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu w8, d0 -; CHECK-NEXT: mov w9, #65535 +; CHECK-NEXT: mov w9, #65535 // =0xffff ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret @@ -217,7 +217,7 @@ ; CHECK-LABEL: test_unsigned_i19_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu w8, d0 -; CHECK-NEXT: mov w9, #524287 +; CHECK-NEXT: mov w9, #524287 // =0x7ffff ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret @@ -238,7 +238,7 @@ ; CHECK-LABEL: test_unsigned_i50_f64: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu x8, d0 -; CHECK-NEXT: mov x9, #1125899906842623 +; CHECK-NEXT: mov x9, #1125899906842623 // =0x3ffffffffffff ; CHECK-NEXT: cmp x8, x9 ; CHECK-NEXT: csel x0, x8, x9, lo ; CHECK-NEXT: ret @@ -262,11 +262,11 @@ ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: fmov d8, d0 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov x8, #5057542381537067007 +; CHECK-NEXT: mov x8, #5057542381537067007 // =0x462fffffffffffff ; CHECK-NEXT: fcmp d8, #0.0 -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: mov x10, #68719476735 +; CHECK-NEXT: mov x10, #68719476735 // =0xfffffffff ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp d8, d0 @@ -285,7 +285,7 @@ ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: fmov d8, d0 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov x8, #5183643171103440895 +; CHECK-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff ; CHECK-NEXT: fcmp d8, #0.0 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 @@ -338,7 +338,7 @@ ; CHECK-CVT-LABEL: test_unsigned_i8_f16: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov w8, #255 +; CHECK-CVT-NEXT: mov w8, #255 // =0xff ; CHECK-CVT-NEXT: fcvtzu w9, s0 ; CHECK-CVT-NEXT: cmp w9, #255 ; CHECK-CVT-NEXT: csel w0, w9, w8, lo @@ -347,7 +347,7 @@ ; CHECK-FP16-LABEL: test_unsigned_i8_f16: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzu w9, h0 -; CHECK-FP16-NEXT: mov w8, #255 +; CHECK-FP16-NEXT: mov w8, #255 // =0xff ; CHECK-FP16-NEXT: cmp w9, #255 ; CHECK-FP16-NEXT: csel w0, w9, w8, lo ; CHECK-FP16-NEXT: ret @@ -359,7 +359,7 @@ ; CHECK-CVT-LABEL: test_unsigned_i13_f16: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov w9, #8191 +; CHECK-CVT-NEXT: mov w9, #8191 // =0x1fff ; CHECK-CVT-NEXT: fcvtzu w8, s0 ; CHECK-CVT-NEXT: cmp w8, w9 ; CHECK-CVT-NEXT: csel w0, w8, w9, lo @@ -368,7 +368,7 @@ ; CHECK-FP16-LABEL: test_unsigned_i13_f16: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzu w8, h0 -; CHECK-FP16-NEXT: mov w9, #8191 +; CHECK-FP16-NEXT: mov w9, #8191 // =0x1fff ; CHECK-FP16-NEXT: cmp w8, w9 ; CHECK-FP16-NEXT: csel w0, w8, w9, lo ; CHECK-FP16-NEXT: ret @@ -380,7 +380,7 @@ ; CHECK-CVT-LABEL: test_unsigned_i16_f16: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov w9, #65535 +; CHECK-CVT-NEXT: mov w9, #65535 // =0xffff ; CHECK-CVT-NEXT: fcvtzu w8, s0 ; CHECK-CVT-NEXT: cmp w8, w9 ; CHECK-CVT-NEXT: csel w0, w8, w9, lo @@ -389,7 +389,7 @@ ; CHECK-FP16-LABEL: test_unsigned_i16_f16: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzu w8, h0 -; CHECK-FP16-NEXT: mov w9, #65535 +; CHECK-FP16-NEXT: mov w9, #65535 // =0xffff ; CHECK-FP16-NEXT: cmp w8, w9 ; CHECK-FP16-NEXT: csel w0, w8, w9, lo ; CHECK-FP16-NEXT: ret @@ -401,7 +401,7 @@ ; CHECK-CVT-LABEL: test_unsigned_i19_f16: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov w9, #524287 +; CHECK-CVT-NEXT: mov w9, #524287 // =0x7ffff ; CHECK-CVT-NEXT: fcvtzu w8, s0 ; CHECK-CVT-NEXT: cmp w8, w9 ; CHECK-CVT-NEXT: csel w0, w8, w9, lo @@ -410,7 +410,7 @@ ; CHECK-FP16-LABEL: test_unsigned_i19_f16: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzu w8, h0 -; CHECK-FP16-NEXT: mov w9, #524287 +; CHECK-FP16-NEXT: mov w9, #524287 // =0x7ffff ; CHECK-FP16-NEXT: cmp w8, w9 ; CHECK-FP16-NEXT: csel w0, w8, w9, lo ; CHECK-FP16-NEXT: ret @@ -437,7 +437,7 @@ ; CHECK-CVT-LABEL: test_unsigned_i50_f16: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov x9, #1125899906842623 +; CHECK-CVT-NEXT: mov x9, #1125899906842623 // =0x3ffffffffffff ; CHECK-CVT-NEXT: fcvtzu x8, s0 ; CHECK-CVT-NEXT: cmp x8, x9 ; CHECK-CVT-NEXT: csel x0, x8, x9, lo @@ -446,7 +446,7 @@ ; CHECK-FP16-LABEL: test_unsigned_i50_f16: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzu x8, h0 -; CHECK-FP16-NEXT: mov x9, #1125899906842623 +; CHECK-FP16-NEXT: mov x9, #1125899906842623 // =0x3ffffffffffff ; CHECK-FP16-NEXT: cmp x8, x9 ; CHECK-FP16-NEXT: csel x0, x8, x9, lo ; CHECK-FP16-NEXT: ret @@ -477,11 +477,11 @@ ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov w8, #1904214015 +; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: mov x10, #68719476735 +; CHECK-NEXT: mov x10, #68719476735 // =0xfffffffff ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s0 @@ -501,7 +501,7 @@ ; CHECK-NEXT: str x30, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov w8, #2139095039 +; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -165,9 +165,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f) @@ -178,10 +178,10 @@ ; CHECK-LABEL: test_unsigned_v3f64_v3i32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzu w8, d2 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: fcvtzu w8, d0 ; CHECK-NEXT: mov v0.s[3], w8 @@ -195,11 +195,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fcvtzu w9, d2 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d2 -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzu w8, d1 ; CHECK-NEXT: mov d1, v1.d[1] +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: fcvtzu w8, d1 ; CHECK-NEXT: mov v0.s[3], w8 @@ -262,8 +262,8 @@ ; CHECK-NEXT: adrp x8, .LCPI14_1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 -; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_1] +; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csinv w8, w19, wzr, le @@ -285,11 +285,11 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: adrp x8, .LCPI15_0 -; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill +; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -298,21 +298,21 @@ ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 -; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] +; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csinv w20, w19, wzr, le ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload +; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: cmp w0, #0 @@ -338,12 +338,12 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -352,8 +352,8 @@ ; CHECK-NEXT: adrp x8, .LCPI16_1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 -; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_1] +; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldp q1, q0, [sp, #32] // 32-byte Folded Reload @@ -364,8 +364,8 @@ ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: cmp w0, #0 @@ -380,14 +380,14 @@ ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: ldr q0, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: ldr q1, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: csinv w8, w19, wzr, le ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: csinv w8, w19, wzr, le ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: add sp, sp, #112 @@ -406,13 +406,13 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: stp q0, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b +; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: str q3, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: str q3, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -421,21 +421,21 @@ ; CHECK-NEXT: adrp x8, .LCPI17_1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: cmp w19, #0 -; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_1] +; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: str q1, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: ldr q1, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: csinv w20, w19, wzr, le ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: cmp w0, #0 @@ -464,14 +464,14 @@ ; CHECK-NEXT: mov w19, w0 ; CHECK-NEXT: bl __fixunstfsi ; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: ldr q1, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: cmp w19, #0 ; CHECK-NEXT: csel w19, wzr, w0, lt ; CHECK-NEXT: bl __gttf2 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: csinv w8, w19, wzr, le ; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload +; CHECK-NEXT: csinv w8, w19, wzr, le ; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: add sp, sp, #128 @@ -567,10 +567,10 @@ ; CHECK-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] -; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptoui.sat.v6f16.v6i32(<6 x half> %f) ret <6 x i32> %x @@ -710,9 +710,9 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzu x8, s0 +; CHECK-NEXT: fcvtzu x9, s1 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzu x8, s1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %x = call <2 x i64> @llvm.fptoui.sat.v2f32.v2i64(<2 x float> %f) ret <2 x i64> %x @@ -740,9 +740,9 @@ ; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: mov x21, #68719476735 // =0xfffffffff +; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 @@ -760,8 +760,8 @@ ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csel x1, x21, x9, gt -; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #64 @@ -791,8 +791,8 @@ ; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: fmov s9, w8 +; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 @@ -802,8 +802,8 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x2, x19 ; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: fcmp s0, #0.0 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt @@ -904,14 +904,14 @@ ; CHECK-LABEL: test_unsigned_v4f32_v4i50: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov x8, #1125899906842623 // =0x3ffffffffffff ; CHECK-NEXT: mov s3, v0.s[1] +; CHECK-NEXT: mov x8, #1125899906842623 // =0x3ffffffffffff ; CHECK-NEXT: fcvtzu x11, s0 ; CHECK-NEXT: mov s2, v1.s[1] ; CHECK-NEXT: fcvtzu x9, s1 ; CHECK-NEXT: fcvtzu x12, s3 -; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: fcvtzu x10, s2 +; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: csel x2, x9, x8, lo ; CHECK-NEXT: cmp x10, x8 ; CHECK-NEXT: csel x3, x10, x8, lo @@ -932,12 +932,12 @@ ; CHECK-NEXT: fcvtzu x9, s0 ; CHECK-NEXT: mov s2, v1.s[1] ; CHECK-NEXT: fcvtzu x8, s1 +; CHECK-NEXT: fcvtzu x11, s3 ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzu x9, s3 +; CHECK-NEXT: fcvtzu x10, s2 ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fcvtzu x8, s2 -; CHECK-NEXT: mov v0.d[1], x9 -; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: mov v1.d[1], x10 ; CHECK-NEXT: ret %x = call <4 x i64> @llvm.fptoui.sat.v4f32.v4i64(<4 x float> %f) ret <4 x i64> %x @@ -968,10 +968,10 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov x25, #68719476735 // =0xfffffffff +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov s9, w8 +; CHECK-NEXT: mov x25, #68719476735 // =0xfffffffff ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt @@ -1009,17 +1009,17 @@ ; CHECK-NEXT: fcmp s0, #0.0 ; CHECK-NEXT: mov x7, x23 ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s0, s9 -; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csel x1, x25, x9, gt -; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldp x30, x25, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret @@ -1051,8 +1051,8 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x1, lt @@ -1090,17 +1090,17 @@ ; CHECK-NEXT: mov x6, x23 ; CHECK-NEXT: fcmp s0, #0.0 ; CHECK-NEXT: mov x7, x24 +; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s0, s9 -; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csinv x1, x9, xzr, le -; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 @@ -1222,9 +1222,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f) @@ -1279,9 +1279,9 @@ ; CHECK-NEXT: mov x8, #5057542381537067007 // =0x462fffffffffffff ; CHECK-NEXT: fcmp d8, #0.0 ; CHECK-NEXT: mov x21, #68719476735 // =0xfffffffff +; CHECK-NEXT: fmov d9, x8 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: fmov d9, x8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp d8, d9 @@ -1299,8 +1299,8 @@ ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csel x1, x21, x9, gt -; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: ldp x30, x21, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #64 @@ -1329,8 +1329,8 @@ ; CHECK-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff ; CHECK-NEXT: fcmp d8, #0.0 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: fmov d9, x8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp d8, d9 @@ -1340,8 +1340,8 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x2, x19 ; CHECK-NEXT: mov x3, x20 -; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: fcmp d0, #0.0 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt @@ -1476,17 +1476,17 @@ ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-NEXT: mov h1, v0.h[1] ; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov x8, #1125899906842623 // =0x3ffffffffffff ; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov x8, #1125899906842623 // =0x3ffffffffffff ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcvtzu x9, s0 ; CHECK-CVT-NEXT: fcvtzu x10, s1 ; CHECK-CVT-NEXT: fcvtzu x11, s2 -; CHECK-CVT-NEXT: cmp x9, x8 ; CHECK-CVT-NEXT: fcvtzu x12, s3 +; CHECK-CVT-NEXT: cmp x9, x8 ; CHECK-CVT-NEXT: csel x0, x9, x8, lo ; CHECK-CVT-NEXT: cmp x10, x8 ; CHECK-CVT-NEXT: csel x1, x10, x8, lo @@ -1501,13 +1501,13 @@ ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-NEXT: mov h1, v0.h[1] ; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov x8, #1125899906842623 // =0x3ffffffffffff ; CHECK-FP16-NEXT: mov h3, v0.h[3] ; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov x8, #1125899906842623 // =0x3ffffffffffff ; CHECK-FP16-NEXT: fcvtzu x10, h1 ; CHECK-FP16-NEXT: fcvtzu x11, h2 -; CHECK-FP16-NEXT: cmp x9, x8 ; CHECK-FP16-NEXT: fcvtzu x12, h3 +; CHECK-FP16-NEXT: cmp x9, x8 ; CHECK-FP16-NEXT: csel x0, x9, x8, lo ; CHECK-FP16-NEXT: cmp x10, x8 ; CHECK-FP16-NEXT: csel x1, x10, x8, lo @@ -1526,19 +1526,19 @@ ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-NEXT: mov h1, v0.h[2] ; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: fcvt s3, h0 -; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] +; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzu x8, s3 -; CHECK-CVT-NEXT: fcvt s3, h0 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvtzu x8, s0 ; CHECK-CVT-NEXT: fcvtzu x9, s1 +; CHECK-CVT-NEXT: fcvtzu x10, s2 +; CHECK-CVT-NEXT: fcvtzu x11, s3 ; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzu x8, s2 ; CHECK-CVT-NEXT: fmov d1, x9 -; CHECK-CVT-NEXT: fcvtzu x9, s3 -; CHECK-CVT-NEXT: mov v0.d[1], x8 -; CHECK-CVT-NEXT: mov v1.d[1], x9 +; CHECK-CVT-NEXT: mov v0.d[1], x10 +; CHECK-CVT-NEXT: mov v1.d[1], x11 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i64: @@ -1549,12 +1549,12 @@ ; CHECK-FP16-NEXT: mov h3, v0.h[3] ; CHECK-FP16-NEXT: fcvtzu x8, h0 ; CHECK-FP16-NEXT: fcvtzu x9, h1 +; CHECK-FP16-NEXT: fcvtzu x10, h2 +; CHECK-FP16-NEXT: fcvtzu x11, h3 ; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fcvtzu x8, h2 ; CHECK-FP16-NEXT: fmov d1, x9 -; CHECK-FP16-NEXT: fcvtzu x9, h3 -; CHECK-FP16-NEXT: mov v0.d[1], x8 -; CHECK-FP16-NEXT: mov v1.d[1], x9 +; CHECK-FP16-NEXT: mov v0.d[1], x10 +; CHECK-FP16-NEXT: mov v1.d[1], x11 ; CHECK-FP16-NEXT: ret %x = call <4 x i64> @llvm.fptoui.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x @@ -1589,15 +1589,15 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: mov x25, #68719476735 // =0xfffffffff ; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x19, x25, x9, gt -; CHECK-NEXT: csinv x20, x8, xzr, le +; CHECK-NEXT: csel x19, x25, x8, gt +; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -1627,19 +1627,19 @@ ; CHECK-NEXT: mov x4, x20 ; CHECK-NEXT: mov x5, x19 ; CHECK-NEXT: mov x6, x24 +; CHECK-NEXT: mov x7, x23 +; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: mov x7, x23 -; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: csel x1, x25, x9, gt -; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp x30, x25, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret @@ -1675,14 +1675,14 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x19, x9, xzr, le -; CHECK-NEXT: csinv x20, x8, xzr, le +; CHECK-NEXT: csinv x19, x8, xzr, le +; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -1712,19 +1712,19 @@ ; CHECK-NEXT: mov x4, x21 ; CHECK-NEXT: mov x5, x22 ; CHECK-NEXT: mov x6, x23 +; CHECK-NEXT: mov x7, x24 +; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: mov x7, x24 -; CHECK-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csinv x1, x9, xzr, le ; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 +; CHECK-NEXT: csinv x8, x8, xzr, le +; CHECK-NEXT: csinv x1, x9, xzr, le +; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret @@ -1754,42 +1754,42 @@ ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] ; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w9, s1 -; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: fcvtzu w13, s0 ; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: fcvtzu w8, s2 -; CHECK-CVT-NEXT: fcvtzu w11, s3 -; CHECK-CVT-NEXT: fcvtzu w12, s4 -; CHECK-CVT-NEXT: fcvtzu w13, s5 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w10, s3 +; CHECK-CVT-NEXT: fcvtzu w11, s4 +; CHECK-CVT-NEXT: fcvtzu w14, s1 +; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: cmp w8, #1 +; CHECK-CVT-NEXT: fcvtzu w12, s2 ; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo ; CHECK-CVT-NEXT: cmp w9, #1 ; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo +; CHECK-CVT-NEXT: cmp w10, #1 +; CHECK-CVT-NEXT: csinc w10, w10, wzr, lo ; CHECK-CVT-NEXT: cmp w11, #1 +; CHECK-CVT-NEXT: fmov s1, w9 ; CHECK-CVT-NEXT: csinc w11, w11, wzr, lo ; CHECK-CVT-NEXT: cmp w12, #1 ; CHECK-CVT-NEXT: csinc w12, w12, wzr, lo ; CHECK-CVT-NEXT: cmp w13, #1 ; CHECK-CVT-NEXT: csinc w13, w13, wzr, lo -; CHECK-CVT-NEXT: cmp w10, #1 -; CHECK-CVT-NEXT: csinc w10, w10, wzr, lo -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvtzu w9, s1 -; CHECK-CVT-NEXT: fmov s3, w10 -; CHECK-CVT-NEXT: mov v2.s[1], w8 -; CHECK-CVT-NEXT: cmp w9, #1 -; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo -; CHECK-CVT-NEXT: fcvtzu w9, s0 -; CHECK-CVT-NEXT: mov v3.s[1], w13 -; CHECK-CVT-NEXT: mov v2.s[2], w11 -; CHECK-CVT-NEXT: cmp w9, #1 -; CHECK-CVT-NEXT: mov v3.s[2], w8 -; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo -; CHECK-CVT-NEXT: mov v2.s[3], w12 -; CHECK-CVT-NEXT: mov v3.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: mov v1.s[1], w8 +; CHECK-CVT-NEXT: cmp w14, #1 +; CHECK-CVT-NEXT: fmov s2, w13 +; CHECK-CVT-NEXT: fcvtzu w8, s0 +; CHECK-CVT-NEXT: csinc w9, w14, wzr, lo +; CHECK-CVT-NEXT: mov v2.s[1], w12 +; CHECK-CVT-NEXT: mov v1.s[2], w10 +; CHECK-CVT-NEXT: cmp w8, #1 +; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo +; CHECK-CVT-NEXT: mov v2.s[2], w9 +; CHECK-CVT-NEXT: mov v1.s[3], w11 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; @@ -1813,42 +1813,42 @@ ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] ; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: fcvtzu w14, s0 ; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: fcvtzu w9, s2 -; CHECK-CVT-NEXT: fcvtzu w12, s3 -; CHECK-CVT-NEXT: fcvtzu w13, s4 -; CHECK-CVT-NEXT: fcvtzu w14, s5 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w11, s3 +; CHECK-CVT-NEXT: fcvtzu w12, s4 +; CHECK-CVT-NEXT: fcvtzu w15, s1 +; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: fcvtzu w13, s2 ; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w10, #255 ; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w11, #255 +; CHECK-CVT-NEXT: csel w11, w11, w8, lo ; CHECK-CVT-NEXT: cmp w12, #255 +; CHECK-CVT-NEXT: fmov s1, w10 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, #255 ; CHECK-CVT-NEXT: csel w13, w13, w8, lo ; CHECK-CVT-NEXT: cmp w14, #255 ; CHECK-CVT-NEXT: csel w14, w14, w8, lo -; CHECK-CVT-NEXT: cmp w11, #255 -; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w10 -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fmov s3, w11 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: cmp w10, #255 -; CHECK-CVT-NEXT: csel w9, w10, w8, lo -; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: cmp w10, #255 -; CHECK-CVT-NEXT: csel w8, w10, w8, lo -; CHECK-CVT-NEXT: mov v3.s[2], w9 -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v3.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: cmp w15, #255 +; CHECK-CVT-NEXT: fmov s2, w14 +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: csel w10, w15, w8, lo +; CHECK-CVT-NEXT: mov v2.s[1], w13 +; CHECK-CVT-NEXT: mov v1.s[2], w11 +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: csel w8, w9, w8, lo +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: mov v1.s[3], w12 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; @@ -1870,42 +1870,42 @@ ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] ; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: fcvtzu w14, s0 ; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: fcvtzu w9, s2 -; CHECK-CVT-NEXT: fcvtzu w12, s3 -; CHECK-CVT-NEXT: fcvtzu w13, s4 -; CHECK-CVT-NEXT: fcvtzu w14, s5 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w11, s3 +; CHECK-CVT-NEXT: fcvtzu w12, s4 +; CHECK-CVT-NEXT: fcvtzu w15, s1 +; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: fcvtzu w13, s2 ; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w10, w8 ; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w11, w11, w8, lo ; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: fmov s1, w10 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, w8 ; CHECK-CVT-NEXT: csel w13, w13, w8, lo ; CHECK-CVT-NEXT: cmp w14, w8 ; CHECK-CVT-NEXT: csel w14, w14, w8, lo -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w10 -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fmov s3, w11 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w9, w10, w8, lo -; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w8, w10, w8, lo -; CHECK-CVT-NEXT: mov v3.s[2], w9 -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v3.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: fmov s2, w14 +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: csel w10, w15, w8, lo +; CHECK-CVT-NEXT: mov v2.s[1], w13 +; CHECK-CVT-NEXT: mov v1.s[2], w11 +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w8, w9, w8, lo +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: mov v1.s[3], w12 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13: @@ -1927,42 +1927,42 @@ ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] ; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: fcvtzu w14, s0 ; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: fcvtzu w9, s2 -; CHECK-CVT-NEXT: fcvtzu w12, s3 -; CHECK-CVT-NEXT: fcvtzu w13, s4 -; CHECK-CVT-NEXT: fcvtzu w14, s5 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w11, s3 +; CHECK-CVT-NEXT: fcvtzu w12, s4 +; CHECK-CVT-NEXT: fcvtzu w15, s1 +; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: fcvtzu w13, s2 ; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w10, w8 ; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: csel w11, w11, w8, lo ; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: fmov s1, w10 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, w8 ; CHECK-CVT-NEXT: csel w13, w13, w8, lo ; CHECK-CVT-NEXT: cmp w14, w8 ; CHECK-CVT-NEXT: csel w14, w14, w8, lo -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w10 -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fmov s3, w11 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w9, w10, w8, lo -; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w8, w10, w8, lo -; CHECK-CVT-NEXT: mov v3.s[2], w9 -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v3.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: fmov s2, w14 +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: csel w10, w15, w8, lo +; CHECK-CVT-NEXT: mov v2.s[1], w13 +; CHECK-CVT-NEXT: mov v1.s[2], w11 +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w8, w9, w8, lo +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: mov v1.s[3], w12 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i16: @@ -1985,8 +1985,8 @@ ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mov w1, v2.s[1] ; CHECK-NEXT: mov w2, v2.s[2] -; CHECK-NEXT: mov w5, v0.s[1] ; CHECK-NEXT: mov w3, v2.s[3] +; CHECK-NEXT: mov w5, v0.s[1] ; CHECK-NEXT: mov w6, v0.s[2] ; CHECK-NEXT: mov w7, v0.s[3] ; CHECK-NEXT: fmov w4, s0 @@ -2012,81 +2012,81 @@ ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i50: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-CVT-NEXT: mov h5, v0.h[1] ; CHECK-CVT-NEXT: mov x8, #1125899906842623 // =0x3ffffffffffff -; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: mov h3, v0.h[2] -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h6, v0.h[2] +; CHECK-CVT-NEXT: mov h7, v0.h[3] ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov h4, v1.h[1] -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: mov h7, v1.h[3] +; CHECK-CVT-NEXT: mov h2, v1.h[1] +; CHECK-CVT-NEXT: mov h3, v1.h[2] +; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzu x13, s0 ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvtzu x9, s0 -; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: fcvt s4, h4 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s0, h7 -; CHECK-CVT-NEXT: fcvtzu x10, s1 -; CHECK-CVT-NEXT: fcvtzu x11, s2 -; CHECK-CVT-NEXT: fcvtzu x12, s3 -; CHECK-CVT-NEXT: fcvtzu x14, s5 -; CHECK-CVT-NEXT: fcvtzu x13, s4 -; CHECK-CVT-NEXT: fcvtzu x15, s6 -; CHECK-CVT-NEXT: cmp x10, x8 -; CHECK-CVT-NEXT: fcvtzu x16, s0 -; CHECK-CVT-NEXT: csel x4, x10, x8, lo -; CHECK-CVT-NEXT: cmp x13, x8 -; CHECK-CVT-NEXT: csel x5, x13, x8, lo -; CHECK-CVT-NEXT: cmp x15, x8 -; CHECK-CVT-NEXT: csel x6, x15, x8, lo -; CHECK-CVT-NEXT: cmp x16, x8 -; CHECK-CVT-NEXT: csel x7, x16, x8, lo +; CHECK-CVT-NEXT: fcvtzu x9, s1 +; CHECK-CVT-NEXT: fcvt s1, h5 +; CHECK-CVT-NEXT: fcvtzu x10, s2 +; CHECK-CVT-NEXT: fcvtzu x11, s3 +; CHECK-CVT-NEXT: fcvt s2, h6 +; CHECK-CVT-NEXT: fcvtzu x12, s4 +; CHECK-CVT-NEXT: fcvt s3, h7 ; CHECK-CVT-NEXT: cmp x9, x8 -; CHECK-CVT-NEXT: csel x0, x9, x8, lo +; CHECK-CVT-NEXT: fcvtzu x14, s1 +; CHECK-CVT-NEXT: csel x4, x9, x8, lo +; CHECK-CVT-NEXT: cmp x10, x8 +; CHECK-CVT-NEXT: fcvtzu x9, s2 +; CHECK-CVT-NEXT: csel x5, x10, x8, lo ; CHECK-CVT-NEXT: cmp x11, x8 -; CHECK-CVT-NEXT: csel x1, x11, x8, lo +; CHECK-CVT-NEXT: fcvtzu x10, s3 +; CHECK-CVT-NEXT: csel x6, x11, x8, lo ; CHECK-CVT-NEXT: cmp x12, x8 -; CHECK-CVT-NEXT: csel x2, x12, x8, lo +; CHECK-CVT-NEXT: csel x7, x12, x8, lo +; CHECK-CVT-NEXT: cmp x13, x8 +; CHECK-CVT-NEXT: csel x0, x13, x8, lo ; CHECK-CVT-NEXT: cmp x14, x8 -; CHECK-CVT-NEXT: csel x3, x14, x8, lo +; CHECK-CVT-NEXT: csel x1, x14, x8, lo +; CHECK-CVT-NEXT: cmp x9, x8 +; CHECK-CVT-NEXT: csel x2, x9, x8, lo +; CHECK-CVT-NEXT: cmp x10, x8 +; CHECK-CVT-NEXT: csel x3, x10, x8, lo ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i50: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-FP16-NEXT: mov x8, #1125899906842623 // =0x3ffffffffffff -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: mov h3, v0.h[2] -; CHECK-FP16-NEXT: mov h5, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov h4, v1.h[1] -; CHECK-FP16-NEXT: mov h6, v1.h[2] -; CHECK-FP16-NEXT: mov h0, v1.h[3] -; CHECK-FP16-NEXT: fcvtzu x10, h1 -; CHECK-FP16-NEXT: fcvtzu x11, h2 -; CHECK-FP16-NEXT: fcvtzu x12, h3 -; CHECK-FP16-NEXT: fcvtzu x14, h5 -; CHECK-FP16-NEXT: fcvtzu x13, h4 -; CHECK-FP16-NEXT: fcvtzu x15, h6 -; CHECK-FP16-NEXT: cmp x10, x8 -; CHECK-FP16-NEXT: fcvtzu x16, h0 -; CHECK-FP16-NEXT: csel x4, x10, x8, lo -; CHECK-FP16-NEXT: cmp x13, x8 -; CHECK-FP16-NEXT: csel x5, x13, x8, lo -; CHECK-FP16-NEXT: cmp x15, x8 -; CHECK-FP16-NEXT: csel x6, x15, x8, lo -; CHECK-FP16-NEXT: cmp x16, x8 -; CHECK-FP16-NEXT: csel x7, x16, x8, lo +; CHECK-FP16-NEXT: fcvtzu x13, h0 +; CHECK-FP16-NEXT: mov h2, v1.h[1] +; CHECK-FP16-NEXT: mov h3, v1.h[2] +; CHECK-FP16-NEXT: mov h4, v1.h[3] +; CHECK-FP16-NEXT: fcvtzu x9, h1 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzu x10, h2 +; CHECK-FP16-NEXT: fcvtzu x11, h3 +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: fcvtzu x12, h4 +; CHECK-FP16-NEXT: mov h3, v0.h[3] ; CHECK-FP16-NEXT: cmp x9, x8 -; CHECK-FP16-NEXT: csel x0, x9, x8, lo +; CHECK-FP16-NEXT: fcvtzu x14, h1 +; CHECK-FP16-NEXT: csel x4, x9, x8, lo +; CHECK-FP16-NEXT: cmp x10, x8 +; CHECK-FP16-NEXT: fcvtzu x9, h2 +; CHECK-FP16-NEXT: csel x5, x10, x8, lo ; CHECK-FP16-NEXT: cmp x11, x8 -; CHECK-FP16-NEXT: csel x1, x11, x8, lo +; CHECK-FP16-NEXT: fcvtzu x10, h3 +; CHECK-FP16-NEXT: csel x6, x11, x8, lo ; CHECK-FP16-NEXT: cmp x12, x8 -; CHECK-FP16-NEXT: csel x2, x12, x8, lo +; CHECK-FP16-NEXT: csel x7, x12, x8, lo +; CHECK-FP16-NEXT: cmp x13, x8 +; CHECK-FP16-NEXT: csel x0, x13, x8, lo ; CHECK-FP16-NEXT: cmp x14, x8 -; CHECK-FP16-NEXT: csel x3, x14, x8, lo +; CHECK-FP16-NEXT: csel x1, x14, x8, lo +; CHECK-FP16-NEXT: cmp x9, x8 +; CHECK-FP16-NEXT: csel x2, x9, x8, lo +; CHECK-FP16-NEXT: cmp x10, x8 +; CHECK-FP16-NEXT: csel x3, x10, x8, lo ; CHECK-FP16-NEXT: ret %x = call <8 x i50> @llvm.fptoui.sat.v8f16.v8i50(<8 x half> %f) ret <8 x i50> %x @@ -2096,63 +2096,63 @@ ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-CVT-NEXT: mov h2, v0.h[2] -; CHECK-CVT-NEXT: fcvt s3, h0 -; CHECK-CVT-NEXT: mov h7, v0.h[1] -; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: mov h4, v1.h[1] -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: fcvt s5, h1 -; CHECK-CVT-NEXT: mov h1, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v0.h[1] +; CHECK-CVT-NEXT: mov h7, v0.h[3] +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mov h2, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v1.h[1] +; CHECK-CVT-NEXT: mov h6, v1.h[3] +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvtzu x9, s0 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzu x8, s3 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: fcvtzu x9, s5 -; CHECK-CVT-NEXT: fcvt s5, h7 -; CHECK-CVT-NEXT: fcvt s6, h0 -; CHECK-CVT-NEXT: fcvt s7, h1 -; CHECK-CVT-NEXT: fcvtzu x10, s2 -; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fmov d2, x9 -; CHECK-CVT-NEXT: fcvtzu x9, s4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: fcvtzu x8, s1 +; CHECK-CVT-NEXT: fcvtzu x12, s4 ; CHECK-CVT-NEXT: fcvtzu x11, s3 -; CHECK-CVT-NEXT: fcvtzu x8, s5 -; CHECK-CVT-NEXT: fmov d1, x10 -; CHECK-CVT-NEXT: fcvtzu x10, s6 -; CHECK-CVT-NEXT: fmov d3, x9 -; CHECK-CVT-NEXT: fcvtzu x9, s7 -; CHECK-CVT-NEXT: mov v2.d[1], x11 -; CHECK-CVT-NEXT: mov v0.d[1], x8 -; CHECK-CVT-NEXT: mov v1.d[1], x10 -; CHECK-CVT-NEXT: mov v3.d[1], x9 +; CHECK-CVT-NEXT: fcvtzu x15, s7 +; CHECK-CVT-NEXT: fmov d0, x9 +; CHECK-CVT-NEXT: fcvtzu x10, s2 +; CHECK-CVT-NEXT: fcvtzu x13, s5 +; CHECK-CVT-NEXT: fcvtzu x14, s6 +; CHECK-CVT-NEXT: fmov d2, x8 +; CHECK-CVT-NEXT: fmov d1, x12 +; CHECK-CVT-NEXT: mov v0.d[1], x11 +; CHECK-CVT-NEXT: fmov d3, x10 +; CHECK-CVT-NEXT: mov v2.d[1], x13 +; CHECK-CVT-NEXT: mov v1.d[1], x15 +; CHECK-CVT-NEXT: mov v3.d[1], x14 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h5, v0.h[1] -; CHECK-FP16-NEXT: mov h6, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu x8, h0 -; CHECK-FP16-NEXT: mov h4, v1.h[2] -; CHECK-FP16-NEXT: fcvtzu x9, h1 -; CHECK-FP16-NEXT: mov h3, v1.h[1] -; CHECK-FP16-NEXT: mov h7, v1.h[3] -; CHECK-FP16-NEXT: fcvtzu x10, h2 -; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fmov d2, x9 -; CHECK-FP16-NEXT: fcvtzu x8, h5 -; CHECK-FP16-NEXT: fcvtzu x9, h4 +; CHECK-FP16-NEXT: mov h4, v0.h[2] +; CHECK-FP16-NEXT: mov h3, v0.h[1] +; CHECK-FP16-NEXT: mov h7, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu x9, h0 +; CHECK-FP16-NEXT: mov h2, v1.h[2] +; CHECK-FP16-NEXT: mov h5, v1.h[1] +; CHECK-FP16-NEXT: mov h6, v1.h[3] +; CHECK-FP16-NEXT: fcvtzu x8, h1 +; CHECK-FP16-NEXT: fcvtzu x12, h4 ; CHECK-FP16-NEXT: fcvtzu x11, h3 -; CHECK-FP16-NEXT: fmov d1, x10 -; CHECK-FP16-NEXT: fcvtzu x10, h6 -; CHECK-FP16-NEXT: fmov d3, x9 -; CHECK-FP16-NEXT: fcvtzu x9, h7 -; CHECK-FP16-NEXT: mov v2.d[1], x11 -; CHECK-FP16-NEXT: mov v0.d[1], x8 -; CHECK-FP16-NEXT: mov v1.d[1], x10 -; CHECK-FP16-NEXT: mov v3.d[1], x9 +; CHECK-FP16-NEXT: fcvtzu x15, h7 +; CHECK-FP16-NEXT: fmov d0, x9 +; CHECK-FP16-NEXT: fcvtzu x10, h2 +; CHECK-FP16-NEXT: fcvtzu x13, h5 +; CHECK-FP16-NEXT: fcvtzu x14, h6 +; CHECK-FP16-NEXT: fmov d2, x8 +; CHECK-FP16-NEXT: fmov d1, x12 +; CHECK-FP16-NEXT: mov v0.d[1], x11 +; CHECK-FP16-NEXT: fmov d3, x10 +; CHECK-FP16-NEXT: mov v2.d[1], x13 +; CHECK-FP16-NEXT: mov v1.d[1], x15 +; CHECK-FP16-NEXT: mov v3.d[1], x14 ; CHECK-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x @@ -2185,8 +2185,8 @@ ; CHECK-NEXT: .cfi_offset b8, -104 ; CHECK-NEXT: .cfi_offset b9, -112 ; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: mov x19, x8 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov x19, x8 ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fcvt s8, h0 @@ -2195,17 +2195,17 @@ ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov x23, #68719476735 // =0xfffffffff -; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: mov x22, #68719476735 // =0xfffffffff +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x9, x23, x9, gt -; CHECK-NEXT: csinv x8, x8, xzr, le +; CHECK-NEXT: csel x10, x22, x8, gt +; CHECK-NEXT: csinv x8, x9, xzr, le +; CHECK-NEXT: stp x8, x10, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: stp x8, x9, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -2213,10 +2213,10 @@ ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x9, x23, x9, gt +; CHECK-NEXT: csel x9, x22, x9, gt ; CHECK-NEXT: csinv x24, x8, xzr, le -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x9, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 @@ -2226,7 +2226,7 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x25, x23, x9, gt +; CHECK-NEXT: csel x25, x22, x9, gt ; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti @@ -2238,7 +2238,7 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x27, x23, x9, gt +; CHECK-NEXT: csel x26, x22, x9, gt ; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti @@ -2249,8 +2249,8 @@ ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x29, x23, x9, gt -; CHECK-NEXT: csinv x26, x8, xzr, le +; CHECK-NEXT: csel x29, x22, x9, gt +; CHECK-NEXT: csinv x27, x8, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 @@ -2259,8 +2259,8 @@ ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x28, x23, x9, gt -; CHECK-NEXT: csinv x20, x8, xzr, le +; CHECK-NEXT: csel x20, x22, x9, gt +; CHECK-NEXT: csinv x21, x8, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -2270,45 +2270,46 @@ ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x21, x23, x9, gt -; CHECK-NEXT: csinv x22, x8, xzr, le +; CHECK-NEXT: csel x28, x22, x9, gt +; CHECK-NEXT: csinv x23, x8, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti +; CHECK-NEXT: ldr x9, [sp] // 8-byte Folded Reload +; CHECK-NEXT: extr x8, x20, x21, #28 ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: extr x8, x28, x20, #28 -; CHECK-NEXT: bfi x21, x26, #36, #28 -; CHECK-NEXT: extr x9, x29, x26, #28 +; CHECK-NEXT: bfi x28, x27, #36, #28 ; CHECK-NEXT: lsr x11, x29, #28 -; CHECK-NEXT: str x22, [x19] +; CHECK-NEXT: bfi x26, x24, #36, #28 +; CHECK-NEXT: stur x9, [x19, #75] +; CHECK-NEXT: extr x9, x29, x27, #28 ; CHECK-NEXT: stur x8, [x19, #41] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x10, xzr, x1, lt +; CHECK-NEXT: str x9, [x19, #16] +; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: stp x21, x9, [x19, #8] -; CHECK-NEXT: lsr x9, x28, #28 +; CHECK-NEXT: ldr x10, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: stp x23, x28, [x19] ; CHECK-NEXT: strb w11, [x19, #24] -; CHECK-NEXT: bfi x27, x24, #36, #28 -; CHECK-NEXT: csel x10, x23, x10, gt +; CHECK-NEXT: stur x10, [x19, #50] +; CHECK-NEXT: lsr x10, x20, #28 +; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: bfi x9, x21, #36, #28 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: bfi x10, x20, #36, #28 -; CHECK-NEXT: strb w9, [x19, #49] +; CHECK-NEXT: strb w10, [x19, #49] +; CHECK-NEXT: ldr x11, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: stur x8, [x19, #25] -; CHECK-NEXT: stur x10, [x19, #33] -; CHECK-NEXT: ldp x9, x12, [sp] // 16-byte Folded Reload -; CHECK-NEXT: stur x9, [x19, #75] -; CHECK-NEXT: extr x8, x12, x24, #28 -; CHECK-NEXT: ldr x9, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: stur x9, [x19, #50] -; CHECK-NEXT: ldp x11, x10, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: stur x8, [x19, #91] -; CHECK-NEXT: lsr x8, x12, #28 -; CHECK-NEXT: stur x27, [x19, #83] -; CHECK-NEXT: extr x9, x10, x11, #28 -; CHECK-NEXT: bfi x25, x11, #36, #28 -; CHECK-NEXT: strb w8, [x19, #99] -; CHECK-NEXT: stur x9, [x19, #66] -; CHECK-NEXT: lsr x9, x10, #28 +; CHECK-NEXT: stur x9, [x19, #33] +; CHECK-NEXT: extr x10, x11, x24, #28 +; CHECK-NEXT: stur x10, [x19, #91] +; CHECK-NEXT: ldp x10, x9, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: stur x26, [x19, #83] +; CHECK-NEXT: extr x8, x9, x10, #28 +; CHECK-NEXT: bfi x25, x10, #36, #28 +; CHECK-NEXT: lsr x9, x9, #28 +; CHECK-NEXT: stur x8, [x19, #66] +; CHECK-NEXT: lsr x8, x11, #28 ; CHECK-NEXT: stur x25, [x19, #58] +; CHECK-NEXT: strb w8, [x19, #99] ; CHECK-NEXT: strb w9, [x19, #74] ; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload @@ -2350,8 +2351,8 @@ ; CHECK-NEXT: .cfi_offset b8, -104 ; CHECK-NEXT: .cfi_offset b9, -112 ; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: mov x19, x8 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov x19, x8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 @@ -2359,16 +2360,16 @@ ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csinv x8, x8, xzr, le +; CHECK-NEXT: csinv x10, x8, xzr, le +; CHECK-NEXT: csinv x8, x9, xzr, le +; CHECK-NEXT: stp x8, x10, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: stp x8, x9, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 @@ -2379,8 +2380,8 @@ ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x9, x9, xzr, le ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: stp x8, x9, [sp] // 16-byte Folded Spill +; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 @@ -2558,87 +2559,87 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h ; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl2 v5.4s, v0.8h ; CHECK-CVT-NEXT: mov w8, #255 // =0xff -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s3, v2.s[1] ; CHECK-CVT-NEXT: mov s4, v2.s[2] -; CHECK-CVT-NEXT: fcvtzu w9, s2 -; CHECK-CVT-NEXT: mov s2, v2.s[3] -; CHECK-CVT-NEXT: fcvtzu w12, s1 -; CHECK-CVT-NEXT: fcvtzu w16, s5 -; CHECK-CVT-NEXT: fcvtzu w2, s0 -; CHECK-CVT-NEXT: fcvtzu w10, s3 +; CHECK-CVT-NEXT: mov s5, v2.s[3] +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-CVT-NEXT: fcvtzu w13, s1 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: fcvtzu w9, s3 ; CHECK-CVT-NEXT: mov s3, v1.s[1] ; CHECK-CVT-NEXT: fcvtzu w11, s4 ; CHECK-CVT-NEXT: mov s4, v1.s[2] +; CHECK-CVT-NEXT: fcvtzu w12, s5 ; CHECK-CVT-NEXT: mov s1, v1.s[3] -; CHECK-CVT-NEXT: fcvtzu w13, s2 -; CHECK-CVT-NEXT: cmp w10, #255 -; CHECK-CVT-NEXT: mov s2, v5.s[1] +; CHECK-CVT-NEXT: fcvtzu w18, s2 +; CHECK-CVT-NEXT: fcvtzu w3, s0 ; CHECK-CVT-NEXT: fcvtzu w14, s3 -; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: cmp w9, #255 -; CHECK-CVT-NEXT: fcvtzu w15, s4 +; CHECK-CVT-NEXT: mov s3, v2.s[1] ; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: fcvtzu w15, s4 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: cmp w11, #255 +; CHECK-CVT-NEXT: mov s4, v2.s[2] ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: cmp w13, #255 -; CHECK-CVT-NEXT: mov s3, v5.s[2] -; CHECK-CVT-NEXT: fcvtzu w17, s1 -; CHECK-CVT-NEXT: csel w13, w13, w8, lo -; CHECK-CVT-NEXT: cmp w14, #255 -; CHECK-CVT-NEXT: mov s4, v5.s[3] -; CHECK-CVT-NEXT: fcvtzu w18, s2 -; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w12, #255 -; CHECK-CVT-NEXT: mov s1, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w16, s1 +; CHECK-CVT-NEXT: mov s1, v2.s[3] ; CHECK-CVT-NEXT: csel w12, w12, w8, lo +; CHECK-CVT-NEXT: cmp w14, #255 +; CHECK-CVT-NEXT: fcvtzu w17, s3 +; CHECK-CVT-NEXT: mov s3, v0.s[1] +; CHECK-CVT-NEXT: csel w14, w14, w8, lo +; CHECK-CVT-NEXT: cmp w13, #255 +; CHECK-CVT-NEXT: fcvtzu w0, s4 +; CHECK-CVT-NEXT: fmov s2, w10 +; CHECK-CVT-NEXT: csel w13, w13, w8, lo ; CHECK-CVT-NEXT: cmp w15, #255 -; CHECK-CVT-NEXT: fcvtzu w0, s3 ; CHECK-CVT-NEXT: csel w15, w15, w8, lo -; CHECK-CVT-NEXT: cmp w17, #255 -; CHECK-CVT-NEXT: csel w17, w17, w8, lo -; CHECK-CVT-NEXT: cmp w18, #255 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: csel w9, w18, w8, lo -; CHECK-CVT-NEXT: fcvtzu w18, s4 ; CHECK-CVT-NEXT: cmp w16, #255 ; CHECK-CVT-NEXT: fcvtzu w1, s1 ; CHECK-CVT-NEXT: csel w16, w16, w8, lo -; CHECK-CVT-NEXT: cmp w0, #255 +; CHECK-CVT-NEXT: cmp w17, #255 +; CHECK-CVT-NEXT: fcvtzu w2, s3 +; CHECK-CVT-NEXT: csel w17, w17, w8, lo +; CHECK-CVT-NEXT: cmp w18, #255 ; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: csel w18, w18, w8, lo +; CHECK-CVT-NEXT: cmp w0, #255 +; CHECK-CVT-NEXT: mov v2.s[1], w9 ; CHECK-CVT-NEXT: csel w0, w0, w8, lo -; CHECK-CVT-NEXT: cmp w18, #255 -; CHECK-CVT-NEXT: mov v2.s[1], w10 -; CHECK-CVT-NEXT: csel w10, w18, w8, lo ; CHECK-CVT-NEXT: cmp w1, #255 -; CHECK-CVT-NEXT: fmov s3, w12 -; CHECK-CVT-NEXT: csel w18, w1, w8, lo +; CHECK-CVT-NEXT: fmov s3, w18 +; CHECK-CVT-NEXT: csel w10, w1, w8, lo ; CHECK-CVT-NEXT: cmp w2, #255 -; CHECK-CVT-NEXT: csel w1, w2, w8, lo -; CHECK-CVT-NEXT: fmov s4, w16 -; CHECK-CVT-NEXT: mov v2.s[2], w11 -; CHECK-CVT-NEXT: fcvtzu w11, s1 ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fmov s1, w1 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, #255 +; CHECK-CVT-NEXT: csel w9, w2, w8, lo +; CHECK-CVT-NEXT: cmp w3, #255 +; CHECK-CVT-NEXT: fcvtzu w2, s1 +; CHECK-CVT-NEXT: csel w1, w3, w8, lo +; CHECK-CVT-NEXT: fmov s1, w13 +; CHECK-CVT-NEXT: mov v3.s[1], w17 +; CHECK-CVT-NEXT: fmov s4, w1 +; CHECK-CVT-NEXT: mov v2.s[2], w11 +; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: cmp w2, #255 ; CHECK-CVT-NEXT: mov v4.s[1], w9 -; CHECK-CVT-NEXT: csel w9, w11, w8, lo -; CHECK-CVT-NEXT: mov v1.s[1], w18 -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v3.s[2], w15 -; CHECK-CVT-NEXT: mov v4.s[2], w0 -; CHECK-CVT-NEXT: mov v1.s[2], w9 -; CHECK-CVT-NEXT: cmp w11, #255 -; CHECK-CVT-NEXT: csel w8, w11, w8, lo -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v3.s[3], w17 -; CHECK-CVT-NEXT: mov v4.s[3], w10 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h -; CHECK-CVT-NEXT: uzp1 v1.8h, v1.8h, v4.8h +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: csel w11, w2, w8, lo +; CHECK-CVT-NEXT: mov v3.s[2], w0 +; CHECK-CVT-NEXT: mov v2.s[3], w12 +; CHECK-CVT-NEXT: mov v1.s[2], w15 +; CHECK-CVT-NEXT: mov v4.s[2], w11 +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: csel w8, w9, w8, lo +; CHECK-CVT-NEXT: mov v3.s[3], w10 +; CHECK-CVT-NEXT: mov v1.s[3], w16 +; CHECK-CVT-NEXT: mov v4.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: uzp1 v1.8h, v4.8h, v3.8h ; CHECK-CVT-NEXT: uzp1 v0.16b, v1.16b, v0.16b ; CHECK-CVT-NEXT: ret ; @@ -2660,87 +2661,87 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v5.4s, v1.8h ; CHECK-CVT-NEXT: mov w8, #65535 // =0xffff -; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h ; CHECK-CVT-NEXT: mov s3, v2.s[1] ; CHECK-CVT-NEXT: mov s4, v2.s[2] -; CHECK-CVT-NEXT: fcvtzu w9, s2 -; CHECK-CVT-NEXT: mov s2, v2.s[3] -; CHECK-CVT-NEXT: fcvtzu w12, s0 -; CHECK-CVT-NEXT: fcvtzu w16, s5 -; CHECK-CVT-NEXT: fcvtzu w2, s1 -; CHECK-CVT-NEXT: fcvtzu w10, s3 +; CHECK-CVT-NEXT: mov s5, v2.s[3] +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h +; CHECK-CVT-NEXT: fcvtzu w13, s0 +; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h +; CHECK-CVT-NEXT: fcvtzu w9, s3 ; CHECK-CVT-NEXT: mov s3, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w11, s4 ; CHECK-CVT-NEXT: mov s4, v0.s[2] +; CHECK-CVT-NEXT: fcvtzu w12, s5 ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fcvtzu w13, s2 -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: mov s2, v5.s[1] +; CHECK-CVT-NEXT: fcvtzu w18, s2 +; CHECK-CVT-NEXT: fcvtzu w3, s1 ; CHECK-CVT-NEXT: fcvtzu w14, s3 -; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: cmp w9, w8 -; CHECK-CVT-NEXT: fcvtzu w15, s4 +; CHECK-CVT-NEXT: mov s3, v2.s[1] ; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: fcvtzu w15, s4 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: mov s4, v2.s[2] ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: cmp w13, w8 -; CHECK-CVT-NEXT: mov s3, v5.s[2] -; CHECK-CVT-NEXT: fcvtzu w17, s0 -; CHECK-CVT-NEXT: csel w13, w13, w8, lo -; CHECK-CVT-NEXT: cmp w14, w8 -; CHECK-CVT-NEXT: mov s4, v5.s[3] -; CHECK-CVT-NEXT: fcvtzu w18, s2 -; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w12, w8 -; CHECK-CVT-NEXT: mov s0, v1.s[1] +; CHECK-CVT-NEXT: fcvtzu w16, s0 +; CHECK-CVT-NEXT: mov s0, v2.s[3] ; CHECK-CVT-NEXT: csel w12, w12, w8, lo +; CHECK-CVT-NEXT: cmp w14, w8 +; CHECK-CVT-NEXT: fcvtzu w17, s3 +; CHECK-CVT-NEXT: mov s3, v1.s[1] +; CHECK-CVT-NEXT: csel w14, w14, w8, lo +; CHECK-CVT-NEXT: cmp w13, w8 +; CHECK-CVT-NEXT: fcvtzu w0, s4 +; CHECK-CVT-NEXT: fmov s2, w10 +; CHECK-CVT-NEXT: csel w13, w13, w8, lo ; CHECK-CVT-NEXT: cmp w15, w8 -; CHECK-CVT-NEXT: fcvtzu w0, s3 ; CHECK-CVT-NEXT: csel w15, w15, w8, lo -; CHECK-CVT-NEXT: cmp w17, w8 -; CHECK-CVT-NEXT: csel w17, w17, w8, lo -; CHECK-CVT-NEXT: cmp w18, w8 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: csel w9, w18, w8, lo -; CHECK-CVT-NEXT: fcvtzu w18, s4 ; CHECK-CVT-NEXT: cmp w16, w8 ; CHECK-CVT-NEXT: fcvtzu w1, s0 ; CHECK-CVT-NEXT: csel w16, w16, w8, lo -; CHECK-CVT-NEXT: cmp w0, w8 +; CHECK-CVT-NEXT: cmp w17, w8 +; CHECK-CVT-NEXT: fcvtzu w2, s3 +; CHECK-CVT-NEXT: csel w17, w17, w8, lo +; CHECK-CVT-NEXT: cmp w18, w8 ; CHECK-CVT-NEXT: mov s0, v1.s[2] +; CHECK-CVT-NEXT: csel w18, w18, w8, lo +; CHECK-CVT-NEXT: cmp w0, w8 +; CHECK-CVT-NEXT: mov v2.s[1], w9 ; CHECK-CVT-NEXT: csel w0, w0, w8, lo -; CHECK-CVT-NEXT: cmp w18, w8 -; CHECK-CVT-NEXT: mov v2.s[1], w10 -; CHECK-CVT-NEXT: csel w10, w18, w8, lo ; CHECK-CVT-NEXT: cmp w1, w8 -; CHECK-CVT-NEXT: fmov s3, w12 -; CHECK-CVT-NEXT: csel w18, w1, w8, lo +; CHECK-CVT-NEXT: fmov s3, w18 +; CHECK-CVT-NEXT: csel w10, w1, w8, lo ; CHECK-CVT-NEXT: cmp w2, w8 -; CHECK-CVT-NEXT: csel w1, w2, w8, lo -; CHECK-CVT-NEXT: fmov s4, w16 -; CHECK-CVT-NEXT: mov v2.s[2], w11 -; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: csel w9, w2, w8, lo +; CHECK-CVT-NEXT: cmp w3, w8 +; CHECK-CVT-NEXT: fcvtzu w2, s0 +; CHECK-CVT-NEXT: csel w1, w3, w8, lo ; CHECK-CVT-NEXT: mov s0, v1.s[3] -; CHECK-CVT-NEXT: fmov s5, w1 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: fmov s1, w13 +; CHECK-CVT-NEXT: fmov s4, w1 +; CHECK-CVT-NEXT: mov v3.s[1], w17 +; CHECK-CVT-NEXT: mov v2.s[2], w11 +; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: cmp w2, w8 ; CHECK-CVT-NEXT: mov v4.s[1], w9 -; CHECK-CVT-NEXT: csel w9, w11, w8, lo -; CHECK-CVT-NEXT: mov v5.s[1], w18 -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v3.s[2], w15 -; CHECK-CVT-NEXT: mov v4.s[2], w0 -; CHECK-CVT-NEXT: mov v5.s[2], w9 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w8, w11, w8, lo -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v3.s[3], w17 -; CHECK-CVT-NEXT: mov v4.s[3], w10 -; CHECK-CVT-NEXT: mov v5.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h -; CHECK-CVT-NEXT: uzp1 v1.8h, v5.8h, v4.8h +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: csel w11, w2, w8, lo +; CHECK-CVT-NEXT: mov v3.s[2], w0 +; CHECK-CVT-NEXT: mov v2.s[3], w12 +; CHECK-CVT-NEXT: mov v1.s[2], w15 +; CHECK-CVT-NEXT: mov v4.s[2], w11 +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w8, w9, w8, lo +; CHECK-CVT-NEXT: mov v3.s[3], w10 +; CHECK-CVT-NEXT: mov v1.s[3], w16 +; CHECK-CVT-NEXT: mov v4.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: uzp1 v1.8h, v4.8h, v3.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v16f16_v16i16: @@ -2756,45 +2757,45 @@ ; CHECK-LABEL: test_unsigned_v8f64_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d4, v3.d[1] -; CHECK-NEXT: fcvtzu w10, d3 -; CHECK-NEXT: mov d3, v2.d[1] -; CHECK-NEXT: mov w8, #255 // =0xff +; CHECK-NEXT: mov d5, v2.d[1] +; CHECK-NEXT: mov w11, #255 // =0xff +; CHECK-NEXT: fcvtzu w9, d3 +; CHECK-NEXT: mov d3, v1.d[1] ; CHECK-NEXT: fcvtzu w12, d2 -; CHECK-NEXT: fcvtzu w13, d1 -; CHECK-NEXT: fcvtzu w9, d4 -; CHECK-NEXT: mov d4, v1.d[1] -; CHECK-NEXT: fcvtzu w11, d3 -; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzu w14, d1 +; CHECK-NEXT: fcvtzu w8, d4 +; CHECK-NEXT: mov d4, v0.d[1] +; CHECK-NEXT: fcvtzu w10, d5 +; CHECK-NEXT: fcvtzu w13, d3 +; CHECK-NEXT: cmp w8, #255 +; CHECK-NEXT: fcvtzu w15, d4 +; CHECK-NEXT: csel w8, w8, w11, lo ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: csel w9, w9, w11, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w11, w11, w8, lo +; CHECK-NEXT: fmov s4, w9 +; CHECK-NEXT: csel w9, w10, w11, lo ; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: csel w12, w12, w8, lo -; CHECK-NEXT: fmov s19, w10 -; CHECK-NEXT: fcvtzu w10, d4 -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v19.s[1], w9 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: fmov s18, w12 -; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: csel w12, w13, w8, lo -; CHECK-NEXT: fcvtzu w13, d0 -; CHECK-NEXT: mov v18.s[1], w11 -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: fmov s17, w12 -; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: fcvtzu w10, d0 +; CHECK-NEXT: mov v4.s[1], w8 +; CHECK-NEXT: csel w8, w12, w11, lo ; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: csel w8, w13, w8, lo -; CHECK-NEXT: mov v17.s[1], w10 -; CHECK-NEXT: fmov s16, w8 +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: csel w8, w13, w11, lo +; CHECK-NEXT: cmp w14, #255 +; CHECK-NEXT: mov v3.s[1], w9 +; CHECK-NEXT: csel w9, w14, w11, lo +; CHECK-NEXT: cmp w15, #255 +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: csel w9, w15, w11, lo +; CHECK-NEXT: cmp w10, #255 +; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: csel w8, w10, w11, lo +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: adrp x8, .LCPI82_0 -; CHECK-NEXT: mov v16.s[1], w9 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI82_0] -; CHECK-NEXT: tbl v0.8b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.8b +; CHECK-NEXT: mov v1.s[1], w9 +; CHECK-NEXT: tbl v0.8b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.8b ; CHECK-NEXT: ret %x = call <8 x i8> @llvm.fptoui.sat.v8f64.v8i8(<8 x double> %f) ret <8 x i8> %x @@ -2805,99 +2806,99 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d16, v0.d[1] ; CHECK-NEXT: fcvtzu w10, d0 -; CHECK-NEXT: mov d0, v1.d[1] ; CHECK-NEXT: mov w8, #255 // =0xff -; CHECK-NEXT: fcvtzu w12, d1 -; CHECK-NEXT: mov d1, v2.d[1] ; CHECK-NEXT: fcvtzu w9, d16 -; CHECK-NEXT: fcvtzu w11, d0 +; CHECK-NEXT: mov d16, v1.d[1] ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 ; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w11, #255 ; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: csel w10, w11, w8, lo -; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: csel w11, w12, w8, lo -; CHECK-NEXT: fcvtzu w12, d2 +; CHECK-NEXT: fcvtzu w10, d16 +; CHECK-NEXT: mov d16, v2.d[1] ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: cmp w10, #255 +; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: mov w11, v0.s[1] ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w12, #255 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: fcvtzu w9, d16 +; CHECK-NEXT: mov d16, v3.d[1] +; CHECK-NEXT: mov v0.b[1], w11 ; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: csel w12, w12, w8, lo ; CHECK-NEXT: fcvtzu w10, d2 -; CHECK-NEXT: mov v0.b[1], w11 -; CHECK-NEXT: fcvtzu w11, d3 -; CHECK-NEXT: fmov s2, w12 -; CHECK-NEXT: mov w12, v1.s[1] +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov d3, v4.d[1] -; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: mov w11, v1.s[1] ; CHECK-NEXT: mov v0.b[2], v1.b[0] -; CHECK-NEXT: cmp w11, #255 +; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: fcvtzu w10, d16 +; CHECK-NEXT: mov d16, v4.d[1] +; CHECK-NEXT: mov v0.b[3], w11 ; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: csel w11, w11, w8, lo ; CHECK-NEXT: fcvtzu w9, d3 -; CHECK-NEXT: mov d3, v5.d[1] -; CHECK-NEXT: mov v0.b[3], w12 -; CHECK-NEXT: fcvtzu w12, d4 -; CHECK-NEXT: fmov s4, w11 -; CHECK-NEXT: mov w11, v2.s[1] +; CHECK-NEXT: cmp w10, #255 +; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w12, #255 +; CHECK-NEXT: mov w11, v2.s[1] ; CHECK-NEXT: mov v0.b[4], v2.b[0] -; CHECK-NEXT: csel w12, w12, w8, lo -; CHECK-NEXT: mov v4.s[1], w10 -; CHECK-NEXT: fcvtzu w10, d3 -; CHECK-NEXT: fmov s3, w12 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: fmov s3, w9 +; CHECK-NEXT: fcvtzu w9, d16 ; CHECK-NEXT: mov v0.b[5], w11 -; CHECK-NEXT: fcvtzu w11, d5 -; CHECK-NEXT: mov w12, v4.s[1] +; CHECK-NEXT: mov v3.s[1], w10 +; CHECK-NEXT: fcvtzu w10, d4 +; CHECK-NEXT: mov d4, v5.d[1] +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: mov d5, v6.d[1] -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: mov v0.b[6], v4.b[0] -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: mov v3.s[1], w9 -; CHECK-NEXT: fcvtzu w9, d6 -; CHECK-NEXT: mov d6, v7.d[1] -; CHECK-NEXT: mov v0.b[7], w12 -; CHECK-NEXT: fcvtzu w12, d5 -; CHECK-NEXT: fmov s5, w11 ; CHECK-NEXT: mov w11, v3.s[1] -; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: mov v0.b[8], v3.b[0] -; CHECK-NEXT: csel w12, w12, w8, lo +; CHECK-NEXT: mov v0.b[6], v3.b[0] +; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: fmov s16, w10 +; CHECK-NEXT: fcvtzu w10, d4 +; CHECK-NEXT: mov d4, v6.d[1] +; CHECK-NEXT: mov v0.b[7], w11 +; CHECK-NEXT: mov v16.s[1], w9 +; CHECK-NEXT: fcvtzu w9, d5 +; CHECK-NEXT: cmp w10, #255 +; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov v5.s[1], w10 +; CHECK-NEXT: mov w11, v16.s[1] +; CHECK-NEXT: mov v0.b[8], v16.b[0] ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: fcvtzu w10, d6 +; CHECK-NEXT: fmov s5, w9 +; CHECK-NEXT: fcvtzu w9, d4 +; CHECK-NEXT: mov d4, v7.d[1] ; CHECK-NEXT: mov v0.b[9], w11 -; CHECK-NEXT: fcvtzu w11, d7 -; CHECK-NEXT: fmov s16, w9 -; CHECK-NEXT: mov w9, v5.s[1] +; CHECK-NEXT: mov v5.s[1], w10 +; CHECK-NEXT: fcvtzu w10, d6 +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 ; CHECK-NEXT: mov v0.b[10], v5.b[0] -; CHECK-NEXT: mov v16.s[1], w12 -; CHECK-NEXT: mov v0.b[11], w9 -; CHECK-NEXT: csel w9, w10, w8, lo -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: mov w10, v16.s[1] -; CHECK-NEXT: csel w8, w11, w8, lo -; CHECK-NEXT: mov v0.b[12], v16.b[0] -; CHECK-NEXT: fmov s6, w8 -; CHECK-NEXT: mov v0.b[13], w10 +; CHECK-NEXT: mov w11, v5.s[1] +; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: fmov s6, w10 +; CHECK-NEXT: fcvtzu w10, d7 +; CHECK-NEXT: mov v0.b[11], w11 ; CHECK-NEXT: mov v6.s[1], w9 -; CHECK-NEXT: mov v0.b[14], v6.b[0] -; CHECK-NEXT: mov w8, v6.s[1] +; CHECK-NEXT: fcvtzu w9, d4 +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: mov v0.b[12], v6.b[0] +; CHECK-NEXT: mov w11, v6.s[1] +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w10, #255 +; CHECK-NEXT: csel w8, w10, w8, lo +; CHECK-NEXT: fmov s4, w8 +; CHECK-NEXT: mov v0.b[13], w11 +; CHECK-NEXT: mov v4.s[1], w9 +; CHECK-NEXT: mov v0.b[14], v4.b[0] +; CHECK-NEXT: mov w8, v4.s[1] ; CHECK-NEXT: mov v0.b[15], w8 ; CHECK-NEXT: ret %x = call <16 x i8> @llvm.fptoui.sat.v16f64.v16i8(<16 x double> %f) @@ -2908,45 +2909,45 @@ ; CHECK-LABEL: test_unsigned_v8f64_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d4, v3.d[1] -; CHECK-NEXT: fcvtzu w10, d3 -; CHECK-NEXT: mov d3, v2.d[1] -; CHECK-NEXT: mov w8, #65535 // =0xffff +; CHECK-NEXT: mov d5, v2.d[1] +; CHECK-NEXT: mov w10, #65535 // =0xffff +; CHECK-NEXT: fcvtzu w9, d3 +; CHECK-NEXT: mov d3, v1.d[1] ; CHECK-NEXT: fcvtzu w12, d2 -; CHECK-NEXT: fcvtzu w13, d1 -; CHECK-NEXT: fcvtzu w9, d4 -; CHECK-NEXT: mov d4, v1.d[1] -; CHECK-NEXT: fcvtzu w11, d3 -; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: csel w12, w12, w8, lo -; CHECK-NEXT: fmov s19, w10 -; CHECK-NEXT: fcvtzu w10, d4 -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: mov v19.s[1], w9 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: fmov s18, w12 -; CHECK-NEXT: fcvtzu w9, d1 -; CHECK-NEXT: csel w12, w13, w8, lo -; CHECK-NEXT: fcvtzu w13, d0 -; CHECK-NEXT: mov v18.s[1], w11 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: fmov s17, w12 -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: csel w8, w13, w8, lo -; CHECK-NEXT: mov v17.s[1], w10 -; CHECK-NEXT: fmov s16, w8 +; CHECK-NEXT: fcvtzu w14, d1 +; CHECK-NEXT: fcvtzu w8, d4 +; CHECK-NEXT: mov d4, v0.d[1] +; CHECK-NEXT: fcvtzu w11, d5 +; CHECK-NEXT: fcvtzu w13, d3 +; CHECK-NEXT: cmp w8, w10 +; CHECK-NEXT: fcvtzu w15, d4 +; CHECK-NEXT: csel w8, w8, w10, lo +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: csel w9, w9, w10, lo +; CHECK-NEXT: cmp w11, w10 +; CHECK-NEXT: fmov s4, w9 +; CHECK-NEXT: csel w9, w11, w10, lo +; CHECK-NEXT: cmp w12, w10 +; CHECK-NEXT: fcvtzu w11, d0 +; CHECK-NEXT: mov v4.s[1], w8 +; CHECK-NEXT: csel w8, w12, w10, lo +; CHECK-NEXT: cmp w13, w10 +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: csel w8, w13, w10, lo +; CHECK-NEXT: cmp w14, w10 +; CHECK-NEXT: mov v3.s[1], w9 +; CHECK-NEXT: csel w9, w14, w10, lo +; CHECK-NEXT: cmp w15, w10 +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: csel w9, w15, w10, lo +; CHECK-NEXT: cmp w11, w10 +; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: csel w8, w11, w10, lo +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: adrp x8, .LCPI84_0 -; CHECK-NEXT: mov v16.s[1], w9 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI84_0] -; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b +; CHECK-NEXT: mov v1.s[1], w9 +; CHECK-NEXT: tbl v0.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b ; CHECK-NEXT: ret %x = call <8 x i16> @llvm.fptoui.sat.v8f64.v8i16(<8 x double> %f) ret <8 x i16> %x @@ -2957,78 +2958,78 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d16, v3.d[1] ; CHECK-NEXT: mov d17, v2.d[1] +; CHECK-NEXT: mov w8, #65535 // =0xffff ; CHECK-NEXT: fcvtzu w9, d3 ; CHECK-NEXT: mov d3, v1.d[1] -; CHECK-NEXT: mov w8, #65535 // =0xffff -; CHECK-NEXT: fcvtzu w10, d2 -; CHECK-NEXT: fcvtzu w12, d1 +; CHECK-NEXT: fcvtzu w11, d1 ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzu w11, d16 -; CHECK-NEXT: fcvtzu w13, d17 -; CHECK-NEXT: fcvtzu w14, d3 -; CHECK-NEXT: mov d2, v7.d[1] -; CHECK-NEXT: fcvtzu w17, d6 -; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: fcvtzu w15, d1 -; CHECK-NEXT: csel w11, w11, w8, lo +; CHECK-NEXT: fcvtzu w10, d2 +; CHECK-NEXT: fcvtzu w13, d0 +; CHECK-NEXT: mov d0, v7.d[1] +; CHECK-NEXT: mov d2, v6.d[1] +; CHECK-NEXT: fcvtzu w15, d7 +; CHECK-NEXT: fcvtzu w12, d16 +; CHECK-NEXT: fcvtzu w14, d17 +; CHECK-NEXT: fcvtzu w16, d6 +; CHECK-NEXT: fcvtzu w17, d3 +; CHECK-NEXT: mov d6, v5.d[1] +; CHECK-NEXT: mov d3, v4.d[1] +; CHECK-NEXT: fcvtzu w18, d1 +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: csel w12, w12, w8, lo ; CHECK-NEXT: cmp w9, w8 ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: csel w13, w13, w8, lo -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: csel w10, w10, w8, lo ; CHECK-NEXT: cmp w14, w8 ; CHECK-NEXT: fmov s19, w9 ; CHECK-NEXT: csel w9, w14, w8, lo +; CHECK-NEXT: cmp w10, w8 ; CHECK-NEXT: fcvtzu w14, d0 -; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: fcvtzu w16, d2 -; CHECK-NEXT: mov d0, v6.d[1] -; CHECK-NEXT: csel w12, w12, w8, lo -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: mov v19.s[1], w11 -; CHECK-NEXT: fcvtzu w11, d7 -; CHECK-NEXT: fmov s18, w10 -; CHECK-NEXT: csel w10, w15, w8, lo -; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: csel w14, w14, w8, lo -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: csel w15, w16, w8, lo +; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: cmp w17, w8 +; CHECK-NEXT: mov v19.s[1], w12 +; CHECK-NEXT: csel w12, w17, w8, lo ; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: fcvtzu w16, d0 -; CHECK-NEXT: mov d0, v5.d[1] ; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: mov v18.s[1], w13 -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fmov s17, w12 +; CHECK-NEXT: cmp w18, w8 +; CHECK-NEXT: fmov s18, w10 +; CHECK-NEXT: csel w10, w18, w8, lo +; CHECK-NEXT: cmp w13, w8 +; CHECK-NEXT: fcvtzu w17, d2 +; CHECK-NEXT: csel w13, w13, w8, lo +; CHECK-NEXT: cmp w14, w8 +; CHECK-NEXT: fcvtzu w18, d6 +; CHECK-NEXT: mov v18.s[1], w9 +; CHECK-NEXT: csel w9, w14, w8, lo +; CHECK-NEXT: cmp w15, w8 +; CHECK-NEXT: fmov s17, w11 +; CHECK-NEXT: csel w11, w15, w8, lo +; CHECK-NEXT: fcvtzu w14, d5 ; CHECK-NEXT: fmov s23, w11 -; CHECK-NEXT: csel w11, w16, w8, lo ; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: fcvtzu w16, d0 -; CHECK-NEXT: mov d0, v4.d[1] -; CHECK-NEXT: csel w13, w17, w8, lo -; CHECK-NEXT: fcvtzu w17, d5 -; CHECK-NEXT: fcvtzu w12, d4 -; CHECK-NEXT: mov v23.s[1], w15 +; CHECK-NEXT: fcvtzu w15, d3 +; CHECK-NEXT: csel w11, w17, w8, lo ; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fmov s22, w13 -; CHECK-NEXT: csel w13, w16, w8, lo -; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: fcvtzu w16, d0 -; CHECK-NEXT: csel w15, w17, w8, lo +; CHECK-NEXT: fcvtzu w17, d4 +; CHECK-NEXT: mov v17.s[1], w12 +; CHECK-NEXT: mov v23.s[1], w9 +; CHECK-NEXT: csel w9, w16, w8, lo +; CHECK-NEXT: cmp w18, w8 +; CHECK-NEXT: fmov s22, w9 +; CHECK-NEXT: csel w9, w18, w8, lo +; CHECK-NEXT: cmp w14, w8 +; CHECK-NEXT: fmov s16, w13 ; CHECK-NEXT: mov v22.s[1], w11 -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fmov s21, w15 -; CHECK-NEXT: csel w11, w16, w8, lo -; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: csel w8, w12, w8, lo -; CHECK-NEXT: mov v17.s[1], w9 -; CHECK-NEXT: adrp x9, .LCPI85_0 -; CHECK-NEXT: mov v21.s[1], w13 -; CHECK-NEXT: fmov s16, w14 -; CHECK-NEXT: fmov s20, w8 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI85_0] +; CHECK-NEXT: csel w11, w14, w8, lo +; CHECK-NEXT: cmp w15, w8 +; CHECK-NEXT: fmov s21, w11 +; CHECK-NEXT: csel w11, w15, w8, lo +; CHECK-NEXT: cmp w17, w8 +; CHECK-NEXT: csel w8, w17, w8, lo ; CHECK-NEXT: mov v16.s[1], w10 +; CHECK-NEXT: mov v21.s[1], w9 +; CHECK-NEXT: fmov s20, w8 +; CHECK-NEXT: adrp x8, .LCPI85_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI85_0] ; CHECK-NEXT: mov v20.s[1], w11 ; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b ; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b diff --git a/llvm/test/CodeGen/AArch64/fptrunc.ll b/llvm/test/CodeGen/AArch64/fptrunc.ll --- a/llvm/test/CodeGen/AArch64/fptrunc.ll +++ b/llvm/test/CodeGen/AArch64/fptrunc.ll @@ -57,12 +57,12 @@ ; CHECK-GI: // %bb.0: // %entry ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-GI-NEXT: fcvt s2, d2 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] -; CHECK-GI-NEXT: fcvt s1, d2 ; CHECK-GI-NEXT: fcvtn v0.2s, v0.2d -; CHECK-GI-NEXT: mov s2, v0.s[1] -; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] -; CHECK-GI-NEXT: mov v0.s[2], v1.s[0] +; CHECK-GI-NEXT: mov s1, v0.s[1] +; CHECK-GI-NEXT: mov v0.s[1], v1.s[0] +; CHECK-GI-NEXT: mov v0.s[2], v2.s[0] ; CHECK-GI-NEXT: mov v0.s[3], v0.s[0] ; CHECK-GI-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll --- a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll @@ -40,12 +40,12 @@ ; CHECK-LABEL: rotl_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: and w9, w1, #0xf +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: and w10, w1, #0xf ; CHECK-NEXT: and w8, w8, #0xf -; CHECK-NEXT: and w10, w0, #0xffff -; CHECK-NEXT: lsl w9, w0, w9 -; CHECK-NEXT: lsr w8, w10, w8 -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: lsl w10, w0, w10 +; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: orr w0, w10, w8 ; CHECK-NEXT: ret %f = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 %z) ret i16 %f @@ -132,10 +132,10 @@ ; CHECK-LABEL: rotr_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: and w9, w1, #0xf +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: and w10, w1, #0xf ; CHECK-NEXT: and w8, w8, #0xf -; CHECK-NEXT: and w10, w0, #0xffff -; CHECK-NEXT: lsr w9, w10, w9 +; CHECK-NEXT: lsr w9, w9, w10 ; CHECK-NEXT: lsl w8, w0, w8 ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret @@ -169,8 +169,8 @@ ; CHECK-NEXT: movi v2.4s, #31 ; CHECK-NEXT: neg v3.4s, v1.4s ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: neg v1.4s, v1.4s ; CHECK-NEXT: and v2.16b, v3.16b, v2.16b +; CHECK-NEXT: neg v1.4s, v1.4s ; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b diff --git a/llvm/test/CodeGen/AArch64/funnel-shift.ll b/llvm/test/CodeGen/AArch64/funnel-shift.ll --- a/llvm/test/CodeGen/AArch64/funnel-shift.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift.ll @@ -19,11 +19,11 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: fshl_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w1, #1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsr w9, w1, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsl w10, w0, w2 -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: orr w0, w10, w8 ; CHECK-NEXT: ret %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z) @@ -33,10 +33,10 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) { ; CHECK-LABEL: fshl_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsr x9, x1, #1 +; CHECK-NEXT: lsr x8, x1, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsl x10, x0, x2 -; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: lsr x8, x8, x9 ; CHECK-NEXT: orr x0, x10, x8 ; CHECK-NEXT: ret %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z) @@ -47,18 +47,18 @@ ; CHECK-LABEL: fshl_i128: ; CHECK: // %bb.0: ; CHECK-NEXT: tst x4, #0x40 -; CHECK-NEXT: mvn w8, w4 +; CHECK-NEXT: mvn w11, w4 +; CHECK-NEXT: csel x8, x3, x0, ne ; CHECK-NEXT: csel x9, x2, x3, ne -; CHECK-NEXT: csel x10, x3, x0, ne -; CHECK-NEXT: lsr x9, x9, #1 -; CHECK-NEXT: lsl x11, x10, x4 ; CHECK-NEXT: csel x12, x0, x1, ne -; CHECK-NEXT: lsr x10, x10, #1 -; CHECK-NEXT: lsr x9, x9, x8 +; CHECK-NEXT: lsr x9, x9, #1 +; CHECK-NEXT: lsr x10, x8, #1 +; CHECK-NEXT: lsl x8, x8, x4 ; CHECK-NEXT: lsl x12, x12, x4 -; CHECK-NEXT: lsr x8, x10, x8 -; CHECK-NEXT: orr x0, x11, x9 -; CHECK-NEXT: orr x1, x12, x8 +; CHECK-NEXT: lsr x9, x9, x11 +; CHECK-NEXT: lsr x10, x10, x11 +; CHECK-NEXT: orr x0, x8, x9 +; CHECK-NEXT: orr x1, x12, x10 ; CHECK-NEXT: ret %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z) ret i128 %f @@ -69,18 +69,18 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-LABEL: fshl_i37: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #46053 +; CHECK-NEXT: mov x9, #46053 // =0xb3e5 ; CHECK-NEXT: and x8, x2, #0x1fffffffff ; CHECK-NEXT: movk x9, #12398, lsl #16 -; CHECK-NEXT: ubfiz x10, x1, #26, #37 ; CHECK-NEXT: movk x9, #15941, lsl #32 ; CHECK-NEXT: movk x9, #1771, lsl #48 ; CHECK-NEXT: umulh x8, x8, x9 -; CHECK-NEXT: mov w9, #37 +; CHECK-NEXT: mov w9, #37 // =0x25 ; CHECK-NEXT: msub w8, w8, w9, w2 -; CHECK-NEXT: mvn w9, w8 +; CHECK-NEXT: ubfiz x9, x1, #26, #37 +; CHECK-NEXT: mvn w10, w8 ; CHECK-NEXT: lsl x8, x0, x8 -; CHECK-NEXT: lsr x9, x10, x9 +; CHECK-NEXT: lsr x9, x9, x10 ; CHECK-NEXT: orr x0, x8, x9 ; CHECK-NEXT: ret %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z) @@ -93,7 +93,7 @@ define i7 @fshl_i7_const_fold() { ; CHECK-LABEL: fshl_i7_const_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #67 +; CHECK-NEXT: mov w0, #67 // =0x43 ; CHECK-NEXT: ret %f = call i7 @llvm.fshl.i7(i7 112, i7 127, i7 2) ret i7 %f @@ -102,7 +102,7 @@ define i8 @fshl_i8_const_fold_overshift_1() { ; CHECK-LABEL: fshl_i8_const_fold_overshift_1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #128 +; CHECK-NEXT: mov w0, #128 // =0x80 ; CHECK-NEXT: ret %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 15) ret i8 %f @@ -111,7 +111,7 @@ define i8 @fshl_i8_const_fold_overshift_2() { ; CHECK-LABEL: fshl_i8_const_fold_overshift_2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #120 +; CHECK-NEXT: mov w0, #120 // =0x78 ; CHECK-NEXT: ret %f = call i8 @llvm.fshl.i8(i8 15, i8 15, i8 11) ret i8 %f @@ -164,7 +164,7 @@ define i8 @fshl_i8_const_fold() { ; CHECK-LABEL: fshl_i8_const_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #128 +; CHECK-NEXT: mov w0, #128 // =0x80 ; CHECK-NEXT: ret %f = call i8 @llvm.fshl.i8(i8 255, i8 0, i8 7) ret i8 %f @@ -177,11 +177,11 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) { ; CHECK-LABEL: fshr_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: lsl w8, w0, #1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsl w9, w0, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: lsl w8, w9, w8 +; CHECK-NEXT: lsl w8, w8, w9 ; CHECK-NEXT: orr w0, w8, w10 ; CHECK-NEXT: ret %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z) @@ -191,10 +191,10 @@ define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) { ; CHECK-LABEL: fshr_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsl x9, x0, #1 +; CHECK-NEXT: lsl x8, x0, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsr x10, x1, x2 -; CHECK-NEXT: lsl x8, x9, x8 +; CHECK-NEXT: lsl x8, x8, x9 ; CHECK-NEXT: orr x0, x8, x10 ; CHECK-NEXT: ret %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z) @@ -206,20 +206,20 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) { ; CHECK-LABEL: fshr_i37: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #46053 +; CHECK-NEXT: mov x9, #46053 // =0xb3e5 ; CHECK-NEXT: and x8, x2, #0x1fffffffff +; CHECK-NEXT: lsl x10, x0, #1 ; CHECK-NEXT: movk x9, #12398, lsl #16 -; CHECK-NEXT: lsl x10, x1, #27 ; CHECK-NEXT: movk x9, #15941, lsl #32 -; CHECK-NEXT: lsl x11, x0, #1 ; CHECK-NEXT: movk x9, #1771, lsl #48 ; CHECK-NEXT: umulh x8, x8, x9 -; CHECK-NEXT: mov w9, #37 +; CHECK-NEXT: mov w9, #37 // =0x25 ; CHECK-NEXT: msub w8, w8, w9, w2 +; CHECK-NEXT: lsl x9, x1, #27 ; CHECK-NEXT: add w8, w8, #27 -; CHECK-NEXT: mvn w9, w8 -; CHECK-NEXT: lsr x8, x10, x8 -; CHECK-NEXT: lsl x9, x11, x9 +; CHECK-NEXT: mvn w11, w8 +; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: lsl x9, x10, x11 ; CHECK-NEXT: orr x0, x9, x8 ; CHECK-NEXT: ret %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z) @@ -232,7 +232,7 @@ define i7 @fshr_i7_const_fold() { ; CHECK-LABEL: fshr_i7_const_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #31 +; CHECK-NEXT: mov w0, #31 // =0x1f ; CHECK-NEXT: ret %f = call i7 @llvm.fshr.i7(i7 112, i7 127, i7 2) ret i7 %f @@ -241,7 +241,7 @@ define i8 @fshr_i8_const_fold_overshift_1() { ; CHECK-LABEL: fshr_i8_const_fold_overshift_1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #254 +; CHECK-NEXT: mov w0, #254 // =0xfe ; CHECK-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 15) ret i8 %f @@ -250,7 +250,7 @@ define i8 @fshr_i8_const_fold_overshift_2() { ; CHECK-LABEL: fshr_i8_const_fold_overshift_2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #225 +; CHECK-NEXT: mov w0, #225 // =0xe1 ; CHECK-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) ret i8 %f @@ -259,7 +259,7 @@ define i8 @fshr_i8_const_fold_overshift_3() { ; CHECK-LABEL: fshr_i8_const_fold_overshift_3: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #255 +; CHECK-NEXT: mov w0, #255 // =0xff ; CHECK-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) ret i8 %f @@ -303,7 +303,7 @@ define i8 @fshr_i8_const_fold() { ; CHECK-LABEL: fshr_i8_const_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #254 +; CHECK-NEXT: mov w0, #254 // =0xfe ; CHECK-NEXT: ret %f = call i8 @llvm.fshr.i8(i8 255, i8 0, i8 7) ret i8 %f @@ -347,13 +347,13 @@ ; CHECK-LABEL: or_shl_fshl: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsr w10, w1, #1 -; CHECK-NEXT: lsr w9, w10, w9 -; CHECK-NEXT: lsl w8, w0, w8 +; CHECK-NEXT: lsr w9, w1, #1 ; CHECK-NEXT: lsl w10, w1, w2 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: orr w0, w8, w10 +; CHECK-NEXT: mvn w11, w2 +; CHECK-NEXT: lsl w8, w0, w8 +; CHECK-NEXT: lsr w9, w9, w11 +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %shy = shl i32 %y, %s %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s) @@ -379,13 +379,13 @@ ; CHECK-LABEL: or_shl_fshl_commute: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsr w10, w1, #1 -; CHECK-NEXT: lsr w9, w10, w9 -; CHECK-NEXT: lsl w8, w0, w8 +; CHECK-NEXT: lsr w9, w1, #1 ; CHECK-NEXT: lsl w10, w1, w2 -; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: orr w0, w10, w8 +; CHECK-NEXT: mvn w11, w2 +; CHECK-NEXT: lsl w8, w0, w8 +; CHECK-NEXT: lsr w9, w9, w11 +; CHECK-NEXT: orr w8, w10, w8 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %shy = shl i32 %y, %s %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s) @@ -411,13 +411,13 @@ ; CHECK-LABEL: or_lshr_fshr: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsl w10, w1, #1 -; CHECK-NEXT: lsr w8, w0, w8 -; CHECK-NEXT: lsl w9, w10, w9 +; CHECK-NEXT: lsl w9, w1, #1 ; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: orr w0, w8, w10 +; CHECK-NEXT: lsr w8, w0, w8 +; CHECK-NEXT: mvn w11, w2 +; CHECK-NEXT: lsl w9, w9, w11 +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %shy = lshr i32 %y, %s %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s) @@ -442,13 +442,13 @@ ; CHECK-LABEL: or_lshr_fshr_commute: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, w2 -; CHECK-NEXT: mvn w9, w2 -; CHECK-NEXT: lsl w10, w1, #1 -; CHECK-NEXT: lsr w8, w0, w8 -; CHECK-NEXT: lsl w9, w10, w9 +; CHECK-NEXT: lsl w9, w1, #1 ; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: orr w0, w10, w8 +; CHECK-NEXT: lsr w8, w0, w8 +; CHECK-NEXT: mvn w11, w2 +; CHECK-NEXT: lsl w9, w9, w11 +; CHECK-NEXT: orr w8, w10, w8 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %shy = lshr i32 %y, %s %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s) @@ -472,11 +472,11 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) { ; CHECK-LABEL: or_shl_fshl_simplify: ; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w0, #1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsr w9, w0, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsl w10, w1, w2 -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: orr w0, w10, w8 ; CHECK-NEXT: ret %shy = shl i32 %y, %s @@ -488,11 +488,11 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) { ; CHECK-LABEL: or_lshr_fshr_simplify: ; CHECK: // %bb.0: +; CHECK-NEXT: lsl w8, w0, #1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsl w9, w0, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: lsl w8, w9, w8 +; CHECK-NEXT: lsl w8, w8, w9 ; CHECK-NEXT: orr w0, w8, w10 ; CHECK-NEXT: ret %shy = lshr i32 %y, %s diff --git a/llvm/test/CodeGen/AArch64/global-merge-3.ll b/llvm/test/CodeGen/AArch64/global-merge-3.ll --- a/llvm/test/CodeGen/AArch64/global-merge-3.ll +++ b/llvm/test/CodeGen/AArch64/global-merge-3.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc %s -mtriple=aarch64-none-linux-gnu -aarch64-enable-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s ; RUN: llc %s -mtriple=aarch64-linux-gnuabi -aarch64-enable-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s ; RUN: llc %s -mtriple=aarch64-apple-ios -aarch64-enable-global-merge -global-merge-on-external -disable-post-ra -o - | FileCheck %s --check-prefix=CHECK-APPLE-IOS @@ -7,19 +8,28 @@ @z = internal global i32 1, align 4 define dso_local void @f1(i32 %a1, i32 %a2, i32 %a3) { -;CHECK-APPLE-IOS: adrp x8, _z@PAGE -;CHECK-APPLE-IOS: adrp x9, __MergedGlobals_x@PAGE+12 -;CHECK-APPLE-IOS-NOT: adrp -;CHECK-APPLE-IOS: add x9, x9, __MergedGlobals_x@PAGEOFF+12 -;CHECK-APPLE-IOS: str w1, [x9, #400] -;CHECK-APPLE-IOS: str w0, [x9] -;CHECK-APPLE-IOS: str w2, [x8, _z@PAGEOFF] -;CHECK: adrp x8, z -;CHECK: adrp x9, .L_MergedGlobals+12 -;CHECK: add x9, x9, :lo12:.L_MergedGlobals+12 -;CHECK: str w1, [x9, #400] -;CHECK: str w0, [x9] -;CHECK: str w2, [x8, :lo12:z] +; CHECK-LABEL: f1: +; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .L_MergedGlobals+12 +; CHECK-NEXT: add x8, x8, :lo12:.L_MergedGlobals+12 +; CHECK-NEXT: str w0, [x8] +; CHECK-NEXT: str w1, [x8, #400] +; CHECK-NEXT: adrp x8, z +; CHECK-NEXT: str w2, [x8, :lo12:z] +; CHECK-NEXT: ret +; +; CHECK-APPLE-IOS-LABEL: f1: +; CHECK-APPLE-IOS: ; %bb.0: +; CHECK-APPLE-IOS-NEXT: Lloh0: +; CHECK-APPLE-IOS-NEXT: adrp x8, __MergedGlobals_x@PAGE+12 +; CHECK-APPLE-IOS-NEXT: Lloh1: +; CHECK-APPLE-IOS-NEXT: add x8, x8, __MergedGlobals_x@PAGEOFF+12 +; CHECK-APPLE-IOS-NEXT: str w0, [x8] +; CHECK-APPLE-IOS-NEXT: str w1, [x8, #400] +; CHECK-APPLE-IOS-NEXT: adrp x8, _z@PAGE +; CHECK-APPLE-IOS-NEXT: str w2, [x8, _z@PAGEOFF] +; CHECK-APPLE-IOS-NEXT: ret +; CHECK-APPLE-IOS-NEXT: .loh AdrpAdd Lloh0, Lloh1 %x3 = getelementptr inbounds [100 x i32], ptr @x, i32 0, i64 3 %y3 = getelementptr inbounds [100 x i32], ptr @y, i32 0, i64 3 store i32 %a1, ptr %x3, align 4 diff --git a/llvm/test/CodeGen/AArch64/gpr_cttz.ll b/llvm/test/CodeGen/AArch64/gpr_cttz.ll --- a/llvm/test/CodeGen/AArch64/gpr_cttz.ll +++ b/llvm/test/CodeGen/AArch64/gpr_cttz.ll @@ -103,24 +103,24 @@ define i128 @cttz128(i128 %x) nounwind readnone { ; CHECK-LABEL: cttz128: ; CHECK: // %bb.0: -; CHECK-NEXT: rbit x9, x1 -; CHECK-NEXT: rbit x8, x0 -; CHECK-NEXT: clz x9, x9 -; CHECK-NEXT: clz x8, x8 -; CHECK-NEXT: add x9, x9, #64 +; CHECK-NEXT: rbit x8, x1 +; CHECK-NEXT: rbit x9, x0 ; CHECK-NEXT: cmp x0, #0 -; CHECK-NEXT: csel x0, x8, x9, ne ; CHECK-NEXT: mov x1, xzr +; CHECK-NEXT: clz x8, x8 +; CHECK-NEXT: clz x9, x9 +; CHECK-NEXT: add x8, x8, #64 +; CHECK-NEXT: csel x0, x9, x8, ne ; CHECK-NEXT: ret ; ; CHECK-CSSC-LABEL: cttz128: ; CHECK-CSSC: // %bb.0: -; CHECK-CSSC-NEXT: ctz x9, x1 -; CHECK-CSSC-NEXT: ctz x8, x0 -; CHECK-CSSC-NEXT: add x9, x9, #64 +; CHECK-CSSC-NEXT: ctz x8, x1 +; CHECK-CSSC-NEXT: ctz x9, x0 ; CHECK-CSSC-NEXT: cmp x0, #0 -; CHECK-CSSC-NEXT: csel x0, x8, x9, ne +; CHECK-CSSC-NEXT: add x8, x8, #64 ; CHECK-CSSC-NEXT: mov x1, xzr +; CHECK-CSSC-NEXT: csel x0, x9, x8, ne ; CHECK-CSSC-NEXT: ret %ctz = tail call i128 @llvm.cttz.i128(i128 %x) ret i128 %ctz diff --git a/llvm/test/CodeGen/AArch64/half.ll b/llvm/test/CodeGen/AArch64/half.ll --- a/llvm/test/CodeGen/AArch64/half.ll +++ b/llvm/test/CodeGen/AArch64/half.ll @@ -99,16 +99,16 @@ define i16 @test_fccmp(i1 %a, i16 %in) { ; CHECK-LABEL: test_fccmp: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #24576 ; CHECK-NEXT: fmov s0, w1 +; CHECK-NEXT: mov w8, #24576 // =0x6000 ; CHECK-NEXT: movk w8, #15974, lsl #16 -; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, #16384 +; CHECK-NEXT: mov w8, #16384 // =0x4000 +; CHECK-NEXT: fcvt s0, h0 ; CHECK-NEXT: movk w8, #15428, lsl #16 -; CHECK-NEXT: fcmp s0, s1 ; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w8, #4 // =0x4 +; CHECK-NEXT: fcmp s0, s1 ; CHECK-NEXT: fccmp s0, s2, #8, pl ; CHECK-NEXT: csinc w8, w8, wzr, mi ; CHECK-NEXT: fcmp s0, s1 diff --git a/llvm/test/CodeGen/AArch64/highextractbitcast.ll b/llvm/test/CodeGen/AArch64/highextractbitcast.ll --- a/llvm/test/CodeGen/AArch64/highextractbitcast.ll +++ b/llvm/test/CodeGen/AArch64/highextractbitcast.ll @@ -90,10 +90,10 @@ ; ; CHECK-BE-LABEL: test_smull_high_s16_bitcasta2: ; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: rev64 v1.8h, v1.8h ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: rev64 v0.4h, v0.4h ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-BE-NEXT: smull v0.4s, v0.4h, v1.4h @@ -147,10 +147,10 @@ ; ; CHECK-BE-LABEL: test_smull_high_s16_bitcasta1_wrongindex: ; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: rev64 v0.8h, v0.8h ; CHECK-BE-NEXT: rev64 v1.8h, v1.8h -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: rev64 v0.8h, v0.8h ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #4 ; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; CHECK-BE-NEXT: smull v0.4s, v0.4h, v1.4h diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -202,8 +202,8 @@ define <4 x i1> @vec_4xi32_nonsplat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_nonsplat_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: neg v1.4s, v1.4s +; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s ; CHECK-NEXT: and v0.16b, v1.16b, v0.16b @@ -285,7 +285,7 @@ define i1 @scalar_i32_x_is_const_eq(i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_x_is_const_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43605 +; CHECK-NEXT: mov w8, #43605 // =0xaa55 ; CHECK-NEXT: movk w8, #43605, lsl #16 ; CHECK-NEXT: lsr w8, w8, w0 ; CHECK-NEXT: tst w8, #0x1 @@ -299,7 +299,7 @@ define i1 @scalar_i32_x_is_const2_eq(i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_x_is_const2_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: lsr w8, w8, w0 ; CHECK-NEXT: cmp w8, #0 ; CHECK-NEXT: cset w0, eq @@ -324,7 +324,7 @@ define i1 @scalar_i8_signbit_eq_with_nonzero(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_eq_with_nonzero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 +; CHECK-NEXT: mov w8, #128 // =0x80 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: lsr w8, w8, w1 ; CHECK-NEXT: and w8, w8, w0 diff --git a/llvm/test/CodeGen/AArch64/i128-math.ll b/llvm/test/CodeGen/AArch64/i128-math.ll --- a/llvm/test/CodeGen/AArch64/i128-math.ll +++ b/llvm/test/CodeGen/AArch64/i128-math.ll @@ -262,19 +262,19 @@ define { i128, i8 } @u128_checked_mul(i128 %x, i128 %y) { ; CHECK-LABEL: u128_checked_mul: ; CHECK: // %bb.0: +; CHECK-NEXT: mul x9, x3, x0 ; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: umulh x8, x1, x2 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: mul x9, x3, x0 +; CHECK-NEXT: umulh x8, x1, x2 +; CHECK-NEXT: umulh x10, x3, x0 ; CHECK-NEXT: madd x9, x1, x2, x9 ; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: umulh x8, x3, x0 -; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: umulh x8, x0, x2 +; CHECK-NEXT: umulh x11, x0, x2 +; CHECK-NEXT: ccmp xzr, x10, #0, eq ; CHECK-NEXT: mul x0, x0, x2 -; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: adds x1, x8, x9 -; CHECK-NEXT: csinc w8, w10, wzr, lo +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: adds x1, x11, x9 +; CHECK-NEXT: csinc w8, w8, wzr, lo ; CHECK-NEXT: eor w2, w8, #0x1 ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) @@ -290,19 +290,19 @@ define { i128, i8 } @u128_overflowing_mul(i128 %x, i128 %y) { ; CHECK-LABEL: u128_overflowing_mul: ; CHECK: // %bb.0: +; CHECK-NEXT: mul x9, x3, x0 ; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: umulh x8, x1, x2 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: umulh x9, x3, x0 +; CHECK-NEXT: umulh x8, x1, x2 +; CHECK-NEXT: umulh x10, x3, x0 +; CHECK-NEXT: madd x9, x1, x2, x9 ; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: mul x8, x3, x0 -; CHECK-NEXT: madd x8, x1, x2, x8 -; CHECK-NEXT: ccmp xzr, x9, #0, eq -; CHECK-NEXT: umulh x9, x0, x2 +; CHECK-NEXT: umulh x11, x0, x2 +; CHECK-NEXT: ccmp xzr, x10, #0, eq ; CHECK-NEXT: mul x0, x0, x2 -; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: adds x1, x9, x8 -; CHECK-NEXT: csinc w2, w10, wzr, lo +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: adds x1, x11, x9 +; CHECK-NEXT: csinc w2, w8, wzr, lo ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -316,22 +316,22 @@ define i128 @u128_saturating_mul(i128 %x, i128 %y) { ; CHECK-LABEL: u128_saturating_mul: ; CHECK: // %bb.0: +; CHECK-NEXT: mul x9, x3, x0 ; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: umulh x8, x1, x2 ; CHECK-NEXT: ccmp x3, #0, #4, ne -; CHECK-NEXT: umulh x9, x3, x0 +; CHECK-NEXT: umulh x8, x1, x2 +; CHECK-NEXT: umulh x10, x3, x0 +; CHECK-NEXT: madd x9, x1, x2, x9 ; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: mul x8, x3, x0 -; CHECK-NEXT: madd x8, x1, x2, x8 -; CHECK-NEXT: ccmp xzr, x9, #0, eq -; CHECK-NEXT: umulh x9, x0, x2 +; CHECK-NEXT: umulh x11, x0, x2 +; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: mul x8, x0, x2 ; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: adds x8, x9, x8 -; CHECK-NEXT: csinc w9, w10, wzr, lo -; CHECK-NEXT: mul x10, x0, x2 -; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: csinv x0, x10, xzr, eq -; CHECK-NEXT: csinv x1, x8, xzr, eq +; CHECK-NEXT: adds x9, x11, x9 +; CHECK-NEXT: csinc w10, w10, wzr, lo +; CHECK-NEXT: cmp w10, #0 +; CHECK-NEXT: csinv x0, x8, xzr, eq +; CHECK-NEXT: csinv x1, x9, xzr, eq ; CHECK-NEXT: ret %1 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %x, i128 %y) %2 = extractvalue { i128, i1 } %1, 0 @@ -411,13 +411,13 @@ ; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: str xzr, [sp, #8] ; CHECK-NEXT: bl __muloti4 -; CHECK-NEXT: ldr x8, [sp, #8] -; CHECK-NEXT: eor x9, x19, x20 +; CHECK-NEXT: eor x8, x19, x20 +; CHECK-NEXT: ldr x9, [sp, #8] +; CHECK-NEXT: asr x8, x8, #63 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: asr x9, x9, #63 -; CHECK-NEXT: eor x10, x9, #0x7fffffffffffffff -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csinv x0, x0, x9, eq +; CHECK-NEXT: cmp x9, #0 +; CHECK-NEXT: eor x10, x8, #0x7fffffffffffffff +; CHECK-NEXT: csinv x0, x0, x8, eq ; CHECK-NEXT: csel x1, x10, x1, ne ; CHECK-NEXT: ldr x30, [sp], #32 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll --- a/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll +++ b/llvm/test/CodeGen/AArch64/i128_volatile_load_store.ll @@ -39,11 +39,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x ; CHECK-NEXT: add x8, x8, :lo12:x -; CHECK-NEXT: add x8, x8, #512 ; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y -; CHECK-NEXT: add x10, x10, #512 +; CHECK-NEXT: add x8, x8, #512 ; CHECK-NEXT: ldp x8, x9, [x8] +; CHECK-NEXT: add x10, x10, #512 ; CHECK-NEXT: stp x8, x9, [x10] ; CHECK-NEXT: ret %tmp = load volatile i128, ptr getelementptr (i8, ptr @x, i64 512) @@ -71,11 +71,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x ; CHECK-NEXT: add x8, x8, :lo12:x -; CHECK-NEXT: sub x8, x8, #520 ; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y -; CHECK-NEXT: sub x10, x10, #520 +; CHECK-NEXT: sub x8, x8, #520 ; CHECK-NEXT: ldp x8, x9, [x8] +; CHECK-NEXT: sub x10, x10, #520 ; CHECK-NEXT: stp x8, x9, [x10] ; CHECK-NEXT: ret %tmp = load volatile i128, ptr getelementptr (i8, ptr @x, i64 -520) @@ -88,11 +88,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x ; CHECK-NEXT: add x8, x8, :lo12:x -; CHECK-NEXT: sub x8, x8, #520 ; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y -; CHECK-NEXT: sub x10, x10, #520 +; CHECK-NEXT: sub x8, x8, #520 ; CHECK-NEXT: ldp x8, x9, [x8] +; CHECK-NEXT: sub x10, x10, #520 ; CHECK-NEXT: stp x8, x9, [x10] ; CHECK-NEXT: ret %tmp = load volatile i128, ptr getelementptr (i8, ptr @x, i64 -520) @@ -105,11 +105,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, x ; CHECK-NEXT: add x8, x8, :lo12:x -; CHECK-NEXT: add x8, x8, #503 ; CHECK-NEXT: adrp x10, y ; CHECK-NEXT: add x10, x10, :lo12:y -; CHECK-NEXT: add x10, x10, #503 +; CHECK-NEXT: add x8, x8, #503 ; CHECK-NEXT: ldp x8, x9, [x8] +; CHECK-NEXT: add x10, x10, #503 ; CHECK-NEXT: stp x8, x9, [x10] ; CHECK-NEXT: ret %tmp = load volatile i128, ptr getelementptr (i8, ptr @x, i64 503) diff --git a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll --- a/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll +++ b/llvm/test/CodeGen/AArch64/illegal-floating-point-vector-compares.ll @@ -8,8 +8,8 @@ ; CHECK-LABEL: unordered_floating_point_compare_on_v8f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0 -; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: xtn v0.8b, v0.8h @@ -27,8 +27,8 @@ ; CHECK-LABEL: unordered_floating_point_compare_on_v16f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcmgt v3.4s, v3.4s, #0.0 -; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0 +; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0 ; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0 ; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h @@ -36,8 +36,8 @@ ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umaxv b0, v0.16b -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: bic w0, w8, w9 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: bic w0, w9, w8 ; CHECK-NEXT: ret %a_cmp = fcmp ule <16 x float> %a_vec, zeroinitializer %cmp_result = bitcast <16 x i1> %a_cmp to i16 @@ -49,8 +49,8 @@ ; CHECK-LABEL: unordered_floating_point_compare_on_v32f32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcmgt v3.4s, v3.4s, #0.0 -; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: fcmgt v2.4s, v2.4s, #0.0 +; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: fcmgt v1.4s, v1.4s, #0.0 ; CHECK-NEXT: fcmgt v0.4s, v0.4s, #0.0 ; CHECK-NEXT: fcmgt v7.4s, v7.4s, #0.0 diff --git a/llvm/test/CodeGen/AArch64/insert-extend.ll b/llvm/test/CodeGen/AArch64/insert-extend.ll --- a/llvm/test/CodeGen/AArch64/insert-extend.ll +++ b/llvm/test/CodeGen/AArch64/insert-extend.ll @@ -50,112 +50,118 @@ ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x8, w1 ; CHECK-NEXT: sxtw x9, w3 -; CHECK-NEXT: add x10, x0, x8 -; CHECK-NEXT: add x11, x2, x9 ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: add x12, x10, x8 ; CHECK-NEXT: ldr d1, [x2] +; CHECK-NEXT: add x10, x0, x8 +; CHECK-NEXT: add x11, x2, x9 ; CHECK-NEXT: ldr d2, [x10] -; CHECK-NEXT: add x10, x11, x9 ; CHECK-NEXT: ldr d3, [x11] -; CHECK-NEXT: ldr d4, [x12] -; CHECK-NEXT: ldr d5, [x10] -; CHECK-NEXT: ldr d6, [x12, x8] -; CHECK-NEXT: ldr d7, [x10, x9] ; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: add x10, x10, x8 +; CHECK-NEXT: add x11, x11, x9 +; CHECK-NEXT: usubl v1.8h, v2.8b, v3.8b +; CHECK-NEXT: ldr d2, [x10] +; CHECK-NEXT: ldr d3, [x11] +; CHECK-NEXT: ldr d4, [x10, x8] +; CHECK-NEXT: ldr d5, [x11, x9] ; CHECK-NEXT: usubl v2.8h, v2.8b, v3.8b -; CHECK-NEXT: usubl v1.8h, v4.8b, v5.8b +; CHECK-NEXT: usubl v3.8h, v4.8b, v5.8b ; CHECK-NEXT: shll2 v4.4s, v0.8h, #16 -; CHECK-NEXT: shll2 v5.4s, v2.8h, #16 -; CHECK-NEXT: usubl v3.8h, v6.8b, v7.8b +; CHECK-NEXT: shll2 v5.4s, v1.8h, #16 ; CHECK-NEXT: saddw v0.4s, v4.4s, v0.4h -; CHECK-NEXT: saddw v2.4s, v5.4s, v2.4h -; CHECK-NEXT: shll2 v7.4s, v1.8h, #16 -; CHECK-NEXT: shll2 v6.4s, v3.8h, #16 -; CHECK-NEXT: saddw v1.4s, v7.4s, v1.4h +; CHECK-NEXT: shll2 v6.4s, v2.8h, #16 +; CHECK-NEXT: saddw v1.4s, v5.4s, v1.4h +; CHECK-NEXT: shll2 v4.4s, v3.8h, #16 +; CHECK-NEXT: saddw v2.4s, v6.4s, v2.4h +; CHECK-NEXT: saddw v3.4s, v4.4s, v3.4h ; CHECK-NEXT: rev64 v4.4s, v0.4s -; CHECK-NEXT: rev64 v5.4s, v2.4s -; CHECK-NEXT: saddw v3.4s, v6.4s, v3.4h -; CHECK-NEXT: rev64 v7.4s, v1.4s +; CHECK-NEXT: rev64 v5.4s, v1.4s +; CHECK-NEXT: rev64 v6.4s, v2.4s +; CHECK-NEXT: rev64 v7.4s, v3.4s ; CHECK-NEXT: sub v4.4s, v0.4s, v4.4s -; CHECK-NEXT: rev64 v6.4s, v3.4s -; CHECK-NEXT: sub v5.4s, v2.4s, v5.4s -; CHECK-NEXT: sub v7.4s, v1.4s, v7.4s +; CHECK-NEXT: addp v0.4s, v1.4s, v0.4s +; CHECK-NEXT: sub v5.4s, v1.4s, v5.4s +; CHECK-NEXT: sub v6.4s, v2.4s, v6.4s +; CHECK-NEXT: addp v2.4s, v3.4s, v2.4s +; CHECK-NEXT: sub v7.4s, v3.4s, v7.4s ; CHECK-NEXT: zip1 v16.4s, v5.4s, v4.4s -; CHECK-NEXT: addp v1.4s, v3.4s, v1.4s -; CHECK-NEXT: sub v6.4s, v3.4s, v6.4s -; CHECK-NEXT: addp v0.4s, v2.4s, v0.4s -; CHECK-NEXT: zip2 v17.4s, v7.4s, v6.4s -; CHECK-NEXT: mov v7.s[1], v6.s[0] -; CHECK-NEXT: ext v2.16b, v5.16b, v16.16b, #8 +; CHECK-NEXT: ext v1.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: zip2 v3.4s, v6.4s, v7.4s +; CHECK-NEXT: mov v6.s[1], v7.s[0] +; CHECK-NEXT: ext v7.16b, v5.16b, v16.16b, #8 ; CHECK-NEXT: mov v5.s[3], v4.s[2] -; CHECK-NEXT: ext v3.16b, v1.16b, v1.16b, #8 -; CHECK-NEXT: uzp2 v4.4s, v1.4s, v0.4s -; CHECK-NEXT: mov v7.d[1], v2.d[1] -; CHECK-NEXT: mov v17.d[1], v5.d[1] -; CHECK-NEXT: uzp1 v1.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp1 v2.4s, v3.4s, v0.4s -; CHECK-NEXT: uzp2 v0.4s, v3.4s, v0.4s -; CHECK-NEXT: add v3.4s, v17.4s, v7.4s -; CHECK-NEXT: add v1.4s, v4.4s, v1.4s -; CHECK-NEXT: sub v4.4s, v7.4s, v17.4s -; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s -; CHECK-NEXT: rev64 v2.4s, v3.4s -; CHECK-NEXT: rev64 v5.4s, v4.4s -; CHECK-NEXT: rev64 v7.4s, v0.4s +; CHECK-NEXT: uzp1 v4.4s, v1.4s, v0.4s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v0.4s +; CHECK-NEXT: mov v6.d[1], v7.d[1] +; CHECK-NEXT: mov v3.d[1], v5.d[1] +; CHECK-NEXT: uzp2 v5.4s, v2.4s, v0.4s +; CHECK-NEXT: uzp1 v0.4s, v2.4s, v0.4s +; CHECK-NEXT: sub v1.4s, v4.4s, v1.4s +; CHECK-NEXT: add v2.4s, v3.4s, v6.4s +; CHECK-NEXT: sub v3.4s, v6.4s, v3.4s +; CHECK-NEXT: add v0.4s, v5.4s, v0.4s ; CHECK-NEXT: rev64 v6.4s, v1.4s -; CHECK-NEXT: addp v16.4s, v0.4s, v4.4s -; CHECK-NEXT: addp v17.4s, v1.4s, v3.4s -; CHECK-NEXT: sub v4.4s, v4.4s, v5.4s -; CHECK-NEXT: sub v2.4s, v3.4s, v2.4s -; CHECK-NEXT: sub v0.4s, v0.4s, v7.4s +; CHECK-NEXT: rev64 v4.4s, v2.4s +; CHECK-NEXT: rev64 v5.4s, v3.4s +; CHECK-NEXT: addp v16.4s, v1.4s, v3.4s +; CHECK-NEXT: rev64 v7.4s, v0.4s +; CHECK-NEXT: addp v17.4s, v0.4s, v2.4s ; CHECK-NEXT: sub v1.4s, v1.4s, v6.4s -; CHECK-NEXT: ext v3.16b, v16.16b, v4.16b, #4 -; CHECK-NEXT: ext v5.16b, v0.16b, v16.16b, #8 -; CHECK-NEXT: ext v6.16b, v17.16b, v2.16b, #4 -; CHECK-NEXT: zip1 v7.4s, v17.4s, v17.4s -; CHECK-NEXT: zip2 v3.4s, v3.4s, v16.4s -; CHECK-NEXT: ext v18.16b, v5.16b, v0.16b, #4 -; CHECK-NEXT: zip2 v6.4s, v6.4s, v17.4s -; CHECK-NEXT: trn2 v7.4s, v7.4s, v1.4s -; CHECK-NEXT: ext v1.16b, v1.16b, v17.16b, #4 -; CHECK-NEXT: ext v3.16b, v4.16b, v3.16b, #12 -; CHECK-NEXT: mov v0.s[2], v16.s[1] -; CHECK-NEXT: ext v6.16b, v2.16b, v6.16b, #12 -; CHECK-NEXT: mov v4.s[2], v16.s[3] -; CHECK-NEXT: uzp2 v5.4s, v5.4s, v18.4s -; CHECK-NEXT: mov v2.s[2], v17.s[3] -; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #4 -; CHECK-NEXT: sub v18.4s, v4.4s, v3.4s -; CHECK-NEXT: sub v19.4s, v0.4s, v5.4s -; CHECK-NEXT: sub v20.4s, v2.4s, v6.4s -; CHECK-NEXT: mov v4.s[1], v16.s[2] -; CHECK-NEXT: sub v21.4s, v7.4s, v1.4s -; CHECK-NEXT: mov v2.s[1], v17.s[2] -; CHECK-NEXT: mov v0.s[1], v16.s[0] -; CHECK-NEXT: mov v1.s[0], v17.s[1] -; CHECK-NEXT: add v3.4s, v4.4s, v3.4s -; CHECK-NEXT: add v2.4s, v2.4s, v6.4s +; CHECK-NEXT: sub v3.4s, v3.4s, v5.4s +; CHECK-NEXT: sub v2.4s, v2.4s, v4.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v7.4s +; CHECK-NEXT: zip1 v18.4s, v17.4s, v17.4s +; CHECK-NEXT: ext v6.16b, v1.16b, v16.16b, #8 +; CHECK-NEXT: ext v4.16b, v17.16b, v2.16b, #4 +; CHECK-NEXT: ext v5.16b, v16.16b, v3.16b, #4 +; CHECK-NEXT: mov v20.16b, v3.16b +; CHECK-NEXT: ext v7.16b, v0.16b, v17.16b, #4 +; CHECK-NEXT: mov v21.16b, v2.16b +; CHECK-NEXT: trn2 v0.4s, v18.4s, v0.4s +; CHECK-NEXT: ext v19.16b, v6.16b, v1.16b, #4 +; CHECK-NEXT: mov v1.s[2], v16.s[1] +; CHECK-NEXT: mov v20.s[2], v16.s[3] +; CHECK-NEXT: zip2 v4.4s, v4.4s, v17.4s +; CHECK-NEXT: zip2 v5.4s, v5.4s, v16.4s +; CHECK-NEXT: mov v21.s[2], v17.s[3] +; CHECK-NEXT: ext v7.16b, v7.16b, v7.16b, #4 +; CHECK-NEXT: mov v18.16b, v1.16b +; CHECK-NEXT: ext v2.16b, v2.16b, v4.16b, #12 +; CHECK-NEXT: ext v3.16b, v3.16b, v5.16b, #12 +; CHECK-NEXT: uzp2 v4.4s, v6.4s, v19.4s +; CHECK-NEXT: mov v5.16b, v7.16b +; CHECK-NEXT: mov v6.16b, v20.16b +; CHECK-NEXT: mov v19.16b, v21.16b +; CHECK-NEXT: mov v18.s[1], v16.s[0] +; CHECK-NEXT: sub v7.4s, v0.4s, v7.4s +; CHECK-NEXT: mov v6.s[1], v16.s[2] +; CHECK-NEXT: mov v5.s[0], v17.s[1] +; CHECK-NEXT: mov v19.s[1], v17.s[2] +; CHECK-NEXT: sub v1.4s, v1.4s, v4.4s +; CHECK-NEXT: sub v16.4s, v20.4s, v3.4s +; CHECK-NEXT: sub v17.4s, v21.4s, v2.4s +; CHECK-NEXT: add v4.4s, v18.4s, v4.4s +; CHECK-NEXT: add v3.4s, v6.4s, v3.4s ; CHECK-NEXT: add v0.4s, v0.4s, v5.4s -; CHECK-NEXT: add v1.4s, v7.4s, v1.4s -; CHECK-NEXT: mov v3.d[1], v18.d[1] -; CHECK-NEXT: mov v2.d[1], v20.d[1] -; CHECK-NEXT: mov v1.d[1], v21.d[1] -; CHECK-NEXT: mov v0.d[1], v19.d[1] -; CHECK-NEXT: cmlt v4.8h, v3.8h, #0 -; CHECK-NEXT: cmlt v5.8h, v2.8h, #0 -; CHECK-NEXT: cmlt v6.8h, v1.8h, #0 -; CHECK-NEXT: cmlt v7.8h, v0.8h, #0 -; CHECK-NEXT: add v1.4s, v6.4s, v1.4s -; CHECK-NEXT: add v0.4s, v7.4s, v0.4s -; CHECK-NEXT: add v3.4s, v4.4s, v3.4s -; CHECK-NEXT: add v2.4s, v5.4s, v2.4s -; CHECK-NEXT: eor v3.16b, v3.16b, v4.16b -; CHECK-NEXT: eor v2.16b, v2.16b, v5.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v7.16b -; CHECK-NEXT: eor v1.16b, v1.16b, v6.16b -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s -; CHECK-NEXT: add v1.4s, v2.4s, v3.4s +; CHECK-NEXT: add v2.4s, v19.4s, v2.4s +; CHECK-NEXT: mov v4.d[1], v1.d[1] +; CHECK-NEXT: mov v3.d[1], v16.d[1] +; CHECK-NEXT: mov v0.d[1], v7.d[1] +; CHECK-NEXT: mov v2.d[1], v17.d[1] +; CHECK-NEXT: cmlt v6.8h, v4.8h, #0 +; CHECK-NEXT: cmlt v1.8h, v3.8h, #0 +; CHECK-NEXT: cmlt v5.8h, v0.8h, #0 +; CHECK-NEXT: cmlt v7.8h, v2.8h, #0 +; CHECK-NEXT: add v4.4s, v6.4s, v4.4s +; CHECK-NEXT: add v3.4s, v1.4s, v3.4s +; CHECK-NEXT: add v0.4s, v5.4s, v0.4s +; CHECK-NEXT: add v2.4s, v7.4s, v2.4s +; CHECK-NEXT: eor v1.16b, v3.16b, v1.16b +; CHECK-NEXT: eor v2.16b, v2.16b, v7.16b +; CHECK-NEXT: eor v3.16b, v4.16b, v6.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v5.16b +; CHECK-NEXT: add v1.4s, v2.4s, v1.4s +; CHECK-NEXT: add v0.4s, v0.4s, v3.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 diff --git a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll --- a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll +++ b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll @@ -9,9 +9,9 @@ ; CHECK-LABEL: vec_scalable_subvec_scalable_idx_zero_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1b { z1.s }, p0/z, [x1] +; CHECK-NEXT: ld1b { z1.s }, p1/z, [x1] ; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h ; CHECK-NEXT: ret @@ -25,9 +25,9 @@ ; CHECK-LABEL: vec_scalable_subvec_scalable_idx_nonzero_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1b { z1.s }, p0/z, [x1] +; CHECK-NEXT: ld1b { z1.s }, p1/z, [x1] ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h ; CHECK-NEXT: ret @@ -41,9 +41,9 @@ ; CHECK-LABEL: vec_scalable_subvec_scalable_idx_zero_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1h { z1.d }, p0/z, [x1] +; CHECK-NEXT: ld1h { z1.d }, p1/z, [x1] ; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s ; CHECK-NEXT: ret @@ -57,9 +57,9 @@ ; CHECK-LABEL: vec_scalable_subvec_scalable_idx_nonzero_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1h { z1.d }, p0/z, [x1] +; CHECK-NEXT: ld1h { z1.d }, p1/z, [x1] ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s ; CHECK-NEXT: ret @@ -75,11 +75,11 @@ ; CHECK-LABEL: vec_scalable_subvec_fixed_idx_zero_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ptrue p1.h, vl8 ; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: mov z0.h, p0/m, z1.h +; CHECK-NEXT: mov z0.h, p1/m, z1.h ; CHECK-NEXT: ret %vec = load , * %a %subvec = load <8 x i8>, <8 x i8>* %b @@ -92,16 +92,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cnth x8 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cnth x8 +; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: sub x8, x8, #8 -; CHECK-NEXT: mov w9, #8 ; CHECK-NEXT: cmp x8, #8 -; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: lsl x8, x8, #1 ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: lsl x8, x8, #1 +; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] @@ -119,11 +119,11 @@ ; CHECK-LABEL: vec_scalable_subvec_fixed_idx_zero_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.s, vl4 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: mov z0.s, p1/m, z1.s ; CHECK-NEXT: ret %vec = load , * %a %subvec = load <4 x i16>, <4 x i16>* %b @@ -136,16 +136,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntw x8 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cntw x8 +; CHECK-NEXT: mov w9, #4 // =0x4 ; CHECK-NEXT: sub x8, x8, #4 -; CHECK-NEXT: mov w9, #4 ; CHECK-NEXT: cmp x8, #4 -; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: lsl x8, x8, #2 ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] @@ -163,11 +163,11 @@ ; CHECK-LABEL: vec_scalable_subvec_fixed_idx_zero_i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.d, vl2 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-NEXT: mov z0.d, p0/m, z1.d +; CHECK-NEXT: mov z0.d, p1/m, z1.d ; CHECK-NEXT: ret %vec = load , * %a %subvec = load <2 x i32>, <2 x i32>* %b @@ -180,16 +180,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: cntd x8 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: cntd x8 +; CHECK-NEXT: mov w9, #2 // =0x2 ; CHECK-NEXT: sub x8, x8, #2 -; CHECK-NEXT: mov w9, #2 ; CHECK-NEXT: cmp x8, #2 -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] -; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] diff --git a/llvm/test/CodeGen/AArch64/insert-subvector.ll b/llvm/test/CodeGen/AArch64/insert-subvector.ll --- a/llvm/test/CodeGen/AArch64/insert-subvector.ll +++ b/llvm/test/CodeGen/AArch64/insert-subvector.ll @@ -47,8 +47,8 @@ define <16 x i8> @insert_v16i8_4_15(float %tmp, <16 x i8> %b, <16 x i8> %a) { ; CHECK-LABEL: insert_v16i8_4_15: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 +; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b @@ -146,8 +146,8 @@ define <8 x i16> @insert_v8i16_2_15(float %tmp, <8 x i16> %b, <8 x i16> %a) { ; CHECK-LABEL: insert_v8i16_2_15: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 +; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b @@ -272,8 +272,8 @@ define <16 x i8> @load_v16i8_4_15(float %tmp, <16 x i8> %b, ptr %a) { ; CHECK-LABEL: load_v16i8_4_15: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 +; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_0] ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b @@ -338,8 +338,8 @@ ; CHECK-LABEL: load_v8i8_4_2: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d0, d1 -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: mov v0.s[1], v1.s[0] +; CHECK-NEXT: ldr s2, [x0] +; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %l = load <4 x i8>, ptr %a @@ -365,8 +365,8 @@ ; CHECK-LABEL: load_v16i8_8_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret %l = load <8 x i8>, ptr %a %s1 = shufflevector <8 x i8> %l, <8 x i8> poison, <16 x i32> @@ -379,13 +379,13 @@ define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_2_1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: add x9, x0, #2 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ld1 { v0.h }[2], [x9] +; CHECK-NEXT: xtn v2.4h, v0.4s ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: ld1 { v2.h }[2], [x8] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: mov v0.s[0], v1.s[0] +; CHECK-NEXT: mov v0.s[0], v2.s[0] ; CHECK-NEXT: ret %l = load <2 x i16>, ptr %a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> @@ -396,15 +396,15 @@ define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_2_15: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: add x9, x0, #2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: ld1 { v2.h }[2], [x8] +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: adrp x8, .LCPI33_0 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI33_0] +; CHECK-NEXT: ld1 { v2.h }[2], [x9] ; CHECK-NEXT: xtn v0.4h, v2.4s -; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v3.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI33_0] +; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret %l = load <2 x i16>, ptr %a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> @@ -415,13 +415,13 @@ define <8 x i16> @load_v8i16_2_2(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_2_2: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: add x9, x0, #2 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ld1 { v0.h }[2], [x9] +; CHECK-NEXT: xtn v2.4h, v0.4s ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: ld1 { v2.h }[2], [x8] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: mov v0.s[1], v1.s[0] +; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: ret %l = load <2 x i16>, ptr %a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> @@ -432,13 +432,13 @@ define <8 x i16> @load_v8i16_2_3(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_2_3: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: add x9, x0, #2 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ld1 { v0.h }[2], [x9] +; CHECK-NEXT: xtn v2.4h, v0.4s ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: ld1 { v2.h }[2], [x8] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: mov v0.s[2], v1.s[0] +; CHECK-NEXT: mov v0.s[2], v2.s[0] ; CHECK-NEXT: ret %l = load <2 x i16>, ptr %a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> @@ -449,13 +449,13 @@ define <8 x i16> @load_v8i16_2_4(float %tmp, <8 x i16> %b, ptr %a) { ; CHECK-LABEL: load_v8i16_2_4: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: add x9, x0, #2 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ld1 { v0.h }[2], [x9] +; CHECK-NEXT: xtn v2.4h, v0.4s ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: ld1 { v2.h }[2], [x8] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: mov v0.s[3], v1.s[0] +; CHECK-NEXT: mov v0.s[3], v2.s[0] ; CHECK-NEXT: ret %l = load <2 x i16>, ptr %a %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> @@ -514,8 +514,8 @@ ; CHECK-LABEL: load_v8i16_4_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret %l = load <4 x i16>, ptr %a %s1 = shufflevector <4 x i16> %l, <4 x i16> poison, <8 x i32> @@ -542,8 +542,8 @@ ; CHECK-LABEL: load_v4i32_2_2: ; CHECK: // %bb.0: ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: ret %l = load <2 x i32>, ptr %a %s1 = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> @@ -621,13 +621,13 @@ define <16 x i8> @load2multi2_v4i8(float %tmp, ptr %a, ptr %b) { ; CHECK-LABEL: load2multi2_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x1] -; CHECK-NEXT: ldr s1, [x0] +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: mov v1.d[1], v1.d[0] -; CHECK-NEXT: uzp1 v0.16b, v1.16b, v0.16b +; CHECK-NEXT: mov v0.d[1], v0.d[0] +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %la = load <4 x i8>, ptr %a %lb = load <4 x i8>, ptr %b @@ -640,8 +640,8 @@ define void @loads_before_stores(ptr %i44) { ; CHECK-LABEL: loads_before_stores: ; CHECK: // %bb.0: // %bb -; CHECK-NEXT: add x8, x0, #20 ; CHECK-NEXT: ldr s0, [x0, #28] +; CHECK-NEXT: add x8, x0, #20 ; CHECK-NEXT: ldrh w9, [x0, #26] ; CHECK-NEXT: ldrh w10, [x0, #24] ; CHECK-NEXT: ld1 { v0.s }[1], [x8] diff --git a/llvm/test/CodeGen/AArch64/insertshuffleload.ll b/llvm/test/CodeGen/AArch64/insertshuffleload.ll --- a/llvm/test/CodeGen/AArch64/insertshuffleload.ll +++ b/llvm/test/CodeGen/AArch64/insertshuffleload.ll @@ -121,11 +121,11 @@ define <8 x i32> @inserti32_first_multiuse(ptr %p) { ; CHECK-LABEL: inserti32_first_multiuse: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: ldur q1, [x0, #20] ; CHECK-NEXT: ldur q0, [x0, #4] -; CHECK-NEXT: add v1.4s, v1.4s, v3.4s -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v0.4s, v0.4s, v3.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s ; CHECK-NEXT: ret %q = getelementptr inbounds i8, ptr %p, i32 4 %l1 = load <8 x i32>, ptr %q @@ -140,10 +140,10 @@ ; CHECK-LABEL: inserti32_last_multiuse: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldur q2, [x0, #4] -; CHECK-NEXT: ldur q3, [x0, #20] -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: add v1.4s, v1.4s, v3.4s +; CHECK-NEXT: ldur q2, [x0, #20] +; CHECK-NEXT: ldur q3, [x0, #4] +; CHECK-NEXT: add v0.4s, v0.4s, v3.4s +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s ; CHECK-NEXT: ret %q = getelementptr inbounds i8, ptr %p, i32 32 %l1 = load <8 x i32>, ptr %p @@ -291,8 +291,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldur d0, [x0, #1] ; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b ; CHECK-NEXT: ld1 { v0.b }[0], [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll --- a/llvm/test/CodeGen/AArch64/isinf.ll +++ b/llvm/test/CodeGen/AArch64/isinf.ll @@ -10,7 +10,7 @@ define i32 @replace_isinf_call_f16(half %x) { ; CHECK-LABEL: replace_isinf_call_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31744 +; CHECK-NEXT: mov w8, #31744 // =0x7c00 ; CHECK-NEXT: fabs h0, h0 ; CHECK-NEXT: fmov h1, w8 ; CHECK-NEXT: fcmp h0, h1 @@ -26,8 +26,8 @@ define i32 @replace_isinf_call_f32(float %x) { ; CHECK-LABEL: replace_isinf_call_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2139095040 ; CHECK-NEXT: fabs s0, s0 +; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000 ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: fcmp s0, s1 ; CHECK-NEXT: cset w0, eq @@ -42,8 +42,8 @@ define i32 @replace_isinf_call_f64(double %x) { ; CHECK-LABEL: replace_isinf_call_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #9218868437227405312 ; CHECK-NEXT: fabs d0, d0 +; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fcmp d0, d1 ; CHECK-NEXT: cset w0, eq @@ -70,8 +70,8 @@ ; CHECK-NEXT: ldr q0, [sp] ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: bl __eqtf2 -; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: cmp w0, #0 +; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/known-never-nan.ll b/llvm/test/CodeGen/AArch64/known-never-nan.ll --- a/llvm/test/CodeGen/AArch64/known-never-nan.ll +++ b/llvm/test/CodeGen/AArch64/known-never-nan.ll @@ -28,13 +28,13 @@ define float @not_fmaxnm_maybe_nan(i32 %i1, i32 %i2) #0 { ; CHECK-LABEL: not_fmaxnm_maybe_nan: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-8388608 ; CHECK-NEXT: ucvtf s0, w0 ; CHECK-NEXT: ucvtf s1, w1 -; CHECK-NEXT: fmov s3, #17.00000000 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fadd s1, s1, s3 -; CHECK-NEXT: fmul s0, s0, s2 +; CHECK-NEXT: mov w8, #-8388608 // =0xff800000 +; CHECK-NEXT: fmov s2, #17.00000000 +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: fmul s0, s0, s3 +; CHECK-NEXT: fadd s1, s1, s2 ; CHECK-NEXT: fcmp s0, s1 ; CHECK-NEXT: fcsel s0, s0, s1, pl ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll --- a/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll +++ b/llvm/test/CodeGen/AArch64/llvm-ir-to-intrinsic.ll @@ -12,17 +12,17 @@ ; CHECK-NEXT: sunpkhi z3.h, z0.b ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sunpklo z1.h, z1.b +; CHECK-NEXT: sunpklo z0.h, z0.b ; CHECK-NEXT: sunpkhi z4.s, z2.h ; CHECK-NEXT: sunpkhi z5.s, z3.h ; CHECK-NEXT: sunpklo z2.s, z2.h ; CHECK-NEXT: sunpklo z3.s, z3.h -; CHECK-NEXT: sunpklo z0.h, z0.b ; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s +; CHECK-NEXT: sunpkhi z5.s, z0.h +; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; CHECK-NEXT: sunpkhi z3.s, z1.h -; CHECK-NEXT: sunpkhi z5.s, z0.h ; CHECK-NEXT: sunpklo z1.s, z1.h -; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z5.s ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: uzp1 z1.h, z2.h, z4.h @@ -113,23 +113,23 @@ ; CHECK-NEXT: sunpkhi z2.h, z1.b ; CHECK-NEXT: sunpkhi z3.h, z0.b ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: sunpkhi z5.s, z2.h -; CHECK-NEXT: sunpkhi z6.s, z3.h +; CHECK-NEXT: sunpkhi z4.s, z2.h +; CHECK-NEXT: sunpkhi z5.s, z3.h ; CHECK-NEXT: sunpklo z2.s, z2.h ; CHECK-NEXT: sunpklo z3.s, z3.h -; CHECK-NEXT: sunpklo z4.h, z1.b +; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s +; CHECK-NEXT: sunpklo z5.h, z0.b +; CHECK-NEXT: sunpkhi z7.s, z5.h +; CHECK-NEXT: sunpklo z5.s, z5.h ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: sunpklo z3.h, z0.b -; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: sunpkhi z6.s, z4.h -; CHECK-NEXT: sunpkhi z7.s, z3.h -; CHECK-NEXT: sunpklo z4.s, z4.h +; CHECK-NEXT: sunpklo z3.h, z1.b +; CHECK-NEXT: sunpkhi z6.s, z3.h ; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z5.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z6.h +; CHECK-NEXT: uzp1 z2.h, z2.h, z4.h +; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z5.s ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uzp1 z3.h, z3.h, z6.h ; CHECK-NEXT: uzp1 z2.b, z3.b, z2.b ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: ret @@ -143,13 +143,12 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sunpkhi z2.s, z1.h ; CHECK-NEXT: sunpkhi z3.s, z0.h -; CHECK-NEXT: sunpklo z4.s, z1.h +; CHECK-NEXT: sunpklo z4.s, z0.h ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: sunpklo z5.s, z0.h -; CHECK-NEXT: movprfx z3, z5 -; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h +; CHECK-NEXT: sunpklo z3.s, z1.h +; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: ret %div = srem %a, %b @@ -191,17 +190,17 @@ ; CHECK-NEXT: uunpkhi z3.h, z0.b ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpklo z1.h, z1.b +; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpkhi z4.s, z2.h ; CHECK-NEXT: uunpkhi z5.s, z3.h ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s +; CHECK-NEXT: uunpkhi z5.s, z0.h +; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; CHECK-NEXT: uunpkhi z3.s, z1.h -; CHECK-NEXT: uunpkhi z5.s, z0.h ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z5.s ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: uzp1 z1.h, z2.h, z4.h @@ -293,23 +292,23 @@ ; CHECK-NEXT: uunpkhi z2.h, z1.b ; CHECK-NEXT: uunpkhi z3.h, z0.b ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: uunpkhi z5.s, z2.h -; CHECK-NEXT: uunpkhi z6.s, z3.h +; CHECK-NEXT: uunpkhi z4.s, z2.h +; CHECK-NEXT: uunpkhi z5.s, z3.h ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: uunpklo z4.h, z1.b +; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s +; CHECK-NEXT: uunpklo z5.h, z0.b +; CHECK-NEXT: uunpkhi z7.s, z5.h +; CHECK-NEXT: uunpklo z5.s, z5.h ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: uunpklo z3.h, z0.b -; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: uunpkhi z6.s, z4.h -; CHECK-NEXT: uunpkhi z7.s, z3.h -; CHECK-NEXT: uunpklo z4.s, z4.h +; CHECK-NEXT: uunpklo z3.h, z1.b +; CHECK-NEXT: uunpkhi z6.s, z3.h ; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z5.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z6.h +; CHECK-NEXT: uzp1 z2.h, z2.h, z4.h +; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z5.s ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uzp1 z3.h, z3.h, z6.h ; CHECK-NEXT: uzp1 z2.b, z3.b, z2.b ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: ret @@ -323,13 +322,12 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpkhi z2.s, z1.h ; CHECK-NEXT: uunpkhi z3.s, z0.h -; CHECK-NEXT: uunpklo z4.s, z1.h +; CHECK-NEXT: uunpklo z4.s, z0.h ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: uunpklo z5.s, z0.h -; CHECK-NEXT: movprfx z3, z5 -; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h +; CHECK-NEXT: uunpklo z3.s, z1.h +; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: ret %div = urem %a, %b @@ -424,9 +422,9 @@ ; CHECK-LABEL: smin_split_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: smin z2.h, p0/m, z2.h, z6.h ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z4.h ; CHECK-NEXT: smin z1.h, p0/m, z1.h, z5.h +; CHECK-NEXT: smin z2.h, p0/m, z2.h, z6.h ; CHECK-NEXT: smin z3.h, p0/m, z3.h, z7.h ; CHECK-NEXT: ret %cmp = icmp slt %a, %b @@ -1067,9 +1065,9 @@ ; CHECK-LABEL: cmp_split_64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p3.b -; CHECK-NEXT: cmpgt p2.b, p3/z, z2.b, z6.b ; CHECK-NEXT: cmpgt p0.b, p3/z, z0.b, z4.b ; CHECK-NEXT: cmpgt p1.b, p3/z, z1.b, z5.b +; CHECK-NEXT: cmpgt p2.b, p3/z, z2.b, z6.b ; CHECK-NEXT: cmpgt p3.b, p3/z, z3.b, z7.b ; CHECK-NEXT: ret %cmp = icmp sgt %a, %b @@ -1083,12 +1081,12 @@ define @fshl_i64( %a, %b, %c){ ; CHECK-LABEL: fshl_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z3.d, #63 // =0x3f ; CHECK-NEXT: mov z4.d, z2.d +; CHECK-NEXT: lsr z1.d, z1.d, #1 ; CHECK-NEXT: bic z2.d, z3.d, z2.d ; CHECK-NEXT: and z4.d, z4.d, #0x3f -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: lsr z1.d, z1.d, #1 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z4.d ; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -1100,18 +1098,19 @@ define @fshl_illegal_i64( %a, %b, %c){ ; CHECK-LABEL: fshl_illegal_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z6.d, #63 // =0x3f ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: bic z7.d, z6.d, z4.d +; CHECK-NEXT: mov z6.d, #63 // =0x3f +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: lsr z2.d, z2.d, #1 +; CHECK-NEXT: lsr z3.d, z3.d, #1 +; CHECK-NEXT: bic z4.d, z6.d, z4.d +; CHECK-NEXT: and z7.d, z7.d, #0x3f ; CHECK-NEXT: bic z6.d, z6.d, z5.d -; CHECK-NEXT: and z4.d, z4.d, #0x3f ; CHECK-NEXT: and z5.d, z5.d, #0x3f -; CHECK-NEXT: lsr z3.d, z3.d, #1 -; CHECK-NEXT: lsr z2.d, p0/m, z2.d, z7.d -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z4.d -; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z5.d +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z7.d +; CHECK-NEXT: lsr z2.d, p0/m, z2.d, z4.d ; CHECK-NEXT: lsr z3.d, p0/m, z3.d, z6.d +; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z5.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orr z1.d, z1.d, z3.d ; CHECK-NEXT: ret @@ -1122,14 +1121,14 @@ define @fshl_rot_i64( %a, %b){ ; CHECK-LABEL: fshl_rot_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: subr z1.d, z1.d, #0 // =0x0 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: and z1.d, z1.d, #0x3f ; CHECK-NEXT: and z2.d, z2.d, #0x3f -; CHECK-NEXT: lsrr z1.d, p0/m, z1.d, z0.d -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: and z1.d, z1.d, #0x3f +; CHECK-NEXT: lslr z2.d, p0/m, z2.d, z0.d +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: orr z0.d, z2.d, z0.d ; CHECK-NEXT: ret %fshl = call @llvm.fshl.nxv2i64( %a, %a, %b) ret %fshl @@ -1139,22 +1138,22 @@ define @fshl_rot_illegal_i64( %a, %b){ ; CHECK-LABEL: fshl_rot_illegal_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z4.d, z2.d ; CHECK-NEXT: subr z2.d, z2.d, #0 // =0x0 ; CHECK-NEXT: mov z5.d, z3.d ; CHECK-NEXT: subr z3.d, z3.d, #0 // =0x0 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: and z2.d, z2.d, #0x3f ; CHECK-NEXT: and z4.d, z4.d, #0x3f +; CHECK-NEXT: and z2.d, z2.d, #0x3f ; CHECK-NEXT: and z3.d, z3.d, #0x3f -; CHECK-NEXT: lsrr z2.d, p0/m, z2.d, z0.d -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z4.d ; CHECK-NEXT: and z5.d, z5.d, #0x3f -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: lsl z4.d, p0/m, z4.d, z5.d +; CHECK-NEXT: lslr z4.d, p0/m, z4.d, z0.d +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: lsl z2.d, p0/m, z2.d, z5.d ; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: orr z0.d, z0.d, z2.d -; CHECK-NEXT: orr z1.d, z4.d, z1.d +; CHECK-NEXT: orr z0.d, z4.d, z0.d +; CHECK-NEXT: orr z1.d, z2.d, z1.d ; CHECK-NEXT: ret %fshl = call @llvm.fshl.nxv4i64( %a, %a, %b) ret %fshl @@ -1176,12 +1175,12 @@ define @fshr_i64( %a, %b, %c){ ; CHECK-LABEL: fshr_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z3.d, #63 // =0x3f ; CHECK-NEXT: mov z4.d, z2.d +; CHECK-NEXT: lsl z0.d, z0.d, #1 ; CHECK-NEXT: bic z2.d, z3.d, z2.d ; CHECK-NEXT: and z4.d, z4.d, #0x3f -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: lsl z0.d, z0.d, #1 ; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z4.d ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z2.d ; CHECK-NEXT: orr z0.d, z0.d, z1.d @@ -1193,14 +1192,14 @@ define @fshr_rot_i64( %a, %b){ ; CHECK-LABEL: fshr_rot_i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: subr z1.d, z1.d, #0 // =0x0 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: and z1.d, z1.d, #0x3f ; CHECK-NEXT: and z2.d, z2.d, #0x3f -; CHECK-NEXT: lslr z1.d, p0/m, z1.d, z0.d -; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: orr z0.d, z0.d, z1.d +; CHECK-NEXT: and z1.d, z1.d, #0x3f +; CHECK-NEXT: lsrr z2.d, p0/m, z2.d, z0.d +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: orr z0.d, z2.d, z0.d ; CHECK-NEXT: ret %fshr = call @llvm.fshr.nxv2i64( %a, %a, %b) ret %fshr diff --git a/llvm/test/CodeGen/AArch64/load-insert-zero.ll b/llvm/test/CodeGen/AArch64/load-insert-zero.ll --- a/llvm/test/CodeGen/AArch64/load-insert-zero.ll +++ b/llvm/test/CodeGen/AArch64/load-insert-zero.ll @@ -461,27 +461,27 @@ ; CHECK-LABEL: predictor_4x4_neon: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldur w9, [x2, #2] +; CHECK-NEXT: ldur w8, [x2, #2] ; CHECK-NEXT: ldr s1, [x2] -; CHECK-NEXT: lsl x8, x1, #1 ; CHECK-NEXT: ldur s2, [x2, #1] -; CHECK-NEXT: mov v0.s[0], w9 -; CHECK-NEXT: lsr w9, w9, #24 ; CHECK-NEXT: ushll v3.8h, v2.8b, #1 -; CHECK-NEXT: dup v4.8b, w9 -; CHECK-NEXT: add x9, x8, x1 +; CHECK-NEXT: mov v0.s[0], w8 +; CHECK-NEXT: lsr w8, w8, #24 ; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b ; CHECK-NEXT: urhadd v1.8b, v1.8b, v2.8b ; CHECK-NEXT: str s1, [x0] ; CHECK-NEXT: add v0.8h, v0.8h, v3.8h -; CHECK-NEXT: zip1 v2.2s, v1.2s, v4.2s +; CHECK-NEXT: dup v3.8b, w8 +; CHECK-NEXT: lsl x8, x1, #1 ; CHECK-NEXT: rshrn v0.8b, v0.8h, #2 +; CHECK-NEXT: zip1 v2.2s, v1.2s, v3.2s ; CHECK-NEXT: str s0, [x0, x1] -; CHECK-NEXT: zip1 v3.2s, v0.2s, v4.2s -; CHECK-NEXT: ext v1.8b, v2.8b, v0.8b, #1 +; CHECK-NEXT: zip1 v3.2s, v0.2s, v3.2s +; CHECK-NEXT: ext v2.8b, v2.8b, v0.8b, #1 +; CHECK-NEXT: str s2, [x0, x8] +; CHECK-NEXT: add x8, x8, x1 +; CHECK-NEXT: ext v1.8b, v3.8b, v0.8b, #1 ; CHECK-NEXT: str s1, [x0, x8] -; CHECK-NEXT: ext v2.8b, v3.8b, v0.8b, #1 -; CHECK-NEXT: str s2, [x0, x9] ; CHECK-NEXT: ret %5 = load i32, ptr %2, align 4 %6 = insertelement <2 x i32> , i32 %5, i64 0 @@ -537,24 +537,24 @@ ; CHECK-LABEL: predictor_4x4_neon_new: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s0, [x2] -; CHECK-NEXT: lsl x8, x1, #1 ; CHECK-NEXT: ldur s1, [x2, #1] -; CHECK-NEXT: add x9, x8, x1 +; CHECK-NEXT: lsl x8, x1, #1 ; CHECK-NEXT: ldur s2, [x2, #2] ; CHECK-NEXT: ldur s3, [x2, #3] ; CHECK-NEXT: uaddl v4.8h, v1.8b, v0.8b ; CHECK-NEXT: urhadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: add x9, x8, x1 ; CHECK-NEXT: uaddl v5.8h, v2.8b, v1.8b ; CHECK-NEXT: uaddl v3.8h, v3.8b, v2.8b +; CHECK-NEXT: urhadd v1.8b, v1.8b, v2.8b ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: add v4.8h, v4.8h, v5.8h ; CHECK-NEXT: add v3.8h, v3.8h, v5.8h -; CHECK-NEXT: rshrn v0.8b, v4.8h, #2 -; CHECK-NEXT: str s0, [x0, x1] -; CHECK-NEXT: urhadd v0.8b, v1.8b, v2.8b -; CHECK-NEXT: rshrn v1.8b, v3.8h, #2 -; CHECK-NEXT: str s0, [x0, x8] -; CHECK-NEXT: str s1, [x0, x9] +; CHECK-NEXT: rshrn v4.8b, v4.8h, #2 +; CHECK-NEXT: rshrn v0.8b, v3.8h, #2 +; CHECK-NEXT: str s4, [x0, x1] +; CHECK-NEXT: str s1, [x0, x8] +; CHECK-NEXT: str s0, [x0, x9] ; CHECK-NEXT: ret %5 = load i32, ptr %2, align 4 %6 = insertelement <2 x i32> , i32 %5, i64 0 @@ -608,9 +608,9 @@ define @loadnxv8i8(ptr %p) { ; CHECK-LABEL: loadnxv8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: ptrue p0.h, vl1 ; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: ret %l = load i8, ptr %p @@ -631,9 +631,9 @@ define @loadnxv4i16(ptr %p) { ; CHECK-LABEL: loadnxv4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: ptrue p0.s, vl1 ; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: ret %l = load i16, ptr %p @@ -654,9 +654,9 @@ define @loadnxv2i32(ptr %p) { ; CHECK-LABEL: loadnxv2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: ptrue p0.d, vl1 ; CHECK-NEXT: mov z0.d, #0 // =0x0 +; CHECK-NEXT: ldr w8, [x0] ; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: ret %l = load i32, ptr %p @@ -688,13 +688,13 @@ define @loadnxv4f16(ptr %p) { ; CHECK-LABEL: loadnxv4f16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ldr h1, [x0] ; CHECK-NEXT: index z0.s, #0, #1 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s ; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: ldr h1, [x0] ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret %l = load half, ptr %p @@ -715,13 +715,13 @@ define @loadnxv4bf16(ptr %p) { ; CHECK-LABEL: loadnxv4bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ldr h1, [x0] ; CHECK-NEXT: index z0.s, #0, #1 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s ; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: ldr h1, [x0] ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret %l = load bfloat, ptr %p @@ -742,13 +742,13 @@ define @loadnxv2f32(ptr %p) { ; CHECK-LABEL: loadnxv2f32: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: ldr s1, [x0] ; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d ; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: ldr s1, [x0] ; CHECK-NEXT: mov z0.s, p0/m, s1 ; CHECK-NEXT: ret %l = load float, ptr %p @@ -782,9 +782,9 @@ define @loadnxv8i8_offset(ptr %p) { ; CHECK-LABEL: loadnxv8i8_offset: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x0, #1] ; CHECK-NEXT: ptrue p0.h, vl1 ; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: ldrb w8, [x0, #1] ; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: ret %g = getelementptr inbounds i8, ptr %p, i64 1 @@ -807,9 +807,9 @@ define @loadnxv4i16_offset(ptr %p) { ; CHECK-LABEL: loadnxv4i16_offset: ; CHECK: // %bb.0: -; CHECK-NEXT: ldurh w8, [x0, #1] ; CHECK-NEXT: ptrue p0.s, vl1 ; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: ldurh w8, [x0, #1] ; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: ret %g = getelementptr inbounds i8, ptr %p, i64 1 @@ -832,9 +832,9 @@ define @loadnxv2i32_offset(ptr %p) { ; CHECK-LABEL: loadnxv2i32_offset: ; CHECK: // %bb.0: -; CHECK-NEXT: ldur w8, [x0, #1] ; CHECK-NEXT: ptrue p0.d, vl1 ; CHECK-NEXT: mov z0.d, #0 // =0x0 +; CHECK-NEXT: ldur w8, [x0, #1] ; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: ret %g = getelementptr inbounds i8, ptr %p, i64 1 @@ -869,13 +869,13 @@ define @loadnxv4f16_offset(ptr %p) { ; CHECK-LABEL: loadnxv4f16_offset: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ldur h1, [x0, #1] ; CHECK-NEXT: index z0.s, #0, #1 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s ; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: ldur h1, [x0, #1] ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret %g = getelementptr inbounds i8, ptr %p, i64 1 @@ -898,13 +898,13 @@ define @loadnxv4bf16_offset(ptr %p) { ; CHECK-LABEL: loadnxv4bf16_offset: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: ldur h1, [x0, #1] ; CHECK-NEXT: index z0.s, #0, #1 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s ; CHECK-NEXT: mov z0.h, #0 // =0x0 +; CHECK-NEXT: ldur h1, [x0, #1] ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret %g = getelementptr inbounds i8, ptr %p, i64 1 @@ -927,13 +927,13 @@ define @loadnxv2f32_offset(ptr %p) { ; CHECK-LABEL: loadnxv2f32_offset: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov x8, xzr -; CHECK-NEXT: ldur s1, [x0, #1] ; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d ; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: ldur s1, [x0, #1] ; CHECK-NEXT: mov z0.s, p0/m, s1 ; CHECK-NEXT: ret %g = getelementptr inbounds i8, ptr %p, i64 1 diff --git a/llvm/test/CodeGen/AArch64/logic-reassociate.ll b/llvm/test/CodeGen/AArch64/logic-reassociate.ll --- a/llvm/test/CodeGen/AArch64/logic-reassociate.ll +++ b/llvm/test/CodeGen/AArch64/logic-reassociate.ll @@ -14,8 +14,8 @@ define i128 @and_commute1(i128 %x, i128 %y) { ; CHECK-LABEL: and_commute1: ; CHECK: // %bb.0: -; CHECK-NEXT: and x0, x2, x0 ; CHECK-NEXT: and x1, x3, x1 +; CHECK-NEXT: and x0, x2, x0 ; CHECK-NEXT: ret %b = and i128 %y, %x %b2 = and i128 %x, %b diff --git a/llvm/test/CodeGen/AArch64/logic-shift.ll b/llvm/test/CodeGen/AArch64/logic-shift.ll --- a/llvm/test/CodeGen/AArch64/logic-shift.ll +++ b/llvm/test/CodeGen/AArch64/logic-shift.ll @@ -200,10 +200,10 @@ define i64 @or_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) { ; CHECK-LABEL: or_lshr_mix_shift_amount: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x9, x0, x2 -; CHECK-NEXT: lsr x8, x1, x4 -; CHECK-NEXT: orr x9, x9, x3 -; CHECK-NEXT: orr x0, x9, x8 +; CHECK-NEXT: lsr x8, x0, x2 +; CHECK-NEXT: lsr x9, x1, x4 +; CHECK-NEXT: orr x8, x8, x3 +; CHECK-NEXT: orr x0, x8, x9 ; CHECK-NEXT: ret %sh1 = lshr i64 %x0, %y %sh2 = lshr i64 %x1, %w @@ -428,10 +428,10 @@ define i64 @xor_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) { ; CHECK-LABEL: xor_lshr_mix_shift_amount: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x9, x0, x2 -; CHECK-NEXT: lsr x8, x1, x4 -; CHECK-NEXT: eor x9, x9, x3 -; CHECK-NEXT: eor x0, x9, x8 +; CHECK-NEXT: lsr x8, x0, x2 +; CHECK-NEXT: lsr x9, x1, x4 +; CHECK-NEXT: eor x8, x8, x3 +; CHECK-NEXT: eor x0, x8, x9 ; CHECK-NEXT: ret %sh1 = lshr i64 %x0, %y %sh2 = lshr i64 %x1, %w @@ -656,10 +656,10 @@ define i64 @and_lshr_mix_shift_amount(i64 %x0, i64 %x1, i64 %y, i64 %z, i64 %w) { ; CHECK-LABEL: and_lshr_mix_shift_amount: ; CHECK: // %bb.0: -; CHECK-NEXT: lsr x9, x0, x2 -; CHECK-NEXT: lsr x8, x1, x4 -; CHECK-NEXT: and x9, x9, x3 -; CHECK-NEXT: and x0, x9, x8 +; CHECK-NEXT: lsr x8, x0, x2 +; CHECK-NEXT: lsr x9, x1, x4 +; CHECK-NEXT: and x8, x8, x3 +; CHECK-NEXT: and x0, x8, x9 ; CHECK-NEXT: ret %sh1 = lshr i64 %x0, %y %sh2 = lshr i64 %x1, %w @@ -788,9 +788,10 @@ define i16 @or_fshr_commute2(i16 %x, i16 %y) { ; CHECK-LABEL: or_fshr_commute2: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w0, w1 -; CHECK-NEXT: lsl w0, w0, #9 -; CHECK-NEXT: bfxil w0, w8, #7, #9 +; CHECK-NEXT: lsl w8, w0, #9 +; CHECK-NEXT: orr w9, w0, w1 +; CHECK-NEXT: bfxil w8, w9, #7, #9 +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %or1 = or i16 %x, %y %sh1 = shl i16 %x, 9 @@ -802,9 +803,10 @@ define i8 @or_fshr_commute3(i8 %x, i8 %y) { ; CHECK-LABEL: or_fshr_commute3: ; CHECK: // %bb.0: -; CHECK-NEXT: orr w8, w1, w0 -; CHECK-NEXT: lsl w0, w0, #2 -; CHECK-NEXT: bfxil w0, w8, #6, #2 +; CHECK-NEXT: lsl w8, w0, #2 +; CHECK-NEXT: orr w9, w1, w0 +; CHECK-NEXT: bfxil w8, w9, #6, #2 +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret %or1 = or i8 %y, %x %sh1 = shl i8 %x, 2 diff --git a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll --- a/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll +++ b/llvm/test/CodeGen/AArch64/logical_shifted_reg.ll @@ -18,39 +18,39 @@ ; CHECK-NEXT: ldr w9, [x9] ; CHECK-NEXT: and w11, w10, w9 ; CHECK-NEXT: bic w12, w10, w9 -; CHECK-NEXT: orr w13, w10, w9 ; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: orn w11, w10, w9 +; CHECK-NEXT: orr w11, w10, w9 ; CHECK-NEXT: str w12, [x8] -; CHECK-NEXT: eor w12, w10, w9 -; CHECK-NEXT: str w13, [x8] -; CHECK-NEXT: eon w13, w9, w10 +; CHECK-NEXT: orn w12, w10, w9 +; CHECK-NEXT: str w11, [x8] +; CHECK-NEXT: eor w11, w10, w9 +; CHECK-NEXT: str w12, [x8] +; CHECK-NEXT: eon w12, w9, w10 ; CHECK-NEXT: str w11, [x8] ; CHECK-NEXT: and w11, w10, w9, lsl #31 ; CHECK-NEXT: str w12, [x8] ; CHECK-NEXT: bic w12, w10, w9, lsl #31 -; CHECK-NEXT: str w13, [x8] -; CHECK-NEXT: orr w13, w10, w9, lsl #31 ; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: orn w11, w10, w9, lsl #31 +; CHECK-NEXT: orr w11, w10, w9, lsl #31 +; CHECK-NEXT: str w12, [x8] +; CHECK-NEXT: orn w12, w10, w9, lsl #31 +; CHECK-NEXT: str w11, [x8] +; CHECK-NEXT: eor w11, w10, w9, lsl #31 ; CHECK-NEXT: str w12, [x8] -; CHECK-NEXT: eor w12, w10, w9, lsl #31 -; CHECK-NEXT: str w13, [x8] -; CHECK-NEXT: eon w13, w10, w9, lsl #31 +; CHECK-NEXT: eon w12, w10, w9, lsl #31 ; CHECK-NEXT: str w11, [x8] ; CHECK-NEXT: bic w11, w10, w9, asr #10 ; CHECK-NEXT: str w12, [x8] ; CHECK-NEXT: eor w12, w10, w9, asr #10 -; CHECK-NEXT: str w13, [x8] -; CHECK-NEXT: orn w13, w10, w9, lsr #1 ; CHECK-NEXT: str w11, [x8] -; CHECK-NEXT: eor w11, w10, w9, lsr #1 +; CHECK-NEXT: orn w11, w10, w9, lsr #1 ; CHECK-NEXT: str w12, [x8] -; CHECK-NEXT: eon w12, w10, w9, ror #20 -; CHECK-NEXT: and w9, w10, w9, ror #20 -; CHECK-NEXT: str w13, [x8] +; CHECK-NEXT: eor w12, w10, w9, lsr #1 ; CHECK-NEXT: str w11, [x8] +; CHECK-NEXT: eon w11, w10, w9, ror #20 +; CHECK-NEXT: and w9, w10, w9, ror #20 ; CHECK-NEXT: str w12, [x8] +; CHECK-NEXT: str w11, [x8] ; CHECK-NEXT: str w9, [x8] ; CHECK-NEXT: ret %val1 = load i32, ptr @var1_32 @@ -134,39 +134,39 @@ ; CHECK-NEXT: ldr x9, [x9] ; CHECK-NEXT: and x11, x10, x9 ; CHECK-NEXT: bic x12, x10, x9 -; CHECK-NEXT: orr x13, x10, x9 ; CHECK-NEXT: str x11, [x8] -; CHECK-NEXT: orn x11, x10, x9 +; CHECK-NEXT: orr x11, x10, x9 ; CHECK-NEXT: str x12, [x8] -; CHECK-NEXT: eor x12, x10, x9 -; CHECK-NEXT: str x13, [x8] -; CHECK-NEXT: eon x13, x9, x10 +; CHECK-NEXT: orn x12, x10, x9 +; CHECK-NEXT: str x11, [x8] +; CHECK-NEXT: eor x11, x10, x9 +; CHECK-NEXT: str x12, [x8] +; CHECK-NEXT: eon x12, x9, x10 ; CHECK-NEXT: str x11, [x8] ; CHECK-NEXT: and x11, x10, x9, lsl #63 ; CHECK-NEXT: str x12, [x8] ; CHECK-NEXT: bic x12, x10, x9, lsl #63 -; CHECK-NEXT: str x13, [x8] -; CHECK-NEXT: orr x13, x10, x9, lsl #63 ; CHECK-NEXT: str x11, [x8] -; CHECK-NEXT: orn x11, x10, x9, lsl #63 +; CHECK-NEXT: orr x11, x10, x9, lsl #63 +; CHECK-NEXT: str x12, [x8] +; CHECK-NEXT: orn x12, x10, x9, lsl #63 +; CHECK-NEXT: str x11, [x8] +; CHECK-NEXT: eor x11, x10, x9, lsl #63 ; CHECK-NEXT: str x12, [x8] -; CHECK-NEXT: eor x12, x10, x9, lsl #63 -; CHECK-NEXT: str x13, [x8] -; CHECK-NEXT: eon x13, x10, x9, lsl #63 +; CHECK-NEXT: eon x12, x10, x9, lsl #63 ; CHECK-NEXT: str x11, [x8] ; CHECK-NEXT: bic x11, x10, x9, asr #10 ; CHECK-NEXT: str x12, [x8] ; CHECK-NEXT: eor x12, x10, x9, asr #10 -; CHECK-NEXT: str x13, [x8] -; CHECK-NEXT: orn x13, x10, x9, lsr #1 ; CHECK-NEXT: str x11, [x8] -; CHECK-NEXT: eor x11, x10, x9, lsr #1 +; CHECK-NEXT: orn x11, x10, x9, lsr #1 ; CHECK-NEXT: str x12, [x8] -; CHECK-NEXT: eon x12, x10, x9, ror #20 -; CHECK-NEXT: and x9, x10, x9, ror #20 -; CHECK-NEXT: str x13, [x8] +; CHECK-NEXT: eor x12, x10, x9, lsr #1 ; CHECK-NEXT: str x11, [x8] +; CHECK-NEXT: eon x11, x10, x9, ror #20 +; CHECK-NEXT: and x9, x10, x9, ror #20 ; CHECK-NEXT: str x12, [x8] +; CHECK-NEXT: str x11, [x8] ; CHECK-NEXT: str x9, [x8] ; CHECK-NEXT: ret %val1 = load i64, ptr @var1_64 @@ -252,16 +252,17 @@ ; CHECK-NEXT: ldr x9, [x8] ; CHECK-NEXT: ldr x10, [x10] ; CHECK-NEXT: tst x9, x10 -; CHECK-NEXT: b.gt .LBB2_2 +; CHECK-NEXT: b.gt .LBB2_4 ; CHECK-NEXT: // %bb.1: // %test2 ; CHECK-NEXT: tst x9, x10, lsl #63 +; CHECK-NEXT: b.lt .LBB2_4 +; CHECK-NEXT: // %bb.2: // %test3 ; CHECK-NEXT: and x10, x9, x10, asr #12 -; CHECK-NEXT: ccmp x10, #1, #0, ge -; CHECK-NEXT: b.lt .LBB2_3 -; CHECK-NEXT: .LBB2_2: // %common.ret -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB2_3: // %other_exit +; CHECK-NEXT: cmp x10, #1 +; CHECK-NEXT: b.ge .LBB2_4 +; CHECK-NEXT: // %bb.3: // %other_exit ; CHECK-NEXT: str x9, [x8] +; CHECK-NEXT: .LBB2_4: // %common.ret ; CHECK-NEXT: ret %val1 = load i64, ptr @var1_64 %val2 = load i64, ptr @var2_64 diff --git a/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll b/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll --- a/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll +++ b/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll @@ -86,9 +86,9 @@ ; CHECK-LABEL: addmuli16_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff ; CHECK-NEXT: smlal v1.4s, v0.4h, v2.4h -; CHECK-NEXT: and v0.16b, v1.16b, v3.16b +; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret entry: %v0 = sext <4 x i16> %vec0 to <4 x i32> @@ -214,9 +214,9 @@ ; CHECK-LABEL: addmuli32_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v1.2d, v1.2s, v2.2s -; CHECK-NEXT: movi v3.2d, #0x000000ffffffff ; CHECK-NEXT: smlal v1.2d, v0.2s, v2.2s -; CHECK-NEXT: and v0.16b, v1.16b, v3.16b +; CHECK-NEXT: movi v0.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret entry: %v0 = sext <2 x i32> %vec0 to <2 x i64> diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll --- a/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner-copy.ll @@ -7,8 +7,8 @@ ; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-NEXT: cbz w2, .LBB0_8 ; CHECK-NEXT: // %bb.1: // %for.body.preheader -; CHECK-NEXT: mov w8, w2 ; CHECK-NEXT: cmp w2, #15 +; CHECK-NEXT: mov w8, w2 ; CHECK-NEXT: b.hi .LBB0_3 ; CHECK-NEXT: // %bb.2: ; CHECK-NEXT: mov x9, xzr @@ -20,13 +20,13 @@ ; CHECK-NEXT: mov x12, x9 ; CHECK-NEXT: .LBB0_4: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp q1, q2, [x11, #-16] +; CHECK-NEXT: ldp q1, q3, [x11, #-16] ; CHECK-NEXT: subs x12, x12, #16 +; CHECK-NEXT: ldp q2, q4, [x10, #-16] ; CHECK-NEXT: add x11, x11, #32 -; CHECK-NEXT: ldp q3, q4, [x10, #-16] -; CHECK-NEXT: fmla v3.8h, v1.8h, v0.h[0] -; CHECK-NEXT: fmla v4.8h, v2.8h, v0.h[0] -; CHECK-NEXT: stp q3, q4, [x10, #-16] +; CHECK-NEXT: fmla v2.8h, v1.8h, v0.h[0] +; CHECK-NEXT: fmla v4.8h, v3.8h, v0.h[0] +; CHECK-NEXT: stp q2, q4, [x10, #-16] ; CHECK-NEXT: add x10, x10, #32 ; CHECK-NEXT: b.ne .LBB0_4 ; CHECK-NEXT: // %bb.5: // %middle.block diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-subadd.ll b/llvm/test/CodeGen/AArch64/machine-combiner-subadd.ll --- a/llvm/test/CodeGen/AArch64/machine-combiner-subadd.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner-subadd.ll @@ -9,12 +9,12 @@ define i32 @test1(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: test1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: add w9, w0, #100 -; CHECK-NEXT: orr w8, w2, #0x80 -; CHECK-NEXT: sub w8, w8, w9 -; CHECK-NEXT: eor w9, w1, w9, lsl #8 -; CHECK-NEXT: sub w8, w8, w9 -; CHECK-NEXT: eor w0, w8, w9, asr #13 +; CHECK-NEXT: add w8, w0, #100 +; CHECK-NEXT: orr w9, w2, #0x80 +; CHECK-NEXT: eor w10, w1, w8, lsl #8 +; CHECK-NEXT: sub w8, w9, w8 +; CHECK-NEXT: sub w8, w8, w10 +; CHECK-NEXT: eor w0, w8, w10, asr #13 ; CHECK-NEXT: ret entry: %c1 = or i32 %c, 128 @@ -32,12 +32,12 @@ define i64 @test2(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: test2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: add x9, x0, #100 -; CHECK-NEXT: orr x8, x2, #0x80 -; CHECK-NEXT: sub x8, x8, x9 -; CHECK-NEXT: eor x9, x1, x9, lsl #8 -; CHECK-NEXT: sub x8, x8, x9 -; CHECK-NEXT: eor x0, x8, x9, asr #13 +; CHECK-NEXT: add x8, x0, #100 +; CHECK-NEXT: orr x9, x2, #0x80 +; CHECK-NEXT: eor x10, x1, x8, lsl #8 +; CHECK-NEXT: sub x8, x9, x8 +; CHECK-NEXT: sub x8, x8, x10 +; CHECK-NEXT: eor x0, x8, x10, asr #13 ; CHECK-NEXT: ret entry: %c1 = or i64 %c, 128 @@ -55,12 +55,12 @@ define i32 @test3(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: test3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: add w9, w0, #100 -; CHECK-NEXT: orr w8, w2, #0x80 -; CHECK-NEXT: add w8, w8, w9 -; CHECK-NEXT: eor w9, w1, w9, lsl #8 -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: eor w0, w8, w9, asr #13 +; CHECK-NEXT: add w8, w0, #100 +; CHECK-NEXT: orr w9, w2, #0x80 +; CHECK-NEXT: eor w10, w1, w8, lsl #8 +; CHECK-NEXT: add w8, w9, w8 +; CHECK-NEXT: sub w8, w10, w8 +; CHECK-NEXT: eor w0, w8, w10, asr #13 ; CHECK-NEXT: ret entry: %c1 = or i32 %c, 128 diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-transient.ll b/llvm/test/CodeGen/AArch64/machine-combiner-transient.ll --- a/llvm/test/CodeGen/AArch64/machine-combiner-transient.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner-transient.ll @@ -11,10 +11,10 @@ define i32 @test1(i32 %a, i32 %b, i32 %c) { ; CHECK-LABEL: test1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub w8, w2, w0 -; CHECK-NEXT: eor w9, w1, w0, lsl #8 -; CHECK-NEXT: sub w8, w8, w9 -; CHECK-NEXT: eor w0, w8, w9, asr #13 +; CHECK-NEXT: eor w8, w1, w0, lsl #8 +; CHECK-NEXT: sub w9, w2, w0 +; CHECK-NEXT: sub w9, w9, w8 +; CHECK-NEXT: eor w0, w9, w8, asr #13 ; CHECK-NEXT: ret entry: %shl = shl i32 %a, 8 @@ -30,10 +30,10 @@ define i64 @test2(i64 %a, i64 %b, i64 %c) { ; CHECK-LABEL: test2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub x8, x2, x0 -; CHECK-NEXT: eor x9, x1, x0, lsl #8 -; CHECK-NEXT: sub x8, x8, x9 -; CHECK-NEXT: eor x0, x8, x9, asr #13 +; CHECK-NEXT: eor x8, x1, x0, lsl #8 +; CHECK-NEXT: sub x9, x2, x0 +; CHECK-NEXT: sub x9, x9, x8 +; CHECK-NEXT: eor x0, x9, x8, asr #13 ; CHECK-NEXT: ret entry: %shl = shl i64 %a, 8 diff --git a/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll b/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll --- a/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll +++ b/llvm/test/CodeGen/AArch64/machine-licm-sink-instr.ll @@ -131,23 +131,23 @@ ; CHECK-NEXT: b.lt .LBB2_3 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: adrp x8, A -; CHECK-NEXT: mov w9, #42 -; CHECK-NEXT: mov w20, w19 -; CHECK-NEXT: ldr w21, [x8, :lo12:A] -; CHECK-NEXT: str w9, [x0] +; CHECK-NEXT: mov w21, w19 +; CHECK-NEXT: ldr w20, [x8, :lo12:A] +; CHECK-NEXT: mov w8, #42 // =0x2a +; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: .LBB2_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: mov w0, w21 +; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: bl _Z3usei -; CHECK-NEXT: sdiv w20, w20, w0 +; CHECK-NEXT: sdiv w21, w21, w0 ; CHECK-NEXT: subs w19, w19, #1 ; CHECK-NEXT: b.ne .LBB2_2 ; CHECK-NEXT: b .LBB2_4 ; CHECK-NEXT: .LBB2_3: -; CHECK-NEXT: mov w20, w19 +; CHECK-NEXT: mov w21, w19 ; CHECK-NEXT: .LBB2_4: // %for.cond.cleanup -; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov w0, w21 ; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll b/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll --- a/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll +++ b/llvm/test/CodeGen/AArch64/machine-licm-sub-loop.ll @@ -9,17 +9,17 @@ ; CHECK-NEXT: b.lt .LBB0_10 ; CHECK-NEXT: // %bb.1: // %for.cond1.preheader.us.preheader ; CHECK-NEXT: mov w10, w0 +; CHECK-NEXT: ubfiz x11, x0, #2, #32 ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: mov x9, xzr -; CHECK-NEXT: and x11, x10, #0xfffffff0 -; CHECK-NEXT: add x12, x1, #32 -; CHECK-NEXT: ubfiz x13, x0, #2, #32 +; CHECK-NEXT: and x12, x10, #0xfffffff0 +; CHECK-NEXT: add x13, x1, #32 ; CHECK-NEXT: add x14, x2, #16 ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .LBB0_2: // %for.cond1.for.cond.cleanup3_crit_edge.us ; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: add x9, x9, #1 -; CHECK-NEXT: add x12, x12, x13 +; CHECK-NEXT: add x13, x13, x11 ; CHECK-NEXT: add x8, x8, x10 ; CHECK-NEXT: cmp x9, x10 ; CHECK-NEXT: b.eq .LBB0_10 @@ -36,43 +36,43 @@ ; CHECK-NEXT: .LBB0_5: // %vector.ph ; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: mov x16, x14 -; CHECK-NEXT: mov x17, x12 -; CHECK-NEXT: mov x18, x11 +; CHECK-NEXT: mov x17, x13 +; CHECK-NEXT: mov x18, x12 ; CHECK-NEXT: .LBB0_6: // %vector.body ; CHECK-NEXT: // Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: // => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ldp q0, q1, [x16, #-16] -; CHECK-NEXT: dup v3.8h, w15 +; CHECK-NEXT: dup v0.8h, w15 +; CHECK-NEXT: ldp q1, q4, [x16, #-16] +; CHECK-NEXT: ldp q3, q2, [x17, #-32] ; CHECK-NEXT: subs x18, x18, #16 -; CHECK-NEXT: add x16, x16, #32 -; CHECK-NEXT: ldp q4, q2, [x17, #-32] -; CHECK-NEXT: smlal v4.4s, v3.4h, v0.4h ; CHECK-NEXT: ldp q6, q5, [x17] -; CHECK-NEXT: smlal2 v2.4s, v3.8h, v0.8h -; CHECK-NEXT: smlal v6.4s, v3.4h, v1.4h -; CHECK-NEXT: stp q4, q2, [x17, #-32] -; CHECK-NEXT: smlal2 v5.4s, v3.8h, v1.8h +; CHECK-NEXT: add x16, x16, #32 +; CHECK-NEXT: smlal2 v2.4s, v0.8h, v1.8h +; CHECK-NEXT: smlal v3.4s, v0.4h, v1.4h +; CHECK-NEXT: smlal2 v5.4s, v0.8h, v4.8h +; CHECK-NEXT: smlal v6.4s, v0.4h, v4.4h +; CHECK-NEXT: stp q3, q2, [x17, #-32] ; CHECK-NEXT: stp q6, q5, [x17], #64 ; CHECK-NEXT: b.ne .LBB0_6 ; CHECK-NEXT: // %bb.7: // %middle.block ; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: mov x18, x11 -; CHECK-NEXT: cmp x11, x10 +; CHECK-NEXT: cmp x12, x10 +; CHECK-NEXT: mov x18, x12 ; CHECK-NEXT: b.eq .LBB0_2 ; CHECK-NEXT: .LBB0_8: // %for.body4.us.preheader ; CHECK-NEXT: // in Loop: Header=BB0_3 Depth=1 -; CHECK-NEXT: add x17, x18, x8 -; CHECK-NEXT: sub x16, x10, x18 -; CHECK-NEXT: add x18, x2, x18, lsl #1 -; CHECK-NEXT: add x17, x1, x17, lsl #2 +; CHECK-NEXT: add x16, x18, x8 +; CHECK-NEXT: add x17, x2, x18, lsl #1 +; CHECK-NEXT: sub x18, x10, x18 +; CHECK-NEXT: add x16, x1, x16, lsl #2 ; CHECK-NEXT: .LBB0_9: // %for.body4.us ; CHECK-NEXT: // Parent Loop BB0_3 Depth=1 ; CHECK-NEXT: // => This Inner Loop Header: Depth=2 -; CHECK-NEXT: ldrsh w3, [x18], #2 -; CHECK-NEXT: ldr w4, [x17] -; CHECK-NEXT: subs x16, x16, #1 +; CHECK-NEXT: ldrsh w3, [x17], #2 +; CHECK-NEXT: ldr w4, [x16] +; CHECK-NEXT: subs x18, x18, #1 ; CHECK-NEXT: madd w3, w3, w15, w4 -; CHECK-NEXT: str w3, [x17], #4 +; CHECK-NEXT: str w3, [x16], #4 ; CHECK-NEXT: b.ne .LBB0_9 ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_10: // %for.cond.cleanup diff --git a/llvm/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll b/llvm/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll --- a/llvm/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll +++ b/llvm/test/CodeGen/AArch64/machine_cse_impdef_killflags.ll @@ -7,11 +7,11 @@ define i64 @csed_impdef_killflag(i64 %a) { ; CHECK-LABEL: csed_impdef_killflag: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 ; =0x1 ; CHECK-NEXT: cmp x0, #0 +; CHECK-NEXT: mov x9, #2 ; =0x2 ; CHECK-NEXT: csel w8, wzr, w8, ne -; CHECK-NEXT: mov x9, #2 -; CHECK-NEXT: mov x10, #3 +; CHECK-NEXT: mov x10, #3 ; =0x3 ; CHECK-NEXT: ubfx x8, x8, #0, #32 ; CHECK-NEXT: csel x9, x9, x10, ne ; CHECK-NEXT: add x0, x9, x8 diff --git a/llvm/test/CodeGen/AArch64/madd-combiner.ll b/llvm/test/CodeGen/AArch64/madd-combiner.ll --- a/llvm/test/CodeGen/AArch64/madd-combiner.ll +++ b/llvm/test/CodeGen/AArch64/madd-combiner.ll @@ -6,7 +6,7 @@ define i32 @mul_add_imm(i32 %a, i32 %b) { ; CHECK-LABEL: mul_add_imm: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w8, #4 ; =0x4 ; CHECK-NEXT: madd w0, w0, w1, w8 ; CHECK-NEXT: ret %1 = mul i32 %a, %b @@ -17,7 +17,7 @@ define i32 @mul_sub_imm1(i32 %a, i32 %b) { ; CHECK-LABEL: mul_sub_imm1: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w8, #4 ; =0x4 ; CHECK-NEXT: msub w0, w0, w1, w8 ; CHECK-NEXT: ret %1 = mul i32 %a, %b @@ -29,7 +29,7 @@ define void @mul_add_imm2() { ; CHECK-ISEL-LABEL: mul_add_imm2: ; CHECK-ISEL: ; %bb.0: ; %entry -; CHECK-ISEL-NEXT: mov w8, #1 +; CHECK-ISEL-NEXT: mov w8, #1 ; =0x1 ; CHECK-ISEL-NEXT: LBB2_1: ; %for.body8 ; CHECK-ISEL-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-ISEL-NEXT: cbnz w8, LBB2_1 @@ -38,10 +38,10 @@ ; ; CHECK-FAST-LABEL: mul_add_imm2: ; CHECK-FAST: ; %bb.0: ; %entry -; CHECK-FAST-NEXT: mov x8, #-3 -; CHECK-FAST-NEXT: mov x9, #-3 +; CHECK-FAST-NEXT: mov x8, #-3 ; =0xfffffffffffffffd +; CHECK-FAST-NEXT: mov x9, #-3 ; =0xfffffffffffffffd ; CHECK-FAST-NEXT: madd x8, x8, x8, x9 -; CHECK-FAST-NEXT: mov x9, #45968 +; CHECK-FAST-NEXT: mov x9, #45968 ; =0xb390 ; CHECK-FAST-NEXT: movk x9, #48484, lsl #16 ; CHECK-FAST-NEXT: movk x9, #323, lsl #32 ; CHECK-FAST-NEXT: LBB2_1: ; %for.body8 @@ -120,9 +120,9 @@ ; ; CHECK-FAST-LABEL: add1_mul_val4: ; CHECK-FAST: ; %bb.0: -; CHECK-FAST-NEXT: add x8, x1, #1 -; CHECK-FAST-NEXT: add x9, x0, x2 -; CHECK-FAST-NEXT: mul x0, x9, x8 +; CHECK-FAST-NEXT: add x8, x0, x2 +; CHECK-FAST-NEXT: add x9, x1, #1 +; CHECK-FAST-NEXT: mul x0, x8, x9 ; CHECK-FAST-NEXT: ret %1 = add i64 %a, %c %2 = add i64 %b, 1 @@ -138,7 +138,7 @@ ; ; CHECK-FAST-LABEL: sub1_mul_val1: ; CHECK-FAST: ; %bb.0: -; CHECK-FAST-NEXT: mov w8, #1 +; CHECK-FAST-NEXT: mov w8, #1 ; =0x1 ; CHECK-FAST-NEXT: sub w8, w8, w0 ; CHECK-FAST-NEXT: mul w0, w8, w1 ; CHECK-FAST-NEXT: ret @@ -155,7 +155,7 @@ ; ; CHECK-FAST-LABEL: sub1_mul_val2: ; CHECK-FAST: ; %bb.0: -; CHECK-FAST-NEXT: mov w8, #1 +; CHECK-FAST-NEXT: mov w8, #1 ; =0x1 ; CHECK-FAST-NEXT: sub w8, w8, w1 ; CHECK-FAST-NEXT: mul w0, w0, w8 ; CHECK-FAST-NEXT: ret @@ -172,7 +172,7 @@ ; ; CHECK-FAST-LABEL: sub1_mul_val3: ; CHECK-FAST: ; %bb.0: -; CHECK-FAST-NEXT: mov x8, #1 +; CHECK-FAST-NEXT: mov x8, #1 ; =0x1 ; CHECK-FAST-NEXT: sub x8, x8, x1 ; CHECK-FAST-NEXT: mul x0, x0, x8 ; CHECK-FAST-NEXT: ret @@ -190,7 +190,7 @@ ; ; CHECK-FAST-LABEL: sub1_mul_val4: ; CHECK-FAST: ; %bb.0: -; CHECK-FAST-NEXT: mov x8, #1 +; CHECK-FAST-NEXT: mov x8, #1 ; =0x1 ; CHECK-FAST-NEXT: sub x9, x0, #1 ; CHECK-FAST-NEXT: sub x8, x8, x1 ; CHECK-FAST-NEXT: mul x0, x9, x8 diff --git a/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll b/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll --- a/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll +++ b/llvm/test/CodeGen/AArch64/memcpy-scoped-aa.ll @@ -15,9 +15,9 @@ ; CHECK-LABEL: test_memcpy: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w9, w10, [x1] +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: add w0, w9, w10 -; CHECK-NEXT: ldr q0, [x8, #16] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* @@ -38,9 +38,9 @@ ; CHECK-LABEL: test_memcpy_inline: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w9, w10, [x1] +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: add w0, w9, w10 -; CHECK-NEXT: ldr q0, [x8, #16] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* @@ -61,9 +61,9 @@ ; CHECK-LABEL: test_memmove: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w9, w10, [x1] +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: add w0, w9, w10 -; CHECK-NEXT: ldr q0, [x8, #16] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* @@ -86,9 +86,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w10, w11, [x1] ; CHECK-NEXT: mov x8, x0 -; CHECK-NEXT: mov x9, #-6148914691236517206 -; CHECK-NEXT: add w0, w10, w11 +; CHECK-NEXT: mov x9, #-6148914691236517206 // =0xaaaaaaaaaaaaaaaa ; CHECK-NEXT: stp x9, x9, [x8] +; CHECK-NEXT: add w0, w10, w11 ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* tail call void @llvm.memset.p0i8.i64(i8* noundef nonnull align 4 dereferenceable(16) %p0, i8 170, i64 16, i1 false), !alias.scope !2, !noalias !4 @@ -106,9 +106,9 @@ ; CHECK-LABEL: test_mempcpy: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp w9, w10, [x1] +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: add w0, w9, w10 -; CHECK-NEXT: ldr q0, [x8, #16] ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret %p0 = bitcast i32* %p to i8* diff --git a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll --- a/llvm/test/CodeGen/AArch64/merge-trunc-store.ll +++ b/llvm/test/CodeGen/AArch64/merge-trunc-store.ll @@ -524,11 +524,11 @@ define void @be_i64_to_i16(i64 %x, ptr %p0) { ; LE-LABEL: be_i64_to_i16: ; LE: // %bb.0: -; LE-NEXT: lsr x8, x0, #32 -; LE-NEXT: ror w9, w0, #16 +; LE-NEXT: ror w8, w0, #16 +; LE-NEXT: lsr x9, x0, #32 ; LE-NEXT: lsr x10, x0, #48 -; LE-NEXT: strh w8, [x1, #2] -; LE-NEXT: str w9, [x1, #4] +; LE-NEXT: str w8, [x1, #4] +; LE-NEXT: strh w9, [x1, #2] ; LE-NEXT: strh w10, [x1] ; LE-NEXT: ret ; @@ -749,16 +749,16 @@ ; CHECK-NEXT: lsr x8, x0, #56 ; CHECK-NEXT: lsr x9, x0, #48 ; CHECK-NEXT: lsr x10, x0, #40 -; CHECK-NEXT: lsr x11, x0, #32 ; CHECK-NEXT: strb w0, [x1, #7] ; CHECK-NEXT: strb w8, [x1] -; CHECK-NEXT: lsr x8, x0, #16 +; CHECK-NEXT: lsr x8, x0, #32 ; CHECK-NEXT: strb w9, [x1, #1] -; CHECK-NEXT: lsr x9, x0, #8 +; CHECK-NEXT: lsr x9, x0, #16 ; CHECK-NEXT: strb w10, [x1, #2] -; CHECK-NEXT: strb w11, [x1, #3] -; CHECK-NEXT: strb w8, [x1, #5] -; CHECK-NEXT: strb w9, [x1, #6] +; CHECK-NEXT: lsr x10, x0, #8 +; CHECK-NEXT: strb w8, [x1, #3] +; CHECK-NEXT: strb w9, [x1, #5] +; CHECK-NEXT: strb w10, [x1, #6] ; CHECK-NEXT: ret %sh1 = lshr i64 %x, 8 %sh2 = lshr i64 %x, 16 diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll --- a/llvm/test/CodeGen/AArch64/midpoint-int.ll +++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll @@ -14,13 +14,13 @@ ; CHECK-LABEL: scalar_i32_signed_reg_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: mov w10, #-1 -; CHECK-NEXT: csel w8, w1, w0, gt -; CHECK-NEXT: csel w9, w0, w1, gt -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, le -; CHECK-NEXT: lsr w8, w8, #1 -; CHECK-NEXT: madd w0, w8, w9, w0 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: csel w9, w1, w0, gt +; CHECK-NEXT: csel w10, w0, w1, gt +; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: sub w9, w10, w9 +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp sgt i32 %a1, %a2 ; signed %t4 = select i1 %t3, i32 -1, i32 1 @@ -37,13 +37,13 @@ ; CHECK-LABEL: scalar_i32_unsigned_reg_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: mov w10, #-1 -; CHECK-NEXT: csel w8, w1, w0, hi -; CHECK-NEXT: csel w9, w0, w1, hi -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, ls -; CHECK-NEXT: lsr w8, w8, #1 -; CHECK-NEXT: madd w0, w8, w9, w0 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: csel w9, w1, w0, hi +; CHECK-NEXT: csel w10, w0, w1, hi +; CHECK-NEXT: cneg w8, w8, ls +; CHECK-NEXT: sub w9, w10, w9 +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp ugt i32 %a1, %a2 %t4 = select i1 %t3, i32 -1, i32 1 @@ -62,12 +62,12 @@ ; CHECK-LABEL: scalar_i32_signed_mem_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: cmp w9, w1 ; CHECK-NEXT: csel w10, w1, w9, gt ; CHECK-NEXT: csel w11, w9, w1, gt -; CHECK-NEXT: sub w10, w11, w10 ; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: sub w10, w11, w10 ; CHECK-NEXT: lsr w10, w10, #1 ; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret @@ -87,12 +87,12 @@ ; CHECK-LABEL: scalar_i32_signed_reg_mem: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w9, [x1] -; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: cmp w0, w9 ; CHECK-NEXT: csel w10, w9, w0, gt ; CHECK-NEXT: csel w9, w0, w9, gt -; CHECK-NEXT: sub w9, w9, w10 ; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: sub w9, w9, w10 ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret @@ -112,13 +112,13 @@ ; CHECK-LABEL: scalar_i32_signed_mem_mem: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w8, #-1 ; CHECK-NEXT: ldr w10, [x1] +; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: cmp w9, w10 ; CHECK-NEXT: csel w11, w10, w9, gt ; CHECK-NEXT: csel w10, w9, w10, gt -; CHECK-NEXT: sub w10, w10, w11 ; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: sub w10, w10, w11 ; CHECK-NEXT: lsr w10, w10, #1 ; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret @@ -145,13 +145,13 @@ ; CHECK-LABEL: scalar_i64_signed_reg_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: mov x10, #-1 -; CHECK-NEXT: csel x8, x1, x0, gt -; CHECK-NEXT: csel x9, x0, x1, gt -; CHECK-NEXT: sub x8, x9, x8 -; CHECK-NEXT: cneg x9, x10, le -; CHECK-NEXT: lsr x8, x8, #1 -; CHECK-NEXT: madd x0, x8, x9, x0 +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: csel x9, x1, x0, gt +; CHECK-NEXT: csel x10, x0, x1, gt +; CHECK-NEXT: cneg x8, x8, le +; CHECK-NEXT: sub x9, x10, x9 +; CHECK-NEXT: lsr x9, x9, #1 +; CHECK-NEXT: madd x0, x9, x8, x0 ; CHECK-NEXT: ret %t3 = icmp sgt i64 %a1, %a2 ; signed %t4 = select i1 %t3, i64 -1, i64 1 @@ -168,13 +168,13 @@ ; CHECK-LABEL: scalar_i64_unsigned_reg_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: mov x10, #-1 -; CHECK-NEXT: csel x8, x1, x0, hi -; CHECK-NEXT: csel x9, x0, x1, hi -; CHECK-NEXT: sub x8, x9, x8 -; CHECK-NEXT: cneg x9, x10, ls -; CHECK-NEXT: lsr x8, x8, #1 -; CHECK-NEXT: madd x0, x8, x9, x0 +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: csel x9, x1, x0, hi +; CHECK-NEXT: csel x10, x0, x1, hi +; CHECK-NEXT: cneg x8, x8, ls +; CHECK-NEXT: sub x9, x10, x9 +; CHECK-NEXT: lsr x9, x9, #1 +; CHECK-NEXT: madd x0, x9, x8, x0 ; CHECK-NEXT: ret %t3 = icmp ugt i64 %a1, %a2 %t4 = select i1 %t3, i64 -1, i64 1 @@ -193,12 +193,12 @@ ; CHECK-LABEL: scalar_i64_signed_mem_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: cmp x9, x1 ; CHECK-NEXT: csel x10, x1, x9, gt ; CHECK-NEXT: csel x11, x9, x1, gt -; CHECK-NEXT: sub x10, x11, x10 ; CHECK-NEXT: cneg x8, x8, le +; CHECK-NEXT: sub x10, x11, x10 ; CHECK-NEXT: lsr x10, x10, #1 ; CHECK-NEXT: madd x0, x10, x8, x9 ; CHECK-NEXT: ret @@ -218,12 +218,12 @@ ; CHECK-LABEL: scalar_i64_signed_reg_mem: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x9, [x1] -; CHECK-NEXT: mov x8, #-1 +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: cmp x0, x9 ; CHECK-NEXT: csel x10, x9, x0, gt ; CHECK-NEXT: csel x9, x0, x9, gt -; CHECK-NEXT: sub x9, x9, x10 ; CHECK-NEXT: cneg x8, x8, le +; CHECK-NEXT: sub x9, x9, x10 ; CHECK-NEXT: lsr x9, x9, #1 ; CHECK-NEXT: madd x0, x9, x8, x0 ; CHECK-NEXT: ret @@ -243,13 +243,13 @@ ; CHECK-LABEL: scalar_i64_signed_mem_mem: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov x8, #-1 ; CHECK-NEXT: ldr x10, [x1] +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: cmp x9, x10 ; CHECK-NEXT: csel x11, x10, x9, gt ; CHECK-NEXT: csel x10, x9, x10, gt -; CHECK-NEXT: sub x10, x10, x11 ; CHECK-NEXT: cneg x8, x8, le +; CHECK-NEXT: sub x10, x10, x11 ; CHECK-NEXT: lsr x10, x10, #1 ; CHECK-NEXT: madd x0, x10, x8, x9 ; CHECK-NEXT: ret @@ -275,15 +275,15 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: mov w10, #-1 -; CHECK-NEXT: cmp w8, w1, sxth -; CHECK-NEXT: csel w8, w1, w0, gt -; CHECK-NEXT: csel w9, w0, w1, gt -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, le -; CHECK-NEXT: ubfx w8, w8, #1, #15 -; CHECK-NEXT: madd w0, w8, w9, w0 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cmp w9, w1, sxth +; CHECK-NEXT: csel w9, w1, w0, gt +; CHECK-NEXT: csel w10, w0, w1, gt +; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: sub w9, w10, w9 +; CHECK-NEXT: ubfx w9, w9, #1, #15 +; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp sgt i16 %a1, %a2 ; signed %t4 = select i1 %t3, i16 -1, i16 1 @@ -299,15 +299,15 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: mov w10, #-1 -; CHECK-NEXT: cmp w8, w1, uxth -; CHECK-NEXT: csel w8, w1, w0, hi -; CHECK-NEXT: csel w9, w0, w1, hi -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, ls -; CHECK-NEXT: ubfx w8, w8, #1, #15 -; CHECK-NEXT: madd w0, w8, w9, w0 +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cmp w9, w1, uxth +; CHECK-NEXT: csel w9, w1, w0, hi +; CHECK-NEXT: csel w10, w0, w1, hi +; CHECK-NEXT: cneg w8, w8, ls +; CHECK-NEXT: sub w9, w10, w9 +; CHECK-NEXT: ubfx w9, w9, #1, #15 +; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp ugt i16 %a1, %a2 %t4 = select i1 %t3, i16 -1, i16 1 @@ -326,12 +326,12 @@ ; CHECK-LABEL: scalar_i16_signed_mem_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrsh w9, [x0] -; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: cmp w9, w1, sxth ; CHECK-NEXT: csel w10, w1, w9, gt ; CHECK-NEXT: csel w11, w9, w1, gt -; CHECK-NEXT: sub w10, w11, w10 ; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: sub w10, w11, w10 ; CHECK-NEXT: ubfx w10, w10, #1, #15 ; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret @@ -350,16 +350,16 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind { ; CHECK-LABEL: scalar_i16_signed_reg_mem: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsh w9, [x1] -; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: mov w10, #-1 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: csel w8, w9, w0, gt -; CHECK-NEXT: csel w9, w0, w9, gt -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, le -; CHECK-NEXT: ubfx w8, w8, #1, #15 -; CHECK-NEXT: madd w0, w8, w9, w0 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: ldrsh w10, [x1] +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: csel w9, w10, w0, gt +; CHECK-NEXT: csel w10, w0, w10, gt +; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: sub w9, w10, w9 +; CHECK-NEXT: ubfx w9, w9, #1, #15 +; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %a2 = load i16, ptr %a2_addr %t3 = icmp sgt i16 %a1, %a2 ; signed @@ -377,13 +377,13 @@ ; CHECK-LABEL: scalar_i16_signed_mem_mem: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrsh w9, [x0] -; CHECK-NEXT: mov w8, #-1 ; CHECK-NEXT: ldrsh w10, [x1] +; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: cmp w9, w10 ; CHECK-NEXT: csel w11, w10, w9, gt ; CHECK-NEXT: csel w10, w9, w10, gt -; CHECK-NEXT: sub w10, w10, w11 ; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: sub w10, w10, w11 ; CHECK-NEXT: ubfx w10, w10, #1, #15 ; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret @@ -409,15 +409,15 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: mov w10, #-1 -; CHECK-NEXT: cmp w8, w1, sxtb -; CHECK-NEXT: csel w8, w1, w0, gt -; CHECK-NEXT: csel w9, w0, w1, gt -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, le -; CHECK-NEXT: ubfx w8, w8, #1, #7 -; CHECK-NEXT: madd w0, w8, w9, w0 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cmp w9, w1, sxtb +; CHECK-NEXT: csel w9, w1, w0, gt +; CHECK-NEXT: csel w10, w0, w1, gt +; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: sub w9, w10, w9 +; CHECK-NEXT: ubfx w9, w9, #1, #7 +; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp sgt i8 %a1, %a2 ; signed %t4 = select i1 %t3, i8 -1, i8 1 @@ -433,15 +433,15 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: mov w10, #-1 -; CHECK-NEXT: cmp w8, w1, uxtb -; CHECK-NEXT: csel w8, w1, w0, hi -; CHECK-NEXT: csel w9, w0, w1, hi -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, ls -; CHECK-NEXT: ubfx w8, w8, #1, #7 -; CHECK-NEXT: madd w0, w8, w9, w0 +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cmp w9, w1, uxtb +; CHECK-NEXT: csel w9, w1, w0, hi +; CHECK-NEXT: csel w10, w0, w1, hi +; CHECK-NEXT: cneg w8, w8, ls +; CHECK-NEXT: sub w9, w10, w9 +; CHECK-NEXT: ubfx w9, w9, #1, #7 +; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp ugt i8 %a1, %a2 %t4 = select i1 %t3, i8 -1, i8 1 @@ -460,12 +460,12 @@ ; CHECK-LABEL: scalar_i8_signed_mem_reg: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrsb w9, [x0] -; CHECK-NEXT: mov w8, #-1 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: cmp w9, w1, sxtb ; CHECK-NEXT: csel w10, w1, w9, gt ; CHECK-NEXT: csel w11, w9, w1, gt -; CHECK-NEXT: sub w10, w11, w10 ; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: sub w10, w11, w10 ; CHECK-NEXT: ubfx w10, w10, #1, #7 ; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret @@ -484,16 +484,16 @@ define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind { ; CHECK-LABEL: scalar_i8_signed_reg_mem: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsb w9, [x1] -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: mov w10, #-1 -; CHECK-NEXT: cmp w8, w9 -; CHECK-NEXT: csel w8, w9, w0, gt -; CHECK-NEXT: csel w9, w0, w9, gt -; CHECK-NEXT: sub w8, w9, w8 -; CHECK-NEXT: cneg w9, w10, le -; CHECK-NEXT: ubfx w8, w8, #1, #7 -; CHECK-NEXT: madd w0, w8, w9, w0 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: ldrsb w10, [x1] +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: cmp w9, w10 +; CHECK-NEXT: csel w9, w10, w0, gt +; CHECK-NEXT: csel w10, w0, w10, gt +; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: sub w9, w10, w9 +; CHECK-NEXT: ubfx w9, w9, #1, #7 +; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %a2 = load i8, ptr %a2_addr %t3 = icmp sgt i8 %a1, %a2 ; signed @@ -511,13 +511,13 @@ ; CHECK-LABEL: scalar_i8_signed_mem_mem: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrsb w9, [x0] -; CHECK-NEXT: mov w8, #-1 ; CHECK-NEXT: ldrsb w10, [x1] +; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: cmp w9, w10 ; CHECK-NEXT: csel w11, w10, w9, gt ; CHECK-NEXT: csel w10, w9, w10, gt -; CHECK-NEXT: sub w10, w10, w11 ; CHECK-NEXT: cneg w8, w8, le +; CHECK-NEXT: sub w10, w10, w11 ; CHECK-NEXT: ubfx w10, w10, #1, #7 ; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll b/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll --- a/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll +++ b/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll @@ -10,9 +10,9 @@ define <4 x i32> @smin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -26,9 +26,9 @@ define <4 x i32> @smin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -74,9 +74,9 @@ define <4 x i32> @smin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -90,9 +90,9 @@ define <4 x i32> @smin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -138,9 +138,9 @@ define <4 x i32> @smin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -154,9 +154,9 @@ define <4 x i32> @smin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -202,9 +202,9 @@ define <4 x i32> @smin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -218,9 +218,9 @@ define <4 x i32> @smin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -266,9 +266,9 @@ define <4 x i32> @smax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -282,9 +282,9 @@ define <4 x i32> @smax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -330,9 +330,9 @@ define <4 x i32> @smax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -346,9 +346,9 @@ define <4 x i32> @smax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -394,9 +394,9 @@ define <4 x i32> @smax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -410,9 +410,9 @@ define <4 x i32> @smax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -458,9 +458,9 @@ define <4 x i32> @smax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -474,9 +474,9 @@ define <4 x i32> @smax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -522,9 +522,9 @@ define <4 x i32> @umin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -538,9 +538,9 @@ define <4 x i32> @umin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -586,9 +586,9 @@ define <4 x i32> @umin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -602,9 +602,9 @@ define <4 x i32> @umin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -650,9 +650,9 @@ define <4 x i32> @umin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -666,9 +666,9 @@ define <4 x i32> @umin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -714,9 +714,9 @@ define <4 x i32> @umin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -730,9 +730,9 @@ define <4 x i32> @umin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -778,9 +778,9 @@ define <4 x i32> @umax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -794,9 +794,9 @@ define <4 x i32> @umax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -842,9 +842,9 @@ define <4 x i32> @umax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -858,9 +858,9 @@ define <4 x i32> @umax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -906,9 +906,9 @@ define <4 x i32> @umax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -922,9 +922,9 @@ define <4 x i32> @umax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -970,9 +970,9 @@ define <4 x i32> @umax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -986,9 +986,9 @@ define <4 x i32> @umax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -1034,8 +1034,8 @@ define <4 x i32> @notted_smin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s @@ -1056,8 +1056,8 @@ define <4 x i32> @notted_smin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s @@ -1078,8 +1078,8 @@ define <4 x i32> @notted_smin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s @@ -1100,8 +1100,8 @@ define <4 x i32> @notted_smin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s @@ -1122,8 +1122,8 @@ define <4 x i32> @notted_smin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s @@ -1144,8 +1144,8 @@ define <4 x i32> @notted_smin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s @@ -1166,8 +1166,8 @@ define <4 x i32> @notted_smin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s @@ -1188,8 +1188,8 @@ define <4 x i32> @notted_smin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s @@ -1210,8 +1210,8 @@ define <4 x i32> @notted_smin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s @@ -1232,8 +1232,8 @@ define <4 x i32> @notted_smin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s @@ -1254,8 +1254,8 @@ define <4 x i32> @notted_smin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s @@ -1276,8 +1276,8 @@ define <4 x i32> @notted_smin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s @@ -1298,8 +1298,8 @@ define <4 x i32> @notted_smin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s @@ -1320,8 +1320,8 @@ define <4 x i32> @notted_smin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s @@ -1342,8 +1342,8 @@ define <4 x i32> @notted_smin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s @@ -1364,8 +1364,8 @@ define <4 x i32> @notted_smin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s @@ -1386,8 +1386,8 @@ define <4 x i32> @notted_smax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s @@ -1408,8 +1408,8 @@ define <4 x i32> @notted_smax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s @@ -1430,8 +1430,8 @@ define <4 x i32> @notted_smax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1452,8 +1452,8 @@ define <4 x i32> @notted_smax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s @@ -1474,8 +1474,8 @@ define <4 x i32> @notted_smax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s @@ -1496,8 +1496,8 @@ define <4 x i32> @notted_smax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s @@ -1518,8 +1518,8 @@ define <4 x i32> @notted_smax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1540,8 +1540,8 @@ define <4 x i32> @notted_smax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s @@ -1562,8 +1562,8 @@ define <4 x i32> @notted_smax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s @@ -1584,8 +1584,8 @@ define <4 x i32> @notted_smax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s @@ -1606,8 +1606,8 @@ define <4 x i32> @notted_smax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1628,8 +1628,8 @@ define <4 x i32> @notted_smax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s @@ -1650,8 +1650,8 @@ define <4 x i32> @notted_smax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s @@ -1672,8 +1672,8 @@ define <4 x i32> @notted_smax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s @@ -1694,8 +1694,8 @@ define <4 x i32> @notted_smax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1716,8 +1716,8 @@ define <4 x i32> @notted_smax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s @@ -1738,8 +1738,8 @@ define <4 x i32> @notted_umin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s @@ -1760,8 +1760,8 @@ define <4 x i32> @notted_umin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s @@ -1782,8 +1782,8 @@ define <4 x i32> @notted_umin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s @@ -1804,8 +1804,8 @@ define <4 x i32> @notted_umin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s @@ -1826,8 +1826,8 @@ define <4 x i32> @notted_umin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s @@ -1848,8 +1848,8 @@ define <4 x i32> @notted_umin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s @@ -1870,8 +1870,8 @@ define <4 x i32> @notted_umin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s @@ -1892,8 +1892,8 @@ define <4 x i32> @notted_umin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s @@ -1914,8 +1914,8 @@ define <4 x i32> @notted_umin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s @@ -1936,8 +1936,8 @@ define <4 x i32> @notted_umin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s @@ -1958,8 +1958,8 @@ define <4 x i32> @notted_umin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s @@ -1980,8 +1980,8 @@ define <4 x i32> @notted_umin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s @@ -2002,8 +2002,8 @@ define <4 x i32> @notted_umin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s @@ -2024,8 +2024,8 @@ define <4 x i32> @notted_umin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s @@ -2046,8 +2046,8 @@ define <4 x i32> @notted_umin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s @@ -2068,8 +2068,8 @@ define <4 x i32> @notted_umin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s @@ -2090,8 +2090,8 @@ define <4 x i32> @notted_umax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s @@ -2112,8 +2112,8 @@ define <4 x i32> @notted_umax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s @@ -2134,8 +2134,8 @@ define <4 x i32> @notted_umax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s @@ -2156,8 +2156,8 @@ define <4 x i32> @notted_umax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s @@ -2178,8 +2178,8 @@ define <4 x i32> @notted_umax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s @@ -2200,8 +2200,8 @@ define <4 x i32> @notted_umax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s @@ -2222,8 +2222,8 @@ define <4 x i32> @notted_umax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s @@ -2244,8 +2244,8 @@ define <4 x i32> @notted_umax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s @@ -2266,8 +2266,8 @@ define <4 x i32> @notted_umax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s @@ -2288,8 +2288,8 @@ define <4 x i32> @notted_umax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s @@ -2310,8 +2310,8 @@ define <4 x i32> @notted_umax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s @@ -2332,8 +2332,8 @@ define <4 x i32> @notted_umax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s @@ -2354,8 +2354,8 @@ define <4 x i32> @notted_umax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s @@ -2376,8 +2376,8 @@ define <4 x i32> @notted_umax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s @@ -2398,8 +2398,8 @@ define <4 x i32> @notted_umax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s @@ -2420,8 +2420,8 @@ define <4 x i32> @notted_umax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s diff --git a/llvm/test/CodeGen/AArch64/minmax.ll b/llvm/test/CodeGen/AArch64/minmax.ll --- a/llvm/test/CodeGen/AArch64/minmax.ll +++ b/llvm/test/CodeGen/AArch64/minmax.ll @@ -97,8 +97,8 @@ define <8 x i32> @t10(<8 x i32> %a, <8 x i32> %b) { ; CHECK-LABEL: t10: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v3.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t1 = icmp sgt <8 x i32> %a, %b %t2 = select <8 x i1> %t1, <8 x i32> %a, <8 x i32> %b @@ -158,10 +158,10 @@ define <4 x i64> @t15(<4 x i64> %a, <4 x i64> %b) { ; CHECK-LABEL: t15: ; CHECK: // %bb.0: -; CHECK-NEXT: cmhi v4.2d, v2.2d, v0.2d -; CHECK-NEXT: cmhi v5.2d, v3.2d, v1.2d -; CHECK-NEXT: bif v0.16b, v2.16b, v4.16b -; CHECK-NEXT: bif v1.16b, v3.16b, v5.16b +; CHECK-NEXT: cmhi v4.2d, v3.2d, v1.2d +; CHECK-NEXT: cmhi v5.2d, v2.2d, v0.2d +; CHECK-NEXT: bif v1.16b, v3.16b, v4.16b +; CHECK-NEXT: bif v0.16b, v2.16b, v5.16b ; CHECK-NEXT: ret %t1 = icmp ule <4 x i64> %a, %b %t2 = select <4 x i1> %t1, <4 x i64> %a, <4 x i64> %b diff --git a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir --- a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir +++ b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir @@ -13,7 +13,7 @@ source_filename = "../llvm-project/llvm/test/CodeGen/AArch64/aarch64-smull.failing.ll" target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" target triple = "aarch64-none-linux-gnu" - + define <8 x i32> @umull_and_v8i32(<8 x i16> %src1, <8 x i32> %src2) #0 { entry: %in1 = zext <8 x i16> %src1 to <8 x i32> @@ -21,50 +21,89 @@ %out = mul nsw <8 x i32> %in1, %in2 ret <8 x i32> %out } - + attributes #0 = { "target-features"="+neon" } ... --- name: umull_and_v8i32 alignment: 4 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false tracksRegLiveness: true +hasWinCFI: false +callsEHReturn: false +callsUnwindInit: false +hasEHCatchret: false +hasEHScopes: false +hasEHFunclets: false +isOutlined: false +debugInstrRef: false +failsVerification: false +tracksDebugUserValues: false registers: - - { id: 0, class: fpr128 } - - { id: 1, class: fpr128 } - - { id: 2, class: fpr128 } - - { id: 3, class: fpr128 } - - { id: 4, class: fpr64 } - - { id: 5, class: fpr64 } - - { id: 6, class: fpr128 } - - { id: 7, class: fpr128 } - - { id: 8, class: fpr128 } - - { id: 9, class: fpr64 } - - { id: 10, class: fpr128 } - - { id: 11, class: fpr64 } - - { id: 12, class: fpr128 } + - { id: 0, class: fpr128, preferred-register: '' } + - { id: 1, class: fpr128, preferred-register: '' } + - { id: 2, class: fpr128, preferred-register: '' } + - { id: 3, class: fpr128, preferred-register: '' } + - { id: 4, class: fpr64, preferred-register: '' } + - { id: 5, class: fpr64, preferred-register: '' } + - { id: 6, class: fpr128, preferred-register: '' } + - { id: 7, class: fpr128, preferred-register: '' } + - { id: 8, class: fpr128, preferred-register: '' } + - { id: 9, class: fpr64, preferred-register: '' } + - { id: 10, class: fpr128, preferred-register: '' } + - { id: 11, class: fpr64, preferred-register: '' } + - { id: 12, class: fpr128, preferred-register: '' } liveins: - { reg: '$q0', virtual-reg: '%0' } - { reg: '$q1', virtual-reg: '%1' } - { reg: '$q2', virtual-reg: '%2' } frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + functionContext: '' maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +entry_values: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] machineFunctionInfo: {} body: | bb.0.entry: liveins: $q0, $q1, $q2 - + + %6:fpr128 = MOVIv2d_ns 17 %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 - %0:fpr128 = COPY $q0 - %3:fpr128 = EXTv16i8 %0, %0, 8 - %6:fpr128 = MOVIv2d_ns 17 %7:fpr128 = ANDv16i8 %2, %6 %8:fpr128 = ANDv16i8 %1, %6 + %0:fpr128 = COPY $q0 + %3:fpr128 = EXTv16i8 %0, %0, 8 %9:fpr64 = XTNv4i16 %8 - %10:fpr128 = UMULLv4i16_v4i32 %0.dsub, %9 %11:fpr64 = XTNv4i16 %7 + %10:fpr128 = UMULLv4i16_v4i32 %0.dsub, %9 %12:fpr128 = UMULLv4i16_v4i32 %3.dsub, %11 $q0 = COPY %10 $q1 = COPY %12 @@ -75,164 +114,164 @@ # CHECK-LABEL: Before MISched: # CHECK-NEXT: # Machine code for function umull_and_v8i32: IsSSA, NoPHIs, TracksLiveness # CHECK-NEXT: Function Live Ins: $q0 in %0, $q1 in %1, $q2 in %2 -# CHECK-EMPTY: +# CHECK-EMPTY: # CHECK-NEXT: bb.0.entry: # CHECK-NEXT: liveins: $q0, $q1, $q2 +# CHECK-NEXT: %6:fpr128 = MOVIv2d_ns 17 # CHECK-NEXT: %2:fpr128 = COPY $q2 # CHECK-NEXT: %1:fpr128 = COPY $q1 -# CHECK-NEXT: %0:fpr128 = COPY $q0 -# CHECK-NEXT: %3:fpr128 = EXTv16i8 %0:fpr128, %0:fpr128, 8 -# CHECK-NEXT: %6:fpr128 = MOVIv2d_ns 17 # CHECK-NEXT: %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 # CHECK-NEXT: %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 +# CHECK-NEXT: %0:fpr128 = COPY $q0 +# CHECK-NEXT: %3:fpr128 = EXTv16i8 %0:fpr128, %0:fpr128, 8 # CHECK-NEXT: %9:fpr64 = XTNv4i16 %8:fpr128 -# CHECK-NEXT: %10:fpr128 = UMULLv4i16_v4i32 %0.dsub:fpr128, %9:fpr64 # CHECK-NEXT: %11:fpr64 = XTNv4i16 %7:fpr128 +# CHECK-NEXT: %10:fpr128 = UMULLv4i16_v4i32 %0.dsub:fpr128, %9:fpr64 # CHECK-NEXT: %12:fpr128 = UMULLv4i16_v4i32 %3.dsub:fpr128, %11:fpr64 # CHECK-NEXT: $q0 = COPY %10:fpr128 # CHECK-NEXT: $q1 = COPY %12:fpr128 # CHECK-NEXT: RET_ReallyLR implicit $q0, implicit $q1 -# CHECK-EMPTY: +# CHECK-EMPTY: # CHECK-NEXT: # End machine code for function umull_and_v8i32. -# CHECK-EMPTY: +# CHECK-EMPTY: # CHECK-NEXT: ********** MI Scheduling ********** # CHECK-NEXT: umull_and_v8i32:%bb.0 entry -# CHECK-NEXT: From: %2:fpr128 = COPY $q2 +# CHECK-NEXT: From: %6:fpr128 = MOVIv2d_ns 17 # CHECK-NEXT: To: RET_ReallyLR implicit $q0, implicit $q1 # CHECK-NEXT: RegionInstrs: 13 # CHECK-NEXT: ScheduleDAGMILive::schedule starting # CHECK-NEXT: GenericScheduler RegionPolicy: ShouldTrackPressure=0 OnlyTopDown=0 OnlyBottomUp=1 # CHECK-NEXT: Disabled scoreboard hazard recognizer # CHECK-NEXT: Disabled scoreboard hazard recognizer -# CHECK-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 0]: %2:fpr128 = COPY $q2 +# CHECK-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 0]: %6:fpr128 = MOVIv2d_ns 17 +# CHECK-NEXT: # preds left : 0 +# CHECK-NEXT: # succs left : 2 +# CHECK-NEXT: # rdefs left : 0 +# CHECK-NEXT: Latency : 4 +# CHECK-NEXT: Depth : 0 +# CHECK-NEXT: Height : 16 +# CHECK-NEXT: Successors: +# CHECK-NEXT: SU(4): Data Latency=4 Reg=%6 +# CHECK-NEXT: SU(3): Data Latency=4 Reg=%6 +# CHECK-NEXT: Single Issue : false; +# CHECK-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 0]: %2:fpr128 = COPY $q2 # CHECK-NEXT: # preds left : 0 # CHECK-NEXT: # succs left : 1 # CHECK-NEXT: # rdefs left : 0 -# CHECK-NEXT: Latency : 3 +# CHECK-NEXT: Latency : 1 # CHECK-NEXT: Depth : 0 # CHECK-NEXT: Height : 13 # CHECK-NEXT: Successors: -# CHECK-NEXT: SU(5): Data Latency=3 Reg=%2 +# CHECK-NEXT: SU(3): Data Latency=1 Reg=%2 # CHECK-NEXT: Single Issue : false; -# CHECK-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 0]: %1:fpr128 = COPY $q1 +# CHECK-NEXT: SU(2) [TopReadyCycle = 0, BottomReadyCycle = 0]: %1:fpr128 = COPY $q1 # CHECK-NEXT: # preds left : 0 # CHECK-NEXT: # succs left : 2 # CHECK-NEXT: # rdefs left : 0 -# CHECK-NEXT: Latency : 3 +# CHECK-NEXT: Latency : 1 # CHECK-NEXT: Depth : 0 # CHECK-NEXT: Height : 13 # CHECK-NEXT: Successors: -# CHECK-NEXT: SU(6): Data Latency=3 Reg=%1 +# CHECK-NEXT: SU(4): Data Latency=1 Reg=%1 # CHECK-NEXT: SU(12): Anti Latency=0 # CHECK-NEXT: Single Issue : false; -# CHECK-NEXT: SU(2) [TopReadyCycle = 0, BottomReadyCycle = 0]: %0:fpr128 = COPY $q0 -# CHECK-NEXT: # preds left : 0 -# CHECK-NEXT: # succs left : 3 +# CHECK-NEXT: SU(3) [TopReadyCycle = 0, BottomReadyCycle = 0]: %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 +# CHECK-NEXT: # preds left : 2 +# CHECK-NEXT: # succs left : 1 # CHECK-NEXT: # rdefs left : 0 # CHECK-NEXT: Latency : 3 -# CHECK-NEXT: Depth : 0 -# CHECK-NEXT: Height : 14 +# CHECK-NEXT: Depth : 4 +# CHECK-NEXT: Height : 12 +# CHECK-NEXT: Predecessors: +# CHECK-NEXT: SU(1): Data Latency=1 Reg=%2 +# CHECK-NEXT: SU(0): Data Latency=4 Reg=%6 # CHECK-NEXT: Successors: -# CHECK-NEXT: SU(8): Data Latency=3 Reg=%0 -# CHECK-NEXT: SU(3): Data Latency=3 Reg=%0 -# CHECK-NEXT: SU(11): Anti Latency=0 +# CHECK-NEXT: SU(8): Data Latency=3 Reg=%7 # CHECK-NEXT: Single Issue : false; -# CHECK-NEXT: SU(3) [TopReadyCycle = 0, BottomReadyCycle = 0]: %3:fpr128 = EXTv16i8 %0:fpr128, %0:fpr128, 8 -# CHECK-NEXT: # preds left : 1 +# CHECK-NEXT: SU(4) [TopReadyCycle = 0, BottomReadyCycle = 0]: %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 +# CHECK-NEXT: # preds left : 2 # CHECK-NEXT: # succs left : 1 # CHECK-NEXT: # rdefs left : 0 -# CHECK-NEXT: Latency : 4 -# CHECK-NEXT: Depth : 3 -# CHECK-NEXT: Height : 11 +# CHECK-NEXT: Latency : 3 +# CHECK-NEXT: Depth : 4 +# CHECK-NEXT: Height : 12 # CHECK-NEXT: Predecessors: -# CHECK-NEXT: SU(2): Data Latency=3 Reg=%0 +# CHECK-NEXT: SU(2): Data Latency=1 Reg=%1 +# CHECK-NEXT: SU(0): Data Latency=4 Reg=%6 # CHECK-NEXT: Successors: -# CHECK-NEXT: SU(10): Data Latency=4 Reg=%3 +# CHECK-NEXT: SU(7): Data Latency=3 Reg=%8 # CHECK-NEXT: Single Issue : false; -# CHECK-NEXT: SU(4) [TopReadyCycle = 0, BottomReadyCycle = 0]: %6:fpr128 = MOVIv2d_ns 17 +# CHECK-NEXT: SU(5) [TopReadyCycle = 0, BottomReadyCycle = 0]: %0:fpr128 = COPY $q0 # CHECK-NEXT: # preds left : 0 -# CHECK-NEXT: # succs left : 2 -# CHECK-NEXT: # rdefs left : 0 -# CHECK-NEXT: Latency : 4 -# CHECK-NEXT: Depth : 0 -# CHECK-NEXT: Height : 14 -# CHECK-NEXT: Successors: -# CHECK-NEXT: SU(6): Data Latency=4 Reg=%6 -# CHECK-NEXT: SU(5): Data Latency=4 Reg=%6 -# CHECK-NEXT: Single Issue : false; -# CHECK-NEXT: SU(5) [TopReadyCycle = 0, BottomReadyCycle = 0]: %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 -# CHECK-NEXT: # preds left : 2 -# CHECK-NEXT: # succs left : 1 +# CHECK-NEXT: # succs left : 3 # CHECK-NEXT: # rdefs left : 0 # CHECK-NEXT: Latency : 1 -# CHECK-NEXT: Depth : 4 +# CHECK-NEXT: Depth : 0 # CHECK-NEXT: Height : 10 -# CHECK-NEXT: Predecessors: -# CHECK-NEXT: SU(4): Data Latency=4 Reg=%6 -# CHECK-NEXT: SU(0): Data Latency=3 Reg=%2 # CHECK-NEXT: Successors: -# CHECK-NEXT: SU(9): Data Latency=1 Reg=%7 +# CHECK-NEXT: SU(9): Data Latency=1 Reg=%0 +# CHECK-NEXT: SU(6): Data Latency=1 Reg=%0 +# CHECK-NEXT: SU(11): Anti Latency=0 # CHECK-NEXT: Single Issue : false; -# CHECK-NEXT: SU(6) [TopReadyCycle = 0, BottomReadyCycle = 0]: %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 -# CHECK-NEXT: # preds left : 2 +# CHECK-NEXT: SU(6) [TopReadyCycle = 0, BottomReadyCycle = 0]: %3:fpr128 = EXTv16i8 %0:fpr128, %0:fpr128, 8 +# CHECK-NEXT: # preds left : 1 # CHECK-NEXT: # succs left : 1 # CHECK-NEXT: # rdefs left : 0 -# CHECK-NEXT: Latency : 1 -# CHECK-NEXT: Depth : 4 -# CHECK-NEXT: Height : 10 +# CHECK-NEXT: Latency : 4 +# CHECK-NEXT: Depth : 1 +# CHECK-NEXT: Height : 9 # CHECK-NEXT: Predecessors: -# CHECK-NEXT: SU(4): Data Latency=4 Reg=%6 -# CHECK-NEXT: SU(1): Data Latency=3 Reg=%1 +# CHECK-NEXT: SU(5): Data Latency=1 Reg=%0 # CHECK-NEXT: Successors: -# CHECK-NEXT: SU(7): Data Latency=1 Reg=%8 +# CHECK-NEXT: SU(10): Data Latency=4 Reg=%3 # CHECK-NEXT: Single Issue : false; # CHECK-NEXT: SU(7) [TopReadyCycle = 0, BottomReadyCycle = 0]: %9:fpr64 = XTNv4i16 %8:fpr128 # CHECK-NEXT: # preds left : 1 # CHECK-NEXT: # succs left : 1 # CHECK-NEXT: # rdefs left : 0 -# CHECK-NEXT: Latency : 2 -# CHECK-NEXT: Depth : 5 +# CHECK-NEXT: Latency : 4 +# CHECK-NEXT: Depth : 7 # CHECK-NEXT: Height : 9 # CHECK-NEXT: Predecessors: -# CHECK-NEXT: SU(6): Data Latency=1 Reg=%8 +# CHECK-NEXT: SU(4): Data Latency=3 Reg=%8 # CHECK-NEXT: Successors: -# CHECK-NEXT: SU(8): Data Latency=2 Reg=%9 +# CHECK-NEXT: SU(9): Data Latency=4 Reg=%9 # CHECK-NEXT: Single Issue : false; -# CHECK-NEXT: SU(8) [TopReadyCycle = 0, BottomReadyCycle = 0]: %10:fpr128 = UMULLv4i16_v4i32 %0.dsub:fpr128, %9:fpr64 -# CHECK-NEXT: # preds left : 2 +# CHECK-NEXT: SU(8) [TopReadyCycle = 0, BottomReadyCycle = 0]: %11:fpr64 = XTNv4i16 %7:fpr128 +# CHECK-NEXT: # preds left : 1 # CHECK-NEXT: # succs left : 1 # CHECK-NEXT: # rdefs left : 0 # CHECK-NEXT: Latency : 4 # CHECK-NEXT: Depth : 7 -# CHECK-NEXT: Height : 7 +# CHECK-NEXT: Height : 9 # CHECK-NEXT: Predecessors: -# CHECK-NEXT: SU(7): Data Latency=2 Reg=%9 -# CHECK-NEXT: SU(2): Data Latency=3 Reg=%0 +# CHECK-NEXT: SU(3): Data Latency=3 Reg=%7 # CHECK-NEXT: Successors: -# CHECK-NEXT: SU(11): Data Latency=4 Reg=%10 +# CHECK-NEXT: SU(10): Data Latency=4 Reg=%11 # CHECK-NEXT: Single Issue : false; -# CHECK-NEXT: SU(9) [TopReadyCycle = 0, BottomReadyCycle = 0]: %11:fpr64 = XTNv4i16 %7:fpr128 -# CHECK-NEXT: # preds left : 1 +# CHECK-NEXT: SU(9) [TopReadyCycle = 0, BottomReadyCycle = 0]: %10:fpr128 = UMULLv4i16_v4i32 %0.dsub:fpr128, %9:fpr64 +# CHECK-NEXT: # preds left : 2 # CHECK-NEXT: # succs left : 1 # CHECK-NEXT: # rdefs left : 0 -# CHECK-NEXT: Latency : 2 -# CHECK-NEXT: Depth : 5 -# CHECK-NEXT: Height : 9 +# CHECK-NEXT: Latency : 4 +# CHECK-NEXT: Depth : 11 +# CHECK-NEXT: Height : 5 # CHECK-NEXT: Predecessors: -# CHECK-NEXT: SU(5): Data Latency=1 Reg=%7 +# CHECK-NEXT: SU(7): Data Latency=4 Reg=%9 +# CHECK-NEXT: SU(5): Data Latency=1 Reg=%0 # CHECK-NEXT: Successors: -# CHECK-NEXT: SU(10): Data Latency=2 Reg=%11 +# CHECK-NEXT: SU(11): Data Latency=4 Reg=%10 # CHECK-NEXT: Single Issue : false; # CHECK-NEXT: SU(10) [TopReadyCycle = 0, BottomReadyCycle = 0]: %12:fpr128 = UMULLv4i16_v4i32 %3.dsub:fpr128, %11:fpr64 # CHECK-NEXT: # preds left : 2 # CHECK-NEXT: # succs left : 1 # CHECK-NEXT: # rdefs left : 0 # CHECK-NEXT: Latency : 4 -# CHECK-NEXT: Depth : 7 -# CHECK-NEXT: Height : 7 +# CHECK-NEXT: Depth : 11 +# CHECK-NEXT: Height : 5 # CHECK-NEXT: Predecessors: -# CHECK-NEXT: SU(9): Data Latency=2 Reg=%11 -# CHECK-NEXT: SU(3): Data Latency=4 Reg=%3 +# CHECK-NEXT: SU(8): Data Latency=4 Reg=%11 +# CHECK-NEXT: SU(6): Data Latency=4 Reg=%3 # CHECK-NEXT: Successors: # CHECK-NEXT: SU(12): Data Latency=4 Reg=%12 # CHECK-NEXT: Single Issue : false; @@ -240,1380 +279,925 @@ # CHECK-NEXT: # preds left : 2 # CHECK-NEXT: # succs left : 1 # CHECK-NEXT: # rdefs left : 0 -# CHECK-NEXT: Latency : 3 -# CHECK-NEXT: Depth : 11 -# CHECK-NEXT: Height : 3 +# CHECK-NEXT: Latency : 1 +# CHECK-NEXT: Depth : 15 +# CHECK-NEXT: Height : 1 # CHECK-NEXT: Predecessors: -# CHECK-NEXT: SU(8): Data Latency=4 Reg=%10 -# CHECK-NEXT: SU(2): Anti Latency=0 +# CHECK-NEXT: SU(9): Data Latency=4 Reg=%10 +# CHECK-NEXT: SU(5): Anti Latency=0 # CHECK-NEXT: Successors: -# CHECK-NEXT: ExitSU: Ord Latency=3 Artificial +# CHECK-NEXT: ExitSU: Ord Latency=1 Artificial # CHECK-NEXT: Single Issue : false; # CHECK-NEXT: SU(12) [TopReadyCycle = 0, BottomReadyCycle = 0]: $q1 = COPY %12:fpr128 # CHECK-NEXT: # preds left : 2 # CHECK-NEXT: # succs left : 1 # CHECK-NEXT: # rdefs left : 0 -# CHECK-NEXT: Latency : 3 -# CHECK-NEXT: Depth : 11 -# CHECK-NEXT: Height : 3 +# CHECK-NEXT: Latency : 1 +# CHECK-NEXT: Depth : 15 +# CHECK-NEXT: Height : 1 # CHECK-NEXT: Predecessors: # CHECK-NEXT: SU(10): Data Latency=4 Reg=%12 -# CHECK-NEXT: SU(1): Anti Latency=0 +# CHECK-NEXT: SU(2): Anti Latency=0 # CHECK-NEXT: Successors: -# CHECK-NEXT: ExitSU: Ord Latency=3 Artificial +# CHECK-NEXT: ExitSU: Ord Latency=1 Artificial # CHECK-NEXT: Single Issue : false; # CHECK-NEXT: ExitSU [TopReadyCycle = 0, BottomReadyCycle = 0]: RET_ReallyLR implicit $q0, implicit $q1 # CHECK-NEXT: # preds left : 2 # CHECK-NEXT: # succs left : 0 # CHECK-NEXT: # rdefs left : 0 # CHECK-NEXT: Latency : 0 -# CHECK-NEXT: Depth : 14 +# CHECK-NEXT: Depth : 16 # CHECK-NEXT: Height : 0 # CHECK-NEXT: Predecessors: -# CHECK-NEXT: SU(12): Ord Latency=3 Artificial -# CHECK-NEXT: SU(11): Ord Latency=3 Artificial -# CHECK-NEXT: Resource booking (@0c): -# CHECK-NEXT: CortexA55UnitALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@0c): -# CHECK-NEXT: Instance 0 available @0c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @0c -# CHECK-NEXT: Resource booking (@0c): -# CHECK-NEXT: CortexA55UnitALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@0c): -# CHECK-NEXT: Instance 0 available @0c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @0c -# CHECK-NEXT: Resource booking (@0c): -# CHECK-NEXT: CortexA55UnitALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@0c): -# CHECK-NEXT: Instance 0 available @0c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @0c -# CHECK-NEXT: Resource booking (@0c): -# CHECK-NEXT: CortexA55UnitALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@0c): -# CHECK-NEXT: Instance 0 available @0c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @0c -# CHECK-NEXT: Critical Path(GS-RR ): 14 +# CHECK-NEXT: SU(12): Ord Latency=1 Artificial +# CHECK-NEXT: SU(11): Ord Latency=1 Artificial +# CHECK-NEXT: Critical Path(GS-RR ): 16 # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node -# CHECK-NEXT: Cycle: 3 BotQ.A -# CHECK-NEXT: Resource booking (@3c): -# CHECK-NEXT: CortexA55UnitALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@3c): -# CHECK-NEXT: Instance 0 available @3c -# CHECK-NEXT: Instance 1 available @3c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @3c -# CHECK-NEXT: Resource booking (@3c): -# CHECK-NEXT: CortexA55UnitALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@3c): -# CHECK-NEXT: Instance 0 available @3c -# CHECK-NEXT: Instance 1 available @3c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @3c -# CHECK-NEXT: Queue BotQ.P: -# CHECK-NEXT: Queue BotQ.A: 12 11 -# CHECK-NEXT: Cand SU(12) ORDER -# CHECK-NEXT: Pick Bot ORDER +# CHECK-NEXT: Cycle: 1 BotQ.A +# CHECK-NEXT: Queue BotQ.P: +# CHECK-NEXT: Queue BotQ.A: 12 11 +# CHECK-NEXT: Cand SU(12) ORDER +# CHECK-NEXT: Pick Bot ORDER # CHECK-NEXT: Scheduling SU(12) $q1 = COPY %12:fpr128 -# CHECK-NEXT: Ready @3c -# CHECK-NEXT: CortexA55UnitALU +1x1u -# CHECK-NEXT: Resource booking (@3c): -# CHECK-NEXT: CortexA55UnitALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@3c): -# CHECK-NEXT: Instance 0 available @3c -# CHECK-NEXT: Instance 1 available @3c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @3c -# CHECK-NEXT: Resource booking (@3c): -# CHECK-NEXT: CortexA55UnitALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@3c): -# CHECK-NEXT: Instance 0 available @3c -# CHECK-NEXT: Instance 1 available @3c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @3c -# CHECK-NEXT: BotQ.A TopLatency SU(12) 11c -# CHECK-NEXT: BotQ.A BotLatency SU(12) 3c -# CHECK-NEXT: BotQ.A @3c +# CHECK-NEXT: Ready @1c +# CHECK-NEXT: CortexA510UnitALU +1x2u +# CHECK-NEXT: Resource booking (@1c): +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@1c): +# CHECK-NEXT: Instance 0 available @1c +# CHECK-NEXT: Instance 1 available @1c +# CHECK-NEXT: Instance 2 available @1c +# CHECK-NEXT: selecting CortexA510UnitALU[0] available @1c +# CHECK-NEXT: BotQ.A TopLatency SU(12) 15c +# CHECK-NEXT: BotQ.A BotLatency SU(12) 1c +# CHECK-NEXT: BotQ.A @1c # CHECK-NEXT: Retired: 1 -# CHECK-NEXT: Executed: 3c +# CHECK-NEXT: Executed: 1c # CHECK-NEXT: Critical: 0c, 1 MOps -# CHECK-NEXT: ExpectedLatency: 3c +# CHECK-NEXT: ExpectedLatency: 1c # CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node -# CHECK-NEXT: Resource booking (@3c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@3c): -# CHECK-NEXT: Instance 0 available @4c -# CHECK-NEXT: Instance 1 available @3c -# CHECK-NEXT: selecting CortexA55UnitALU[1] available @3c -# CHECK-NEXT: Queue BotQ.P: 10 -# CHECK-NEXT: Queue BotQ.A: 11 +# CHECK-NEXT: Queue BotQ.P: 10 +# CHECK-NEXT: Queue BotQ.A: 11 # CHECK-NEXT: Scheduling SU(11) $q0 = COPY %10:fpr128 -# CHECK-NEXT: Ready @3c -# CHECK-NEXT: CortexA55UnitALU +1x1u -# CHECK-NEXT: Resource booking (@3c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@3c): -# CHECK-NEXT: Instance 0 available @4c -# CHECK-NEXT: Instance 1 available @3c -# CHECK-NEXT: selecting CortexA55UnitALU[1] available @3c -# CHECK-NEXT: Resource booking (@3c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@3c): -# CHECK-NEXT: Instance 0 available @4c -# CHECK-NEXT: Instance 1 available @3c -# CHECK-NEXT: selecting CortexA55UnitALU[1] available @3c -# CHECK-NEXT: *** Max MOps 2 at cycle 3 -# CHECK-NEXT: Cycle: 4 BotQ.A -# CHECK-NEXT: BotQ.A @4c +# CHECK-NEXT: Ready @1c +# CHECK-NEXT: CortexA510UnitALU +1x2u +# CHECK-NEXT: Resource booking (@1c): +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@1c): +# CHECK-NEXT: Instance 0 available @1c +# CHECK-NEXT: Instance 1 available @1c +# CHECK-NEXT: Instance 2 available @1c +# CHECK-NEXT: selecting CortexA510UnitALU[0] available @1c +# CHECK-NEXT: BotQ.A @1c # CHECK-NEXT: Retired: 2 -# CHECK-NEXT: Executed: 4c -# CHECK-NEXT: Critical: 1c, 2 MOps -# CHECK-NEXT: ExpectedLatency: 3c +# CHECK-NEXT: Executed: 1c +# CHECK-NEXT: Critical: 0c, 2 MOps +# CHECK-NEXT: ExpectedLatency: 1c # CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node -# CHECK-NEXT: Cycle: 7 BotQ.A -# CHECK-NEXT: Resource booking (@7c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@7c): -# CHECK-NEXT: Instance 0 available @7c -# CHECK-NEXT: Instance 1 available @7c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @7c -# CHECK-NEXT: Resource booking (@7c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@7c): -# CHECK-NEXT: Instance 0 available @7c -# CHECK-NEXT: Instance 1 available @7c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @7c -# CHECK-NEXT: Queue BotQ.P: -# CHECK-NEXT: Queue BotQ.A: 10 8 -# CHECK-NEXT: Cand SU(10) ORDER -# CHECK-NEXT: Pick Bot ORDER +# CHECK-NEXT: Cycle: 2 BotQ.A +# CHECK-NEXT: Cycle: 5 BotQ.A +# CHECK-NEXT: Queue BotQ.P: +# CHECK-NEXT: Queue BotQ.A: 10 9 +# CHECK-NEXT: Cand SU(10) ORDER +# CHECK-NEXT: Pick Bot ORDER # CHECK-NEXT: Scheduling SU(10) %12:fpr128 = UMULLv4i16_v4i32 %3.dsub:fpr128, %11:fpr64 -# CHECK-NEXT: Ready @7c -# CHECK-NEXT: CortexA55UnitFPALU +2x1u -# CHECK-NEXT: Resource booking (@7c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@7c): -# CHECK-NEXT: Instance 0 available @7c -# CHECK-NEXT: Instance 1 available @7c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @7c -# CHECK-NEXT: Resource booking (@7c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@7c): -# CHECK-NEXT: Instance 0 available @7c -# CHECK-NEXT: Instance 1 available @7c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @7c -# CHECK-NEXT: BotQ.A BotLatency SU(10) 7c -# CHECK-NEXT: Bump cycle to begin group -# CHECK-NEXT: Cycle: 8 BotQ.A -# CHECK-NEXT: BotQ.A @8c +# CHECK-NEXT: Ready @5c +# CHECK-NEXT: CortexA510UnitVALU +1x3u +# CHECK-NEXT: Resource booking (@5c): +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@5c): +# CHECK-NEXT: Instance 0 available @5c +# CHECK-NEXT: Instance 1 available @5c +# CHECK-NEXT: selecting CortexA510UnitVALU[0] available @5c +# CHECK-NEXT: BotQ.A BotLatency SU(10) 5c +# CHECK-NEXT: BotQ.A @5c # CHECK-NEXT: Retired: 3 -# CHECK-NEXT: Executed: 8c +# CHECK-NEXT: Executed: 5c # CHECK-NEXT: Critical: 1c, 3 MOps -# CHECK-NEXT: ExpectedLatency: 7c +# CHECK-NEXT: ExpectedLatency: 5c # CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 7 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node -# CHECK-NEXT: Resource booking (@8c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 7 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@8c): -# CHECK-NEXT: Instance 0 available @9c -# CHECK-NEXT: Instance 1 available @8c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @8c -# CHECK-NEXT: Queue BotQ.P: 9 3 -# CHECK-NEXT: Queue BotQ.A: 8 -# CHECK-NEXT: Scheduling SU(8) %10:fpr128 = UMULLv4i16_v4i32 %0.dsub:fpr128, %9:fpr64 -# CHECK-NEXT: Ready @8c -# CHECK-NEXT: CortexA55UnitFPALU +2x1u -# CHECK-NEXT: Resource booking (@8c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 7 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@8c): -# CHECK-NEXT: Instance 0 available @9c -# CHECK-NEXT: Instance 1 available @8c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @8c -# CHECK-NEXT: Resource booking (@8c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 7 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@8c): -# CHECK-NEXT: Instance 0 available @9c -# CHECK-NEXT: Instance 1 available @8c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @8c -# CHECK-NEXT: BotQ.A TopLatency SU(8) 7c -# CHECK-NEXT: Bump cycle to begin group -# CHECK-NEXT: Cycle: 9 BotQ.A -# CHECK-NEXT: BotQ.A @9c +# CHECK-NEXT: Queue BotQ.P: 8 6 +# CHECK-NEXT: Queue BotQ.A: 9 +# CHECK-NEXT: Scheduling SU(9) %10:fpr128 = UMULLv4i16_v4i32 %0.dsub:fpr128, %9:fpr64 +# CHECK-NEXT: Ready @5c +# CHECK-NEXT: CortexA510UnitVALU +1x3u +# CHECK-NEXT: Resource booking (@5c): +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@5c): +# CHECK-NEXT: Instance 0 available @5c +# CHECK-NEXT: Instance 1 available @5c +# CHECK-NEXT: selecting CortexA510UnitVALU[0] available @5c +# CHECK-NEXT: BotQ.A @5c # CHECK-NEXT: Retired: 4 -# CHECK-NEXT: Executed: 9c -# CHECK-NEXT: Critical: 2c, 4 MOps -# CHECK-NEXT: ExpectedLatency: 7c +# CHECK-NEXT: Executed: 5c +# CHECK-NEXT: Critical: 1c, 4 MOps +# CHECK-NEXT: ExpectedLatency: 5c # CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 7 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node -# CHECK-NEXT: Resource booking (@9c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 7 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@9c): -# CHECK-NEXT: Instance 0 available @9c -# CHECK-NEXT: Instance 1 available @9c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @9c -# CHECK-NEXT: Resource booking (@9c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 7 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@9c): -# CHECK-NEXT: Instance 0 available @9c -# CHECK-NEXT: Instance 1 available @9c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @9c -# CHECK-NEXT: Queue BotQ.P: 7 3 -# CHECK-NEXT: Queue BotQ.A: 9 -# CHECK-NEXT: Scheduling SU(9) %11:fpr64 = XTNv4i16 %7:fpr128 +# CHECK-NEXT: Cycle: 6 BotQ.A +# CHECK-NEXT: Cycle: 9 BotQ.A +# CHECK-NEXT: Queue BotQ.P: +# CHECK-NEXT: Queue BotQ.A: 8 7 6 +# CHECK-NEXT: Cand SU(8) ORDER +# CHECK-NEXT: Pick Bot ORDER +# CHECK-NEXT: Scheduling SU(8) %11:fpr64 = XTNv4i16 %7:fpr128 # CHECK-NEXT: Ready @9c -# CHECK-NEXT: CortexA55UnitFPALU +1x1u -# CHECK-NEXT: Resource booking (@9c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 7 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@9c): +# CHECK-NEXT: CortexA510UnitVALU +1x3u +# CHECK-NEXT: Resource booking (@9c): +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@9c): # CHECK-NEXT: Instance 0 available @9c # CHECK-NEXT: Instance 1 available @9c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @9c -# CHECK-NEXT: Resource booking (@9c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 7 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@9c): +# CHECK-NEXT: selecting CortexA510UnitVALU[0] available @9c +# CHECK-NEXT: BotQ.A BotLatency SU(8) 9c +# CHECK-NEXT: BotQ.A @9c +# CHECK-NEXT: Retired: 5 +# CHECK-NEXT: Executed: 9c +# CHECK-NEXT: Critical: 1c, 5 MOps +# CHECK-NEXT: ExpectedLatency: 9c +# CHECK-NEXT: - Latency limited. +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node +# CHECK-NEXT: Queue BotQ.P: 3 +# CHECK-NEXT: Queue BotQ.A: 6 7 +# CHECK-NEXT: Cand SU(6) ORDER +# CHECK-NEXT: Cand SU(7) ORDER +# CHECK-NEXT: Pick Bot ORDER +# CHECK-NEXT: Scheduling SU(7) %9:fpr64 = XTNv4i16 %8:fpr128 +# CHECK-NEXT: Ready @9c +# CHECK-NEXT: CortexA510UnitVALU +1x3u +# CHECK-NEXT: Resource booking (@9c): +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@9c): # CHECK-NEXT: Instance 0 available @9c # CHECK-NEXT: Instance 1 available @9c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @9c -# CHECK-NEXT: BotQ.A BotLatency SU(9) 9c +# CHECK-NEXT: selecting CortexA510UnitVALU[0] available @9c # CHECK-NEXT: BotQ.A @9c -# CHECK-NEXT: Retired: 5 +# CHECK-NEXT: Retired: 6 # CHECK-NEXT: Executed: 9c -# CHECK-NEXT: Critical: 2c, 5 MOps +# CHECK-NEXT: Critical: 2c, 6 MOps # CHECK-NEXT: ExpectedLatency: 9c # CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 9 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node +# CHECK-NEXT: Queue BotQ.P: 3 4 +# CHECK-NEXT: Queue BotQ.A: 6 +# CHECK-NEXT: Scheduling SU(6) %3:fpr128 = EXTv16i8 %0:fpr128, %0:fpr128, 8 +# CHECK-NEXT: Ready @9c +# CHECK-NEXT: CortexA510UnitVALU +1x3u +# CHECK-NEXT: *** Critical resource CortexA510UnitVALU: 2c +# CHECK-NEXT: Resource booking (@9c): +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@9c): +# CHECK-NEXT: Instance 0 available @9c +# CHECK-NEXT: Instance 1 available @9c +# CHECK-NEXT: selecting CortexA510UnitVALU[0] available @9c +# CHECK-NEXT: *** Max MOps 3 at cycle 9 # CHECK-NEXT: Cycle: 10 BotQ.A -# CHECK-NEXT: Resource booking (@10c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 9 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@10c): -# CHECK-NEXT: Instance 0 available @10c -# CHECK-NEXT: Instance 1 available @10c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @10c -# CHECK-NEXT: Resource booking (@10c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 9 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@10c): -# CHECK-NEXT: Instance 0 available @11c -# CHECK-NEXT: Instance 1 available @10c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @10c -# CHECK-NEXT: Queue BotQ.P: 3 -# CHECK-NEXT: Queue BotQ.A: 7 5 -# CHECK-NEXT: Cand SU(7) ORDER -# CHECK-NEXT: Pick Bot ORDER -# CHECK-NEXT: Scheduling SU(7) %9:fpr64 = XTNv4i16 %8:fpr128 -# CHECK-NEXT: Ready @10c -# CHECK-NEXT: CortexA55UnitFPALU +1x1u -# CHECK-NEXT: Resource booking (@10c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 9 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@10c): -# CHECK-NEXT: Instance 0 available @10c -# CHECK-NEXT: Instance 1 available @10c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @10c -# CHECK-NEXT: Resource booking (@10c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 9 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@10c): -# CHECK-NEXT: Instance 0 available @10c -# CHECK-NEXT: Instance 1 available @10c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @10c # CHECK-NEXT: BotQ.A @10c -# CHECK-NEXT: Retired: 6 +# CHECK-NEXT: Retired: 7 # CHECK-NEXT: Executed: 10c -# CHECK-NEXT: Critical: 3c, 6 MOps +# CHECK-NEXT: Critical: 2c, 5 CortexA510UnitVALU # CHECK-NEXT: ExpectedLatency: 9c # CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node -# CHECK-NEXT: Resource booking (@10c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@10c): -# CHECK-NEXT: Instance 0 available @12c -# CHECK-NEXT: Instance 1 available @10c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @10c -# CHECK-NEXT: Queue BotQ.P: 3 6 -# CHECK-NEXT: Queue BotQ.A: 5 -# CHECK-NEXT: Scheduling SU(5) %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 +# CHECK-NEXT: Queue BotQ.P: 3 4 +# CHECK-NEXT: Queue BotQ.A: 5 +# CHECK-NEXT: Scheduling SU(5) %0:fpr128 = COPY $q0 # CHECK-NEXT: Ready @10c -# CHECK-NEXT: CortexA55UnitFPALU +2x1u -# CHECK-NEXT: *** Critical resource CortexA55UnitFPALU: 4c -# CHECK-NEXT: Resource booking (@10c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@10c): -# CHECK-NEXT: Instance 0 available @12c -# CHECK-NEXT: Instance 1 available @10c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @10c -# CHECK-NEXT: Resource booking (@10c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@10c): -# CHECK-NEXT: Instance 0 available @12c +# CHECK-NEXT: CortexA510UnitALU +1x2u +# CHECK-NEXT: Resource booking (@10c): +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@10c): +# CHECK-NEXT: Instance 0 available @10c # CHECK-NEXT: Instance 1 available @10c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @10c +# CHECK-NEXT: Instance 2 available @10c +# CHECK-NEXT: selecting CortexA510UnitALU[0] available @10c # CHECK-NEXT: BotQ.A BotLatency SU(5) 10c -# CHECK-NEXT: Bump cycle to begin group -# CHECK-NEXT: Cycle: 11 BotQ.A -# CHECK-NEXT: BotQ.A @11c -# CHECK-NEXT: Retired: 7 -# CHECK-NEXT: Executed: 11c -# CHECK-NEXT: Critical: 4c, 8 CortexA55UnitFPALU +# CHECK-NEXT: BotQ.A @10c +# CHECK-NEXT: Retired: 8 +# CHECK-NEXT: Executed: 10c +# CHECK-NEXT: Critical: 2c, 5 CortexA510UnitVALU # CHECK-NEXT: ExpectedLatency: 10c # CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node -# CHECK-NEXT: Resource booking (@11c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@11c): -# CHECK-NEXT: Instance 0 available @12c -# CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @12c -# CHECK-NEXT: SU(3) CortexA55UnitFPALU[0]=12c -# CHECK-NEXT: Resource booking (@11c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@11c): -# CHECK-NEXT: Instance 0 available @12c -# CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @12c -# CHECK-NEXT: SU(6) CortexA55UnitFPALU[0]=12c +# CHECK-NEXT: Cycle: 11 BotQ.A # CHECK-NEXT: Cycle: 12 BotQ.A -# CHECK-NEXT: Resource booking (@12c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@12c): -# CHECK-NEXT: Instance 0 available @12c -# CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @12c -# CHECK-NEXT: Resource booking (@12c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@12c): -# CHECK-NEXT: Instance 0 available @12c -# CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @12c -# CHECK-NEXT: Queue BotQ.P: 0 -# CHECK-NEXT: Queue BotQ.A: 3 6 -# CHECK-NEXT: Cand SU(3) ORDER -# CHECK-NEXT: Cand SU(6) ORDER -# CHECK-NEXT: Pick Bot ORDER -# CHECK-NEXT: Scheduling SU(6) %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 +# CHECK-NEXT: Queue BotQ.P: +# CHECK-NEXT: Queue BotQ.A: 3 4 +# CHECK-NEXT: Cand SU(3) ORDER +# CHECK-NEXT: Cand SU(4) ORDER +# CHECK-NEXT: Pick Bot ORDER +# CHECK-NEXT: Scheduling SU(4) %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 # CHECK-NEXT: Ready @12c -# CHECK-NEXT: CortexA55UnitFPALU +2x1u -# CHECK-NEXT: Resource booking (@12c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@12c): +# CHECK-NEXT: CortexA510UnitVALU +1x3u +# CHECK-NEXT: Resource booking (@12c): +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@12c): # CHECK-NEXT: Instance 0 available @12c # CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @12c -# CHECK-NEXT: Resource booking (@12c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@12c): +# CHECK-NEXT: selecting CortexA510UnitVALU[0] available @12c +# CHECK-NEXT: BotQ.A BotLatency SU(4) 12c +# CHECK-NEXT: BotQ.A @12c +# CHECK-NEXT: Retired: 9 +# CHECK-NEXT: Executed: 12c +# CHECK-NEXT: Critical: 3c, 6 CortexA510UnitVALU +# CHECK-NEXT: ExpectedLatency: 12c +# CHECK-NEXT: - Latency limited. +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node +# CHECK-NEXT: Queue BotQ.P: 2 +# CHECK-NEXT: Queue BotQ.A: 3 +# CHECK-NEXT: Scheduling SU(3) %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 +# CHECK-NEXT: Ready @12c +# CHECK-NEXT: CortexA510UnitVALU +1x3u +# CHECK-NEXT: Resource booking (@12c): +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@12c): # CHECK-NEXT: Instance 0 available @12c # CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @12c -# CHECK-NEXT: BotQ.A TopLatency SU(6) 4c -# CHECK-NEXT: Bump cycle to begin group -# CHECK-NEXT: Cycle: 13 BotQ.A -# CHECK-NEXT: BotQ.A @13c -# CHECK-NEXT: Retired: 8 -# CHECK-NEXT: Executed: 13c -# CHECK-NEXT: Critical: 5c, 10 CortexA55UnitFPALU -# CHECK-NEXT: ExpectedLatency: 10c +# CHECK-NEXT: selecting CortexA510UnitVALU[0] available @12c +# CHECK-NEXT: BotQ.A @12c +# CHECK-NEXT: Retired: 10 +# CHECK-NEXT: Executed: 12c +# CHECK-NEXT: Critical: 3c, 7 CortexA510UnitVALU +# CHECK-NEXT: ExpectedLatency: 12c # CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node -# CHECK-NEXT: Resource booking (@13c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@13c): -# CHECK-NEXT: Instance 0 available @13c -# CHECK-NEXT: Instance 1 available @13c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @13c -# CHECK-NEXT: Resource booking (@13c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@13c): -# CHECK-NEXT: Instance 0 available @14c -# CHECK-NEXT: Instance 1 available @13c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @13c -# CHECK-NEXT: Resource booking (@13c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@13c): -# CHECK-NEXT: Instance 0 available @13c -# CHECK-NEXT: Instance 1 available @13c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @13c -# CHECK-NEXT: Queue BotQ.P: 1 4 -# CHECK-NEXT: Queue BotQ.A: 3 0 -# CHECK-NEXT: Cand SU(3) ORDER -# CHECK-NEXT: Pick Bot PHYS-REG -# CHECK-NEXT: Scheduling SU(3) %3:fpr128 = EXTv16i8 %0:fpr128, %0:fpr128, 8 +# CHECK-NEXT: Cycle: 13 BotQ.A +# CHECK-NEXT: Queue BotQ.P: 0 +# CHECK-NEXT: Queue BotQ.A: 2 1 +# CHECK-NEXT: Cand SU(2) ORDER +# CHECK-NEXT: Pick Bot ORDER +# CHECK-NEXT: Scheduling SU(2) %1:fpr128 = COPY $q1 # CHECK-NEXT: Ready @13c -# CHECK-NEXT: CortexA55UnitFPALU +2x1u -# CHECK-NEXT: Resource booking (@13c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@13c): -# CHECK-NEXT: Instance 0 available @14c -# CHECK-NEXT: Instance 1 available @13c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @13c -# CHECK-NEXT: Resource booking (@13c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@13c): -# CHECK-NEXT: Instance 0 available @14c +# CHECK-NEXT: CortexA510UnitALU +1x2u +# CHECK-NEXT: Resource booking (@13c): +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@13c): +# CHECK-NEXT: Instance 0 available @13c # CHECK-NEXT: Instance 1 available @13c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @13c -# CHECK-NEXT: BotQ.A BotLatency SU(3) 11c -# CHECK-NEXT: Bump cycle to begin group -# CHECK-NEXT: Cycle: 14 BotQ.A -# CHECK-NEXT: BotQ.A @14c -# CHECK-NEXT: Retired: 9 -# CHECK-NEXT: Executed: 14c -# CHECK-NEXT: Critical: 6c, 12 CortexA55UnitFPALU -# CHECK-NEXT: ExpectedLatency: 11c -# CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node -# CHECK-NEXT: Resource booking (@14c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@14c): -# CHECK-NEXT: Instance 0 available @14c -# CHECK-NEXT: Instance 1 available @14c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @14c -# CHECK-NEXT: Queue BotQ.P: 1 4 2 -# CHECK-NEXT: Queue BotQ.A: 0 -# CHECK-NEXT: Scheduling SU(0) %2:fpr128 = COPY $q2 -# CHECK-NEXT: Ready @14c -# CHECK-NEXT: CortexA55UnitALU +1x1u -# CHECK-NEXT: Resource booking (@14c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@14c): -# CHECK-NEXT: Instance 0 available @14c -# CHECK-NEXT: Instance 1 available @14c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @14c -# CHECK-NEXT: Resource booking (@14c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@14c): -# CHECK-NEXT: Instance 0 available @14c -# CHECK-NEXT: Instance 1 available @14c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @14c -# CHECK-NEXT: BotQ.A BotLatency SU(0) 13c -# CHECK-NEXT: BotQ.A @14c -# CHECK-NEXT: Retired: 10 -# CHECK-NEXT: Executed: 14c -# CHECK-NEXT: Critical: 6c, 12 CortexA55UnitFPALU +# CHECK-NEXT: Instance 2 available @13c +# CHECK-NEXT: selecting CortexA510UnitALU[0] available @13c +# CHECK-NEXT: BotQ.A BotLatency SU(2) 13c +# CHECK-NEXT: BotQ.A @13c +# CHECK-NEXT: Retired: 11 +# CHECK-NEXT: Executed: 13c +# CHECK-NEXT: Critical: 3c, 7 CortexA510UnitVALU # CHECK-NEXT: ExpectedLatency: 13c # CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 14 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node -# CHECK-NEXT: Cycle: 15 BotQ.A -# CHECK-NEXT: Resource booking (@15c): -# CHECK-NEXT: CortexA55UnitALU(0) = 14 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@15c): -# CHECK-NEXT: Instance 0 available @15c -# CHECK-NEXT: Instance 1 available @15c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @15c -# CHECK-NEXT: Queue BotQ.P: 2 4 -# CHECK-NEXT: Queue BotQ.A: 1 -# CHECK-NEXT: Scheduling SU(1) %1:fpr128 = COPY $q1 -# CHECK-NEXT: Ready @15c -# CHECK-NEXT: CortexA55UnitALU +1x1u -# CHECK-NEXT: Resource booking (@15c): -# CHECK-NEXT: CortexA55UnitALU(0) = 14 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@15c): -# CHECK-NEXT: Instance 0 available @15c -# CHECK-NEXT: Instance 1 available @15c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @15c -# CHECK-NEXT: Resource booking (@15c): -# CHECK-NEXT: CortexA55UnitALU(0) = 14 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@15c): -# CHECK-NEXT: Instance 0 available @15c -# CHECK-NEXT: Instance 1 available @15c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @15c -# CHECK-NEXT: BotQ.A @15c -# CHECK-NEXT: Retired: 11 -# CHECK-NEXT: Executed: 15c -# CHECK-NEXT: Critical: 6c, 12 CortexA55UnitFPALU +# CHECK-NEXT: Queue BotQ.P: 0 +# CHECK-NEXT: Queue BotQ.A: 1 +# CHECK-NEXT: Scheduling SU(1) %2:fpr128 = COPY $q2 +# CHECK-NEXT: Ready @13c +# CHECK-NEXT: CortexA510UnitALU +1x2u +# CHECK-NEXT: Resource booking (@13c): +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@13c): +# CHECK-NEXT: Instance 0 available @13c +# CHECK-NEXT: Instance 1 available @13c +# CHECK-NEXT: Instance 2 available @13c +# CHECK-NEXT: selecting CortexA510UnitALU[0] available @13c +# CHECK-NEXT: BotQ.A @13c +# CHECK-NEXT: Retired: 12 +# CHECK-NEXT: Executed: 13c +# CHECK-NEXT: Critical: 3c, 7 CortexA510UnitVALU # CHECK-NEXT: ExpectedLatency: 13c # CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 15 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node +# CHECK-NEXT: Cycle: 14 BotQ.A # CHECK-NEXT: Cycle: 16 BotQ.A -# CHECK-NEXT: Resource booking (@16c): -# CHECK-NEXT: CortexA55UnitALU(0) = 15 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@16c): -# CHECK-NEXT: Instance 0 available @16c -# CHECK-NEXT: Instance 1 available @16c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @16c -# CHECK-NEXT: Resource booking (@16c): -# CHECK-NEXT: CortexA55UnitALU(0) = 15 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@16c): -# CHECK-NEXT: Instance 0 available @16c -# CHECK-NEXT: Instance 1 available @16c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @16c -# CHECK-NEXT: Queue BotQ.P: -# CHECK-NEXT: Queue BotQ.A: 2 4 -# CHECK-NEXT: Cand SU(2) ORDER -# CHECK-NEXT: Cand SU(4) PHYS-REG -# CHECK-NEXT: Pick Bot PHYS-REG -# CHECK-NEXT: Scheduling SU(4) %6:fpr128 = MOVIv2d_ns 17 +# CHECK-NEXT: Queue BotQ.P: +# CHECK-NEXT: Queue BotQ.A: 0 +# CHECK-NEXT: Scheduling SU(0) %6:fpr128 = MOVIv2d_ns 17 # CHECK-NEXT: Ready @16c -# CHECK-NEXT: CortexA55UnitFPALU +2x1u -# CHECK-NEXT: Resource booking (@16c): -# CHECK-NEXT: CortexA55UnitALU(0) = 15 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@16c): +# CHECK-NEXT: CortexA510UnitVALU +1x3u +# CHECK-NEXT: Resource booking (@16c): +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@16c): # CHECK-NEXT: Instance 0 available @16c # CHECK-NEXT: Instance 1 available @16c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @16c -# CHECK-NEXT: Resource booking (@16c): -# CHECK-NEXT: CortexA55UnitALU(0) = 15 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@16c): -# CHECK-NEXT: Instance 0 available @16c -# CHECK-NEXT: Instance 1 available @16c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @16c -# CHECK-NEXT: BotQ.A BotLatency SU(4) 14c -# CHECK-NEXT: Bump cycle to begin group -# CHECK-NEXT: Cycle: 17 BotQ.A -# CHECK-NEXT: BotQ.A @17c -# CHECK-NEXT: Retired: 12 -# CHECK-NEXT: Executed: 17c -# CHECK-NEXT: Critical: 7c, 14 CortexA55UnitFPALU -# CHECK-NEXT: ExpectedLatency: 14c -# CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 15 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 16 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node -# CHECK-NEXT: Resource booking (@17c): -# CHECK-NEXT: CortexA55UnitALU(0) = 15 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 16 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@17c): -# CHECK-NEXT: Instance 0 available @17c -# CHECK-NEXT: Instance 1 available @17c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @17c -# CHECK-NEXT: Queue BotQ.P: -# CHECK-NEXT: Queue BotQ.A: 2 -# CHECK-NEXT: Scheduling SU(2) %0:fpr128 = COPY $q0 -# CHECK-NEXT: Ready @17c -# CHECK-NEXT: CortexA55UnitALU +1x1u -# CHECK-NEXT: Resource booking (@17c): -# CHECK-NEXT: CortexA55UnitALU(0) = 15 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 16 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@17c): -# CHECK-NEXT: Instance 0 available @17c -# CHECK-NEXT: Instance 1 available @17c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @17c -# CHECK-NEXT: Resource booking (@17c): -# CHECK-NEXT: CortexA55UnitALU(0) = 15 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 16 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@17c): -# CHECK-NEXT: Instance 0 available @17c -# CHECK-NEXT: Instance 1 available @17c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @17c -# CHECK-NEXT: BotQ.A @17c +# CHECK-NEXT: selecting CortexA510UnitVALU[0] available @16c +# CHECK-NEXT: BotQ.A BotLatency SU(0) 16c +# CHECK-NEXT: BotQ.A @16c # CHECK-NEXT: Retired: 13 -# CHECK-NEXT: Executed: 17c -# CHECK-NEXT: Critical: 7c, 14 CortexA55UnitFPALU -# CHECK-NEXT: ExpectedLatency: 14c +# CHECK-NEXT: Executed: 16c +# CHECK-NEXT: Critical: 4c, 8 CortexA510UnitVALU +# CHECK-NEXT: ExpectedLatency: 16c # CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 17 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 16 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU(2) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitALU12(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitLd1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitLdSt(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitPAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU0(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVALU1(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(0) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMAC(1) = 4294967295 +# CHECK-NEXT: CortexA510UnitVMC(0) = 4294967295 # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node # CHECK-NEXT: *** Final schedule for %bb.0 *** # CHECK-NEXT: * Schedule table (BottomUp): # CHECK-NEXT: i: issue # CHECK-NEXT: x: resource booked -# CHECK-NEXT: Cycle | 17 | 16 | 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | -# CHECK-NEXT: SU(2) | i | | | | | | | | | | | | | | | -# CHECK-NEXT: CortexA55UnitALU | x | | | | | | | | | | | | | | | -# CHECK-NEXT: SU(4) | | i | | | | | | | | | | | | | | -# CHECK-NEXT: CortexA55UnitFPALU | | x | x | | | | | | | | | | | | | -# CHECK-NEXT: SU(1) | | | i | | | | | | | | | | | | | -# CHECK-NEXT: CortexA55UnitALU | | | x | | | | | | | | | | | | | -# CHECK-NEXT: SU(0) | | | | i | | | | | | | | | | | | -# CHECK-NEXT: CortexA55UnitALU | | | | x | | | | | | | | | | | | -# CHECK-NEXT: SU(3) | | | | | i | | | | | | | | | | | -# CHECK-NEXT: CortexA55UnitFPALU | | | | | x | x | | | | | | | | | | -# CHECK-NEXT: SU(6) | | | | | | i | | | | | | | | | | -# CHECK-NEXT: CortexA55UnitFPALU | | | | | | x | x | | | | | | | | | -# CHECK-NEXT: SU(5) | | | | | | | | i | | | | | | | | -# CHECK-NEXT: CortexA55UnitFPALU | | | | | | | | x | x | | | | | | | -# CHECK-NEXT: SU(7) | | | | | | | | i | | | | | | | | -# CHECK-NEXT: CortexA55UnitFPALU | | | | | | | | x | | | | | | | | -# CHECK-NEXT: SU(9) | | | | | | | | | i | | | | | | | -# CHECK-NEXT: CortexA55UnitFPALU | | | | | | | | | x | | | | | | | -# CHECK-NEXT: SU(8) | | | | | | | | | | i | | | | | | -# CHECK-NEXT: CortexA55UnitFPALU | | | | | | | | | | x | x | | | | | -# CHECK-NEXT: SU(10) | | | | | | | | | | | i | | | | | -# CHECK-NEXT: CortexA55UnitFPALU | | | | | | | | | | | x | x | | | | -# CHECK-NEXT: SU(11) | | | | | | | | | | | | | | | i | -# CHECK-NEXT: CortexA55UnitALU | | | | | | | | | | | | | | | x | -# CHECK-NEXT: SU(12) | | | | | | | | | | | | | | | i | -# CHECK-NEXT: CortexA55UnitALU | | | | | | | | | | | | | | | x | -# CHECK-NEXT: SU(2) [TopReadyCycle = 0, BottomReadyCycle = 17]: %0:fpr128 = COPY $q0 -# CHECK-NEXT: SU(4) [TopReadyCycle = 0, BottomReadyCycle = 16]: %6:fpr128 = MOVIv2d_ns 17 -# CHECK-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 15]: %1:fpr128 = COPY $q1 -# CHECK-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 14]: %2:fpr128 = COPY $q2 -# CHECK-NEXT: SU(3) [TopReadyCycle = 0, BottomReadyCycle = 13]: %3:fpr128 = EXTv16i8 %0:fpr128, %0:fpr128, 8 -# CHECK-NEXT: SU(6) [TopReadyCycle = 0, BottomReadyCycle = 12]: %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 -# CHECK-NEXT: SU(5) [TopReadyCycle = 0, BottomReadyCycle = 10]: %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 -# CHECK-NEXT: SU(7) [TopReadyCycle = 0, BottomReadyCycle = 10]: %9:fpr64 = XTNv4i16 %8:fpr128 -# CHECK-NEXT: SU(9) [TopReadyCycle = 0, BottomReadyCycle = 9]: %11:fpr64 = XTNv4i16 %7:fpr128 -# CHECK-NEXT: SU(8) [TopReadyCycle = 0, BottomReadyCycle = 8]: %10:fpr128 = UMULLv4i16_v4i32 %0.dsub:fpr128, %9:fpr64 -# CHECK-NEXT: SU(10) [TopReadyCycle = 0, BottomReadyCycle = 7]: %12:fpr128 = UMULLv4i16_v4i32 %3.dsub:fpr128, %11:fpr64 -# CHECK-NEXT: SU(11) [TopReadyCycle = 0, BottomReadyCycle = 3]: $q0 = COPY %10:fpr128 -# CHECK-NEXT: SU(12) [TopReadyCycle = 0, BottomReadyCycle = 3]: $q1 = COPY %12:fpr128 -# CHECK-EMPTY: +# CHECK-NEXT: Cycle | 16 | 15 | 14 | 13 | 12 | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | +# CHECK-NEXT: SU(0) | i | | | | | | | | | | | | | | | | +# CHECK-NEXT: CortexA510UnitVALU | x | | | | | | | | | | | | | | | | +# CHECK-NEXT: SU(1) | | | | i | | | | | | | | | | | | | +# CHECK-NEXT: CortexA510UnitALU | | | | x | | | | | | | | | | | | | +# CHECK-NEXT: SU(2) | | | | i | | | | | | | | | | | | | +# CHECK-NEXT: CortexA510UnitALU | | | | x | | | | | | | | | | | | | +# CHECK-NEXT: SU(3) | | | | | i | | | | | | | | | | | | +# CHECK-NEXT: CortexA510UnitVALU | | | | | x | | | | | | | | | | | | +# CHECK-NEXT: SU(4) | | | | | i | | | | | | | | | | | | +# CHECK-NEXT: CortexA510UnitVALU | | | | | x | | | | | | | | | | | | +# CHECK-NEXT: SU(5) | | | | | | | i | | | | | | | | | | +# CHECK-NEXT: CortexA510UnitALU | | | | | | | x | | | | | | | | | | +# CHECK-NEXT: SU(6) | | | | | | | | i | | | | | | | | | +# CHECK-NEXT: CortexA510UnitVALU | | | | | | | | x | | | | | | | | | +# CHECK-NEXT: SU(7) | | | | | | | | i | | | | | | | | | +# CHECK-NEXT: CortexA510UnitVALU | | | | | | | | x | | | | | | | | | +# CHECK-NEXT: SU(8) | | | | | | | | i | | | | | | | | | +# CHECK-NEXT: CortexA510UnitVALU | | | | | | | | x | | | | | | | | | +# CHECK-NEXT: SU(9) | | | | | | | | | | | | i | | | | | +# CHECK-NEXT: CortexA510UnitVALU | | | | | | | | | | | | x | | | | | +# CHECK-NEXT: SU(10) | | | | | | | | | | | | i | | | | | +# CHECK-NEXT: CortexA510UnitVALU | | | | | | | | | | | | x | | | | | +# CHECK-NEXT: SU(11) | | | | | | | | | | | | | | | | i | +# CHECK-NEXT: CortexA510UnitALU | | | | | | | | | | | | | | | | x | +# CHECK-NEXT: SU(12) | | | | | | | | | | | | | | | | i | +# CHECK-NEXT: CortexA510UnitALU | | | | | | | | | | | | | | | | x | +# CHECK-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 16]: %6:fpr128 = MOVIv2d_ns 17 +# CHECK-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 13]: %2:fpr128 = COPY $q2 +# CHECK-NEXT: SU(2) [TopReadyCycle = 0, BottomReadyCycle = 13]: %1:fpr128 = COPY $q1 +# CHECK-NEXT: SU(3) [TopReadyCycle = 0, BottomReadyCycle = 12]: %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 +# CHECK-NEXT: SU(4) [TopReadyCycle = 0, BottomReadyCycle = 12]: %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 +# CHECK-NEXT: SU(5) [TopReadyCycle = 0, BottomReadyCycle = 10]: %0:fpr128 = COPY $q0 +# CHECK-NEXT: SU(6) [TopReadyCycle = 0, BottomReadyCycle = 9]: %3:fpr128 = EXTv16i8 %0:fpr128, %0:fpr128, 8 +# CHECK-NEXT: SU(7) [TopReadyCycle = 0, BottomReadyCycle = 9]: %9:fpr64 = XTNv4i16 %8:fpr128 +# CHECK-NEXT: SU(8) [TopReadyCycle = 0, BottomReadyCycle = 9]: %11:fpr64 = XTNv4i16 %7:fpr128 +# CHECK-NEXT: SU(9) [TopReadyCycle = 0, BottomReadyCycle = 5]: %10:fpr128 = UMULLv4i16_v4i32 %0.dsub:fpr128, %9:fpr64 +# CHECK-NEXT: SU(10) [TopReadyCycle = 0, BottomReadyCycle = 5]: %12:fpr128 = UMULLv4i16_v4i32 %3.dsub:fpr128, %11:fpr64 +# CHECK-NEXT: SU(11) [TopReadyCycle = 0, BottomReadyCycle = 1]: $q0 = COPY %10:fpr128 +# CHECK-NEXT: SU(12) [TopReadyCycle = 0, BottomReadyCycle = 1]: $q1 = COPY %12:fpr128 +# CHECK-EMPTY: # CHECK-NEXT: ********** INTERVALS ********** -# CHECK-NEXT: B0 [0B,48r:0)[192r,224r:1) 0@0B-phi 1@192r -# CHECK-NEXT: B1 [0B,88r:0)[208r,224r:1) 0@0B-phi 1@208r -# CHECK-NEXT: B2 [0B,96r:0) 0@0B-phi -# CHECK-NEXT: %0 [48r,168r:0) 0@48r weight:0.000000e+00 -# CHECK-NEXT: %1 [88r,120r:0) 0@88r weight:0.000000e+00 -# CHECK-NEXT: %2 [96r,128r:0) 0@96r weight:0.000000e+00 -# CHECK-NEXT: %3 [104r,176r:0) 0@104r weight:0.000000e+00 -# CHECK-NEXT: %6 [80r,128r:0) 0@80r weight:0.000000e+00 -# CHECK-NEXT: %7 [128r,160r:0) 0@128r weight:0.000000e+00 -# CHECK-NEXT: %8 [120r,136r:0) 0@120r weight:0.000000e+00 -# CHECK-NEXT: %9 [136r,168r:0) 0@136r weight:0.000000e+00 -# CHECK-NEXT: %10 [168r,192r:0) 0@168r weight:0.000000e+00 -# CHECK-NEXT: %11 [160r,176r:0) 0@160r weight:0.000000e+00 +# CHECK-NEXT: B0 [0B,96r:0)[192r,224r:1) 0@0B-phi 1@192r +# CHECK-NEXT: B1 [0B,48r:0)[208r,224r:1) 0@0B-phi 1@208r +# CHECK-NEXT: B2 [0B,32r:0) 0@0B-phi +# CHECK-NEXT: %0 [96r,160r:0) 0@96r weight:0.000000e+00 +# CHECK-NEXT: %1 [48r,80r:0) 0@48r weight:0.000000e+00 +# CHECK-NEXT: %2 [32r,64r:0) 0@32r weight:0.000000e+00 +# CHECK-NEXT: %3 [112r,176r:0) 0@112r weight:0.000000e+00 +# CHECK-NEXT: %6 [16r,80r:0) 0@16r weight:0.000000e+00 +# CHECK-NEXT: %7 [64r,144r:0) 0@64r weight:0.000000e+00 +# CHECK-NEXT: %8 [80r,128r:0) 0@80r weight:0.000000e+00 +# CHECK-NEXT: %9 [128r,160r:0) 0@128r weight:0.000000e+00 +# CHECK-NEXT: %10 [160r,192r:0) 0@160r weight:0.000000e+00 +# CHECK-NEXT: %11 [144r,176r:0) 0@144r weight:0.000000e+00 # CHECK-NEXT: %12 [176r,208r:0) 0@176r weight:0.000000e+00 # CHECK-NEXT: RegMasks: # CHECK-NEXT: ********** MACHINEINSTRS ********** # CHECK-NEXT: # Machine code for function umull_and_v8i32: IsSSA, NoPHIs, TracksLiveness # CHECK-NEXT: Function Live Ins: $q0 in %0, $q1 in %1, $q2 in %2 -# CHECK-EMPTY: +# CHECK-EMPTY: # CHECK-NEXT: 0B bb.0.entry: # CHECK-NEXT: liveins: $q0, $q1, $q2 -# CHECK-NEXT: 48B %0:fpr128 = COPY $q0 -# CHECK-NEXT: 80B %6:fpr128 = MOVIv2d_ns 17 -# CHECK-NEXT: 88B %1:fpr128 = COPY $q1 -# CHECK-NEXT: 96B %2:fpr128 = COPY $q2 -# CHECK-NEXT: 104B %3:fpr128 = EXTv16i8 %0:fpr128, %0:fpr128, 8 -# CHECK-NEXT: 120B %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 -# CHECK-NEXT: 128B %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 -# CHECK-NEXT: 136B %9:fpr64 = XTNv4i16 %8:fpr128 -# CHECK-NEXT: 160B %11:fpr64 = XTNv4i16 %7:fpr128 -# CHECK-NEXT: 168B %10:fpr128 = UMULLv4i16_v4i32 %0.dsub:fpr128, %9:fpr64 +# CHECK-NEXT: 16B %6:fpr128 = MOVIv2d_ns 17 +# CHECK-NEXT: 32B %2:fpr128 = COPY $q2 +# CHECK-NEXT: 48B %1:fpr128 = COPY $q1 +# CHECK-NEXT: 64B %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 +# CHECK-NEXT: 80B %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 +# CHECK-NEXT: 96B %0:fpr128 = COPY $q0 +# CHECK-NEXT: 112B %3:fpr128 = EXTv16i8 %0:fpr128, %0:fpr128, 8 +# CHECK-NEXT: 128B %9:fpr64 = XTNv4i16 %8:fpr128 +# CHECK-NEXT: 144B %11:fpr64 = XTNv4i16 %7:fpr128 +# CHECK-NEXT: 160B %10:fpr128 = UMULLv4i16_v4i32 %0.dsub:fpr128, %9:fpr64 # CHECK-NEXT: 176B %12:fpr128 = UMULLv4i16_v4i32 %3.dsub:fpr128, %11:fpr64 # CHECK-NEXT: 192B $q0 = COPY %10:fpr128 # CHECK-NEXT: 208B $q1 = COPY %12:fpr128 -# CHECK-NEXT: 224B RET_ReallyLR implicit $q0, implicit $q1 -# CHECK-EMPTY: -# CHECK-NEXT: # End machine code for function umull_and_v8i32. +# CHECK-NEXT: 224B RET_ReallyLR implicit $q0, implicit $q1 \ No newline at end of file diff --git a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll --- a/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll +++ b/llvm/test/CodeGen/AArch64/misched-fusion-lit.ll @@ -33,6 +33,7 @@ ; CHECK-LABEL: litp_tune_generic: ; CHECK: adrp [[R:x[0-9]+]], litp_tune_generic +; CHECKDONT: add {{w[0-9]+}}, {{w[0-9]+}}, {{w[0-9]+}} ; CHECK-NEXT: add {{x[0-9]+}}, [[R]], :lo12:litp_tune_generic } diff --git a/llvm/test/CodeGen/AArch64/mul_pow2.ll b/llvm/test/CodeGen/AArch64/mul_pow2.ll --- a/llvm/test/CodeGen/AArch64/mul_pow2.ll +++ b/llvm/test/CodeGen/AArch64/mul_pow2.ll @@ -107,13 +107,13 @@ define i64 @test6_umull(i32 %x) { ; CHECK-LABEL: test6_umull: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w8, #6 // =0x6 ; CHECK-NEXT: umull x0, w0, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_umull: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov w8, #6 // =0x6 ; GISEL-NEXT: umull x0, w0, w8 ; GISEL-NEXT: ret @@ -125,13 +125,13 @@ define i64 @test6_smull(i32 %x) { ; CHECK-LABEL: test6_smull: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w8, #6 // =0x6 ; CHECK-NEXT: smull x0, w0, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_smull: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov w8, #6 // =0x6 ; GISEL-NEXT: smull x0, w0, w8 ; GISEL-NEXT: ret @@ -143,13 +143,13 @@ define i32 @test6_madd(i32 %x, i32 %y) { ; CHECK-LABEL: test6_madd: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w8, #6 // =0x6 ; CHECK-NEXT: madd w0, w0, w8, w1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_madd: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov w8, #6 // =0x6 ; GISEL-NEXT: madd w0, w0, w8, w1 ; GISEL-NEXT: ret @@ -161,13 +161,13 @@ define i32 @test6_msub(i32 %x, i32 %y) { ; CHECK-LABEL: test6_msub: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w8, #6 // =0x6 ; CHECK-NEXT: msub w0, w0, w8, w1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_msub: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov w8, #6 // =0x6 ; GISEL-NEXT: msub w0, w0, w8, w1 ; GISEL-NEXT: ret @@ -179,13 +179,13 @@ define i64 @test6_umaddl(i32 %x, i64 %y) { ; CHECK-LABEL: test6_umaddl: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w8, #6 // =0x6 ; CHECK-NEXT: umaddl x0, w0, w8, x1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_umaddl: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov w8, #6 // =0x6 ; GISEL-NEXT: umaddl x0, w0, w8, x1 ; GISEL-NEXT: ret @@ -198,13 +198,13 @@ define i64 @test6_smaddl(i32 %x, i64 %y) { ; CHECK-LABEL: test6_smaddl: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w8, #6 // =0x6 ; CHECK-NEXT: smaddl x0, w0, w8, x1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_smaddl: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov w8, #6 // =0x6 ; GISEL-NEXT: smaddl x0, w0, w8, x1 ; GISEL-NEXT: ret @@ -217,13 +217,13 @@ define i64 @test6_umsubl(i32 %x, i64 %y) { ; CHECK-LABEL: test6_umsubl: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w8, #6 // =0x6 ; CHECK-NEXT: umsubl x0, w0, w8, x1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_umsubl: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov w8, #6 // =0x6 ; GISEL-NEXT: umsubl x0, w0, w8, x1 ; GISEL-NEXT: ret @@ -236,13 +236,13 @@ define i64 @test6_smsubl(i32 %x, i64 %y) { ; CHECK-LABEL: test6_smsubl: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w8, #6 // =0x6 ; CHECK-NEXT: smsubl x0, w0, w8, x1 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_smsubl: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov w8, #6 // =0x6 ; GISEL-NEXT: smsubl x0, w0, w8, x1 ; GISEL-NEXT: ret @@ -255,13 +255,13 @@ define i64 @test6_umnegl(i32 %x) { ; CHECK-LABEL: test6_umnegl: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w8, #6 // =0x6 ; CHECK-NEXT: umnegl x0, w0, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_umnegl: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov w8, #6 // =0x6 ; GISEL-NEXT: umnegl x0, w0, w8 ; GISEL-NEXT: ret @@ -274,13 +274,13 @@ define i64 @test6_smnegl(i32 %x) { ; CHECK-LABEL: test6_smnegl: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 +; CHECK-NEXT: mov w8, #6 // =0x6 ; CHECK-NEXT: smnegl x0, w0, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test6_smnegl: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #6 +; GISEL-NEXT: mov w8, #6 // =0x6 ; GISEL-NEXT: smnegl x0, w0, w8 ; GISEL-NEXT: ret @@ -294,15 +294,15 @@ define i32 @mull6_sub(i32 %x) { ; CHECK-LABEL: mull6_sub: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: mov w9, #-1 +; CHECK-NEXT: mov w8, #6 // =0x6 +; CHECK-NEXT: mov w9, #-1 // =0xffffffff ; CHECK-NEXT: madd w0, w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: mull6_sub: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #6 -; GISEL-NEXT: mov w9, #-1 +; GISEL-NEXT: mov w8, #6 // =0x6 +; GISEL-NEXT: mov w9, #-1 // =0xffffffff ; GISEL-NEXT: madd w0, w0, w8, w9 ; GISEL-NEXT: ret %mul = mul nsw i32 %x, 6 @@ -313,15 +313,15 @@ define i64 @mull6_sub_orr(i64 %x) { ; CHECK-LABEL: mull6_sub_orr: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: mov x9, #16773120 +; CHECK-NEXT: mov w8, #6 // =0x6 +; CHECK-NEXT: mov x9, #16773120 // =0xfff000 ; CHECK-NEXT: madd x0, x0, x8, x9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: mull6_sub_orr: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #6 -; GISEL-NEXT: mov x9, #16773120 +; GISEL-NEXT: mov w8, #6 // =0x6 +; GISEL-NEXT: mov x9, #16773120 // =0xfff000 ; GISEL-NEXT: madd x0, x0, x8, x9 ; GISEL-NEXT: ret %mul = mul nsw i64 %x, 6 @@ -396,13 +396,13 @@ define i32 @test11(i32 %x) { ; CHECK-LABEL: test11: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #11 +; CHECK-NEXT: mov w8, #11 // =0xb ; CHECK-NEXT: mul w0, w0, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test11: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #11 +; GISEL-NEXT: mov w8, #11 // =0xb ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -430,13 +430,13 @@ define i32 @test13(i32 %x) { ; CHECK-LABEL: test13: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #13 +; CHECK-NEXT: mov w8, #13 // =0xd ; CHECK-NEXT: mul w0, w0, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test13: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #13 +; GISEL-NEXT: mov w8, #13 // =0xd ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -453,7 +453,7 @@ ; ; GISEL-LABEL: test14: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #14 +; GISEL-NEXT: mov w8, #14 // =0xe ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -502,7 +502,7 @@ ; ; GISEL-LABEL: test25_fast_shift: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #25 +; GISEL-NEXT: mov w8, #25 // =0x19 ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -519,7 +519,7 @@ ; ; GISEL-LABEL: test45_fast_shift: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #45 +; GISEL-NEXT: mov w8, #45 // =0x2d ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -531,13 +531,13 @@ define i32 @test45(i32 %x) { ; CHECK-LABEL: test45: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #45 +; CHECK-NEXT: mov w8, #45 // =0x2d ; CHECK-NEXT: mul w0, w0, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test45: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #45 +; GISEL-NEXT: mov w8, #45 // =0x2d ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -549,13 +549,13 @@ define i32 @test85_fast_shift(i32 %x) "target-features"="+lsl-fast" { ; CHECK-LABEL: test85_fast_shift: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #85 +; CHECK-NEXT: mov w8, #85 // =0x55 ; CHECK-NEXT: mul w0, w0, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test85_fast_shift: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #85 +; GISEL-NEXT: mov w8, #85 // =0x55 ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -567,13 +567,13 @@ define i32 @test297_fast_shift(i32 %x) "target-features"="+lsl-fast" { ; CHECK-LABEL: test297_fast_shift: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #297 +; CHECK-NEXT: mov w8, #297 // =0x129 ; CHECK-NEXT: mul w0, w0, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test297_fast_shift: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #297 +; GISEL-NEXT: mov w8, #297 // =0x129 ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -593,7 +593,7 @@ ; ; GISEL-LABEL: ntest2: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #-2 +; GISEL-NEXT: mov w8, #-2 // =0xfffffffe ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -624,7 +624,7 @@ ; ; GISEL-LABEL: ntest4: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #-4 +; GISEL-NEXT: mov w8, #-4 // =0xfffffffc ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -657,7 +657,7 @@ ; ; GISEL-LABEL: ntest6: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #-6 +; GISEL-NEXT: mov w8, #-6 // =0xfffffffa ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -688,7 +688,7 @@ ; ; GISEL-LABEL: ntest8: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #-8 +; GISEL-NEXT: mov w8, #-8 // =0xfffffff8 ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -716,13 +716,13 @@ define i32 @ntest10(i32 %x) { ; CHECK-LABEL: ntest10: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-10 +; CHECK-NEXT: mov w8, #-10 // =0xfffffff6 ; CHECK-NEXT: mul w0, w0, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: ntest10: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #-10 +; GISEL-NEXT: mov w8, #-10 // =0xfffffff6 ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -733,13 +733,13 @@ define i32 @ntest11(i32 %x) { ; CHECK-LABEL: ntest11: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-11 +; CHECK-NEXT: mov w8, #-11 // =0xfffffff5 ; CHECK-NEXT: mul w0, w0, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: ntest11: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #-11 +; GISEL-NEXT: mov w8, #-11 // =0xfffffff5 ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -756,7 +756,7 @@ ; ; GISEL-LABEL: ntest12: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #-12 +; GISEL-NEXT: mov w8, #-12 // =0xfffffff4 ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -767,13 +767,13 @@ define i32 @ntest13(i32 %x) { ; CHECK-LABEL: ntest13: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-13 +; CHECK-NEXT: mov w8, #-13 // =0xfffffff3 ; CHECK-NEXT: mul w0, w0, w8 ; CHECK-NEXT: ret ; ; GISEL-LABEL: ntest13: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #-13 +; GISEL-NEXT: mov w8, #-13 // =0xfffffff3 ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret %mul = mul nsw i32 %x, -13 @@ -789,7 +789,7 @@ ; ; GISEL-LABEL: ntest14: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #-14 +; GISEL-NEXT: mov w8, #-14 // =0xfffffff2 ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -820,7 +820,7 @@ ; ; GISEL-LABEL: ntest16: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #-16 +; GISEL-NEXT: mov w8, #-16 // =0xfffffff0 ; GISEL-NEXT: mul w0, w0, w8 ; GISEL-NEXT: ret @@ -837,7 +837,7 @@ ; ; GISEL-LABEL: muladd_demand: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #131008 +; GISEL-NEXT: mov w8, #131008 // =0x1ffc0 ; GISEL-NEXT: madd w8, w0, w8, w1 ; GISEL-NEXT: and w0, w8, #0x1ffc0 ; GISEL-NEXT: ret @@ -850,8 +850,8 @@ define <4 x i32> @muladd_demand_commute(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: muladd_demand_commute: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1, msl #16 ; CHECK-NEXT: shl v0.4s, v0.4s, #6 +; CHECK-NEXT: movi v2.4s, #1, msl #16 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/mulcmle.ll b/llvm/test/CodeGen/AArch64/mulcmle.ll --- a/llvm/test/CodeGen/AArch64/mulcmle.ll +++ b/llvm/test/CodeGen/AArch64/mulcmle.ll @@ -83,8 +83,8 @@ ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.8b, #17 -; CHECK-NEXT: movi v2.8b, #15 ; CHECK-NEXT: ushr v0.8b, v0.8b, #3 +; CHECK-NEXT: movi v2.8b, #15 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NEXT: mul v0.8b, v0.8b, v2.8b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll --- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll @@ -39,9 +39,8 @@ define <8 x i32> @splice_v8i32_idx(<8 x i32> %a, <8 x i32> %b) #0 { ; CHECK-LABEL: splice_v8i32_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v3.16b, v2.16b, v3.16b, #4 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #4 -; CHECK-NEXT: mov v1.16b, v3.16b +; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #4 ; CHECK-NEXT: ret %res = call <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32> %a, <8 x i32> %b, i32 5) ret <8 x i32> %res @@ -51,12 +50,11 @@ define <16 x float> @splice_v16f32_idx(<16 x float> %a, <16 x float> %b) #0 { ; CHECK-LABEL: splice_v16f32_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v5.16b, v4.16b, v5.16b, #12 -; CHECK-NEXT: ext v6.16b, v2.16b, v3.16b, #12 +; CHECK-NEXT: ext v6.16b, v3.16b, v4.16b, #12 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #12 -; CHECK-NEXT: ext v2.16b, v3.16b, v4.16b, #12 -; CHECK-NEXT: mov v3.16b, v5.16b -; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #12 +; CHECK-NEXT: ext v3.16b, v4.16b, v5.16b, #12 +; CHECK-NEXT: mov v2.16b, v6.16b ; CHECK-NEXT: ret %res = call <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float> %a, <16 x float> %b, i32 7) ret <16 x float> %res @@ -98,9 +96,8 @@ define <8 x i32> @splice_v8i32(<8 x i32> %a, <8 x i32> %b) #0 { ; CHECK-LABEL: splice_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v3.16b, v2.16b, v3.16b, #4 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #4 -; CHECK-NEXT: mov v1.16b, v3.16b +; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #4 ; CHECK-NEXT: ret %res = call <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32> %a, <8 x i32> %b, i32 -3) ret <8 x i32> %res @@ -110,12 +107,11 @@ define <16 x float> @splice_v16f32(<16 x float> %a, <16 x float> %b) #0 { ; CHECK-LABEL: splice_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v5.16b, v4.16b, v5.16b, #12 -; CHECK-NEXT: ext v6.16b, v2.16b, v3.16b, #12 +; CHECK-NEXT: ext v6.16b, v3.16b, v4.16b, #12 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #12 -; CHECK-NEXT: ext v2.16b, v3.16b, v4.16b, #12 -; CHECK-NEXT: mov v3.16b, v5.16b -; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #12 +; CHECK-NEXT: ext v3.16b, v4.16b, v5.16b, #12 +; CHECK-NEXT: mov v2.16b, v6.16b ; CHECK-NEXT: ret %res = call <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float> %a, <16 x float> %b, i32 -9) ret <16 x float> %res diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll --- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll @@ -258,8 +258,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p2/z, z1.d, #0 ; CHECK-NEXT: ret @@ -273,8 +273,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: ptrue p2.s +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: and z1.s, z1.s, #0x1 ; CHECK-NEXT: cmpne p0.s, p2/z, z1.s, #0 ; CHECK-NEXT: ret @@ -288,8 +288,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: ptrue p2.h +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: and z1.h, z1.h, #0x1 ; CHECK-NEXT: cmpne p0.h, p2/z, z1.h, #0 ; CHECK-NEXT: ret @@ -303,8 +303,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: ptrue p2.b +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: and z1.b, z1.b, #0x1 ; CHECK-NEXT: cmpne p0.b, p2/z, z1.b, #0 ; CHECK-NEXT: ret @@ -328,8 +328,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: orr x8, x8, #0x8 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] @@ -350,20 +350,20 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-8 -; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: mov w9, #16 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] -; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov w9, #16 // =0x10 ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: cmp x8, #16 -; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: add x10, x9, x8, lsl #2 +; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] +; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: st1w { z7.s }, p0, [sp, #7, mul vl] ; CHECK-NEXT: st1w { z4.s }, p0, [sp, #4, mul vl] -; CHECK-NEXT: add x10, x9, x8, lsl #2 ; CHECK-NEXT: st1w { z5.s }, p0, [sp, #5, mul vl] ; CHECK-NEXT: st1w { z6.s }, p0, [sp, #6, mul vl] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] @@ -452,15 +452,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: rdvl x9, #1 -; CHECK-NEXT: cmp x9, #17 -; CHECK-NEXT: mov w10, #17 -; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: mov w9, #17 // =0x11 +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: cmp x8, #17 +; CHECK-NEXT: addvl x10, x10, #1 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: sub x8, x10, x8 ; CHECK-NEXT: st1b { z0.b }, p0, [sp] -; CHECK-NEXT: sub x8, x8, x9 ; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 @@ -497,15 +497,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: rdvl x9, #1 -; CHECK-NEXT: cmp x9, #18 -; CHECK-NEXT: mov w10, #18 -; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: mov w9, #18 // =0x12 +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: cmp x8, #18 +; CHECK-NEXT: addvl x10, x10, #1 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: sub x8, x10, x8 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: sub x8, x8, x9 ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 @@ -608,15 +608,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: rdvl x9, #1 -; CHECK-NEXT: cmp x9, #18 -; CHECK-NEXT: mov w10, #18 -; CHECK-NEXT: csel x9, x9, x10, lo ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: rdvl x8, #1 +; CHECK-NEXT: mov w9, #18 // =0x12 +; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: cmp x8, #18 +; CHECK-NEXT: addvl x10, x10, #1 +; CHECK-NEXT: csel x8, x8, x9, lo +; CHECK-NEXT: sub x8, x10, x8 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: sub x8, x8, x9 ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] ; CHECK-NEXT: addvl sp, sp, #2 @@ -698,10 +698,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.d, vl1 ; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1 -; CHECK-NEXT: rev p2.d, p2.d ; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 -; CHECK-NEXT: splice z1.d, p2, z1.d, z0.d ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: rev p2.d, p2.d +; CHECK-NEXT: splice z1.d, p2, z1.d, z0.d ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 ; CHECK-NEXT: ret @@ -715,10 +715,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.s, vl1 ; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1 -; CHECK-NEXT: rev p2.s, p2.s ; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 -; CHECK-NEXT: splice z1.s, p2, z1.s, z0.s ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: rev p2.s, p2.s +; CHECK-NEXT: splice z1.s, p2, z1.s, z0.s ; CHECK-NEXT: and z1.s, z1.s, #0x1 ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 ; CHECK-NEXT: ret @@ -732,10 +732,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.h, vl1 ; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1 -; CHECK-NEXT: rev p2.h, p2.h ; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 -; CHECK-NEXT: splice z1.h, p2, z1.h, z0.h ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: rev p2.h, p2.h +; CHECK-NEXT: splice z1.h, p2, z1.h, z0.h ; CHECK-NEXT: and z1.h, z1.h, #0x1 ; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0 ; CHECK-NEXT: ret @@ -749,10 +749,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p2.b, vl1 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 -; CHECK-NEXT: rev p2.b, p2.b ; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 -; CHECK-NEXT: splice z1.b, p2, z1.b, z0.b ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: rev p2.b, p2.b +; CHECK-NEXT: splice z1.b, p2, z1.b, z0.b ; CHECK-NEXT: and z1.b, z1.b, #0x1 ; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; CHECK-NEXT: ret @@ -778,14 +778,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: mov x9, #-8 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: mov x9, #-8 // =0xfffffffffffffff8 +; CHECK-NEXT: addvl x8, x8, #2 +; CHECK-NEXT: sub x10, x8, #32 ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] -; CHECK-NEXT: sub x10, x8, #32 ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, x9, lsl #2] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x10, #1, mul vl] @@ -802,15 +802,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-8 -; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: rdvl x8, #4 +; CHECK-NEXT: mov w9, #68 // =0x44 +; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: cmp x8, #68 -; CHECK-NEXT: mov w9, #68 ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: addvl x9, x10, #4 -; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] ; CHECK-NEXT: sub x8, x9, x8 +; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] ; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] diff --git a/llvm/test/CodeGen/AArch64/neg-imm.ll b/llvm/test/CodeGen/AArch64/neg-imm.ll --- a/llvm/test/CodeGen/AArch64/neg-imm.ll +++ b/llvm/test/CodeGen/AArch64/neg-imm.ll @@ -20,9 +20,8 @@ ; CHECK-NEXT: b .LBB0_2 ; CHECK-NEXT: .LBB0_1: // %for.inc ; CHECK-NEXT: // in Loop: Header=BB0_2 Depth=1 -; CHECK-NEXT: add w8, w20, #1 ; CHECK-NEXT: cmp w20, w19 -; CHECK-NEXT: mov w20, w8 +; CHECK-NEXT: add w20, w20, #1 ; CHECK-NEXT: b.gt .LBB0_4 ; CHECK-NEXT: .LBB0_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll --- a/llvm/test/CodeGen/AArch64/neon-abd.ll +++ b/llvm/test/CodeGen/AArch64/neon-abd.ll @@ -145,22 +145,22 @@ ; CHECK-LABEL: sabd_2d: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, v0.d[1] -; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: mov x9, v1.d[1] -; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: asr x12, x10, #63 -; CHECK-NEXT: asr x14, x8, #63 -; CHECK-NEXT: asr x15, x9, #63 +; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: fmov x12, d1 +; CHECK-NEXT: asr x14, x10, #63 +; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: asr x13, x9, #63 +; CHECK-NEXT: asr x15, x12, #63 ; CHECK-NEXT: subs x8, x8, x9 -; CHECK-NEXT: asr x13, x11, #63 -; CHECK-NEXT: sbc x9, x14, x15 -; CHECK-NEXT: subs x10, x10, x11 +; CHECK-NEXT: sbc x9, x11, x13 +; CHECK-NEXT: subs x10, x10, x12 +; CHECK-NEXT: sbc x11, x14, x15 ; CHECK-NEXT: asr x9, x9, #63 -; CHECK-NEXT: sbc x11, x12, x13 -; CHECK-NEXT: eor x8, x8, x9 ; CHECK-NEXT: asr x11, x11, #63 -; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: eor x8, x8, x9 ; CHECK-NEXT: eor x10, x10, x11 +; CHECK-NEXT: sub x8, x8, x9 ; CHECK-NEXT: sub x10, x10, x11 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fmov d0, x10 @@ -325,8 +325,8 @@ ; CHECK-LABEL: uabd_2d: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, v0.d[1] -; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: mov x9, v1.d[1] +; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: fmov x11, d1 ; CHECK-NEXT: subs x8, x8, x9 ; CHECK-NEXT: ngc x9, xzr diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -896,9 +896,9 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8(<8 x i8> %a, <8 x i8> %b) { ; CHECK-LABEL: vselect_equivalent_shuffle_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI89_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: adrp x8, .LCPI89_0 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI89_0] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b @@ -929,8 +929,8 @@ define <8 x i8> @vselect_equivalent_shuffle_v8i8_zeroswap(<8 x i8> %a) { ; CHECK-LABEL: vselect_equivalent_shuffle_v8i8_zeroswap: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI91_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: adrp x8, .LCPI91_0 ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI91_0] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b @@ -961,8 +961,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI92_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI92_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret %c = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> @@ -1495,10 +1495,11 @@ define <8 x i32> @bic_shifted_knownbits2(<8 x i16> %v) { ; CHECK-LABEL: bic_shifted_knownbits2: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ushll v2.4s, v0.4h, #0 ; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: bic v2.4s, #255, lsl #8 ; CHECK-NEXT: bic v1.4s, #255, lsl #8 -; CHECK-NEXT: bic v0.4s, #255, lsl #8 +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret entry: %vshr_n = zext <8 x i16> %v to <8 x i32> @@ -1522,8 +1523,8 @@ define <8 x i32> @bic_shifted_knownbits4(<8 x i32> %v) { ; CHECK-LABEL: bic_shifted_knownbits4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: shl v0.4s, v0.4s, #8 ; CHECK-NEXT: shl v1.4s, v1.4s, #8 +; CHECK-NEXT: shl v0.4s, v0.4s, #8 ; CHECK-NEXT: bic v0.4s, #255, lsl #8 ; CHECK-NEXT: bic v1.4s, #255, lsl #8 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll --- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -2134,7 +2134,7 @@ define <2 x i64> @cmhsz2xi64(<2 x i64> %A) { ; CHECK-LABEL: cmhsz2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: mov w8, #2 // =0x2 ; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: cmhs v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret @@ -2262,7 +2262,7 @@ define <2 x i64> @cmhiz2xi64(<2 x i64> %A) { ; CHECK-LABEL: cmhiz2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret @@ -2522,7 +2522,7 @@ define <2 x i64> @cmloz2xi64(<2 x i64> %A) { ; CHECK-LABEL: cmloz2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: mov w8, #2 // =0x2 ; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d ; CHECK-NEXT: ret @@ -5422,8 +5422,8 @@ ; ; GISEL-LABEL: fcmule4xfloat_fast_zext: ; GISEL: // %bb.0: -; GISEL-NEXT: adrp x8, .LCPI322_0 ; GISEL-NEXT: fcmgt v0.4s, v0.4s, v1.4s +; GISEL-NEXT: adrp x8, .LCPI322_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI322_0] ; GISEL-NEXT: bic v0.16b, v1.16b, v0.16b ; GISEL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/neon-dotpattern.ll b/llvm/test/CodeGen/AArch64/neon-dotpattern.ll --- a/llvm/test/CodeGen/AArch64/neon-dotpattern.ll +++ b/llvm/test/CodeGen/AArch64/neon-dotpattern.ll @@ -5,8 +5,8 @@ ; CHECK-LABEL: test_sdot_v4i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr w8, [x2] -; CHECK-NEXT: dup v0.2s, wzr ; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: dup v0.2s, wzr ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: sdot v0.2s, v1.8b, v2.8b @@ -51,8 +51,8 @@ ; CHECK-LABEL: test_udot_v4i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr w8, [x2] -; CHECK-NEXT: dup v0.2s, wzr ; CHECK-NEXT: ldr w9, [x1] +; CHECK-NEXT: dup v0.2s, wzr ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: udot v0.2s, v1.8b, v2.8b diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll --- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll +++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll @@ -15,11 +15,11 @@ define i32 @test_udot_v4i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_udot_v4i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr s0, [x1] -; CHECK-NEXT: ldr s1, [x0] +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: umull v0.4s, v1.4h, v0.4h ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -53,11 +53,11 @@ define i32 @test_sdot_v4i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_sdot_v4i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr s0, [x1] -; CHECK-NEXT: ldr s1, [x0] +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -78,17 +78,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ushll v3.4s, v3.4h, #0 ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: shl v3.4s, v3.4s, #24 -; CHECK-NEXT: shl v2.4s, v2.4s, #24 ; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: sshr v3.4s, v3.4s, #24 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: sshr v2.4s, v2.4s, #24 -; CHECK-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-NEXT: shl v2.4s, v2.4s, #24 +; CHECK-NEXT: shl v3.4s, v3.4s, #24 ; CHECK-NEXT: shl v1.4s, v1.4s, #24 -; CHECK-NEXT: mul v2.4s, v2.4s, v3.4s -; CHECK-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-NEXT: sshr v2.4s, v2.4s, #24 +; CHECK-NEXT: sshr v3.4s, v3.4s, #24 ; CHECK-NEXT: sshr v1.4s, v1.4s, #24 +; CHECK-NEXT: sshr v0.4s, v0.4s, #24 +; CHECK-NEXT: mul v2.4s, v2.4s, v3.4s ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v2.4s ; CHECK-NEXT: fmov w0, s0 @@ -130,15 +130,15 @@ define i32 @test_udot_v5i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_udot_v5i8: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] -; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v3.2d, #0000000000000000 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll v2.8h, v2.8b, #0 -; CHECK-NEXT: umull2 v3.4s, v1.8h, v2.8h -; CHECK-NEXT: mov v0.s[0], v3.s[0] -; CHECK-NEXT: umlal v0.4s, v1.4h, v2.4h -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: umull2 v2.4s, v1.8h, v0.8h +; CHECK-NEXT: mov v3.s[0], v2.s[0] +; CHECK-NEXT: umlal v3.4s, v1.4h, v0.4h +; CHECK-NEXT: addv s0, v3.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret @@ -156,12 +156,12 @@ define i32 @test_udot_v5i8_nomla(ptr nocapture readonly %a1) { ; CHECK-LABEL: test_udot_v5i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0 -; CHECK-NEXT: mov v0.s[0], v2.s[0] -; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-NEXT: mov v1.s[0], v2.s[0] +; CHECK-NEXT: uaddw v0.4s, v1.4s, v0.4h ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -174,15 +174,15 @@ define i32 @test_sdot_v5i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_sdot_v5i8: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr d2, [x0] -; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v3.2d, #0000000000000000 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: sshll v2.8h, v2.8b, #0 -; CHECK-NEXT: smull2 v3.4s, v1.8h, v2.8h -; CHECK-NEXT: mov v0.s[0], v3.s[0] -; CHECK-NEXT: smlal v0.4s, v1.4h, v2.4h -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: smull2 v2.4s, v1.8h, v0.8h +; CHECK-NEXT: mov v3.s[0], v2.s[0] +; CHECK-NEXT: smlal v3.4s, v1.4h, v0.4h +; CHECK-NEXT: addv s0, v3.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret @@ -200,19 +200,19 @@ define i32 @test_sdot_v5i8_double(<5 x i8> %a, <5 x i8> %b, <5 x i8> %c, <5 x i8> %d) { ; CHECK-LABEL: test_sdot_v5i8_double: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sshll v2.8h, v2.8b, #0 -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll v2.8h, v2.8b, #0 ; CHECK-NEXT: sshll v3.8h, v3.8b, #0 -; CHECK-NEXT: movi v4.2d, #0000000000000000 -; CHECK-NEXT: smull2 v5.4s, v0.8h, v1.8h +; CHECK-NEXT: movi v5.2d, #0000000000000000 ; CHECK-NEXT: movi v6.2d, #0000000000000000 +; CHECK-NEXT: smull2 v4.4s, v0.8h, v1.8h ; CHECK-NEXT: smull2 v7.4s, v2.8h, v3.8h -; CHECK-NEXT: mov v6.s[0], v5.s[0] -; CHECK-NEXT: mov v4.s[0], v7.s[0] +; CHECK-NEXT: mov v6.s[0], v4.s[0] +; CHECK-NEXT: mov v5.s[0], v7.s[0] ; CHECK-NEXT: smlal v6.4s, v0.4h, v1.4h -; CHECK-NEXT: smlal v4.4s, v2.4h, v3.4h -; CHECK-NEXT: add v0.4s, v6.4s, v4.4s +; CHECK-NEXT: smlal v5.4s, v2.4h, v3.4h +; CHECK-NEXT: add v0.4s, v6.4s, v5.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -232,16 +232,16 @@ define i32 @test_sdot_v5i8_double_nomla(<5 x i8> %a, <5 x i8> %b, <5 x i8> %c, <5 x i8> %d) { ; CHECK-LABEL: test_sdot_v5i8_double_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v2.8h, v2.8b, #0 +; CHECK-NEXT: sshll v1.8h, v2.8b, #0 +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: sshll2 v4.4s, v0.8h, #0 -; CHECK-NEXT: sshll2 v5.4s, v2.8h, #0 +; CHECK-NEXT: sshll2 v5.4s, v1.8h, #0 ; CHECK-NEXT: mov v3.s[0], v4.s[0] -; CHECK-NEXT: mov v1.s[0], v5.s[0] +; CHECK-NEXT: mov v2.s[0], v5.s[0] ; CHECK-NEXT: saddw v0.4s, v3.4s, v0.4h -; CHECK-NEXT: saddw v1.4s, v1.4s, v2.4h +; CHECK-NEXT: saddw v1.4s, v2.4s, v1.4h ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 @@ -278,11 +278,11 @@ define i32 @test_udot_v8i8_nomla(ptr nocapture readonly %a1) { ; CHECK-LABEL: test_udot_v8i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.8b, #1 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: ldr d2, [x0] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: udot v1.2s, v2.8b, v0.8b -; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s +; CHECK-NEXT: udot v0.2s, v2.8b, v1.8b +; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -315,11 +315,11 @@ define i32 @test_sdot_v8i8_nomla(ptr nocapture readonly %a1) { ; CHECK-LABEL: test_sdot_v8i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.8b, #1 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: ldr d2, [x0] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: sdot v1.2s, v2.8b, v0.8b -; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s +; CHECK-NEXT: sdot v0.2s, v2.8b, v1.8b +; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -334,9 +334,9 @@ ; CHECK-LABEL: test_udot_v16i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: udot v0.4s, v1.16b, v2.16b +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: udot v0.4s, v2.16b, v1.16b ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -356,8 +356,8 @@ ; CHECK-LABEL: test_udot_v16i8_nomla: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v0.16b, #1 -; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: udot v1.4s, v2.16b, v0.16b ; CHECK-NEXT: addv s0, v1.4s ; CHECK-NEXT: fmov w0, s0 @@ -373,9 +373,9 @@ ; CHECK-LABEL: test_sdot_v16i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: sdot v0.4s, v1.16b, v2.16b +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: sdot v0.4s, v2.16b, v1.16b ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -395,8 +395,8 @@ ; CHECK-LABEL: test_sdot_v16i8_nomla: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v0.16b, #1 -; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: sdot v1.4s, v2.16b, v0.16b ; CHECK-NEXT: addv s0, v1.4s ; CHECK-NEXT: fmov w0, s0 @@ -434,11 +434,11 @@ define i32 @test_udot_v8i8_double_nomla(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: test_udot_v8i8_double_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.8b, #1 -; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: udot v3.2s, v2.8b, v1.8b -; CHECK-NEXT: udot v3.2s, v0.8b, v1.8b -; CHECK-NEXT: addp v0.2s, v3.2s, v3.2s +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: movi v3.8b, #1 +; CHECK-NEXT: udot v1.2s, v2.8b, v3.8b +; CHECK-NEXT: udot v1.2s, v0.8b, v3.8b +; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -516,11 +516,11 @@ define i32 @test_sdot_v8i8_double_nomla(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: test_sdot_v8i8_double_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.8b, #1 -; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: sdot v3.2s, v2.8b, v1.8b -; CHECK-NEXT: sdot v3.2s, v0.8b, v1.8b -; CHECK-NEXT: addp v0.2s, v3.2s, v3.2s +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: movi v3.8b, #1 +; CHECK-NEXT: sdot v1.2s, v2.8b, v3.8b +; CHECK-NEXT: sdot v1.2s, v0.8b, v3.8b +; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -577,17 +577,17 @@ ; CHECK-LABEL: test_udot_v24i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: ldr d2, [x0, #16] -; CHECK-NEXT: ldr d4, [x1, #16] -; CHECK-NEXT: ldr q5, [x1] -; CHECK-NEXT: udot v0.2s, v4.8b, v2.8b -; CHECK-NEXT: udot v3.4s, v5.16b, v1.16b -; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s -; CHECK-NEXT: addv s1, v3.4s -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ldr q2, [x0] +; CHECK-NEXT: ldr q3, [x1] +; CHECK-NEXT: ldr d4, [x0, #16] +; CHECK-NEXT: ldr d5, [x1, #16] +; CHECK-NEXT: udot v1.2s, v5.8b, v4.8b +; CHECK-NEXT: udot v0.4s, v3.16b, v2.16b +; CHECK-NEXT: addp v1.2s, v1.2s, v1.2s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: add w8, w9, w8 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret @@ -605,18 +605,18 @@ define i32 @test_udot_v24i8_nomla(ptr nocapture readonly %a1) { ; CHECK-LABEL: test_udot_v24i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.8b, #1 -; CHECK-NEXT: ldr d4, [x0, #16] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: ldr q5, [x0] +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v1.8b, #1 +; CHECK-NEXT: ldr q4, [x0] ; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: movi v3.16b, #1 -; CHECK-NEXT: udot v2.2s, v4.8b, v0.8b -; CHECK-NEXT: udot v1.4s, v5.16b, v3.16b -; CHECK-NEXT: addp v0.2s, v2.2s, v2.2s -; CHECK-NEXT: addv s1, v1.4s -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: ldr d5, [x0, #16] +; CHECK-NEXT: udot v2.2s, v5.8b, v1.8b +; CHECK-NEXT: udot v0.4s, v4.16b, v3.16b +; CHECK-NEXT: addp v1.2s, v2.2s, v2.2s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: add w0, w9, w8 ; CHECK-NEXT: ret entry: @@ -629,17 +629,17 @@ ; CHECK-LABEL: test_sdot_v24i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: ldr d2, [x0, #16] -; CHECK-NEXT: ldr d4, [x1, #16] -; CHECK-NEXT: ldr q5, [x1] -; CHECK-NEXT: sdot v0.2s, v4.8b, v2.8b -; CHECK-NEXT: sdot v3.4s, v5.16b, v1.16b -; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s -; CHECK-NEXT: addv s1, v3.4s -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ldr q2, [x0] +; CHECK-NEXT: ldr q3, [x1] +; CHECK-NEXT: ldr d4, [x0, #16] +; CHECK-NEXT: ldr d5, [x1, #16] +; CHECK-NEXT: sdot v1.2s, v5.8b, v4.8b +; CHECK-NEXT: sdot v0.4s, v3.16b, v2.16b +; CHECK-NEXT: addp v1.2s, v1.2s, v1.2s +; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: add w8, w9, w8 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret @@ -660,200 +660,200 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldr b0, [sp, #144] -; CHECK-NEXT: add x8, sp, #152 -; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: add x9, sp, #168 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ldr b1, [sp, #144] +; CHECK-NEXT: add x10, sp, #152 +; CHECK-NEXT: add x9, sp, #160 +; CHECK-NEXT: add x8, sp, #168 ; CHECK-NEXT: ldr b2, [sp, #272] +; CHECK-NEXT: ld1 { v1.b }[1], [x10] ; CHECK-NEXT: add x11, sp, #280 -; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #160 -; CHECK-NEXT: mov v1.b[1], w1 -; CHECK-NEXT: add x10, sp, #184 +; CHECK-NEXT: ldr b3, [sp, #80] +; CHECK-NEXT: mov v0.b[1], w1 +; CHECK-NEXT: ldr b4, [sp, #528] +; CHECK-NEXT: ldr b6, [sp, #656] +; CHECK-NEXT: add x10, sp, #88 ; CHECK-NEXT: ld1 { v2.b }[1], [x11] ; CHECK-NEXT: add x11, sp, #536 -; CHECK-NEXT: ldr b4, [sp, #528] -; CHECK-NEXT: add x12, sp, #88 -; CHECK-NEXT: ld1 { v0.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #176 -; CHECK-NEXT: mov v1.b[2], w2 +; CHECK-NEXT: ld1 { v1.b }[2], [x9] ; CHECK-NEXT: ldr b5, [sp, #336] +; CHECK-NEXT: ldr b7, [sp, #464] +; CHECK-NEXT: add x12, sp, #664 +; CHECK-NEXT: ld1 { v3.b }[1], [x10] +; CHECK-NEXT: add x10, sp, #344 +; CHECK-NEXT: mov v0.b[2], w2 ; CHECK-NEXT: ld1 { v4.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #344 -; CHECK-NEXT: ldr b3, [sp, #80] -; CHECK-NEXT: add x13, sp, #96 -; CHECK-NEXT: ld1 { v0.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #192 -; CHECK-NEXT: mov v1.b[3], w3 -; CHECK-NEXT: ld1 { v5.b }[1], [x11] -; CHECK-NEXT: ld1 { v3.b }[1], [x12] -; CHECK-NEXT: add x12, sp, #200 -; CHECK-NEXT: add x11, sp, #544 -; CHECK-NEXT: ldr b7, [sp, #656] -; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #288 -; CHECK-NEXT: mov v1.b[4], w4 -; CHECK-NEXT: ldr b16, [sp, #464] -; CHECK-NEXT: ld1 { v3.b }[2], [x13] -; CHECK-NEXT: ld1 { v2.b }[2], [x8] +; CHECK-NEXT: add x11, sp, #176 +; CHECK-NEXT: ld1 { v1.b }[3], [x8] +; CHECK-NEXT: add x9, sp, #472 +; CHECK-NEXT: ld1 { v6.b }[1], [x12] +; CHECK-NEXT: ld1 { v5.b }[1], [x10] +; CHECK-NEXT: add x12, sp, #288 +; CHECK-NEXT: ld1 { v7.b }[1], [x9] +; CHECK-NEXT: ld1 { v2.b }[2], [x12] +; CHECK-NEXT: add x8, sp, #96 +; CHECK-NEXT: add x13, sp, #544 +; CHECK-NEXT: mov v0.b[3], w3 +; CHECK-NEXT: ld1 { v1.b }[4], [x11] +; CHECK-NEXT: add x11, sp, #672 +; CHECK-NEXT: add x10, sp, #184 +; CHECK-NEXT: ld1 { v3.b }[2], [x8] ; CHECK-NEXT: add x8, sp, #352 -; CHECK-NEXT: ld1 { v0.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #296 -; CHECK-NEXT: mov v1.b[5], w5 -; CHECK-NEXT: ld1 { v4.b }[2], [x11] +; CHECK-NEXT: ld1 { v4.b }[2], [x13] +; CHECK-NEXT: ld1 { v6.b }[2], [x11] +; CHECK-NEXT: add x11, sp, #480 ; CHECK-NEXT: ld1 { v5.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #360 -; CHECK-NEXT: ld1 { v2.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #208 -; CHECK-NEXT: ld1 { v0.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #104 -; CHECK-NEXT: mov v1.b[6], w6 +; CHECK-NEXT: ld1 { v7.b }[2], [x11] +; CHECK-NEXT: add x11, sp, #296 +; CHECK-NEXT: mov v0.b[4], w4 +; CHECK-NEXT: ld1 { v1.b }[5], [x10] +; CHECK-NEXT: ld1 { v2.b }[3], [x11] ; CHECK-NEXT: add x11, sp, #552 -; CHECK-NEXT: ld1 { v5.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #16 -; CHECK-NEXT: ld1 { v3.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #304 -; CHECK-NEXT: ld1 { v0.b }[7], [x12] -; CHECK-NEXT: mov v1.b[7], w7 +; CHECK-NEXT: add x9, sp, #192 +; CHECK-NEXT: add x15, sp, #104 ; CHECK-NEXT: ld1 { v4.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #112 -; CHECK-NEXT: ld1 { v2.b }[4], [x9] +; CHECK-NEXT: add x11, sp, #360 +; CHECK-NEXT: add x12, sp, #200 +; CHECK-NEXT: ld1 { v5.b }[3], [x11] +; CHECK-NEXT: ld1 { v1.b }[6], [x9] ; CHECK-NEXT: add x9, sp, #560 -; CHECK-NEXT: ld1 { v0.b }[8], [x10] -; CHECK-NEXT: add x10, sp, #216 -; CHECK-NEXT: ld1 { v1.b }[8], [x8] -; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: ld1 { v3.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #368 +; CHECK-NEXT: mov v0.b[5], w5 +; CHECK-NEXT: ld1 { v3.b }[3], [x15] +; CHECK-NEXT: add x15, sp, #368 ; CHECK-NEXT: ld1 { v4.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #312 -; CHECK-NEXT: ld1 { v0.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #224 +; CHECK-NEXT: add x13, sp, #208 +; CHECK-NEXT: add x8, sp, #216 +; CHECK-NEXT: ld1 { v5.b }[4], [x15] +; CHECK-NEXT: ld1 { v1.b }[7], [x12] +; CHECK-NEXT: add x12, sp, #568 +; CHECK-NEXT: add x14, sp, #224 +; CHECK-NEXT: add x16, sp, #304 +; CHECK-NEXT: add x10, sp, #232 +; CHECK-NEXT: mov v0.b[6], w6 +; CHECK-NEXT: ld1 { v4.b }[5], [x12] +; CHECK-NEXT: add x12, sp, #376 +; CHECK-NEXT: ld1 { v5.b }[5], [x12] +; CHECK-NEXT: add x12, sp, #16 +; CHECK-NEXT: ld1 { v1.b }[8], [x13] +; CHECK-NEXT: add x13, sp, #576 +; CHECK-NEXT: ld1 { v2.b }[4], [x16] +; CHECK-NEXT: add x11, sp, #240 +; CHECK-NEXT: ld1 { v4.b }[6], [x13] +; CHECK-NEXT: add x13, sp, #384 +; CHECK-NEXT: add x9, sp, #248 +; CHECK-NEXT: mov v0.b[7], w7 ; CHECK-NEXT: ld1 { v1.b }[9], [x8] -; CHECK-NEXT: add x8, sp, #32 -; CHECK-NEXT: ld1 { v5.b }[4], [x11] +; CHECK-NEXT: ld1 { v5.b }[6], [x13] +; CHECK-NEXT: add x13, sp, #112 +; CHECK-NEXT: add x8, sp, #584 +; CHECK-NEXT: add x15, sp, #256 +; CHECK-NEXT: ld1 { v3.b }[4], [x13] +; CHECK-NEXT: add x13, sp, #32 +; CHECK-NEXT: ld1 { v4.b }[7], [x8] +; CHECK-NEXT: ld1 { v1.b }[10], [x14] +; CHECK-NEXT: add x14, sp, #312 +; CHECK-NEXT: add x8, sp, #40 +; CHECK-NEXT: ld1 { v0.b }[8], [x12] +; CHECK-NEXT: add x12, sp, #24 +; CHECK-NEXT: ld1 { v2.b }[5], [x14] +; CHECK-NEXT: add x14, sp, #592 +; CHECK-NEXT: add x16, sp, #264 +; CHECK-NEXT: movi v16.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v1.b }[11], [x10] +; CHECK-NEXT: ld1 { v4.b }[8], [x14] +; CHECK-NEXT: add x14, sp, #400 +; CHECK-NEXT: ld1 { v0.b }[9], [x12] +; CHECK-NEXT: add x12, sp, #392 +; CHECK-NEXT: add x10, sp, #72 +; CHECK-NEXT: ld1 { v5.b }[7], [x12] +; CHECK-NEXT: add x12, sp, #48 +; CHECK-NEXT: movi v17.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v1.b }[12], [x11] ; CHECK-NEXT: add x11, sp, #120 -; CHECK-NEXT: ld1 { v2.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #568 -; CHECK-NEXT: ld1 { v0.b }[10], [x10] -; CHECK-NEXT: add x10, sp, #232 +; CHECK-NEXT: movi v18.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v0.b }[10], [x13] ; CHECK-NEXT: ld1 { v3.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #376 -; CHECK-NEXT: ld1 { v1.b }[10], [x8] -; CHECK-NEXT: add x8, sp, #40 -; CHECK-NEXT: ld1 { v4.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #320 -; CHECK-NEXT: ld1 { v0.b }[11], [x10] -; CHECK-NEXT: add x10, sp, #240 -; CHECK-NEXT: ld1 { v5.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #128 -; CHECK-NEXT: ld1 { v1.b }[11], [x8] -; CHECK-NEXT: add x8, sp, #48 -; CHECK-NEXT: ld1 { v2.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #576 -; CHECK-NEXT: ld1 { v0.b }[12], [x10] -; CHECK-NEXT: add x10, sp, #248 -; CHECK-NEXT: ld1 { v3.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #384 -; CHECK-NEXT: ld1 { v1.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #56 -; CHECK-NEXT: ld1 { v4.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #328 -; CHECK-NEXT: ld1 { v5.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #584 -; CHECK-NEXT: ld1 { v0.b }[13], [x10] -; CHECK-NEXT: add x10, sp, #256 -; CHECK-NEXT: ld1 { v1.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #64 -; CHECK-NEXT: ld1 { v2.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #392 -; CHECK-NEXT: ld1 { v4.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #592 -; CHECK-NEXT: ld1 { v0.b }[14], [x10] -; CHECK-NEXT: add x10, sp, #264 -; CHECK-NEXT: ld1 { v5.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #136 -; CHECK-NEXT: ld1 { v1.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #72 -; CHECK-NEXT: ld1 { v4.b }[8], [x11] -; CHECK-NEXT: add x11, sp, #400 -; CHECK-NEXT: ld1 { v0.b }[15], [x10] -; CHECK-NEXT: add x10, sp, #600 -; CHECK-NEXT: ld1 { v3.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #664 -; CHECK-NEXT: ld1 { v5.b }[8], [x11] +; CHECK-NEXT: add x11, sp, #408 +; CHECK-NEXT: ld1 { v5.b }[8], [x14] +; CHECK-NEXT: add x13, sp, #56 +; CHECK-NEXT: add x14, sp, #64 +; CHECK-NEXT: ld1 { v1.b }[13], [x9] +; CHECK-NEXT: add x9, sp, #616 +; CHECK-NEXT: movi v19.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v0.b }[11], [x8] +; CHECK-NEXT: add x8, sp, #600 +; CHECK-NEXT: ld1 { v4.b }[9], [x8] +; CHECK-NEXT: ld1 { v5.b }[9], [x11] ; CHECK-NEXT: add x11, sp, #608 -; CHECK-NEXT: ld1 { v1.b }[15], [x8] -; CHECK-NEXT: add x8, sp, #408 -; CHECK-NEXT: ld1 { v4.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #472 -; CHECK-NEXT: ld1 { v7.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #672 -; CHECK-NEXT: ld1 { v5.b }[9], [x8] -; CHECK-NEXT: add x8, sp, #416 -; CHECK-NEXT: ld1 { v16.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #480 +; CHECK-NEXT: ld1 { v1.b }[14], [x15] +; CHECK-NEXT: add x15, sp, #488 +; CHECK-NEXT: add x8, sp, #320 +; CHECK-NEXT: ld1 { v0.b }[12], [x12] +; CHECK-NEXT: ld1 { v7.b }[3], [x15] +; CHECK-NEXT: ld1 { v2.b }[6], [x8] ; CHECK-NEXT: ld1 { v4.b }[10], [x11] -; CHECK-NEXT: add x11, sp, #616 -; CHECK-NEXT: ld1 { v7.b }[2], [x9] +; CHECK-NEXT: add x8, sp, #624 +; CHECK-NEXT: add x12, sp, #328 +; CHECK-NEXT: add x11, sp, #128 +; CHECK-NEXT: ld1 { v1.b }[15], [x16] +; CHECK-NEXT: ld1 { v0.b }[13], [x13] +; CHECK-NEXT: add x13, sp, #416 +; CHECK-NEXT: ld1 { v3.b }[6], [x11] +; CHECK-NEXT: ld1 { v5.b }[10], [x13] +; CHECK-NEXT: ld1 { v4.b }[11], [x9] ; CHECK-NEXT: add x9, sp, #680 -; CHECK-NEXT: ld1 { v5.b }[10], [x8] -; CHECK-NEXT: add x8, sp, #424 -; CHECK-NEXT: ld1 { v16.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #488 -; CHECK-NEXT: ld1 { v4.b }[11], [x11] -; CHECK-NEXT: add x11, sp, #624 -; CHECK-NEXT: ld1 { v7.b }[3], [x9] +; CHECK-NEXT: ld1 { v6.b }[3], [x9] ; CHECK-NEXT: add x9, sp, #688 -; CHECK-NEXT: ld1 { v5.b }[11], [x8] +; CHECK-NEXT: add x13, sp, #632 +; CHECK-NEXT: ld1 { v0.b }[14], [x14] +; CHECK-NEXT: add x14, sp, #424 +; CHECK-NEXT: ld1 { v2.b }[7], [x12] +; CHECK-NEXT: ld1 { v5.b }[11], [x14] +; CHECK-NEXT: ld1 { v4.b }[12], [x8] ; CHECK-NEXT: add x8, sp, #432 -; CHECK-NEXT: ld1 { v16.b }[3], [x10] +; CHECK-NEXT: ld1 { v6.b }[4], [x9] +; CHECK-NEXT: add x11, sp, #696 +; CHECK-NEXT: add x12, sp, #504 +; CHECK-NEXT: ld1 { v0.b }[15], [x10] ; CHECK-NEXT: add x10, sp, #496 -; CHECK-NEXT: ld1 { v4.b }[12], [x11] -; CHECK-NEXT: add x11, sp, #632 -; CHECK-NEXT: ld1 { v7.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #696 +; CHECK-NEXT: add x9, sp, #640 ; CHECK-NEXT: ld1 { v5.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #440 -; CHECK-NEXT: ld1 { v16.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #504 -; CHECK-NEXT: ld1 { v4.b }[13], [x11] -; CHECK-NEXT: add x11, sp, #640 -; CHECK-NEXT: ld1 { v7.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #704 -; CHECK-NEXT: ld1 { v5.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #448 -; CHECK-NEXT: ld1 { v16.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #512 -; CHECK-NEXT: ld1 { v4.b }[14], [x11] -; CHECK-NEXT: add x11, sp, #648 -; CHECK-NEXT: ld1 { v7.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #712 -; CHECK-NEXT: ld1 { v5.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #456 -; CHECK-NEXT: ld1 { v16.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #520 -; CHECK-NEXT: movi v6.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v4.b }[15], [x11] -; CHECK-NEXT: movi v17.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v7.b }[7], [x9] -; CHECK-NEXT: ld1 { v5.b }[15], [x8] -; CHECK-NEXT: movi v18.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v16.b }[7], [x10] -; CHECK-NEXT: movi v19.2d, #0000000000000000 -; CHECK-NEXT: sdot v17.4s, v1.16b, v0.16b -; CHECK-NEXT: sdot v6.4s, v5.16b, v4.16b -; CHECK-NEXT: sdot v18.2s, v3.8b, v2.8b -; CHECK-NEXT: sdot v19.2s, v16.8b, v7.8b +; CHECK-NEXT: ld1 { v7.b }[4], [x10] +; CHECK-NEXT: ld1 { v4.b }[13], [x13] +; CHECK-NEXT: add x10, sp, #440 +; CHECK-NEXT: ld1 { v6.b }[5], [x11] +; CHECK-NEXT: add x11, sp, #512 +; CHECK-NEXT: add x8, sp, #136 +; CHECK-NEXT: sdot v17.4s, v0.16b, v1.16b +; CHECK-NEXT: ld1 { v5.b }[13], [x10] +; CHECK-NEXT: ld1 { v7.b }[5], [x12] +; CHECK-NEXT: ld1 { v4.b }[14], [x9] +; CHECK-NEXT: add x9, sp, #448 +; CHECK-NEXT: add x10, sp, #704 +; CHECK-NEXT: ld1 { v3.b }[7], [x8] +; CHECK-NEXT: ld1 { v6.b }[6], [x10] +; CHECK-NEXT: add x8, sp, #648 +; CHECK-NEXT: add x10, sp, #520 +; CHECK-NEXT: ld1 { v5.b }[14], [x9] +; CHECK-NEXT: ld1 { v7.b }[6], [x11] +; CHECK-NEXT: ld1 { v4.b }[15], [x8] +; CHECK-NEXT: add x8, sp, #456 +; CHECK-NEXT: add x9, sp, #712 +; CHECK-NEXT: sdot v19.2s, v3.8b, v2.8b +; CHECK-NEXT: ld1 { v6.b }[7], [x9] ; CHECK-NEXT: addv s0, v17.4s -; CHECK-NEXT: addv s2, v6.4s -; CHECK-NEXT: addp v1.2s, v18.2s, v18.2s -; CHECK-NEXT: addp v3.2s, v19.2s, v19.2s +; CHECK-NEXT: ld1 { v5.b }[15], [x8] +; CHECK-NEXT: ld1 { v7.b }[7], [x10] +; CHECK-NEXT: addp v1.2s, v19.2s, v19.2s +; CHECK-NEXT: sdot v16.4s, v5.16b, v4.16b +; CHECK-NEXT: sdot v18.2s, v7.8b, v6.8b ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: fmov w11, s3 +; CHECK-NEXT: addv s2, v16.4s +; CHECK-NEXT: addp v3.2s, v18.2s, v18.2s ; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: fmov w10, s2 +; CHECK-NEXT: fmov w11, s3 ; CHECK-NEXT: add w9, w10, w11 ; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -880,107 +880,107 @@ ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: ldr b1, [sp, #336] ; CHECK-NEXT: add x8, sp, #344 -; CHECK-NEXT: add x9, sp, #392 -; CHECK-NEXT: add x10, sp, #24 +; CHECK-NEXT: add x9, sp, #400 ; CHECK-NEXT: ldr b2, [sp, #80] -; CHECK-NEXT: mov v0.b[1], w1 -; CHECK-NEXT: add x13, sp, #88 +; CHECK-NEXT: ldr b3, [sp, #464] ; CHECK-NEXT: ld1 { v1.b }[1], [x8] ; CHECK-NEXT: add x8, sp, #352 -; CHECK-NEXT: ldr b3, [sp, #464] -; CHECK-NEXT: add x14, sp, #472 -; CHECK-NEXT: ld1 { v2.b }[1], [x13] -; CHECK-NEXT: add x11, sp, #32 -; CHECK-NEXT: mov v0.b[2], w2 -; CHECK-NEXT: add x13, sp, #96 +; CHECK-NEXT: add x10, sp, #408 +; CHECK-NEXT: mov v0.b[1], w1 +; CHECK-NEXT: add x11, sp, #472 +; CHECK-NEXT: add x12, sp, #480 +; CHECK-NEXT: ld1 { v3.b }[1], [x11] +; CHECK-NEXT: add x11, sp, #416 +; CHECK-NEXT: add x13, sp, #488 ; CHECK-NEXT: ld1 { v1.b }[2], [x8] ; CHECK-NEXT: add x8, sp, #360 -; CHECK-NEXT: ld1 { v3.b }[1], [x14] -; CHECK-NEXT: add x14, sp, #480 -; CHECK-NEXT: ld1 { v2.b }[2], [x13] -; CHECK-NEXT: add x12, sp, #40 -; CHECK-NEXT: mov v0.b[3], w3 -; CHECK-NEXT: add x13, sp, #104 +; CHECK-NEXT: add x14, sp, #496 +; CHECK-NEXT: movi v4.16b, #1 +; CHECK-NEXT: movi v5.2d, #0000000000000000 +; CHECK-NEXT: movi v6.2d, #0000000000000000 +; CHECK-NEXT: mov v0.b[2], w2 +; CHECK-NEXT: ld1 { v3.b }[2], [x12] +; CHECK-NEXT: add x12, sp, #424 ; CHECK-NEXT: ld1 { v1.b }[3], [x8] ; CHECK-NEXT: add x8, sp, #368 -; CHECK-NEXT: ld1 { v3.b }[2], [x14] -; CHECK-NEXT: add x14, sp, #488 -; CHECK-NEXT: ld1 { v2.b }[3], [x13] -; CHECK-NEXT: add x13, sp, #112 -; CHECK-NEXT: mov v0.b[4], w4 -; CHECK-NEXT: movi v6.8b, #1 +; CHECK-NEXT: movi v7.2d, #0000000000000000 +; CHECK-NEXT: movi v16.8b, #1 +; CHECK-NEXT: movi v17.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v3.b }[3], [x13] +; CHECK-NEXT: add x13, sp, #432 +; CHECK-NEXT: mov v0.b[3], w3 ; CHECK-NEXT: ld1 { v1.b }[4], [x8] ; CHECK-NEXT: add x8, sp, #376 -; CHECK-NEXT: ld1 { v3.b }[3], [x14] -; CHECK-NEXT: add x14, sp, #496 -; CHECK-NEXT: ld1 { v2.b }[4], [x13] -; CHECK-NEXT: add x13, sp, #120 -; CHECK-NEXT: mov v0.b[5], w5 +; CHECK-NEXT: ld1 { v3.b }[4], [x14] ; CHECK-NEXT: ld1 { v1.b }[5], [x8] ; CHECK-NEXT: add x8, sp, #384 -; CHECK-NEXT: ld1 { v3.b }[4], [x14] -; CHECK-NEXT: add x14, sp, #504 -; CHECK-NEXT: ld1 { v2.b }[5], [x13] -; CHECK-NEXT: add x13, sp, #512 -; CHECK-NEXT: mov v0.b[6], w6 +; CHECK-NEXT: mov v0.b[4], w4 ; CHECK-NEXT: ld1 { v1.b }[6], [x8] +; CHECK-NEXT: add x8, sp, #392 +; CHECK-NEXT: mov v0.b[5], w5 +; CHECK-NEXT: ld1 { v1.b }[7], [x8] ; CHECK-NEXT: add x8, sp, #16 -; CHECK-NEXT: ld1 { v3.b }[5], [x14] -; CHECK-NEXT: movi v4.16b, #1 +; CHECK-NEXT: mov v0.b[6], w6 +; CHECK-NEXT: ld1 { v1.b }[8], [x9] +; CHECK-NEXT: add x9, sp, #88 +; CHECK-NEXT: ld1 { v2.b }[1], [x9] +; CHECK-NEXT: add x9, sp, #40 +; CHECK-NEXT: ld1 { v1.b }[9], [x10] +; CHECK-NEXT: add x10, sp, #96 ; CHECK-NEXT: mov v0.b[7], w7 -; CHECK-NEXT: ld1 { v1.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #400 -; CHECK-NEXT: ld1 { v3.b }[6], [x13] -; CHECK-NEXT: movi v5.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v2.b }[2], [x10] +; CHECK-NEXT: add x10, sp, #56 +; CHECK-NEXT: ld1 { v1.b }[10], [x11] +; CHECK-NEXT: add x11, sp, #104 +; CHECK-NEXT: ld1 { v2.b }[3], [x11] +; CHECK-NEXT: add x11, sp, #72 ; CHECK-NEXT: ld1 { v0.b }[8], [x8] +; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: ld1 { v1.b }[11], [x12] +; CHECK-NEXT: add x12, sp, #112 +; CHECK-NEXT: ld1 { v2.b }[4], [x12] +; CHECK-NEXT: add x12, sp, #440 +; CHECK-NEXT: ld1 { v0.b }[9], [x8] +; CHECK-NEXT: add x8, sp, #32 +; CHECK-NEXT: ld1 { v1.b }[12], [x13] +; CHECK-NEXT: add x13, sp, #504 +; CHECK-NEXT: ld1 { v3.b }[5], [x13] +; CHECK-NEXT: add x13, sp, #512 +; CHECK-NEXT: ld1 { v0.b }[10], [x8] ; CHECK-NEXT: add x8, sp, #48 -; CHECK-NEXT: ld1 { v1.b }[8], [x9] -; CHECK-NEXT: add x9, sp, #408 -; CHECK-NEXT: movi v7.2d, #0000000000000000 -; CHECK-NEXT: movi v16.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v0.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #56 -; CHECK-NEXT: ld1 { v1.b }[9], [x9] -; CHECK-NEXT: add x9, sp, #416 -; CHECK-NEXT: movi v17.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v0.b }[10], [x11] -; CHECK-NEXT: add x11, sp, #64 -; CHECK-NEXT: ld1 { v1.b }[10], [x9] -; CHECK-NEXT: add x9, sp, #424 -; CHECK-NEXT: ld1 { v0.b }[11], [x12] -; CHECK-NEXT: add x12, sp, #72 -; CHECK-NEXT: ld1 { v1.b }[11], [x9] -; CHECK-NEXT: add x9, sp, #432 +; CHECK-NEXT: ld1 { v1.b }[13], [x12] +; CHECK-NEXT: add x12, sp, #448 +; CHECK-NEXT: ld1 { v3.b }[6], [x13] +; CHECK-NEXT: ld1 { v0.b }[11], [x9] +; CHECK-NEXT: add x9, sp, #64 +; CHECK-NEXT: ld1 { v1.b }[14], [x12] ; CHECK-NEXT: ld1 { v0.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #440 -; CHECK-NEXT: ld1 { v1.b }[12], [x9] -; CHECK-NEXT: add x9, sp, #128 +; CHECK-NEXT: add x8, sp, #120 +; CHECK-NEXT: ld1 { v2.b }[5], [x8] +; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: ld1 { v0.b }[13], [x10] ; CHECK-NEXT: add x10, sp, #136 -; CHECK-NEXT: ld1 { v1.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #448 -; CHECK-NEXT: ld1 { v2.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #520 -; CHECK-NEXT: ld1 { v0.b }[14], [x11] -; CHECK-NEXT: ld1 { v1.b }[14], [x8] +; CHECK-NEXT: ld1 { v2.b }[6], [x8] ; CHECK-NEXT: add x8, sp, #456 +; CHECK-NEXT: ld1 { v1.b }[15], [x8] +; CHECK-NEXT: ld1 { v0.b }[14], [x9] +; CHECK-NEXT: add x9, sp, #520 ; CHECK-NEXT: ld1 { v2.b }[7], [x10] ; CHECK-NEXT: ld1 { v3.b }[7], [x9] -; CHECK-NEXT: ld1 { v0.b }[15], [x12] -; CHECK-NEXT: ld1 { v1.b }[15], [x8] -; CHECK-NEXT: sdot v7.2s, v2.8b, v6.8b -; CHECK-NEXT: sdot v5.2s, v3.8b, v6.8b -; CHECK-NEXT: sdot v16.4s, v0.16b, v4.16b -; CHECK-NEXT: sdot v17.4s, v1.16b, v4.16b -; CHECK-NEXT: addp v0.2s, v7.2s, v7.2s -; CHECK-NEXT: addp v1.2s, v5.2s, v5.2s -; CHECK-NEXT: addv s2, v16.4s -; CHECK-NEXT: addv s3, v17.4s -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: fmov w11, s1 +; CHECK-NEXT: sdot v5.4s, v1.16b, v4.16b +; CHECK-NEXT: ld1 { v0.b }[15], [x11] +; CHECK-NEXT: sdot v17.2s, v2.8b, v16.8b +; CHECK-NEXT: sdot v7.2s, v3.8b, v16.8b +; CHECK-NEXT: sdot v6.4s, v0.16b, v4.16b +; CHECK-NEXT: addv s3, v5.4s +; CHECK-NEXT: addp v1.2s, v17.2s, v17.2s +; CHECK-NEXT: addp v2.2s, v7.2s, v7.2s +; CHECK-NEXT: addv s0, v6.4s ; CHECK-NEXT: fmov w10, s3 -; CHECK-NEXT: add w8, w9, w8 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: fmov w11, s2 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: add w8, w8, w9 ; CHECK-NEXT: add w9, w10, w11 ; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -998,26 +998,26 @@ define i32 @test_udot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_udot_v25i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q1, q4, [x1] -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: ushll2 v7.8h, v4.16b, #0 -; CHECK-NEXT: ushll v4.8h, v4.8b, #0 -; CHECK-NEXT: ushll2 v6.8h, v3.16b, #0 -; CHECK-NEXT: ushll v3.8h, v3.8b, #0 -; CHECK-NEXT: ushll v5.8h, v2.8b, #0 -; CHECK-NEXT: ushll2 v2.8h, v2.16b, #0 -; CHECK-NEXT: umull v2.4s, v7.4h, v2.4h -; CHECK-NEXT: ushll v7.8h, v1.8b, #0 -; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0 -; CHECK-NEXT: umull2 v16.4s, v7.8h, v3.8h -; CHECK-NEXT: mov v0.s[0], v2.s[0] -; CHECK-NEXT: umull v2.4s, v7.4h, v3.4h -; CHECK-NEXT: umlal2 v16.4s, v4.8h, v5.8h -; CHECK-NEXT: umlal v0.4s, v1.4h, v6.4h -; CHECK-NEXT: umlal v2.4s, v4.4h, v5.4h -; CHECK-NEXT: umlal2 v16.4s, v1.8h, v6.8h -; CHECK-NEXT: add v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ldp q4, q0, [x0] +; CHECK-NEXT: ldp q5, q1, [x1] +; CHECK-NEXT: ushll2 v2.8h, v0.16b, #0 +; CHECK-NEXT: ushll v6.8h, v4.8b, #0 +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v3.8h, v1.16b, #0 +; CHECK-NEXT: ushll v7.8h, v5.8b, #0 +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: umull v2.4s, v3.4h, v2.4h +; CHECK-NEXT: movi v3.2d, #0000000000000000 +; CHECK-NEXT: umull2 v16.4s, v7.8h, v6.8h +; CHECK-NEXT: umull v6.4s, v7.4h, v6.4h +; CHECK-NEXT: mov v3.s[0], v2.s[0] +; CHECK-NEXT: ushll2 v2.8h, v4.16b, #0 +; CHECK-NEXT: ushll2 v4.8h, v5.16b, #0 +; CHECK-NEXT: umlal v6.4s, v1.4h, v0.4h +; CHECK-NEXT: umlal2 v16.4s, v1.8h, v0.8h +; CHECK-NEXT: umlal v3.4s, v4.4h, v2.4h +; CHECK-NEXT: umlal2 v16.4s, v4.8h, v2.8h +; CHECK-NEXT: add v0.4s, v6.4s, v3.4s ; CHECK-NEXT: add v0.4s, v0.4s, v16.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 @@ -1037,19 +1037,19 @@ define i32 @test_udot_v25i8_nomla(ptr nocapture readonly %a1) { ; CHECK-LABEL: test_udot_v25i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: ldp q2, q1, [x0] ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ushll2 v3.8h, v1.16b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll2 v4.8h, v2.16b, #0 -; CHECK-NEXT: ushll v2.8h, v2.8b, #0 -; CHECK-NEXT: ushll v4.4s, v4.4h, #0 -; CHECK-NEXT: uaddl2 v5.4s, v1.8h, v2.8h -; CHECK-NEXT: mov v0.s[0], v4.s[0] -; CHECK-NEXT: uaddl v1.4s, v1.4h, v2.4h -; CHECK-NEXT: uaddw2 v2.4s, v5.4s, v3.8h -; CHECK-NEXT: uaddw v0.4s, v0.4s, v3.4h -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: ushll v3.8h, v1.8b, #0 +; CHECK-NEXT: ushll v4.8h, v2.8b, #0 +; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0 +; CHECK-NEXT: ushll2 v2.8h, v2.16b, #0 +; CHECK-NEXT: uaddl2 v5.4s, v4.8h, v3.8h +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: uaddl v3.4s, v4.4h, v3.4h +; CHECK-NEXT: mov v0.s[0], v1.s[0] +; CHECK-NEXT: uaddw2 v1.4s, v5.4s, v2.8h +; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h +; CHECK-NEXT: add v1.4s, v3.4s, v1.4s ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 @@ -1063,26 +1063,26 @@ define i32 @test_sdot_v25i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_sdot_v25i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q1, q4, [x1] -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: sshll2 v7.8h, v4.16b, #0 -; CHECK-NEXT: sshll v4.8h, v4.8b, #0 -; CHECK-NEXT: sshll2 v6.8h, v3.16b, #0 -; CHECK-NEXT: sshll v3.8h, v3.8b, #0 -; CHECK-NEXT: sshll v5.8h, v2.8b, #0 -; CHECK-NEXT: sshll2 v2.8h, v2.16b, #0 -; CHECK-NEXT: smull v2.4s, v7.4h, v2.4h -; CHECK-NEXT: sshll v7.8h, v1.8b, #0 -; CHECK-NEXT: sshll2 v1.8h, v1.16b, #0 -; CHECK-NEXT: smull2 v16.4s, v7.8h, v3.8h -; CHECK-NEXT: mov v0.s[0], v2.s[0] -; CHECK-NEXT: smull v2.4s, v7.4h, v3.4h -; CHECK-NEXT: smlal2 v16.4s, v4.8h, v5.8h -; CHECK-NEXT: smlal v0.4s, v1.4h, v6.4h -; CHECK-NEXT: smlal v2.4s, v4.4h, v5.4h -; CHECK-NEXT: smlal2 v16.4s, v1.8h, v6.8h -; CHECK-NEXT: add v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ldp q4, q0, [x0] +; CHECK-NEXT: ldp q5, q1, [x1] +; CHECK-NEXT: sshll2 v2.8h, v0.16b, #0 +; CHECK-NEXT: sshll v6.8h, v4.8b, #0 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll2 v3.8h, v1.16b, #0 +; CHECK-NEXT: sshll v7.8h, v5.8b, #0 +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: smull v2.4s, v3.4h, v2.4h +; CHECK-NEXT: movi v3.2d, #0000000000000000 +; CHECK-NEXT: smull2 v16.4s, v7.8h, v6.8h +; CHECK-NEXT: smull v6.4s, v7.4h, v6.4h +; CHECK-NEXT: mov v3.s[0], v2.s[0] +; CHECK-NEXT: sshll2 v2.8h, v4.16b, #0 +; CHECK-NEXT: sshll2 v4.8h, v5.16b, #0 +; CHECK-NEXT: smlal v6.4s, v1.4h, v0.4h +; CHECK-NEXT: smlal2 v16.4s, v1.8h, v0.8h +; CHECK-NEXT: smlal v3.4s, v4.4h, v2.4h +; CHECK-NEXT: smlal2 v16.4s, v4.8h, v2.8h +; CHECK-NEXT: add v0.4s, v6.4s, v3.4s ; CHECK-NEXT: add v0.4s, v0.4s, v16.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 @@ -1105,221 +1105,221 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldr b2, [sp, #80] +; CHECK-NEXT: fmov s3, w0 +; CHECK-NEXT: ldr b0, [sp, #80] ; CHECK-NEXT: add x8, sp, #88 -; CHECK-NEXT: ldr b0, [sp, #16] -; CHECK-NEXT: add x9, sp, #24 -; CHECK-NEXT: add x10, sp, #40 -; CHECK-NEXT: add x11, sp, #128 -; CHECK-NEXT: ld1 { v2.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #96 -; CHECK-NEXT: ld1 { v0.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #32 -; CHECK-NEXT: ldr b17, [sp, #152] -; CHECK-NEXT: fmov s4, w0 -; CHECK-NEXT: ldr b6, [sp, #280] -; CHECK-NEXT: add x12, sp, #224 -; CHECK-NEXT: ld1 { v2.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #104 -; CHECK-NEXT: ld1 { v0.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #112 -; CHECK-NEXT: ldr b1, [sp, #216] -; CHECK-NEXT: mov v4.b[1], w1 -; CHECK-NEXT: ldr b3, [sp, #480] -; CHECK-NEXT: ld1 { v2.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #120 -; CHECK-NEXT: ld1 { v0.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #48 -; CHECK-NEXT: ld1 { v1.b }[1], [x12] -; CHECK-NEXT: mov v4.b[2], w2 -; CHECK-NEXT: ldr b18, [sp, #352] -; CHECK-NEXT: ld1 { v2.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #56 -; CHECK-NEXT: ld1 { v0.b }[4], [x10] +; CHECK-NEXT: ldr b1, [sp, #16] +; CHECK-NEXT: add x10, sp, #24 +; CHECK-NEXT: ldr b2, [sp, #280] +; CHECK-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-NEXT: ldr b5, [sp, #152] +; CHECK-NEXT: add x9, sp, #96 +; CHECK-NEXT: mov v3.b[1], w1 +; CHECK-NEXT: ld1 { v1.b }[1], [x10] ; CHECK-NEXT: add x10, sp, #288 -; CHECK-NEXT: ldr b20, [sp, #680] -; CHECK-NEXT: mov v4.b[3], w3 -; CHECK-NEXT: ldr b5, [sp, #144] -; CHECK-NEXT: ld1 { v2.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #160 -; CHECK-NEXT: ld1 { v0.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #64 -; CHECK-NEXT: ld1 { v6.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #296 -; CHECK-NEXT: ld1 { v17.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #136 -; CHECK-NEXT: ld1 { v2.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #320 -; CHECK-NEXT: ld1 { v0.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #72 -; CHECK-NEXT: ld1 { v6.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #304 -; CHECK-NEXT: mov v4.b[4], w4 -; CHECK-NEXT: ld1 { v2.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #168 -; CHECK-NEXT: ld1 { v0.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #232 -; CHECK-NEXT: ld1 { v6.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #312 -; CHECK-NEXT: ld1 { v17.b }[2], [x8] +; CHECK-NEXT: ld1 { v2.b }[1], [x10] +; CHECK-NEXT: add x10, sp, #160 +; CHECK-NEXT: ldr b4, [sp, #216] +; CHECK-NEXT: ld1 { v0.b }[2], [x9] +; CHECK-NEXT: ld1 { v5.b }[1], [x10] +; CHECK-NEXT: add x10, sp, #32 +; CHECK-NEXT: add x11, sp, #224 +; CHECK-NEXT: ld1 { v1.b }[2], [x10] +; CHECK-NEXT: add x8, sp, #104 +; CHECK-NEXT: mov v3.b[2], w2 +; CHECK-NEXT: ld1 { v4.b }[1], [x11] +; CHECK-NEXT: add x11, sp, #296 +; CHECK-NEXT: ld1 { v0.b }[3], [x8] +; CHECK-NEXT: add x8, sp, #40 +; CHECK-NEXT: add x10, sp, #232 +; CHECK-NEXT: ld1 { v1.b }[3], [x8] +; CHECK-NEXT: ld1 { v2.b }[2], [x11] +; CHECK-NEXT: add x11, sp, #168 +; CHECK-NEXT: ld1 { v4.b }[2], [x10] +; CHECK-NEXT: ld1 { v5.b }[2], [x11] +; CHECK-NEXT: add x13, sp, #48 +; CHECK-NEXT: mov v3.b[3], w3 +; CHECK-NEXT: add x8, sp, #240 +; CHECK-NEXT: add x15, sp, #56 +; CHECK-NEXT: ld1 { v1.b }[4], [x13] +; CHECK-NEXT: add x12, sp, #112 +; CHECK-NEXT: add x11, sp, #304 +; CHECK-NEXT: ld1 { v4.b }[3], [x8] ; CHECK-NEXT: add x8, sp, #176 -; CHECK-NEXT: ld1 { v1.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #488 -; CHECK-NEXT: mov v4.b[5], w5 -; CHECK-NEXT: ld1 { v6.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #240 -; CHECK-NEXT: ld1 { v3.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #496 -; CHECK-NEXT: ld1 { v17.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #184 -; CHECK-NEXT: ld1 { v1.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #248 -; CHECK-NEXT: mov v4.b[6], w6 -; CHECK-NEXT: ld1 { v6.b }[5], [x11] -; CHECK-NEXT: ld1 { v3.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #504 -; CHECK-NEXT: ld1 { v17.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #192 -; CHECK-NEXT: ld1 { v1.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #256 -; CHECK-NEXT: add x11, sp, #328 -; CHECK-NEXT: ld1 { v3.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #512 -; CHECK-NEXT: ld1 { v17.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #200 -; CHECK-NEXT: ld1 { v1.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #264 -; CHECK-NEXT: mov v4.b[7], w7 -; CHECK-NEXT: ld1 { v6.b }[6], [x11] -; CHECK-NEXT: ld1 { v3.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #520 -; CHECK-NEXT: ld1 { v17.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #208 -; CHECK-NEXT: ld1 { v1.b }[6], [x10] -; CHECK-NEXT: add x11, sp, #336 -; CHECK-NEXT: add x10, sp, #272 -; CHECK-NEXT: ld1 { v3.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #536 -; CHECK-NEXT: ld1 { v17.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #528 -; CHECK-NEXT: sshll v19.8h, v4.8b, #0 -; CHECK-NEXT: ldr b4, [sp, #416] -; CHECK-NEXT: ld1 { v6.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #688 -; CHECK-NEXT: ld1 { v3.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #424 -; CHECK-NEXT: ld1 { v1.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #360 -; CHECK-NEXT: sshll v7.8h, v2.8b, #0 -; CHECK-NEXT: ldr b2, [sp, #344] -; CHECK-NEXT: ld1 { v4.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #432 -; CHECK-NEXT: sshll v17.8h, v17.8b, #0 -; CHECK-NEXT: ld1 { v18.b }[1], [x10] -; CHECK-NEXT: sshll v16.8h, v6.8b, #0 -; CHECK-NEXT: ld1 { v3.b }[7], [x9] -; CHECK-NEXT: sshll v6.8h, v2.8b, #0 -; CHECK-NEXT: add x9, sp, #560 -; CHECK-NEXT: smull v2.4s, v19.4h, v17.4h -; CHECK-NEXT: ld1 { v4.b }[2], [x8] -; CHECK-NEXT: smull2 v17.4s, v19.8h, v17.8h -; CHECK-NEXT: ldr b19, [sp, #552] +; CHECK-NEXT: ld1 { v0.b }[4], [x12] +; CHECK-NEXT: ld1 { v5.b }[3], [x8] +; CHECK-NEXT: add x12, sp, #184 +; CHECK-NEXT: ld1 { v2.b }[3], [x11] +; CHECK-NEXT: mov v3.b[4], w4 +; CHECK-NEXT: ld1 { v1.b }[5], [x15] +; CHECK-NEXT: add x11, sp, #64 +; CHECK-NEXT: add x9, sp, #120 +; CHECK-NEXT: add x8, sp, #312 +; CHECK-NEXT: ldr b6, [sp, #352] +; CHECK-NEXT: ld1 { v5.b }[4], [x12] +; CHECK-NEXT: ld1 { v0.b }[5], [x9] +; CHECK-NEXT: add x9, sp, #192 +; CHECK-NEXT: ld1 { v1.b }[6], [x11] +; CHECK-NEXT: ld1 { v2.b }[4], [x8] +; CHECK-NEXT: add x8, sp, #72 +; CHECK-NEXT: mov v3.b[5], w5 +; CHECK-NEXT: add x11, sp, #360 +; CHECK-NEXT: ldr b16, [sp, #552] +; CHECK-NEXT: ld1 { v5.b }[5], [x9] +; CHECK-NEXT: ld1 { v6.b }[1], [x11] +; CHECK-NEXT: add x15, sp, #200 +; CHECK-NEXT: ld1 { v1.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #560 +; CHECK-NEXT: add x14, sp, #128 +; CHECK-NEXT: ld1 { v16.b }[1], [x8] ; CHECK-NEXT: add x8, sp, #368 -; CHECK-NEXT: add x10, sp, #440 -; CHECK-NEXT: ld1 { v20.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #696 -; CHECK-NEXT: ld1 { v19.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #376 -; CHECK-NEXT: ld1 { v18.b }[2], [x8] +; CHECK-NEXT: ld1 { v0.b }[6], [x14] +; CHECK-NEXT: mov v3.b[6], w6 +; CHECK-NEXT: ld1 { v5.b }[6], [x15] +; CHECK-NEXT: ld1 { v6.b }[2], [x8] ; CHECK-NEXT: add x8, sp, #568 -; CHECK-NEXT: ld1 { v4.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #448 -; CHECK-NEXT: ld1 { v20.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #704 -; CHECK-NEXT: ld1 { v19.b }[2], [x8] +; CHECK-NEXT: add x14, sp, #208 +; CHECK-NEXT: ldr b18, [sp, #480] +; CHECK-NEXT: ld1 { v16.b }[2], [x8] +; CHECK-NEXT: ldr b7, [sp, #144] +; CHECK-NEXT: add x11, sp, #488 +; CHECK-NEXT: ld1 { v5.b }[7], [x14] +; CHECK-NEXT: add x8, sp, #376 +; CHECK-NEXT: ld1 { v18.b }[1], [x11] +; CHECK-NEXT: mov v3.b[7], w7 +; CHECK-NEXT: ld1 { v6.b }[3], [x8] ; CHECK-NEXT: add x8, sp, #576 -; CHECK-NEXT: ld1 { v18.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #384 -; CHECK-NEXT: smlal v2.4s, v7.4h, v16.4h -; CHECK-NEXT: ld1 { v4.b }[4], [x10] -; CHECK-NEXT: smlal2 v17.4s, v7.8h, v16.8h -; CHECK-NEXT: ldr b7, [sp, #616] -; CHECK-NEXT: ld1 { v19.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #584 -; CHECK-NEXT: ld1 { v18.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #392 -; CHECK-NEXT: add x10, sp, #456 -; CHECK-NEXT: ld1 { v20.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #400 -; CHECK-NEXT: ld1 { v19.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #592 -; CHECK-NEXT: ld1 { v18.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #624 -; CHECK-NEXT: ld1 { v4.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #712 +; CHECK-NEXT: sshll v7.8h, v7.8b, #0 +; CHECK-NEXT: ld1 { v16.b }[3], [x8] +; CHECK-NEXT: add x11, sp, #496 ; CHECK-NEXT: sshll v5.8h, v5.8b, #0 -; CHECK-NEXT: ld1 { v7.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #632 -; CHECK-NEXT: ld1 { v19.b }[5], [x8] +; CHECK-NEXT: add x8, sp, #384 +; CHECK-NEXT: ld1 { v18.b }[2], [x11] +; CHECK-NEXT: ld1 { v6.b }[4], [x8] +; CHECK-NEXT: add x8, sp, #584 +; CHECK-NEXT: add x11, sp, #504 +; CHECK-NEXT: sshll v17.8h, v3.8b, #0 +; CHECK-NEXT: ldr b3, [sp, #344] +; CHECK-NEXT: ld1 { v16.b }[4], [x8] +; CHECK-NEXT: add x8, sp, #424 +; CHECK-NEXT: add x16, sp, #320 +; CHECK-NEXT: ld1 { v18.b }[3], [x11] +; CHECK-NEXT: sshll v19.8h, v3.8b, #0 +; CHECK-NEXT: add x11, sp, #392 +; CHECK-NEXT: ld1 { v2.b }[5], [x16] +; CHECK-NEXT: smull2 v3.4s, v17.8h, v5.8h +; CHECK-NEXT: smull v5.4s, v17.4h, v5.4h +; CHECK-NEXT: movi v17.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v6.b }[5], [x11] +; CHECK-NEXT: add x12, sp, #248 +; CHECK-NEXT: add x11, sp, #512 +; CHECK-NEXT: smull v7.4s, v7.4h, v19.4h +; CHECK-NEXT: ldr b19, [sp, #416] +; CHECK-NEXT: ld1 { v4.b }[4], [x12] +; CHECK-NEXT: add x12, sp, #328 +; CHECK-NEXT: ld1 { v18.b }[4], [x11] +; CHECK-NEXT: add x11, sp, #400 +; CHECK-NEXT: ld1 { v19.b }[1], [x8] +; CHECK-NEXT: add x8, sp, #592 +; CHECK-NEXT: ldr b20, [sp, #616] +; CHECK-NEXT: ld1 { v16.b }[5], [x8] +; CHECK-NEXT: add x8, sp, #432 +; CHECK-NEXT: ld1 { v2.b }[6], [x12] +; CHECK-NEXT: mov v17.s[0], v7.s[0] +; CHECK-NEXT: ldr b7, [sp, #680] +; CHECK-NEXT: ld1 { v6.b }[6], [x11] +; CHECK-NEXT: ld1 { v19.b }[2], [x8] ; CHECK-NEXT: add x8, sp, #600 -; CHECK-NEXT: ld1 { v20.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #720 -; CHECK-NEXT: ld1 { v18.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #408 -; CHECK-NEXT: ld1 { v7.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #640 -; CHECK-NEXT: ld1 { v19.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #608 -; CHECK-NEXT: ld1 { v20.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #728 -; CHECK-NEXT: ld1 { v18.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #464 -; CHECK-NEXT: ld1 { v7.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #664 -; CHECK-NEXT: ld1 { v19.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #648 -; CHECK-NEXT: ld1 { v20.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #736 -; CHECK-NEXT: sshll v16.8h, v18.8b, #0 -; CHECK-NEXT: ld1 { v4.b }[6], [x11] +; CHECK-NEXT: add x11, sp, #688 +; CHECK-NEXT: ld1 { v16.b }[6], [x8] +; CHECK-NEXT: add x12, sp, #624 +; CHECK-NEXT: ld1 { v7.b }[1], [x11] +; CHECK-NEXT: ld1 { v20.b }[1], [x12] +; CHECK-NEXT: add x8, sp, #408 +; CHECK-NEXT: add x11, sp, #608 +; CHECK-NEXT: add x12, sp, #440 +; CHECK-NEXT: ld1 { v6.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #696 +; CHECK-NEXT: ld1 { v16.b }[7], [x11] +; CHECK-NEXT: ld1 { v19.b }[3], [x12] +; CHECK-NEXT: add x11, sp, #632 +; CHECK-NEXT: ld1 { v7.b }[2], [x8] +; CHECK-NEXT: ld1 { v20.b }[2], [x11] +; CHECK-NEXT: add x8, sp, #448 +; CHECK-NEXT: add x11, sp, #640 +; CHECK-NEXT: sshll v6.8h, v6.8b, #0 +; CHECK-NEXT: add x13, sp, #256 +; CHECK-NEXT: ld1 { v19.b }[4], [x8] +; CHECK-NEXT: add x8, sp, #704 +; CHECK-NEXT: sshll v16.8h, v16.8b, #0 +; CHECK-NEXT: ld1 { v7.b }[3], [x8] +; CHECK-NEXT: ld1 { v20.b }[3], [x11] +; CHECK-NEXT: add x8, sp, #712 +; CHECK-NEXT: add x11, sp, #648 +; CHECK-NEXT: add x12, sp, #520 +; CHECK-NEXT: ld1 { v4.b }[5], [x13] +; CHECK-NEXT: ldr b21, [sp, #544] +; CHECK-NEXT: smull2 v22.4s, v6.8h, v16.8h +; CHECK-NEXT: smull v6.4s, v6.4h, v16.4h ; CHECK-NEXT: ld1 { v7.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #656 -; CHECK-NEXT: sshll v18.8h, v19.8b, #0 -; CHECK-NEXT: ld1 { v20.b }[7], [x10] -; CHECK-NEXT: smull v19.4s, v16.4h, v18.4h -; CHECK-NEXT: ld1 { v7.b }[5], [x8] +; CHECK-NEXT: ld1 { v20.b }[4], [x11] +; CHECK-NEXT: add x11, sp, #456 +; CHECK-NEXT: ldr b16, [sp, #744] +; CHECK-NEXT: ld1 { v18.b }[5], [x12] +; CHECK-NEXT: ld1 { v19.b }[5], [x11] +; CHECK-NEXT: add x11, sp, #720 +; CHECK-NEXT: add x12, sp, #656 +; CHECK-NEXT: add x9, sp, #264 +; CHECK-NEXT: ld1 { v7.b }[5], [x11] +; CHECK-NEXT: ld1 { v20.b }[5], [x12] +; CHECK-NEXT: sshll v21.8h, v21.8b, #0 +; CHECK-NEXT: sshll v16.8h, v16.8b, #0 +; CHECK-NEXT: add x8, sp, #528 +; CHECK-NEXT: ld1 { v4.b }[6], [x9] +; CHECK-NEXT: ld1 { v18.b }[6], [x8] +; CHECK-NEXT: add x11, sp, #464 +; CHECK-NEXT: add x12, sp, #728 +; CHECK-NEXT: add x13, sp, #664 +; CHECK-NEXT: add x10, sp, #136 +; CHECK-NEXT: ld1 { v19.b }[6], [x11] +; CHECK-NEXT: ld1 { v7.b }[6], [x12] +; CHECK-NEXT: ld1 { v20.b }[6], [x13] +; CHECK-NEXT: ld1 { v0.b }[7], [x10] +; CHECK-NEXT: add x10, sp, #336 +; CHECK-NEXT: add x9, sp, #272 +; CHECK-NEXT: smull v16.4s, v21.4h, v16.4h +; CHECK-NEXT: movi v21.2d, #0000000000000000 +; CHECK-NEXT: add x8, sp, #536 +; CHECK-NEXT: ld1 { v2.b }[7], [x10] +; CHECK-NEXT: ld1 { v4.b }[7], [x9] +; CHECK-NEXT: ld1 { v18.b }[7], [x8] ; CHECK-NEXT: add x8, sp, #472 -; CHECK-NEXT: smull2 v16.4s, v16.8h, v18.8h -; CHECK-NEXT: ldr b18, [sp, #544] -; CHECK-NEXT: smull v5.4s, v5.4h, v6.4h -; CHECK-NEXT: ldr b6, [sp, #744] -; CHECK-NEXT: sshll v3.8h, v3.8b, #0 -; CHECK-NEXT: ld1 { v4.b }[7], [x8] -; CHECK-NEXT: sshll v20.8h, v20.8b, #0 -; CHECK-NEXT: ld1 { v7.b }[6], [x9] -; CHECK-NEXT: sshll v18.8h, v18.8b, #0 -; CHECK-NEXT: add x9, sp, #672 -; CHECK-NEXT: sshll v6.8h, v6.8b, #0 -; CHECK-NEXT: smlal v19.4s, v3.4h, v20.4h -; CHECK-NEXT: smlal2 v16.4s, v3.8h, v20.8h +; CHECK-NEXT: add x9, sp, #736 +; CHECK-NEXT: add x10, sp, #672 +; CHECK-NEXT: ld1 { v19.b }[7], [x8] ; CHECK-NEXT: ld1 { v7.b }[7], [x9] -; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: smull v6.4s, v18.4h, v6.4h -; CHECK-NEXT: movi v18.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v20.b }[7], [x10] ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: mov v3.s[0], v5.s[0] +; CHECK-NEXT: mov v21.s[0], v16.s[0] ; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: mov v18.s[0], v6.s[0] +; CHECK-NEXT: sshll v2.8h, v2.8b, #0 ; CHECK-NEXT: sshll v4.8h, v4.8b, #0 -; CHECK-NEXT: sshll v5.8h, v7.8b, #0 -; CHECK-NEXT: smlal v3.4s, v0.4h, v1.4h -; CHECK-NEXT: smlal v18.4s, v4.4h, v5.4h -; CHECK-NEXT: smlal2 v17.4s, v0.8h, v1.8h -; CHECK-NEXT: smlal2 v16.4s, v4.8h, v5.8h -; CHECK-NEXT: add v0.4s, v2.4s, v3.4s -; CHECK-NEXT: add v1.4s, v19.4s, v18.4s -; CHECK-NEXT: add v0.4s, v0.4s, v17.4s -; CHECK-NEXT: add v1.4s, v1.4s, v16.4s +; CHECK-NEXT: sshll v16.8h, v18.8b, #0 +; CHECK-NEXT: sshll v18.8h, v19.8b, #0 +; CHECK-NEXT: sshll v7.8h, v7.8b, #0 +; CHECK-NEXT: sshll v19.8h, v20.8b, #0 +; CHECK-NEXT: smlal v5.4s, v0.4h, v2.4h +; CHECK-NEXT: smlal2 v3.4s, v0.8h, v2.8h +; CHECK-NEXT: smlal v17.4s, v1.4h, v4.4h +; CHECK-NEXT: smlal v6.4s, v16.4h, v7.4h +; CHECK-NEXT: smlal2 v22.4s, v16.8h, v7.8h +; CHECK-NEXT: smlal v21.4s, v18.4h, v19.4h +; CHECK-NEXT: smlal2 v3.4s, v1.8h, v4.8h +; CHECK-NEXT: add v0.4s, v5.4s, v17.4s +; CHECK-NEXT: add v1.4s, v6.4s, v21.4s +; CHECK-NEXT: smlal2 v22.4s, v18.8h, v19.8h +; CHECK-NEXT: add v0.4s, v0.4s, v3.4s +; CHECK-NEXT: add v1.4s, v1.4s, v22.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 @@ -1344,116 +1344,116 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldr b0, [sp, #80] -; CHECK-NEXT: add x8, sp, #88 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ldr b1, [sp, #80] +; CHECK-NEXT: add x10, sp, #88 ; CHECK-NEXT: ldr b2, [sp, #16] -; CHECK-NEXT: add x9, sp, #24 -; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: add x9, sp, #96 ; CHECK-NEXT: ldr b3, [sp, #480] -; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #96 -; CHECK-NEXT: ld1 { v2.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #32 -; CHECK-NEXT: mov v1.b[1], w1 -; CHECK-NEXT: add x10, sp, #488 -; CHECK-NEXT: add x11, sp, #496 +; CHECK-NEXT: ld1 { v1.b }[1], [x10] +; CHECK-NEXT: add x10, sp, #24 ; CHECK-NEXT: ldr b4, [sp, #352] -; CHECK-NEXT: ld1 { v0.b }[2], [x8] +; CHECK-NEXT: mov v0.b[1], w1 +; CHECK-NEXT: ld1 { v2.b }[1], [x10] +; CHECK-NEXT: add x11, sp, #488 +; CHECK-NEXT: add x10, sp, #360 +; CHECK-NEXT: ldr b5, [sp, #416] ; CHECK-NEXT: add x8, sp, #104 +; CHECK-NEXT: ld1 { v1.b }[2], [x9] +; CHECK-NEXT: add x9, sp, #32 +; CHECK-NEXT: ld1 { v3.b }[1], [x11] ; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #40 -; CHECK-NEXT: ld1 { v3.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #48 -; CHECK-NEXT: mov v1.b[2], w2 -; CHECK-NEXT: ldr b6, [sp, #416] -; CHECK-NEXT: ld1 { v0.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #112 -; CHECK-NEXT: ld1 { v2.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #128 -; CHECK-NEXT: ld1 { v3.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #56 -; CHECK-NEXT: mov v1.b[3], w3 -; CHECK-NEXT: add x12, sp, #504 -; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #120 -; CHECK-NEXT: ld1 { v2.b }[4], [x10] +; CHECK-NEXT: add x11, sp, #424 +; CHECK-NEXT: ld1 { v4.b }[1], [x10] +; CHECK-NEXT: mov v0.b[2], w2 +; CHECK-NEXT: ld1 { v5.b }[1], [x11] +; CHECK-NEXT: add x9, sp, #368 +; CHECK-NEXT: ld1 { v1.b }[3], [x8] +; CHECK-NEXT: add x8, sp, #40 +; CHECK-NEXT: add x12, sp, #496 +; CHECK-NEXT: ld1 { v2.b }[3], [x8] +; CHECK-NEXT: ld1 { v4.b }[2], [x9] +; CHECK-NEXT: add x8, sp, #432 +; CHECK-NEXT: ld1 { v3.b }[2], [x12] +; CHECK-NEXT: add x13, sp, #48 +; CHECK-NEXT: ld1 { v5.b }[2], [x8] +; CHECK-NEXT: mov v0.b[3], w3 +; CHECK-NEXT: add x10, sp, #112 +; CHECK-NEXT: add x8, sp, #504 +; CHECK-NEXT: ld1 { v2.b }[4], [x13] +; CHECK-NEXT: add x13, sp, #376 +; CHECK-NEXT: ld1 { v1.b }[4], [x10] +; CHECK-NEXT: ld1 { v4.b }[3], [x13] +; CHECK-NEXT: add x13, sp, #440 +; CHECK-NEXT: ld1 { v3.b }[3], [x8] +; CHECK-NEXT: ld1 { v5.b }[3], [x13] +; CHECK-NEXT: add x11, sp, #120 +; CHECK-NEXT: add x8, sp, #56 +; CHECK-NEXT: mov v0.b[4], w4 +; CHECK-NEXT: add x13, sp, #512 +; CHECK-NEXT: ld1 { v1.b }[5], [x11] +; CHECK-NEXT: ld1 { v2.b }[5], [x8] +; CHECK-NEXT: add x8, sp, #384 +; CHECK-NEXT: add x11, sp, #448 +; CHECK-NEXT: ld1 { v3.b }[4], [x13] +; CHECK-NEXT: ld1 { v4.b }[4], [x8] +; CHECK-NEXT: ld1 { v5.b }[4], [x11] +; CHECK-NEXT: add x12, sp, #128 ; CHECK-NEXT: add x10, sp, #64 -; CHECK-NEXT: ldr b5, [sp, #144] -; CHECK-NEXT: mov v1.b[4], w4 -; CHECK-NEXT: ld1 { v3.b }[3], [x12] -; CHECK-NEXT: ld1 { v0.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #136 -; CHECK-NEXT: ld1 { v2.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #360 -; CHECK-NEXT: add x12, sp, #72 -; CHECK-NEXT: mov v1.b[5], w5 -; CHECK-NEXT: ld1 { v0.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #424 -; CHECK-NEXT: ld1 { v4.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #512 +; CHECK-NEXT: add x8, sp, #520 +; CHECK-NEXT: mov v0.b[5], w5 +; CHECK-NEXT: ld1 { v1.b }[6], [x12] ; CHECK-NEXT: ld1 { v2.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #368 -; CHECK-NEXT: ld1 { v6.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #376 -; CHECK-NEXT: ld1 { v0.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #432 -; CHECK-NEXT: ld1 { v4.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #520 -; CHECK-NEXT: mov v1.b[6], w6 -; CHECK-NEXT: ld1 { v2.b }[7], [x12] -; CHECK-NEXT: ld1 { v6.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #440 -; CHECK-NEXT: sshll v5.8h, v5.8b, #0 -; CHECK-NEXT: ld1 { v3.b }[4], [x11] -; CHECK-NEXT: ld1 { v4.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #384 -; CHECK-NEXT: movi v7.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v6.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #448 -; CHECK-NEXT: mov v1.b[7], w7 -; CHECK-NEXT: ld1 { v3.b }[5], [x10] -; CHECK-NEXT: sshll v5.4s, v5.4h, #0 -; CHECK-NEXT: ld1 { v4.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #392 -; CHECK-NEXT: add x10, sp, #528 -; CHECK-NEXT: ld1 { v6.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #456 -; CHECK-NEXT: mov v7.s[0], v5.s[0] -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: ld1 { v4.b }[5], [x9] -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: add x10, sp, #392 +; CHECK-NEXT: add x11, sp, #456 +; CHECK-NEXT: ldr b6, [sp, #144] +; CHECK-NEXT: ldr b7, [sp, #544] +; CHECK-NEXT: ld1 { v3.b }[5], [x8] +; CHECK-NEXT: ld1 { v4.b }[5], [x10] +; CHECK-NEXT: ld1 { v5.b }[5], [x11] +; CHECK-NEXT: add x9, sp, #136 +; CHECK-NEXT: sshll v6.8h, v6.8b, #0 +; CHECK-NEXT: mov v0.b[6], w6 +; CHECK-NEXT: ld1 { v1.b }[7], [x9] +; CHECK-NEXT: add x8, sp, #528 ; CHECK-NEXT: add x9, sp, #400 -; CHECK-NEXT: sshll v2.8h, v2.8b, #0 -; CHECK-NEXT: ld1 { v6.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #464 -; CHECK-NEXT: ld1 { v3.b }[6], [x10] -; CHECK-NEXT: saddw v5.4s, v7.4s, v2.4h +; CHECK-NEXT: add x10, sp, #464 +; CHECK-NEXT: sshll v7.8h, v7.8b, #0 +; CHECK-NEXT: ld1 { v3.b }[6], [x8] ; CHECK-NEXT: ld1 { v4.b }[6], [x9] -; CHECK-NEXT: saddl v7.4s, v1.4h, v0.4h -; CHECK-NEXT: add x10, sp, #536 +; CHECK-NEXT: ld1 { v5.b }[6], [x10] +; CHECK-NEXT: movi v16.2d, #0000000000000000 +; CHECK-NEXT: movi v17.2d, #0000000000000000 +; CHECK-NEXT: add x14, sp, #72 +; CHECK-NEXT: mov v0.b[7], w7 +; CHECK-NEXT: sshll v6.4s, v6.4h, #0 +; CHECK-NEXT: add x8, sp, #536 ; CHECK-NEXT: add x9, sp, #408 -; CHECK-NEXT: ld1 { v6.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #472 -; CHECK-NEXT: add v5.4s, v7.4s, v5.4s -; CHECK-NEXT: ldr b7, [sp, #544] -; CHECK-NEXT: saddl2 v0.4s, v1.8h, v0.8h -; CHECK-NEXT: ld1 { v3.b }[7], [x10] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v4.b }[7], [x9] -; CHECK-NEXT: sshll v7.8h, v7.8b, #0 -; CHECK-NEXT: ld1 { v6.b }[7], [x8] -; CHECK-NEXT: saddw2 v0.4s, v0.4s, v2.8h +; CHECK-NEXT: add x10, sp, #472 ; CHECK-NEXT: sshll v7.4s, v7.4h, #0 +; CHECK-NEXT: ld1 { v2.b }[7], [x14] +; CHECK-NEXT: ld1 { v3.b }[7], [x8] +; CHECK-NEXT: ld1 { v4.b }[7], [x9] +; CHECK-NEXT: ld1 { v5.b }[7], [x10] +; CHECK-NEXT: mov v16.s[0], v6.s[0] +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: mov v17.s[0], v7.s[0] +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll v2.8h, v2.8b, #0 ; CHECK-NEXT: sshll v3.8h, v3.8b, #0 -; CHECK-NEXT: mov v1.s[0], v7.s[0] ; CHECK-NEXT: sshll v4.8h, v4.8b, #0 -; CHECK-NEXT: sshll v6.8h, v6.8b, #0 -; CHECK-NEXT: saddl v7.4s, v4.4h, v3.4h +; CHECK-NEXT: sshll v5.8h, v5.8b, #0 +; CHECK-NEXT: saddl v7.4s, v0.4h, v1.4h +; CHECK-NEXT: saddl2 v0.4s, v0.8h, v1.8h +; CHECK-NEXT: saddw v6.4s, v16.4s, v2.4h +; CHECK-NEXT: saddl v1.4s, v4.4h, v3.4h ; CHECK-NEXT: saddl2 v3.4s, v4.8h, v3.8h -; CHECK-NEXT: saddw v1.4s, v1.4s, v6.4h -; CHECK-NEXT: add v0.4s, v5.4s, v0.4s -; CHECK-NEXT: saddw2 v2.4s, v3.4s, v6.8h -; CHECK-NEXT: add v1.4s, v7.4s, v1.4s +; CHECK-NEXT: saddw v4.4s, v17.4s, v5.4h +; CHECK-NEXT: saddw2 v0.4s, v0.4s, v2.8h +; CHECK-NEXT: add v6.4s, v7.4s, v6.4s +; CHECK-NEXT: saddw2 v2.4s, v3.4s, v5.8h +; CHECK-NEXT: add v1.4s, v1.4s, v4.4s +; CHECK-NEXT: add v0.4s, v6.4s, v0.4s ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s @@ -1472,12 +1472,13 @@ define i32 @test_udot_v32i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_udot_v32i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1, #16] -; CHECK-NEXT: udot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: udot v0.4s, v1.16b, v3.16b +; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: ldr q2, [x1, #16] +; CHECK-NEXT: udot v0.4s, v2.16b, v1.16b +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: udot v0.4s, v2.16b, v1.16b ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -1497,8 +1498,8 @@ ; CHECK-LABEL: test_udot_v32i8_nomla: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v0.16b, #1 -; CHECK-NEXT: ldr q2, [x0, #16] ; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ldr q2, [x0, #16] ; CHECK-NEXT: udot v1.4s, v2.16b, v0.16b ; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: udot v1.4s, v2.16b, v0.16b @@ -1514,12 +1515,13 @@ define i32 @test_sdot_v32i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_sdot_v32i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1, #16] -; CHECK-NEXT: sdot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: sdot v0.4s, v1.16b, v3.16b +; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: ldr q2, [x1, #16] +; CHECK-NEXT: sdot v0.4s, v2.16b, v1.16b +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: sdot v0.4s, v2.16b, v1.16b ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -1540,11 +1542,11 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v16.2d, #0000000000000000 ; CHECK-NEXT: movi v17.2d, #0000000000000000 -; CHECK-NEXT: sdot v16.4s, v1.16b, v3.16b -; CHECK-NEXT: sdot v17.4s, v5.16b, v7.16b -; CHECK-NEXT: sdot v16.4s, v0.16b, v2.16b -; CHECK-NEXT: sdot v17.4s, v4.16b, v6.16b -; CHECK-NEXT: add v0.4s, v16.4s, v17.4s +; CHECK-NEXT: sdot v17.4s, v1.16b, v3.16b +; CHECK-NEXT: sdot v16.4s, v5.16b, v7.16b +; CHECK-NEXT: sdot v17.4s, v0.16b, v2.16b +; CHECK-NEXT: sdot v16.4s, v4.16b, v6.16b +; CHECK-NEXT: add v0.4s, v17.4s, v16.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -1567,11 +1569,11 @@ ; CHECK-NEXT: movi v2.16b, #1 ; CHECK-NEXT: movi v3.2d, #0000000000000000 ; CHECK-NEXT: movi v6.2d, #0000000000000000 -; CHECK-NEXT: sdot v3.4s, v1.16b, v2.16b -; CHECK-NEXT: sdot v6.4s, v5.16b, v2.16b -; CHECK-NEXT: sdot v3.4s, v0.16b, v2.16b -; CHECK-NEXT: sdot v6.4s, v4.16b, v2.16b -; CHECK-NEXT: add v0.4s, v3.4s, v6.4s +; CHECK-NEXT: sdot v6.4s, v1.16b, v2.16b +; CHECK-NEXT: sdot v3.4s, v5.16b, v2.16b +; CHECK-NEXT: sdot v6.4s, v0.16b, v2.16b +; CHECK-NEXT: sdot v3.4s, v4.16b, v2.16b +; CHECK-NEXT: add v0.4s, v6.4s, v3.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -1587,34 +1589,34 @@ define i32 @test_udot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_udot_v33i8: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: movi v18.2d, #0000000000000000 +; CHECK-NEXT: ldp q4, q5, [x1] +; CHECK-NEXT: ldr b0, [x0, #32] ; CHECK-NEXT: ldr b1, [x1, #32] -; CHECK-NEXT: ldr b2, [x0, #32] -; CHECK-NEXT: ldp q3, q4, [x0] +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-NEXT: ushll v6.8h, v2.8b, #0 +; CHECK-NEXT: ushll2 v2.8h, v2.16b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll v2.8h, v2.8b, #0 -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: ushll v16.8h, v3.8b, #0 -; CHECK-NEXT: ldp q5, q6, [x1] -; CHECK-NEXT: ushll2 v3.8h, v3.16b, #0 -; CHECK-NEXT: mov v0.s[0], v1.s[0] -; CHECK-NEXT: ushll2 v7.8h, v4.16b, #0 -; CHECK-NEXT: ushll2 v2.8h, v5.16b, #0 +; CHECK-NEXT: ushll v7.8h, v4.8b, #0 +; CHECK-NEXT: ushll2 v4.8h, v4.16b, #0 +; CHECK-NEXT: ushll2 v16.8h, v3.16b, #0 +; CHECK-NEXT: ushll v3.8h, v3.8b, #0 +; CHECK-NEXT: ushll2 v19.8h, v5.16b, #0 ; CHECK-NEXT: ushll v5.8h, v5.8b, #0 -; CHECK-NEXT: umull2 v18.4s, v2.8h, v3.8h -; CHECK-NEXT: umull2 v1.4s, v5.8h, v16.8h -; CHECK-NEXT: ushll v4.8h, v4.8b, #0 -; CHECK-NEXT: ushll2 v17.8h, v6.16b, #0 -; CHECK-NEXT: ushll v6.8h, v6.8b, #0 -; CHECK-NEXT: umull v2.4s, v2.4h, v3.4h -; CHECK-NEXT: umlal2 v18.4s, v17.8h, v7.8h -; CHECK-NEXT: umlal2 v1.4s, v6.8h, v4.8h -; CHECK-NEXT: umlal v0.4s, v5.4h, v16.4h -; CHECK-NEXT: umlal v2.4s, v17.4h, v7.4h -; CHECK-NEXT: add v1.4s, v1.4s, v18.4s -; CHECK-NEXT: umlal v0.4s, v6.4h, v4.4h -; CHECK-NEXT: add v1.4s, v2.4s, v1.4s -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: umull v0.4s, v1.4h, v0.4h +; CHECK-NEXT: umull2 v1.4s, v7.8h, v6.8h +; CHECK-NEXT: umull2 v17.4s, v4.8h, v2.8h +; CHECK-NEXT: umull v2.4s, v4.4h, v2.4h +; CHECK-NEXT: umlal2 v17.4s, v19.8h, v16.8h +; CHECK-NEXT: umlal2 v1.4s, v5.8h, v3.8h +; CHECK-NEXT: mov v18.s[0], v0.s[0] +; CHECK-NEXT: umlal v2.4s, v19.4h, v16.4h +; CHECK-NEXT: add v0.4s, v1.4s, v17.4s +; CHECK-NEXT: umlal v18.4s, v7.4h, v6.4h +; CHECK-NEXT: umlal v18.4s, v5.4h, v3.4h +; CHECK-NEXT: add v0.4s, v2.4s, v0.4s +; CHECK-NEXT: add v0.4s, v18.4s, v0.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -1637,19 +1639,19 @@ ; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: ushll2 v5.8h, v3.16b, #0 -; CHECK-NEXT: mov v0.s[0], v1.s[0] ; CHECK-NEXT: ushll v4.8h, v2.8b, #0 +; CHECK-NEXT: ushll v5.8h, v3.8b, #0 ; CHECK-NEXT: ushll2 v2.8h, v2.16b, #0 -; CHECK-NEXT: ushll v1.8h, v3.8b, #0 -; CHECK-NEXT: uaddl2 v3.4s, v5.8h, v2.8h -; CHECK-NEXT: uaddl2 v6.4s, v1.8h, v4.8h -; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h -; CHECK-NEXT: uaddl v1.4s, v5.4h, v2.4h -; CHECK-NEXT: add v2.4s, v6.4s, v3.4s +; CHECK-NEXT: ushll2 v3.8h, v3.16b, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: uaddl2 v6.4s, v3.8h, v2.8h +; CHECK-NEXT: uaddl v2.4s, v3.4h, v2.4h +; CHECK-NEXT: mov v0.s[0], v1.s[0] +; CHECK-NEXT: uaddl2 v1.4s, v5.8h, v4.8h +; CHECK-NEXT: add v1.4s, v1.4s, v6.4s +; CHECK-NEXT: uaddw v0.4s, v0.4s, v5.4h ; CHECK-NEXT: uaddw v0.4s, v0.4s, v4.4h -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: add v1.4s, v2.4s, v1.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 @@ -1663,34 +1665,34 @@ define i32 @test_sdot_v33i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_sdot_v33i8: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: movi v18.2d, #0000000000000000 +; CHECK-NEXT: ldp q4, q5, [x1] +; CHECK-NEXT: ldr b0, [x0, #32] ; CHECK-NEXT: ldr b1, [x1, #32] -; CHECK-NEXT: ldr b2, [x0, #32] -; CHECK-NEXT: ldp q3, q4, [x0] +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: sshll v6.8h, v2.8b, #0 +; CHECK-NEXT: sshll2 v2.8h, v2.16b, #0 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: sshll v2.8h, v2.8b, #0 -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: sshll v16.8h, v3.8b, #0 -; CHECK-NEXT: ldp q5, q6, [x1] -; CHECK-NEXT: sshll2 v3.8h, v3.16b, #0 -; CHECK-NEXT: mov v0.s[0], v1.s[0] -; CHECK-NEXT: sshll2 v7.8h, v4.16b, #0 -; CHECK-NEXT: sshll2 v2.8h, v5.16b, #0 +; CHECK-NEXT: sshll v7.8h, v4.8b, #0 +; CHECK-NEXT: sshll2 v4.8h, v4.16b, #0 +; CHECK-NEXT: sshll2 v16.8h, v3.16b, #0 +; CHECK-NEXT: sshll v3.8h, v3.8b, #0 +; CHECK-NEXT: sshll2 v19.8h, v5.16b, #0 ; CHECK-NEXT: sshll v5.8h, v5.8b, #0 -; CHECK-NEXT: smull2 v18.4s, v2.8h, v3.8h -; CHECK-NEXT: smull2 v1.4s, v5.8h, v16.8h -; CHECK-NEXT: sshll v4.8h, v4.8b, #0 -; CHECK-NEXT: sshll2 v17.8h, v6.16b, #0 -; CHECK-NEXT: sshll v6.8h, v6.8b, #0 -; CHECK-NEXT: smull v2.4s, v2.4h, v3.4h -; CHECK-NEXT: smlal2 v18.4s, v17.8h, v7.8h -; CHECK-NEXT: smlal2 v1.4s, v6.8h, v4.8h -; CHECK-NEXT: smlal v0.4s, v5.4h, v16.4h -; CHECK-NEXT: smlal v2.4s, v17.4h, v7.4h -; CHECK-NEXT: add v1.4s, v1.4s, v18.4s -; CHECK-NEXT: smlal v0.4s, v6.4h, v4.4h -; CHECK-NEXT: add v1.4s, v2.4s, v1.4s -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h +; CHECK-NEXT: smull2 v1.4s, v7.8h, v6.8h +; CHECK-NEXT: smull2 v17.4s, v4.8h, v2.8h +; CHECK-NEXT: smull v2.4s, v4.4h, v2.4h +; CHECK-NEXT: smlal2 v17.4s, v19.8h, v16.8h +; CHECK-NEXT: smlal2 v1.4s, v5.8h, v3.8h +; CHECK-NEXT: mov v18.s[0], v0.s[0] +; CHECK-NEXT: smlal v2.4s, v19.4h, v16.4h +; CHECK-NEXT: add v0.4s, v1.4s, v17.4s +; CHECK-NEXT: smlal v18.4s, v7.4h, v6.4h +; CHECK-NEXT: smlal v18.4s, v5.4h, v3.4h +; CHECK-NEXT: add v0.4s, v2.4s, v0.4s +; CHECK-NEXT: add v0.4s, v18.4s, v0.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -1714,287 +1716,287 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ldr b0, [sp, #80] ; CHECK-NEXT: add x8, sp, #88 -; CHECK-NEXT: ldr b1, [sp, #144] -; CHECK-NEXT: add x9, sp, #96 +; CHECK-NEXT: ldr b2, [sp, #144] +; CHECK-NEXT: fmov s4, w0 +; CHECK-NEXT: add x10, sp, #152 ; CHECK-NEXT: ldr b3, [sp, #16] -; CHECK-NEXT: add x10, sp, #104 ; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #152 -; CHECK-NEXT: ldr b4, [sp, #344] -; CHECK-NEXT: fmov s2, w0 -; CHECK-NEXT: ldr b6, [sp, #216] -; CHECK-NEXT: add x11, sp, #136 -; CHECK-NEXT: ld1 { v1.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #160 +; CHECK-NEXT: ld1 { v2.b }[1], [x10] +; CHECK-NEXT: add x10, sp, #24 +; CHECK-NEXT: ldr b1, [sp, #344] +; CHECK-NEXT: add x9, sp, #96 +; CHECK-NEXT: ld1 { v3.b }[1], [x10] +; CHECK-NEXT: add x10, sp, #352 +; CHECK-NEXT: mov v4.b[1], w1 +; CHECK-NEXT: add x8, sp, #104 ; CHECK-NEXT: ld1 { v0.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #24 -; CHECK-NEXT: mov v2.b[1], w1 -; CHECK-NEXT: ldr b17, [sp, #280] -; CHECK-NEXT: ldr b7, [sp, #408] -; CHECK-NEXT: ld1 { v1.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #168 -; CHECK-NEXT: ld1 { v3.b }[1], [x9] +; CHECK-NEXT: add x9, sp, #160 +; CHECK-NEXT: ld1 { v1.b }[1], [x10] +; CHECK-NEXT: ld1 { v2.b }[2], [x9] ; CHECK-NEXT: add x9, sp, #32 -; CHECK-NEXT: ld1 { v0.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #112 -; CHECK-NEXT: mov v2.b[2], w2 -; CHECK-NEXT: ldr b5, [sp, #208] -; CHECK-NEXT: ld1 { v1.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #176 +; CHECK-NEXT: add x12, sp, #360 ; CHECK-NEXT: ld1 { v3.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #40 -; CHECK-NEXT: ld1 { v0.b }[4], [x10] +; CHECK-NEXT: add x11, sp, #112 ; CHECK-NEXT: add x10, sp, #120 -; CHECK-NEXT: mov v2.b[3], w3 -; CHECK-NEXT: ld1 { v1.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #184 -; CHECK-NEXT: ld1 { v3.b }[3], [x9] +; CHECK-NEXT: ld1 { v1.b }[2], [x12] +; CHECK-NEXT: add x12, sp, #168 +; CHECK-NEXT: ld1 { v0.b }[3], [x8] +; CHECK-NEXT: mov v4.b[2], w2 +; CHECK-NEXT: ld1 { v2.b }[3], [x12] +; CHECK-NEXT: add x12, sp, #40 +; CHECK-NEXT: ld1 { v3.b }[3], [x12] +; CHECK-NEXT: add x13, sp, #176 +; CHECK-NEXT: ldr b16, [sp, #216] +; CHECK-NEXT: ld1 { v0.b }[4], [x11] +; CHECK-NEXT: add x11, sp, #48 +; CHECK-NEXT: add x12, sp, #368 +; CHECK-NEXT: ld1 { v2.b }[4], [x13] +; CHECK-NEXT: add x13, sp, #224 ; CHECK-NEXT: add x9, sp, #128 +; CHECK-NEXT: mov v4.b[3], w3 +; CHECK-NEXT: ld1 { v3.b }[4], [x11] +; CHECK-NEXT: ld1 { v16.b }[1], [x13] ; CHECK-NEXT: ld1 { v0.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #48 -; CHECK-NEXT: mov v2.b[4], w4 -; CHECK-NEXT: ld1 { v1.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #192 -; CHECK-NEXT: ld1 { v3.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #360 +; CHECK-NEXT: add x10, sp, #56 +; CHECK-NEXT: ld1 { v1.b }[3], [x12] +; CHECK-NEXT: add x12, sp, #184 +; CHECK-NEXT: ldr b5, [sp, #280] +; CHECK-NEXT: add x11, sp, #376 +; CHECK-NEXT: ld1 { v3.b }[5], [x10] +; CHECK-NEXT: ld1 { v2.b }[5], [x12] +; CHECK-NEXT: add x10, sp, #232 +; CHECK-NEXT: mov v4.b[4], w4 ; CHECK-NEXT: ld1 { v0.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #56 -; CHECK-NEXT: mov v2.b[5], w5 -; CHECK-NEXT: ld1 { v1.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #200 -; CHECK-NEXT: ld1 { v3.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #64 -; CHECK-NEXT: ld1 { v0.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #232 -; CHECK-NEXT: mov v2.b[6], w6 -; CHECK-NEXT: ld1 { v1.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #352 -; CHECK-NEXT: ld1 { v3.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #72 -; CHECK-NEXT: sshll v5.8h, v5.8b, #0 -; CHECK-NEXT: ld1 { v4.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #224 -; CHECK-NEXT: mov v2.b[7], w7 -; CHECK-NEXT: ld1 { v3.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #416 -; CHECK-NEXT: ld1 { v6.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #288 -; CHECK-NEXT: ld1 { v4.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #368 -; CHECK-NEXT: ld1 { v7.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #424 -; CHECK-NEXT: ld1 { v17.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #296 -; CHECK-NEXT: ld1 { v6.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #240 -; CHECK-NEXT: ld1 { v4.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #376 -; CHECK-NEXT: ld1 { v7.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #432 +; CHECK-NEXT: add x9, sp, #288 +; CHECK-NEXT: add x15, sp, #64 +; CHECK-NEXT: ld1 { v16.b }[2], [x10] +; CHECK-NEXT: ldr b17, [sp, #408] +; CHECK-NEXT: ld1 { v5.b }[1], [x9] +; CHECK-NEXT: add x14, sp, #192 +; CHECK-NEXT: ld1 { v1.b }[4], [x11] +; CHECK-NEXT: ld1 { v3.b }[6], [x15] +; CHECK-NEXT: add x15, sp, #416 +; CHECK-NEXT: ld1 { v2.b }[6], [x14] +; CHECK-NEXT: add x14, sp, #240 +; CHECK-NEXT: ld1 { v17.b }[1], [x15] +; CHECK-NEXT: add x9, sp, #296 +; CHECK-NEXT: add x8, sp, #136 +; CHECK-NEXT: mov v4.b[5], w5 +; CHECK-NEXT: add x13, sp, #384 +; CHECK-NEXT: ld1 { v16.b }[3], [x14] +; CHECK-NEXT: ld1 { v5.b }[2], [x9] +; CHECK-NEXT: ld1 { v1.b }[5], [x13] +; CHECK-NEXT: ld1 { v0.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #424 +; CHECK-NEXT: add x9, sp, #248 ; CHECK-NEXT: ld1 { v17.b }[2], [x8] ; CHECK-NEXT: add x8, sp, #304 -; CHECK-NEXT: ld1 { v6.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #248 -; CHECK-NEXT: ld1 { v4.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #384 -; CHECK-NEXT: ld1 { v7.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #440 -; CHECK-NEXT: ld1 { v17.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #312 -; CHECK-NEXT: ld1 { v6.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #256 -; CHECK-NEXT: ld1 { v4.b }[5], [x10] ; CHECK-NEXT: add x10, sp, #392 -; CHECK-NEXT: ld1 { v7.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #448 +; CHECK-NEXT: ld1 { v16.b }[4], [x9] +; CHECK-NEXT: ld1 { v5.b }[3], [x8] +; CHECK-NEXT: mov v4.b[6], w6 +; CHECK-NEXT: ld1 { v1.b }[6], [x10] +; CHECK-NEXT: add x10, sp, #432 +; CHECK-NEXT: add x9, sp, #256 +; CHECK-NEXT: ld1 { v17.b }[3], [x10] +; CHECK-NEXT: add x10, sp, #312 +; CHECK-NEXT: ldr b22, [sp, #608] +; CHECK-NEXT: add x8, sp, #400 +; CHECK-NEXT: ld1 { v16.b }[5], [x9] +; CHECK-NEXT: ld1 { v5.b }[4], [x10] +; CHECK-NEXT: add x9, sp, #616 +; CHECK-NEXT: ld1 { v1.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #440 +; CHECK-NEXT: ld1 { v22.b }[1], [x9] +; CHECK-NEXT: mov v4.b[7], w7 ; CHECK-NEXT: ld1 { v17.b }[4], [x8] ; CHECK-NEXT: add x8, sp, #320 -; CHECK-NEXT: ld1 { v6.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #264 -; CHECK-NEXT: sshll v19.8h, v2.8b, #0 -; CHECK-NEXT: ld1 { v4.b }[6], [x10] -; CHECK-NEXT: ld1 { v7.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #456 -; CHECK-NEXT: ld1 { v17.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #328 -; CHECK-NEXT: ld1 { v6.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #272 -; CHECK-NEXT: sshll v2.8h, v1.8b, #0 -; CHECK-NEXT: ldr b1, [sp, #608] -; CHECK-NEXT: ld1 { v7.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #464 +; CHECK-NEXT: add x10, sp, #448 +; CHECK-NEXT: ldr b6, [sp, #208] +; CHECK-NEXT: ld1 { v5.b }[5], [x8] +; CHECK-NEXT: add x8, sp, #624 +; CHECK-NEXT: ldr b7, [sp, #472] +; CHECK-NEXT: ld1 { v22.b }[2], [x8] +; CHECK-NEXT: ld1 { v17.b }[5], [x10] +; CHECK-NEXT: add x10, sp, #328 +; CHECK-NEXT: sshll v20.8h, v4.8b, #0 +; CHECK-NEXT: ldr b4, [sp, #480] +; CHECK-NEXT: add x8, sp, #456 +; CHECK-NEXT: ld1 { v5.b }[6], [x10] +; CHECK-NEXT: add x10, sp, #632 +; CHECK-NEXT: sshll v6.8h, v6.8b, #0 +; CHECK-NEXT: ld1 { v22.b }[3], [x10] +; CHECK-NEXT: add x10, sp, #488 ; CHECK-NEXT: ld1 { v17.b }[6], [x8] ; CHECK-NEXT: add x8, sp, #336 -; CHECK-NEXT: ld1 { v6.b }[7], [x11] -; CHECK-NEXT: add x10, sp, #400 -; CHECK-NEXT: sshll v16.8h, v3.8b, #0 -; CHECK-NEXT: add x11, sp, #648 -; CHECK-NEXT: ld1 { v7.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #624 -; CHECK-NEXT: ld1 { v17.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #616 -; CHECK-NEXT: sshll v21.8h, v6.8b, #0 -; CHECK-NEXT: ldr b6, [sp, #472] -; CHECK-NEXT: ld1 { v4.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #552 -; CHECK-NEXT: ld1 { v1.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #488 -; CHECK-NEXT: sshll v18.8h, v17.8b, #0 -; CHECK-NEXT: ldr b17, [sp, #480] -; CHECK-NEXT: sshll v6.8h, v6.8b, #0 -; CHECK-NEXT: sshll v3.8h, v4.8b, #0 -; CHECK-NEXT: ld1 { v17.b }[1], [x8] +; CHECK-NEXT: ld1 { v4.b }[1], [x10] +; CHECK-NEXT: sshll v7.8h, v7.8b, #0 +; CHECK-NEXT: ld1 { v5.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #640 +; CHECK-NEXT: add x9, sp, #264 +; CHECK-NEXT: ld1 { v22.b }[4], [x8] ; CHECK-NEXT: add x8, sp, #496 -; CHECK-NEXT: ld1 { v1.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #632 -; CHECK-NEXT: sshll v4.8h, v7.8b, #0 -; CHECK-NEXT: smull v20.4s, v5.4h, v6.4h -; CHECK-NEXT: movi v7.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v17.b }[2], [x8] -; CHECK-NEXT: smull v5.4s, v16.4h, v18.4h -; CHECK-NEXT: ld1 { v1.b }[3], [x9] -; CHECK-NEXT: smull2 v16.4s, v16.8h, v18.8h -; CHECK-NEXT: ldr b18, [sp, #544] +; CHECK-NEXT: ld1 { v16.b }[6], [x9] +; CHECK-NEXT: ld1 { v4.b }[2], [x8] +; CHECK-NEXT: add x8, sp, #648 +; CHECK-NEXT: smull v18.4s, v6.4h, v7.4h +; CHECK-NEXT: ldr b7, [sp, #544] +; CHECK-NEXT: add x9, sp, #272 +; CHECK-NEXT: movi v6.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v22.b }[5], [x8] ; CHECK-NEXT: add x8, sp, #504 -; CHECK-NEXT: add x9, sp, #640 -; CHECK-NEXT: mov v7.s[0], v20.s[0] -; CHECK-NEXT: ldr b20, [sp, #672] -; CHECK-NEXT: ld1 { v18.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #680 -; CHECK-NEXT: ld1 { v17.b }[3], [x8] +; CHECK-NEXT: ld1 { v16.b }[7], [x9] +; CHECK-NEXT: ld1 { v4.b }[3], [x8] +; CHECK-NEXT: add x8, sp, #552 +; CHECK-NEXT: add x9, sp, #656 +; CHECK-NEXT: ld1 { v7.b }[1], [x8] +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: ldr b21, [sp, #672] +; CHECK-NEXT: ld1 { v22.b }[6], [x9] +; CHECK-NEXT: mov v6.s[0], v18.s[0] +; CHECK-NEXT: add x9, sp, #664 +; CHECK-NEXT: ld1 { v4.b }[4], [x8] ; CHECK-NEXT: add x8, sp, #560 -; CHECK-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #512 -; CHECK-NEXT: ld1 { v20.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #520 -; CHECK-NEXT: ld1 { v18.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #688 -; CHECK-NEXT: ld1 { v17.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #568 -; CHECK-NEXT: smull2 v6.4s, v19.8h, v21.8h -; CHECK-NEXT: ld1 { v1.b }[5], [x11] -; CHECK-NEXT: ld1 { v20.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #696 -; CHECK-NEXT: ld1 { v18.b }[3], [x9] +; CHECK-NEXT: sshll v23.8h, v16.8b, #0 +; CHECK-NEXT: ld1 { v7.b }[2], [x8] +; CHECK-NEXT: add x8, sp, #520 +; CHECK-NEXT: ldr b24, [sp, #872] +; CHECK-NEXT: ld1 { v22.b }[7], [x9] +; CHECK-NEXT: add x9, sp, #528 +; CHECK-NEXT: movi v19.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v4.b }[5], [x8] +; CHECK-NEXT: add x8, sp, #568 +; CHECK-NEXT: smull2 v18.4s, v20.8h, v23.8h +; CHECK-NEXT: ld1 { v7.b }[3], [x8] +; CHECK-NEXT: add x8, sp, #680 +; CHECK-NEXT: smlal v6.4s, v20.4h, v23.4h +; CHECK-NEXT: ld1 { v21.b }[1], [x8] +; CHECK-NEXT: sshll v20.8h, v22.8b, #0 +; CHECK-NEXT: ldr b22, [sp, #736] +; CHECK-NEXT: ld1 { v4.b }[6], [x9] ; CHECK-NEXT: add x9, sp, #576 -; CHECK-NEXT: ld1 { v17.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #528 -; CHECK-NEXT: smlal v7.4s, v19.4h, v21.4h -; CHECK-NEXT: ldr b19, [sp, #872] -; CHECK-NEXT: ld1 { v20.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #704 -; CHECK-NEXT: ld1 { v18.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #584 -; CHECK-NEXT: ld1 { v17.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #536 -; CHECK-NEXT: ldr b21, [sp, #936] -; CHECK-NEXT: add x11, sp, #656 -; CHECK-NEXT: ld1 { v20.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #712 -; CHECK-NEXT: ld1 { v18.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #592 -; CHECK-NEXT: ld1 { v17.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #880 -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: ld1 { v1.b }[6], [x11] -; CHECK-NEXT: ld1 { v20.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #600 -; CHECK-NEXT: ld1 { v18.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #720 -; CHECK-NEXT: ld1 { v19.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #944 -; CHECK-NEXT: smlal2 v6.4s, v0.8h, v3.8h -; CHECK-NEXT: add x11, sp, #664 -; CHECK-NEXT: ld1 { v20.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #888 -; CHECK-NEXT: ld1 { v18.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #728 -; CHECK-NEXT: ld1 { v21.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #752 -; CHECK-NEXT: ld1 { v19.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #952 -; CHECK-NEXT: ld1 { v20.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #896 -; CHECK-NEXT: smlal v7.4s, v0.4h, v3.4h -; CHECK-NEXT: ldr b0, [sp, #744] +; CHECK-NEXT: ldr b23, [sp, #1000] +; CHECK-NEXT: ld1 { v7.b }[4], [x9] +; CHECK-NEXT: add x9, sp, #688 +; CHECK-NEXT: sshll v22.8h, v22.8b, #0 ; CHECK-NEXT: ld1 { v21.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #904 -; CHECK-NEXT: ld1 { v19.b }[3], [x8] +; CHECK-NEXT: add x9, sp, #696 +; CHECK-NEXT: sshll v23.8h, v23.8b, #0 +; CHECK-NEXT: add x8, sp, #536 +; CHECK-NEXT: ldr b25, [sp, #936] +; CHECK-NEXT: add x10, sp, #464 +; CHECK-NEXT: ld1 { v4.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #584 +; CHECK-NEXT: ld1 { v17.b }[7], [x10] +; CHECK-NEXT: ld1 { v21.b }[3], [x9] +; CHECK-NEXT: ld1 { v7.b }[5], [x8] +; CHECK-NEXT: add x8, sp, #880 +; CHECK-NEXT: add x9, sp, #704 +; CHECK-NEXT: smull v22.4s, v22.4h, v23.4h +; CHECK-NEXT: ldr b23, [sp, #744] +; CHECK-NEXT: ld1 { v24.b }[1], [x8] +; CHECK-NEXT: add x8, sp, #944 +; CHECK-NEXT: add x10, sp, #888 +; CHECK-NEXT: ld1 { v21.b }[4], [x9] +; CHECK-NEXT: add x9, sp, #752 +; CHECK-NEXT: ld1 { v25.b }[1], [x8] +; CHECK-NEXT: ld1 { v23.b }[1], [x9] +; CHECK-NEXT: add x8, sp, #712 +; CHECK-NEXT: add x9, sp, #760 +; CHECK-NEXT: ld1 { v24.b }[2], [x10] +; CHECK-NEXT: add x10, sp, #952 +; CHECK-NEXT: mov v19.s[0], v22.s[0] +; CHECK-NEXT: ldr b22, [sp, #808] +; CHECK-NEXT: ld1 { v25.b }[2], [x10] +; CHECK-NEXT: ld1 { v21.b }[5], [x8] +; CHECK-NEXT: ld1 { v23.b }[2], [x9] +; CHECK-NEXT: add x8, sp, #816 +; CHECK-NEXT: add x9, sp, #896 +; CHECK-NEXT: ld1 { v22.b }[1], [x8] ; CHECK-NEXT: add x8, sp, #960 -; CHECK-NEXT: ld1 { v0.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #760 -; CHECK-NEXT: ld1 { v1.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #816 -; CHECK-NEXT: ld1 { v21.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #968 -; CHECK-NEXT: ldr b3, [sp, #808] -; CHECK-NEXT: ld1 { v19.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #912 -; CHECK-NEXT: ld1 { v0.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #768 -; CHECK-NEXT: ld1 { v3.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #824 -; CHECK-NEXT: ld1 { v21.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #976 -; CHECK-NEXT: ld1 { v19.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #920 -; CHECK-NEXT: ld1 { v0.b }[3], [x10] +; CHECK-NEXT: ld1 { v24.b }[3], [x9] +; CHECK-NEXT: add x9, sp, #768 +; CHECK-NEXT: ld1 { v25.b }[3], [x8] +; CHECK-NEXT: add x10, sp, #904 +; CHECK-NEXT: ld1 { v23.b }[3], [x9] +; CHECK-NEXT: add x9, sp, #824 +; CHECK-NEXT: add x8, sp, #720 +; CHECK-NEXT: ld1 { v22.b }[2], [x9] +; CHECK-NEXT: add x9, sp, #968 +; CHECK-NEXT: ld1 { v24.b }[4], [x10] ; CHECK-NEXT: add x10, sp, #776 -; CHECK-NEXT: ld1 { v3.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #832 -; CHECK-NEXT: ld1 { v21.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #984 -; CHECK-NEXT: ld1 { v19.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #928 -; CHECK-NEXT: ld1 { v0.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #848 -; CHECK-NEXT: ld1 { v3.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #840 +; CHECK-NEXT: ld1 { v25.b }[4], [x9] ; CHECK-NEXT: ld1 { v21.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #992 -; CHECK-NEXT: ld1 { v19.b }[7], [x9] +; CHECK-NEXT: ld1 { v23.b }[4], [x10] +; CHECK-NEXT: add x8, sp, #832 +; CHECK-NEXT: add x9, sp, #912 +; CHECK-NEXT: ld1 { v22.b }[3], [x8] +; CHECK-NEXT: add x8, sp, #976 +; CHECK-NEXT: ld1 { v24.b }[5], [x9] ; CHECK-NEXT: add x9, sp, #784 -; CHECK-NEXT: smlal2 v16.4s, v2.8h, v4.8h -; CHECK-NEXT: ld1 { v3.b }[4], [x11] +; CHECK-NEXT: ld1 { v25.b }[5], [x8] +; CHECK-NEXT: add x10, sp, #920 +; CHECK-NEXT: ld1 { v23.b }[5], [x9] +; CHECK-NEXT: add x9, sp, #840 +; CHECK-NEXT: add x8, sp, #728 +; CHECK-NEXT: ld1 { v22.b }[4], [x9] +; CHECK-NEXT: add x9, sp, #984 +; CHECK-NEXT: ld1 { v24.b }[6], [x10] +; CHECK-NEXT: add x10, sp, #792 +; CHECK-NEXT: ld1 { v25.b }[6], [x9] ; CHECK-NEXT: ld1 { v21.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #792 -; CHECK-NEXT: ld1 { v0.b }[5], [x9] +; CHECK-NEXT: ld1 { v23.b }[6], [x10] +; CHECK-NEXT: add x8, sp, #848 +; CHECK-NEXT: add x9, sp, #928 +; CHECK-NEXT: ld1 { v22.b }[5], [x8] +; CHECK-NEXT: add x12, sp, #72 +; CHECK-NEXT: add x8, sp, #992 +; CHECK-NEXT: ld1 { v24.b }[7], [x9] +; CHECK-NEXT: add x9, sp, #800 +; CHECK-NEXT: ld1 { v3.b }[7], [x12] +; CHECK-NEXT: ld1 { v25.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #592 +; CHECK-NEXT: ld1 { v23.b }[7], [x9] ; CHECK-NEXT: add x9, sp, #856 -; CHECK-NEXT: smlal v5.4s, v2.4h, v4.4h -; CHECK-NEXT: ldr b2, [sp, #736] -; CHECK-NEXT: sshll v4.8h, v20.8b, #0 -; CHECK-NEXT: ldr b20, [sp, #1000] -; CHECK-NEXT: ld1 { v3.b }[5], [x10] -; CHECK-NEXT: sshll v2.8h, v2.8b, #0 -; CHECK-NEXT: ld1 { v0.b }[6], [x8] -; CHECK-NEXT: sshll v20.8h, v20.8b, #0 -; CHECK-NEXT: add x8, sp, #800 +; CHECK-NEXT: ld1 { v7.b }[6], [x8] +; CHECK-NEXT: add x11, sp, #200 +; CHECK-NEXT: ld1 { v22.b }[6], [x9] +; CHECK-NEXT: sshll v3.8h, v3.8b, #0 +; CHECK-NEXT: sshll v5.8h, v5.8b, #0 +; CHECK-NEXT: sshll v4.8h, v4.8b, #0 ; CHECK-NEXT: sshll v21.8h, v21.8b, #0 -; CHECK-NEXT: smull v2.4s, v2.4h, v20.4h -; CHECK-NEXT: ld1 { v3.b }[6], [x9] -; CHECK-NEXT: smull v20.4s, v4.4h, v21.4h -; CHECK-NEXT: ld1 { v0.b }[7], [x8] -; CHECK-NEXT: smull2 v4.4s, v4.8h, v21.8h +; CHECK-NEXT: sshll v24.8h, v24.8b, #0 +; CHECK-NEXT: sshll v25.8h, v25.8b, #0 +; CHECK-NEXT: add x8, sp, #600 +; CHECK-NEXT: sshll v23.8h, v23.8b, #0 ; CHECK-NEXT: add x9, sp, #864 -; CHECK-NEXT: movi v21.2d, #0000000000000000 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: ld1 { v2.b }[7], [x11] +; CHECK-NEXT: ld1 { v7.b }[7], [x8] +; CHECK-NEXT: ld1 { v22.b }[7], [x9] +; CHECK-NEXT: smull v16.4s, v3.4h, v5.4h +; CHECK-NEXT: smull2 v3.4s, v3.8h, v5.8h +; CHECK-NEXT: smull v5.4s, v21.4h, v25.4h +; CHECK-NEXT: smull2 v21.4s, v21.8h, v25.8h +; CHECK-NEXT: smull2 v25.4s, v20.8h, v24.8h +; CHECK-NEXT: smlal v19.4s, v4.4h, v23.4h +; CHECK-NEXT: sshll v2.8h, v2.8b, #0 ; CHECK-NEXT: sshll v17.8h, v17.8b, #0 -; CHECK-NEXT: ld1 { v3.b }[7], [x9] -; CHECK-NEXT: sshll v19.8h, v19.8b, #0 -; CHECK-NEXT: mov v21.s[0], v2.s[0] ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: smull2 v2.4s, v1.8h, v19.8h -; CHECK-NEXT: sshll v18.8h, v18.8b, #0 -; CHECK-NEXT: smlal v21.4s, v17.4h, v0.4h -; CHECK-NEXT: sshll v3.8h, v3.8b, #0 -; CHECK-NEXT: smlal2 v2.4s, v17.8h, v0.8h -; CHECK-NEXT: smlal2 v4.4s, v18.8h, v3.8h -; CHECK-NEXT: smlal v20.4s, v18.4h, v3.4h -; CHECK-NEXT: smlal v21.4s, v1.4h, v19.4h -; CHECK-NEXT: add v0.4s, v6.4s, v16.4s -; CHECK-NEXT: add v1.4s, v7.4s, v5.4s -; CHECK-NEXT: add v2.4s, v2.4s, v4.4s -; CHECK-NEXT: add v3.4s, v21.4s, v20.4s +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: sshll v7.8h, v7.8b, #0 +; CHECK-NEXT: sshll v22.8h, v22.8b, #0 +; CHECK-NEXT: smlal2 v3.4s, v2.8h, v17.8h +; CHECK-NEXT: smlal v16.4s, v2.4h, v17.4h +; CHECK-NEXT: smlal2 v25.4s, v4.8h, v23.8h +; CHECK-NEXT: smlal2 v18.4s, v0.8h, v1.8h +; CHECK-NEXT: smlal v6.4s, v0.4h, v1.4h +; CHECK-NEXT: smlal v19.4s, v20.4h, v24.4h +; CHECK-NEXT: smlal2 v21.4s, v7.8h, v22.8h +; CHECK-NEXT: smlal v5.4s, v7.4h, v22.4h +; CHECK-NEXT: add v0.4s, v18.4s, v3.4s +; CHECK-NEXT: add v1.4s, v6.4s, v16.4s +; CHECK-NEXT: add v2.4s, v25.4s, v21.4s +; CHECK-NEXT: add v3.4s, v19.4s, v5.4s ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add v1.4s, v3.4s, v2.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s @@ -2021,153 +2023,153 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldr b0, [sp, #80] +; CHECK-NEXT: ldr b1, [sp, #80] ; CHECK-NEXT: add x8, sp, #88 ; CHECK-NEXT: ldr b2, [sp, #144] ; CHECK-NEXT: add x9, sp, #152 -; CHECK-NEXT: fmov s3, w0 -; CHECK-NEXT: ldr b4, [sp, #16] -; CHECK-NEXT: ld1 { v0.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #96 -; CHECK-NEXT: add x10, sp, #104 +; CHECK-NEXT: ldr b3, [sp, #16] +; CHECK-NEXT: add x12, sp, #32 +; CHECK-NEXT: ld1 { v1.b }[1], [x8] ; CHECK-NEXT: ld1 { v2.b }[1], [x9] -; CHECK-NEXT: mov v3.b[1], w1 -; CHECK-NEXT: add x9, sp, #160 -; CHECK-NEXT: add x11, sp, #128 -; CHECK-NEXT: ldr b1, [sp, #208] -; CHECK-NEXT: ld1 { v0.b }[2], [x8] +; CHECK-NEXT: add x9, sp, #96 ; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #168 -; CHECK-NEXT: mov v3.b[2], w2 -; CHECK-NEXT: ld1 { v4.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #112 -; CHECK-NEXT: ld1 { v0.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #32 -; CHECK-NEXT: ld1 { v2.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #176 -; CHECK-NEXT: mov v3.b[3], w3 -; CHECK-NEXT: ld1 { v4.b }[2], [x10] +; CHECK-NEXT: add x11, sp, #112 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ld1 { v3.b }[1], [x8] +; CHECK-NEXT: add x8, sp, #160 +; CHECK-NEXT: ldr b4, [sp, #480] +; CHECK-NEXT: ld1 { v1.b }[2], [x9] +; CHECK-NEXT: add x9, sp, #104 +; CHECK-NEXT: ld1 { v2.b }[2], [x8] +; CHECK-NEXT: add x8, sp, #168 ; CHECK-NEXT: add x10, sp, #120 -; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #40 -; CHECK-NEXT: ld1 { v2.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #184 -; CHECK-NEXT: mov v3.b[4], w4 -; CHECK-NEXT: ld1 { v4.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #48 -; CHECK-NEXT: ld1 { v0.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #136 -; CHECK-NEXT: ld1 { v2.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #192 -; CHECK-NEXT: mov v3.b[5], w5 +; CHECK-NEXT: add x13, sp, #48 +; CHECK-NEXT: ld1 { v3.b }[2], [x12] +; CHECK-NEXT: add x12, sp, #40 +; CHECK-NEXT: ldr b5, [sp, #608] +; CHECK-NEXT: ld1 { v1.b }[3], [x9] +; CHECK-NEXT: ld1 { v2.b }[3], [x8] +; CHECK-NEXT: mov v0.b[1], w1 +; CHECK-NEXT: add x9, sp, #128 +; CHECK-NEXT: add x14, sp, #184 +; CHECK-NEXT: ldr b16, [sp, #544] +; CHECK-NEXT: ld1 { v3.b }[3], [x12] +; CHECK-NEXT: add x12, sp, #176 +; CHECK-NEXT: ldr b17, [sp, #672] +; CHECK-NEXT: ld1 { v1.b }[4], [x11] +; CHECK-NEXT: add x11, sp, #488 +; CHECK-NEXT: ld1 { v2.b }[4], [x12] +; CHECK-NEXT: ld1 { v4.b }[1], [x11] +; CHECK-NEXT: mov v0.b[2], w2 +; CHECK-NEXT: add x11, sp, #192 +; CHECK-NEXT: ld1 { v3.b }[4], [x13] +; CHECK-NEXT: add x13, sp, #616 +; CHECK-NEXT: add x12, sp, #56 +; CHECK-NEXT: ld1 { v1.b }[5], [x10] +; CHECK-NEXT: ld1 { v5.b }[1], [x13] +; CHECK-NEXT: add x13, sp, #496 +; CHECK-NEXT: ld1 { v4.b }[2], [x13] +; CHECK-NEXT: ld1 { v2.b }[5], [x14] +; CHECK-NEXT: add x14, sp, #680 +; CHECK-NEXT: ld1 { v17.b }[1], [x14] +; CHECK-NEXT: add x13, sp, #504 +; CHECK-NEXT: ld1 { v3.b }[5], [x12] +; CHECK-NEXT: ld1 { v1.b }[6], [x9] +; CHECK-NEXT: add x9, sp, #552 +; CHECK-NEXT: add x12, sp, #688 +; CHECK-NEXT: ld1 { v16.b }[1], [x9] +; CHECK-NEXT: add x9, sp, #624 +; CHECK-NEXT: ld1 { v4.b }[3], [x13] +; CHECK-NEXT: ld1 { v2.b }[6], [x11] +; CHECK-NEXT: add x11, sp, #560 +; CHECK-NEXT: add x8, sp, #136 +; CHECK-NEXT: ld1 { v17.b }[2], [x12] +; CHECK-NEXT: ld1 { v5.b }[2], [x9] +; CHECK-NEXT: ld1 { v1.b }[7], [x8] +; CHECK-NEXT: ld1 { v16.b }[2], [x11] +; CHECK-NEXT: add x8, sp, #512 +; CHECK-NEXT: mov v0.b[3], w3 ; CHECK-NEXT: ld1 { v4.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #56 -; CHECK-NEXT: ld1 { v0.b }[6], [x11] +; CHECK-NEXT: add x8, sp, #568 +; CHECK-NEXT: add x9, sp, #696 ; CHECK-NEXT: add x11, sp, #632 -; CHECK-NEXT: ld1 { v2.b }[6], [x9] +; CHECK-NEXT: ld1 { v17.b }[3], [x9] +; CHECK-NEXT: add x9, sp, #520 +; CHECK-NEXT: ld1 { v16.b }[3], [x8] +; CHECK-NEXT: ld1 { v5.b }[3], [x11] +; CHECK-NEXT: add x8, sp, #640 +; CHECK-NEXT: ld1 { v4.b }[5], [x9] +; CHECK-NEXT: add x9, sp, #576 +; CHECK-NEXT: add x11, sp, #704 +; CHECK-NEXT: ldr b18, [sp, #736] +; CHECK-NEXT: mov v0.b[4], w4 +; CHECK-NEXT: ld1 { v17.b }[4], [x11] +; CHECK-NEXT: ld1 { v16.b }[4], [x9] +; CHECK-NEXT: ld1 { v5.b }[4], [x8] +; CHECK-NEXT: add x9, sp, #528 +; CHECK-NEXT: sshll v18.8h, v18.8b, #0 +; CHECK-NEXT: add x8, sp, #648 +; CHECK-NEXT: add x11, sp, #584 +; CHECK-NEXT: add x12, sp, #712 +; CHECK-NEXT: ld1 { v4.b }[6], [x9] +; CHECK-NEXT: movi v7.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v16.b }[5], [x11] +; CHECK-NEXT: ld1 { v17.b }[5], [x12] +; CHECK-NEXT: ld1 { v5.b }[5], [x8] +; CHECK-NEXT: mov v0.b[5], w5 +; CHECK-NEXT: add x9, sp, #536 +; CHECK-NEXT: sshll v18.4s, v18.4h, #0 +; CHECK-NEXT: add x8, sp, #656 +; CHECK-NEXT: add x11, sp, #592 +; CHECK-NEXT: add x12, sp, #720 +; CHECK-NEXT: ld1 { v4.b }[7], [x9] +; CHECK-NEXT: ld1 { v16.b }[6], [x11] +; CHECK-NEXT: ld1 { v17.b }[6], [x12] +; CHECK-NEXT: ld1 { v5.b }[6], [x8] +; CHECK-NEXT: ldr b6, [sp, #208] +; CHECK-NEXT: add x10, sp, #64 +; CHECK-NEXT: mov v7.s[0], v18.s[0] +; CHECK-NEXT: mov v0.b[6], w6 +; CHECK-NEXT: ld1 { v3.b }[6], [x10] +; CHECK-NEXT: add x8, sp, #664 +; CHECK-NEXT: add x9, sp, #600 +; CHECK-NEXT: add x10, sp, #728 +; CHECK-NEXT: sshll v4.8h, v4.8b, #0 +; CHECK-NEXT: sshll v6.8h, v6.8b, #0 +; CHECK-NEXT: ld1 { v16.b }[7], [x9] +; CHECK-NEXT: ld1 { v17.b }[7], [x10] +; CHECK-NEXT: ld1 { v5.b }[7], [x8] +; CHECK-NEXT: movi v18.2d, #0000000000000000 +; CHECK-NEXT: mov v0.b[7], w7 ; CHECK-NEXT: add x9, sp, #200 -; CHECK-NEXT: mov v3.b[6], w6 -; CHECK-NEXT: ld1 { v4.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #64 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: ld1 { v0.b }[7], [x10] -; CHECK-NEXT: ld1 { v2.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #552 -; CHECK-NEXT: mov v3.b[7], w7 -; CHECK-NEXT: add x10, sp, #680 -; CHECK-NEXT: ld1 { v4.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #72 -; CHECK-NEXT: movi v6.2d, #0000000000000000 -; CHECK-NEXT: sshll v5.4s, v1.4h, #0 -; CHECK-NEXT: ldr b1, [sp, #608] +; CHECK-NEXT: add x10, sp, #72 +; CHECK-NEXT: saddw v7.4s, v7.4s, v4.4h +; CHECK-NEXT: sshll v6.4s, v6.4h, #0 +; CHECK-NEXT: sshll v16.8h, v16.8b, #0 +; CHECK-NEXT: sshll v17.8h, v17.8b, #0 +; CHECK-NEXT: sshll v5.8h, v5.8b, #0 +; CHECK-NEXT: ld1 { v2.b }[7], [x9] +; CHECK-NEXT: ld1 { v3.b }[7], [x10] +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: mov v18.s[0], v6.s[0] ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: ld1 { v4.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #616 +; CHECK-NEXT: saddl2 v6.4s, v17.8h, v16.8h +; CHECK-NEXT: saddl2 v4.4s, v5.8h, v4.8h +; CHECK-NEXT: saddl v16.4s, v17.4h, v16.4h +; CHECK-NEXT: saddw v5.4s, v7.4s, v5.4h ; CHECK-NEXT: sshll v2.8h, v2.8b, #0 -; CHECK-NEXT: sshll v7.8h, v3.8b, #0 -; CHECK-NEXT: ld1 { v1.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #624 -; CHECK-NEXT: sshll v3.8h, v4.8b, #0 -; CHECK-NEXT: mov v6.s[0], v5.s[0] -; CHECK-NEXT: saddl2 v5.4s, v3.8h, v2.8h -; CHECK-NEXT: saddl2 v16.4s, v7.8h, v0.8h -; CHECK-NEXT: ld1 { v1.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #488 -; CHECK-NEXT: saddw v4.4s, v6.4s, v7.4h -; CHECK-NEXT: ldr b6, [sp, #480] -; CHECK-NEXT: add v5.4s, v16.4s, v5.4s -; CHECK-NEXT: ldr b7, [sp, #544] -; CHECK-NEXT: ldr b16, [sp, #672] -; CHECK-NEXT: ld1 { v6.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #496 -; CHECK-NEXT: ld1 { v7.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #560 -; CHECK-NEXT: ld1 { v16.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #688 -; CHECK-NEXT: ld1 { v1.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #640 -; CHECK-NEXT: ld1 { v6.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #504 -; CHECK-NEXT: ld1 { v7.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #568 -; CHECK-NEXT: ld1 { v16.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #696 -; CHECK-NEXT: ld1 { v1.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #648 -; CHECK-NEXT: ld1 { v6.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #512 -; CHECK-NEXT: ld1 { v7.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #576 -; CHECK-NEXT: ld1 { v16.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #704 -; CHECK-NEXT: ld1 { v1.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #656 -; CHECK-NEXT: ld1 { v6.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #520 -; CHECK-NEXT: ld1 { v7.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #584 -; CHECK-NEXT: ld1 { v16.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #712 -; CHECK-NEXT: ld1 { v1.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #664 -; CHECK-NEXT: ld1 { v6.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #528 -; CHECK-NEXT: ld1 { v7.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #592 -; CHECK-NEXT: ld1 { v16.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #720 +; CHECK-NEXT: sshll v3.8h, v3.8b, #0 +; CHECK-NEXT: saddl2 v17.4s, v0.8h, v1.8h +; CHECK-NEXT: saddw v0.4s, v18.4s, v0.4h +; CHECK-NEXT: saddl2 v7.4s, v3.8h, v2.8h +; CHECK-NEXT: add v4.4s, v4.4s, v6.4s ; CHECK-NEXT: saddl v2.4s, v3.4h, v2.4h -; CHECK-NEXT: ldr b3, [sp, #736] -; CHECK-NEXT: ld1 { v6.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #600 -; CHECK-NEXT: saddw v0.4s, v4.4s, v0.4h -; CHECK-NEXT: ld1 { v7.b }[6], [x9] -; CHECK-NEXT: ld1 { v16.b }[6], [x10] -; CHECK-NEXT: add x9, sp, #728 -; CHECK-NEXT: add x10, sp, #536 -; CHECK-NEXT: ld1 { v1.b }[7], [x11] -; CHECK-NEXT: movi v4.2d, #0000000000000000 +; CHECK-NEXT: add v5.4s, v5.4s, v16.4s +; CHECK-NEXT: saddw v0.4s, v0.4s, v1.4h +; CHECK-NEXT: add v6.4s, v17.4s, v7.4s +; CHECK-NEXT: add v1.4s, v5.4s, v4.4s ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ld1 { v7.b }[7], [x8] -; CHECK-NEXT: sshll v2.8h, v3.8b, #0 -; CHECK-NEXT: ld1 { v16.b }[7], [x9] -; CHECK-NEXT: ld1 { v6.b }[7], [x10] -; CHECK-NEXT: sshll v2.4s, v2.4h, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: mov v4.s[0], v2.s[0] -; CHECK-NEXT: sshll v3.8h, v7.8b, #0 -; CHECK-NEXT: sshll v7.8h, v16.8b, #0 -; CHECK-NEXT: sshll v2.8h, v6.8b, #0 -; CHECK-NEXT: saddl2 v6.4s, v7.8h, v3.8h -; CHECK-NEXT: saddl2 v16.4s, v1.8h, v2.8h -; CHECK-NEXT: saddw v2.4s, v4.4s, v2.4h -; CHECK-NEXT: saddl v3.4s, v7.4h, v3.4h -; CHECK-NEXT: add v4.4s, v16.4s, v6.4s -; CHECK-NEXT: saddw v1.4s, v2.4s, v1.4h -; CHECK-NEXT: add v2.4s, v3.4s, v4.4s -; CHECK-NEXT: add v0.4s, v0.4s, v5.4s -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s +; CHECK-NEXT: add v1.4s, v6.4s, v1.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 @@ -2185,14 +2187,15 @@ ; CHECK-LABEL: test_udot_v48i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1, #32] -; CHECK-NEXT: ldr q2, [x0, #32] -; CHECK-NEXT: udot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: udot v0.4s, v1.16b, v3.16b -; CHECK-NEXT: ldr q1, [x1, #16] -; CHECK-NEXT: udot v0.4s, v1.16b, v2.16b +; CHECK-NEXT: ldr q1, [x0, #32] +; CHECK-NEXT: ldr q2, [x1, #32] +; CHECK-NEXT: udot v0.4s, v2.16b, v1.16b +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: udot v0.4s, v2.16b, v1.16b +; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: ldr q2, [x1, #16] +; CHECK-NEXT: udot v0.4s, v2.16b, v1.16b ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -2212,8 +2215,8 @@ ; CHECK-LABEL: test_udot_v48i8_nomla: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v0.16b, #1 -; CHECK-NEXT: ldr q2, [x0, #32] ; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: ldr q2, [x0, #32] ; CHECK-NEXT: udot v1.4s, v2.16b, v0.16b ; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: udot v1.4s, v2.16b, v0.16b @@ -2232,14 +2235,15 @@ ; CHECK-LABEL: test_sdot_v48i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1, #32] -; CHECK-NEXT: ldr q2, [x0, #32] -; CHECK-NEXT: sdot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: sdot v0.4s, v1.16b, v3.16b -; CHECK-NEXT: ldr q1, [x1, #16] -; CHECK-NEXT: sdot v0.4s, v1.16b, v2.16b +; CHECK-NEXT: ldr q1, [x0, #32] +; CHECK-NEXT: ldr q2, [x1, #32] +; CHECK-NEXT: sdot v0.4s, v2.16b, v1.16b +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: sdot v0.4s, v2.16b, v1.16b +; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: ldr q2, [x1, #16] +; CHECK-NEXT: sdot v0.4s, v2.16b, v1.16b ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -2261,380 +2265,380 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldr b2, [sp, #592] +; CHECK-NEXT: ldr b3, [sp, #592] ; CHECK-NEXT: add x8, sp, #600 -; CHECK-NEXT: ldr b3, [sp, #208] -; CHECK-NEXT: add x10, sp, #344 +; CHECK-NEXT: ldr b4, [sp, #208] ; CHECK-NEXT: ldr b0, [sp, #336] -; CHECK-NEXT: add x9, sp, #608 -; CHECK-NEXT: ld1 { v2.b }[1], [x8] +; CHECK-NEXT: add x9, sp, #344 +; CHECK-NEXT: ldr b2, [sp, #464] +; CHECK-NEXT: ld1 { v3.b }[1], [x8] ; CHECK-NEXT: add x8, sp, #216 +; CHECK-NEXT: add x10, sp, #624 +; CHECK-NEXT: ld1 { v4.b }[1], [x8] +; CHECK-NEXT: add x8, sp, #608 +; CHECK-NEXT: ld1 { v0.b }[1], [x9] +; CHECK-NEXT: add x9, sp, #232 ; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: add x11, sp, #664 -; CHECK-NEXT: ld1 { v0.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #352 -; CHECK-NEXT: ld1 { v3.b }[1], [x8] +; CHECK-NEXT: ldr b7, [sp, #1360] +; CHECK-NEXT: ld1 { v3.b }[2], [x8] ; CHECK-NEXT: add x8, sp, #224 -; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #616 +; CHECK-NEXT: add x12, sp, #376 +; CHECK-NEXT: ld1 { v4.b }[2], [x8] +; CHECK-NEXT: add x8, sp, #616 +; CHECK-NEXT: add x11, sp, #656 ; CHECK-NEXT: mov v1.b[1], w1 -; CHECK-NEXT: ldr b18, [sp, #1360] -; CHECK-NEXT: ld1 { v0.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #240 -; CHECK-NEXT: ld1 { v3.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #232 -; CHECK-NEXT: ld1 { v2.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #624 -; CHECK-NEXT: mov v1.b[2], w2 -; CHECK-NEXT: ldr b19, [sp, #976] -; CHECK-NEXT: ldr b4, [sp, #464] +; CHECK-NEXT: ldr b17, [sp, #976] +; CHECK-NEXT: add x14, sp, #288 ; CHECK-NEXT: ld1 { v3.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #360 -; CHECK-NEXT: ld1 { v2.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #632 -; CHECK-NEXT: mov v1.b[3], w3 -; CHECK-NEXT: ldr b5, [sp, #80] -; CHECK-NEXT: ld1 { v0.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #368 +; CHECK-NEXT: add x8, sp, #632 +; CHECK-NEXT: add x15, sp, #408 +; CHECK-NEXT: ld1 { v4.b }[3], [x9] +; CHECK-NEXT: add x9, sp, #472 +; CHECK-NEXT: add x13, sp, #696 +; CHECK-NEXT: ld1 { v2.b }[1], [x9] +; CHECK-NEXT: add x9, sp, #240 +; CHECK-NEXT: add x16, sp, #448 ; CHECK-NEXT: ld1 { v3.b }[4], [x10] +; CHECK-NEXT: add x10, sp, #352 +; CHECK-NEXT: mov v1.b[2], w2 +; CHECK-NEXT: ld1 { v4.b }[4], [x9] +; CHECK-NEXT: ld1 { v0.b }[2], [x10] +; CHECK-NEXT: add x10, sp, #1368 +; CHECK-NEXT: ld1 { v7.b }[1], [x10] ; CHECK-NEXT: add x10, sp, #248 -; CHECK-NEXT: ld1 { v2.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #376 -; CHECK-NEXT: mov v1.b[4], w4 -; CHECK-NEXT: ldr b16, [sp, #1104] -; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #640 -; CHECK-NEXT: ld1 { v3.b }[5], [x10] +; CHECK-NEXT: add x9, sp, #640 +; CHECK-NEXT: ld1 { v3.b }[5], [x8] +; CHECK-NEXT: add x8, sp, #648 +; CHECK-NEXT: movi v6.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v4.b }[5], [x10] +; CHECK-NEXT: add x10, sp, #360 +; CHECK-NEXT: mov v1.b[3], w3 +; CHECK-NEXT: ld1 { v0.b }[3], [x10] ; CHECK-NEXT: add x10, sp, #256 -; CHECK-NEXT: movi v7.2d, #0000000000000000 -; CHECK-NEXT: ldr b17, [sp, #720] -; CHECK-NEXT: ld1 { v2.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #384 -; CHECK-NEXT: ld1 { v0.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #648 +; CHECK-NEXT: movi v5.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v3.b }[6], [x9] +; CHECK-NEXT: add x9, sp, #368 +; CHECK-NEXT: ldr b16, [sp, #720] +; CHECK-NEXT: ld1 { v4.b }[6], [x10] +; CHECK-NEXT: add x10, sp, #984 +; CHECK-NEXT: ld1 { v0.b }[4], [x9] +; CHECK-NEXT: ld1 { v17.b }[1], [x10] +; CHECK-NEXT: add x10, sp, #664 +; CHECK-NEXT: ld1 { v3.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #264 +; CHECK-NEXT: mov v1.b[4], w4 +; CHECK-NEXT: ld1 { v4.b }[7], [x8] +; CHECK-NEXT: add x9, sp, #672 +; CHECK-NEXT: add x8, sp, #680 +; CHECK-NEXT: ld1 { v0.b }[5], [x12] +; CHECK-NEXT: add x12, sp, #480 +; CHECK-NEXT: ld1 { v2.b }[2], [x12] +; CHECK-NEXT: add x12, sp, #272 +; CHECK-NEXT: ld1 { v3.b }[8], [x11] +; CHECK-NEXT: ld1 { v4.b }[8], [x12] +; CHECK-NEXT: add x12, sp, #384 ; CHECK-NEXT: mov v1.b[5], w5 -; CHECK-NEXT: ld1 { v3.b }[6], [x10] +; CHECK-NEXT: ld1 { v0.b }[6], [x12] +; CHECK-NEXT: add x12, sp, #280 +; CHECK-NEXT: add x11, sp, #688 +; CHECK-NEXT: ld1 { v3.b }[9], [x10] +; CHECK-NEXT: add x10, sp, #1376 +; CHECK-NEXT: ld1 { v7.b }[2], [x10] ; CHECK-NEXT: add x10, sp, #392 -; CHECK-NEXT: ld1 { v2.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #656 -; CHECK-NEXT: ld1 { v0.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #264 -; CHECK-NEXT: mov v1.b[6], w6 -; CHECK-NEXT: movi v6.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v3.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #272 +; CHECK-NEXT: ld1 { v4.b }[9], [x12] ; CHECK-NEXT: ld1 { v0.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #16 -; CHECK-NEXT: mov v1.b[7], w7 -; CHECK-NEXT: ld1 { v2.b }[8], [x9] +; CHECK-NEXT: mov v1.b[6], w6 +; CHECK-NEXT: add x12, sp, #704 +; CHECK-NEXT: ld1 { v3.b }[10], [x9] ; CHECK-NEXT: add x9, sp, #400 -; CHECK-NEXT: ld1 { v3.b }[8], [x8] -; CHECK-NEXT: add x8, sp, #280 -; CHECK-NEXT: ld1 { v1.b }[8], [x10] -; CHECK-NEXT: add x10, sp, #24 +; CHECK-NEXT: add x10, sp, #712 +; CHECK-NEXT: ld1 { v4.b }[10], [x14] +; CHECK-NEXT: add x14, sp, #992 ; CHECK-NEXT: ld1 { v0.b }[8], [x9] -; CHECK-NEXT: add x9, sp, #408 -; CHECK-NEXT: ld1 { v3.b }[9], [x8] -; CHECK-NEXT: add x8, sp, #288 -; CHECK-NEXT: ld1 { v2.b }[9], [x11] -; CHECK-NEXT: add x11, sp, #672 -; CHECK-NEXT: ld1 { v1.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #32 -; CHECK-NEXT: ld1 { v0.b }[9], [x9] -; CHECK-NEXT: add x9, sp, #416 -; CHECK-NEXT: ld1 { v3.b }[10], [x8] -; CHECK-NEXT: add x8, sp, #296 -; CHECK-NEXT: ld1 { v2.b }[10], [x11] -; CHECK-NEXT: add x11, sp, #680 -; CHECK-NEXT: ld1 { v1.b }[10], [x10] -; CHECK-NEXT: add x10, sp, #40 -; CHECK-NEXT: ld1 { v0.b }[10], [x9] -; CHECK-NEXT: add x9, sp, #424 +; CHECK-NEXT: ld1 { v17.b }[2], [x14] +; CHECK-NEXT: add x14, sp, #296 ; CHECK-NEXT: ld1 { v3.b }[11], [x8] -; CHECK-NEXT: add x8, sp, #304 -; CHECK-NEXT: ld1 { v2.b }[11], [x11] -; CHECK-NEXT: add x11, sp, #688 -; CHECK-NEXT: ld1 { v1.b }[11], [x10] -; CHECK-NEXT: add x10, sp, #48 -; CHECK-NEXT: ld1 { v0.b }[11], [x9] -; CHECK-NEXT: add x9, sp, #432 -; CHECK-NEXT: ld1 { v3.b }[12], [x8] +; CHECK-NEXT: add x9, sp, #304 ; CHECK-NEXT: add x8, sp, #312 -; CHECK-NEXT: ld1 { v2.b }[12], [x11] -; CHECK-NEXT: add x11, sp, #696 -; CHECK-NEXT: ld1 { v1.b }[12], [x10] -; CHECK-NEXT: add x10, sp, #56 -; CHECK-NEXT: ld1 { v0.b }[12], [x9] -; CHECK-NEXT: add x9, sp, #440 -; CHECK-NEXT: ld1 { v3.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #320 -; CHECK-NEXT: ld1 { v2.b }[13], [x11] -; CHECK-NEXT: add x11, sp, #704 -; CHECK-NEXT: ld1 { v1.b }[13], [x10] -; CHECK-NEXT: add x10, sp, #64 -; CHECK-NEXT: ld1 { v0.b }[13], [x9] -; CHECK-NEXT: add x9, sp, #448 -; CHECK-NEXT: ld1 { v3.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #328 -; CHECK-NEXT: ld1 { v2.b }[14], [x11] -; CHECK-NEXT: add x11, sp, #712 -; CHECK-NEXT: ld1 { v1.b }[14], [x10] -; CHECK-NEXT: add x10, sp, #472 -; CHECK-NEXT: ld1 { v0.b }[14], [x9] -; CHECK-NEXT: add x9, sp, #456 -; CHECK-NEXT: ld1 { v3.b }[15], [x8] -; CHECK-NEXT: add x8, sp, #72 -; CHECK-NEXT: ld1 { v4.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #88 -; CHECK-NEXT: ld1 { v2.b }[15], [x11] -; CHECK-NEXT: add x11, sp, #480 -; CHECK-NEXT: ld1 { v1.b }[15], [x8] -; CHECK-NEXT: add x8, sp, #1368 -; CHECK-NEXT: ld1 { v0.b }[15], [x9] -; CHECK-NEXT: add x9, sp, #984 -; CHECK-NEXT: ld1 { v5.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #96 -; CHECK-NEXT: ld1 { v18.b }[1], [x8] -; CHECK-NEXT: add x8, sp, #1376 -; CHECK-NEXT: ld1 { v19.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #992 -; CHECK-NEXT: ld1 { v4.b }[2], [x11] -; CHECK-NEXT: add x11, sp, #488 -; CHECK-NEXT: ld1 { v5.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #104 -; CHECK-NEXT: ld1 { v18.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #1384 -; CHECK-NEXT: ld1 { v19.b }[2], [x9] +; CHECK-NEXT: ld1 { v4.b }[11], [x14] +; CHECK-NEXT: mov v1.b[7], w7 +; CHECK-NEXT: add x14, sp, #320 +; CHECK-NEXT: ld1 { v0.b }[9], [x15] +; CHECK-NEXT: add x15, sp, #328 +; CHECK-NEXT: ld1 { v3.b }[12], [x11] +; CHECK-NEXT: add x11, sp, #416 +; CHECK-NEXT: ld1 { v4.b }[12], [x9] +; CHECK-NEXT: add x9, sp, #1384 +; CHECK-NEXT: ld1 { v0.b }[10], [x11] +; CHECK-NEXT: ld1 { v7.b }[3], [x9] +; CHECK-NEXT: add x9, sp, #424 +; CHECK-NEXT: ld1 { v3.b }[13], [x13] +; CHECK-NEXT: add x11, sp, #432 +; CHECK-NEXT: add x13, sp, #440 +; CHECK-NEXT: ld1 { v4.b }[13], [x8] +; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: ld1 { v0.b }[11], [x9] ; CHECK-NEXT: add x9, sp, #1000 -; CHECK-NEXT: ld1 { v4.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #496 -; CHECK-NEXT: ld1 { v5.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #112 -; CHECK-NEXT: ld1 { v18.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #1392 -; CHECK-NEXT: ld1 { v19.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #1008 -; CHECK-NEXT: ld1 { v4.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #504 -; CHECK-NEXT: ld1 { v5.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #120 -; CHECK-NEXT: ld1 { v18.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #1400 -; CHECK-NEXT: ld1 { v19.b }[4], [x9] -; CHECK-NEXT: add x9, sp, #1016 -; CHECK-NEXT: ld1 { v4.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #512 -; CHECK-NEXT: ld1 { v5.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #128 -; CHECK-NEXT: ld1 { v18.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #1408 -; CHECK-NEXT: ld1 { v19.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #1024 -; CHECK-NEXT: ld1 { v4.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #520 -; CHECK-NEXT: ld1 { v5.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #136 -; CHECK-NEXT: ld1 { v18.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #1416 -; CHECK-NEXT: ld1 { v19.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #1032 -; CHECK-NEXT: ld1 { v4.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #528 -; CHECK-NEXT: ld1 { v5.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #144 -; CHECK-NEXT: ld1 { v18.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #1424 -; CHECK-NEXT: ld1 { v19.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #1040 -; CHECK-NEXT: ld1 { v4.b }[8], [x11] -; CHECK-NEXT: add x11, sp, #536 -; CHECK-NEXT: ld1 { v5.b }[8], [x10] -; CHECK-NEXT: add x10, sp, #152 -; CHECK-NEXT: ld1 { v18.b }[8], [x8] -; CHECK-NEXT: add x8, sp, #1432 -; CHECK-NEXT: ld1 { v19.b }[8], [x9] +; CHECK-NEXT: ld1 { v1.b }[8], [x8] +; CHECK-NEXT: ld1 { v17.b }[3], [x9] +; CHECK-NEXT: ld1 { v3.b }[14], [x12] +; CHECK-NEXT: add x12, sp, #488 +; CHECK-NEXT: ld1 { v4.b }[14], [x14] +; CHECK-NEXT: add x14, sp, #1392 +; CHECK-NEXT: ld1 { v2.b }[3], [x12] +; CHECK-NEXT: ld1 { v7.b }[4], [x14] +; CHECK-NEXT: add x8, sp, #1008 +; CHECK-NEXT: ld1 { v0.b }[12], [x11] +; CHECK-NEXT: ld1 { v17.b }[4], [x8] +; CHECK-NEXT: add x11, sp, #1400 +; CHECK-NEXT: add x8, sp, #496 +; CHECK-NEXT: ld1 { v2.b }[4], [x8] +; CHECK-NEXT: add x8, sp, #1016 +; CHECK-NEXT: add x9, sp, #24 +; CHECK-NEXT: ld1 { v7.b }[5], [x11] +; CHECK-NEXT: ld1 { v3.b }[15], [x10] +; CHECK-NEXT: ld1 { v0.b }[13], [x13] +; CHECK-NEXT: ld1 { v17.b }[5], [x8] +; CHECK-NEXT: add x10, sp, #1408 +; CHECK-NEXT: ld1 { v1.b }[9], [x9] +; CHECK-NEXT: add x8, sp, #504 +; CHECK-NEXT: add x9, sp, #32 +; CHECK-NEXT: ld1 { v4.b }[15], [x15] +; CHECK-NEXT: ld1 { v7.b }[6], [x10] +; CHECK-NEXT: ld1 { v2.b }[5], [x8] +; CHECK-NEXT: add x8, sp, #1024 +; CHECK-NEXT: ld1 { v17.b }[6], [x8] +; CHECK-NEXT: ld1 { v0.b }[14], [x16] +; CHECK-NEXT: ld1 { v1.b }[10], [x9] +; CHECK-NEXT: add x9, sp, #1416 +; CHECK-NEXT: add x10, sp, #512 +; CHECK-NEXT: add x8, sp, #456 +; CHECK-NEXT: ld1 { v7.b }[7], [x9] +; CHECK-NEXT: ld1 { v2.b }[6], [x10] +; CHECK-NEXT: add x10, sp, #1032 +; CHECK-NEXT: add x9, sp, #40 +; CHECK-NEXT: ld1 { v17.b }[7], [x10] +; CHECK-NEXT: ld1 { v0.b }[15], [x8] +; CHECK-NEXT: ld1 { v1.b }[11], [x9] +; CHECK-NEXT: add x9, sp, #1424 +; CHECK-NEXT: add x8, sp, #520 +; CHECK-NEXT: ld1 { v7.b }[8], [x9] +; CHECK-NEXT: ld1 { v2.b }[7], [x8] +; CHECK-NEXT: add x8, sp, #1040 +; CHECK-NEXT: add x9, sp, #48 +; CHECK-NEXT: ld1 { v17.b }[8], [x8] +; CHECK-NEXT: add x10, sp, #528 +; CHECK-NEXT: ld1 { v1.b }[12], [x9] +; CHECK-NEXT: add x9, sp, #1432 +; CHECK-NEXT: sdot v6.4s, v4.16b, v3.16b +; CHECK-NEXT: ld1 { v7.b }[9], [x9] +; CHECK-NEXT: ld1 { v2.b }[8], [x10] ; CHECK-NEXT: add x9, sp, #1048 -; CHECK-NEXT: ld1 { v4.b }[9], [x11] -; CHECK-NEXT: add x11, sp, #544 -; CHECK-NEXT: ld1 { v5.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #160 -; CHECK-NEXT: ld1 { v18.b }[9], [x8] -; CHECK-NEXT: add x8, sp, #1440 -; CHECK-NEXT: ld1 { v19.b }[9], [x9] -; CHECK-NEXT: add x9, sp, #1056 -; CHECK-NEXT: ld1 { v4.b }[10], [x11] -; CHECK-NEXT: add x11, sp, #552 -; CHECK-NEXT: ld1 { v5.b }[10], [x10] -; CHECK-NEXT: add x10, sp, #168 -; CHECK-NEXT: ld1 { v18.b }[10], [x8] -; CHECK-NEXT: add x8, sp, #1448 -; CHECK-NEXT: ld1 { v19.b }[10], [x9] -; CHECK-NEXT: add x9, sp, #1064 -; CHECK-NEXT: ld1 { v4.b }[11], [x11] -; CHECK-NEXT: add x11, sp, #560 -; CHECK-NEXT: ld1 { v5.b }[11], [x10] -; CHECK-NEXT: add x10, sp, #176 -; CHECK-NEXT: ld1 { v18.b }[11], [x8] -; CHECK-NEXT: add x8, sp, #1456 -; CHECK-NEXT: ld1 { v19.b }[11], [x9] -; CHECK-NEXT: add x9, sp, #1072 -; CHECK-NEXT: ld1 { v4.b }[12], [x11] -; CHECK-NEXT: add x11, sp, #568 -; CHECK-NEXT: ld1 { v5.b }[12], [x10] -; CHECK-NEXT: add x10, sp, #184 -; CHECK-NEXT: ld1 { v18.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #1464 -; CHECK-NEXT: ld1 { v19.b }[12], [x9] -; CHECK-NEXT: add x9, sp, #1080 -; CHECK-NEXT: ld1 { v4.b }[13], [x11] -; CHECK-NEXT: add x11, sp, #1128 -; CHECK-NEXT: ld1 { v5.b }[13], [x10] -; CHECK-NEXT: add x10, sp, #1112 -; CHECK-NEXT: ld1 { v18.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #1472 -; CHECK-NEXT: ld1 { v19.b }[13], [x9] -; CHECK-NEXT: add x9, sp, #1088 -; CHECK-NEXT: ld1 { v16.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #728 -; CHECK-NEXT: sdot v7.4s, v3.16b, v2.16b -; CHECK-NEXT: ldr b2, [sp, #1232] -; CHECK-NEXT: ld1 { v18.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #1480 -; CHECK-NEXT: ld1 { v19.b }[14], [x9] -; CHECK-NEXT: add x9, sp, #1096 -; CHECK-NEXT: ld1 { v17.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #856 -; CHECK-NEXT: ldr b3, [sp, #848] -; CHECK-NEXT: ld1 { v18.b }[15], [x8] -; CHECK-NEXT: add x8, sp, #576 -; CHECK-NEXT: ld1 { v19.b }[15], [x9] -; CHECK-NEXT: add x9, sp, #192 +; CHECK-NEXT: ldr b3, [sp, #80] +; CHECK-NEXT: ld1 { v17.b }[9], [x9] +; CHECK-NEXT: add x8, sp, #56 +; CHECK-NEXT: add x10, sp, #88 +; CHECK-NEXT: add x9, sp, #536 +; CHECK-NEXT: add x11, sp, #1440 ; CHECK-NEXT: ld1 { v3.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #864 -; CHECK-NEXT: ld1 { v4.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #1120 -; CHECK-NEXT: ld1 { v5.b }[14], [x9] -; CHECK-NEXT: add x9, sp, #1240 -; CHECK-NEXT: sdot v6.4s, v19.16b, v18.16b -; CHECK-NEXT: ld1 { v16.b }[2], [x8] +; CHECK-NEXT: ld1 { v1.b }[13], [x8] +; CHECK-NEXT: ld1 { v2.b }[9], [x9] +; CHECK-NEXT: add x8, sp, #1056 +; CHECK-NEXT: ld1 { v7.b }[10], [x11] +; CHECK-NEXT: add x9, sp, #96 +; CHECK-NEXT: ld1 { v17.b }[10], [x8] +; CHECK-NEXT: add x8, sp, #544 +; CHECK-NEXT: add x10, sp, #1448 +; CHECK-NEXT: ld1 { v3.b }[2], [x9] +; CHECK-NEXT: ld1 { v2.b }[10], [x8] +; CHECK-NEXT: add x8, sp, #1064 +; CHECK-NEXT: ld1 { v7.b }[11], [x10] +; CHECK-NEXT: add x10, sp, #104 +; CHECK-NEXT: add x11, sp, #1456 +; CHECK-NEXT: ld1 { v17.b }[11], [x8] +; CHECK-NEXT: add x8, sp, #552 +; CHECK-NEXT: add x9, sp, #64 +; CHECK-NEXT: ld1 { v3.b }[3], [x10] +; CHECK-NEXT: ld1 { v2.b }[11], [x8] +; CHECK-NEXT: add x8, sp, #1072 +; CHECK-NEXT: ld1 { v7.b }[12], [x11] +; CHECK-NEXT: ld1 { v1.b }[14], [x9] +; CHECK-NEXT: add x9, sp, #112 +; CHECK-NEXT: ld1 { v17.b }[12], [x8] +; CHECK-NEXT: add x8, sp, #560 +; CHECK-NEXT: add x10, sp, #1464 +; CHECK-NEXT: ld1 { v3.b }[4], [x9] +; CHECK-NEXT: ld1 { v2.b }[12], [x8] +; CHECK-NEXT: add x8, sp, #1080 +; CHECK-NEXT: ld1 { v7.b }[13], [x10] +; CHECK-NEXT: add x10, sp, #120 +; CHECK-NEXT: add x11, sp, #1472 +; CHECK-NEXT: ld1 { v17.b }[13], [x8] +; CHECK-NEXT: add x8, sp, #568 +; CHECK-NEXT: add x9, sp, #72 +; CHECK-NEXT: ld1 { v3.b }[5], [x10] +; CHECK-NEXT: ld1 { v2.b }[13], [x8] +; CHECK-NEXT: add x8, sp, #1088 +; CHECK-NEXT: ld1 { v7.b }[14], [x11] +; CHECK-NEXT: ld1 { v1.b }[15], [x9] +; CHECK-NEXT: add x9, sp, #128 +; CHECK-NEXT: ld1 { v17.b }[14], [x8] +; CHECK-NEXT: ldr b4, [sp, #1104] +; CHECK-NEXT: add x10, sp, #1480 +; CHECK-NEXT: ld1 { v3.b }[6], [x9] +; CHECK-NEXT: add x8, sp, #1096 +; CHECK-NEXT: add x9, sp, #1112 +; CHECK-NEXT: ld1 { v7.b }[15], [x10] +; CHECK-NEXT: ld1 { v4.b }[1], [x9] +; CHECK-NEXT: add x9, sp, #576 +; CHECK-NEXT: ld1 { v17.b }[15], [x8] +; CHECK-NEXT: add x8, sp, #728 +; CHECK-NEXT: add x10, sp, #136 +; CHECK-NEXT: ld1 { v16.b }[1], [x8] +; CHECK-NEXT: add x8, sp, #1120 +; CHECK-NEXT: ld1 { v2.b }[14], [x9] +; CHECK-NEXT: ld1 { v4.b }[2], [x8] ; CHECK-NEXT: add x8, sp, #736 -; CHECK-NEXT: ld1 { v2.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #1248 -; CHECK-NEXT: ld1 { v3.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #872 -; CHECK-NEXT: ld1 { v17.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #744 +; CHECK-NEXT: ld1 { v3.b }[7], [x10] +; CHECK-NEXT: sdot v5.4s, v17.16b, v7.16b +; CHECK-NEXT: ldr b7, [sp, #1232] +; CHECK-NEXT: ldr b17, [sp, #848] +; CHECK-NEXT: ld1 { v16.b }[2], [x8] +; CHECK-NEXT: add x9, sp, #1240 +; CHECK-NEXT: add x10, sp, #856 +; CHECK-NEXT: ld1 { v7.b }[1], [x9] +; CHECK-NEXT: ld1 { v17.b }[1], [x10] +; CHECK-NEXT: add x8, sp, #1128 +; CHECK-NEXT: add x11, sp, #744 +; CHECK-NEXT: ld1 { v4.b }[3], [x8] +; CHECK-NEXT: add x10, sp, #1248 ; CHECK-NEXT: ld1 { v16.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #1136 -; CHECK-NEXT: ld1 { v2.b }[2], [x9] +; CHECK-NEXT: add x11, sp, #864 +; CHECK-NEXT: add x9, sp, #144 +; CHECK-NEXT: ld1 { v7.b }[2], [x10] +; CHECK-NEXT: ld1 { v17.b }[2], [x11] +; CHECK-NEXT: add x8, sp, #1136 +; CHECK-NEXT: add x12, sp, #752 +; CHECK-NEXT: ld1 { v3.b }[8], [x9] +; CHECK-NEXT: ld1 { v4.b }[4], [x8] +; CHECK-NEXT: ld1 { v16.b }[4], [x12] ; CHECK-NEXT: add x9, sp, #1256 -; CHECK-NEXT: ld1 { v3.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #880 -; CHECK-NEXT: ld1 { v17.b }[3], [x8] -; CHECK-NEXT: add x8, sp, #752 -; CHECK-NEXT: ld1 { v16.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #1144 -; CHECK-NEXT: ld1 { v2.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #1264 -; CHECK-NEXT: ld1 { v3.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #888 -; CHECK-NEXT: ld1 { v17.b }[4], [x8] -; CHECK-NEXT: add x8, sp, #760 +; CHECK-NEXT: add x10, sp, #872 +; CHECK-NEXT: ld1 { v7.b }[3], [x9] +; CHECK-NEXT: ld1 { v17.b }[3], [x10] +; CHECK-NEXT: add x8, sp, #1144 +; CHECK-NEXT: add x11, sp, #760 +; CHECK-NEXT: ld1 { v4.b }[5], [x8] +; CHECK-NEXT: add x10, sp, #1264 ; CHECK-NEXT: ld1 { v16.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #1152 -; CHECK-NEXT: ld1 { v2.b }[4], [x9] +; CHECK-NEXT: add x11, sp, #880 +; CHECK-NEXT: add x9, sp, #152 +; CHECK-NEXT: ld1 { v7.b }[4], [x10] +; CHECK-NEXT: ld1 { v17.b }[4], [x11] +; CHECK-NEXT: add x8, sp, #1152 +; CHECK-NEXT: add x12, sp, #768 +; CHECK-NEXT: ld1 { v3.b }[9], [x9] +; CHECK-NEXT: ld1 { v4.b }[6], [x8] +; CHECK-NEXT: ld1 { v16.b }[6], [x12] ; CHECK-NEXT: add x9, sp, #1272 -; CHECK-NEXT: ld1 { v3.b }[5], [x10] -; CHECK-NEXT: add x10, sp, #896 -; CHECK-NEXT: ld1 { v17.b }[5], [x8] -; CHECK-NEXT: add x8, sp, #768 -; CHECK-NEXT: ld1 { v16.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #1160 -; CHECK-NEXT: ld1 { v2.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #1280 -; CHECK-NEXT: ld1 { v3.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #904 -; CHECK-NEXT: ld1 { v17.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #776 +; CHECK-NEXT: add x10, sp, #888 +; CHECK-NEXT: ld1 { v7.b }[5], [x9] +; CHECK-NEXT: ld1 { v17.b }[5], [x10] +; CHECK-NEXT: add x8, sp, #1160 +; CHECK-NEXT: add x11, sp, #776 +; CHECK-NEXT: ld1 { v4.b }[7], [x8] +; CHECK-NEXT: add x10, sp, #1280 ; CHECK-NEXT: ld1 { v16.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #1168 -; CHECK-NEXT: ld1 { v2.b }[6], [x9] +; CHECK-NEXT: add x11, sp, #896 +; CHECK-NEXT: add x9, sp, #160 +; CHECK-NEXT: ld1 { v7.b }[6], [x10] +; CHECK-NEXT: ld1 { v17.b }[6], [x11] +; CHECK-NEXT: add x8, sp, #1168 +; CHECK-NEXT: add x12, sp, #784 +; CHECK-NEXT: ld1 { v3.b }[10], [x9] +; CHECK-NEXT: ld1 { v4.b }[8], [x8] +; CHECK-NEXT: ld1 { v16.b }[8], [x12] ; CHECK-NEXT: add x9, sp, #1288 -; CHECK-NEXT: ld1 { v3.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #912 -; CHECK-NEXT: ld1 { v17.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #784 -; CHECK-NEXT: ld1 { v16.b }[8], [x11] -; CHECK-NEXT: add x11, sp, #1176 -; CHECK-NEXT: ld1 { v2.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #1296 -; CHECK-NEXT: ld1 { v3.b }[8], [x10] -; CHECK-NEXT: add x10, sp, #920 -; CHECK-NEXT: ld1 { v17.b }[8], [x8] -; CHECK-NEXT: add x8, sp, #792 +; CHECK-NEXT: add x10, sp, #904 +; CHECK-NEXT: ld1 { v7.b }[7], [x9] +; CHECK-NEXT: ld1 { v17.b }[7], [x10] +; CHECK-NEXT: add x8, sp, #1176 +; CHECK-NEXT: add x11, sp, #792 +; CHECK-NEXT: ld1 { v4.b }[9], [x8] +; CHECK-NEXT: add x10, sp, #1296 ; CHECK-NEXT: ld1 { v16.b }[9], [x11] -; CHECK-NEXT: add x11, sp, #1184 -; CHECK-NEXT: ld1 { v2.b }[8], [x9] +; CHECK-NEXT: add x11, sp, #912 +; CHECK-NEXT: add x9, sp, #168 +; CHECK-NEXT: ld1 { v7.b }[8], [x10] +; CHECK-NEXT: ld1 { v17.b }[8], [x11] +; CHECK-NEXT: add x8, sp, #1184 +; CHECK-NEXT: add x12, sp, #800 +; CHECK-NEXT: ld1 { v3.b }[11], [x9] +; CHECK-NEXT: ld1 { v4.b }[10], [x8] +; CHECK-NEXT: ld1 { v16.b }[10], [x12] ; CHECK-NEXT: add x9, sp, #1304 -; CHECK-NEXT: ld1 { v3.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #928 -; CHECK-NEXT: ld1 { v17.b }[9], [x8] -; CHECK-NEXT: add x8, sp, #800 -; CHECK-NEXT: ld1 { v16.b }[10], [x11] -; CHECK-NEXT: add x11, sp, #1192 -; CHECK-NEXT: ld1 { v2.b }[9], [x9] -; CHECK-NEXT: add x9, sp, #1312 -; CHECK-NEXT: ld1 { v3.b }[10], [x10] -; CHECK-NEXT: add x10, sp, #936 -; CHECK-NEXT: ld1 { v17.b }[10], [x8] -; CHECK-NEXT: add x8, sp, #808 +; CHECK-NEXT: add x10, sp, #920 +; CHECK-NEXT: ld1 { v7.b }[9], [x9] +; CHECK-NEXT: ld1 { v17.b }[9], [x10] +; CHECK-NEXT: add x8, sp, #1192 +; CHECK-NEXT: add x11, sp, #808 +; CHECK-NEXT: ld1 { v4.b }[11], [x8] +; CHECK-NEXT: add x10, sp, #1312 ; CHECK-NEXT: ld1 { v16.b }[11], [x11] -; CHECK-NEXT: add x11, sp, #1200 -; CHECK-NEXT: ld1 { v2.b }[10], [x9] +; CHECK-NEXT: add x11, sp, #928 +; CHECK-NEXT: add x9, sp, #176 +; CHECK-NEXT: ld1 { v7.b }[10], [x10] +; CHECK-NEXT: ld1 { v17.b }[10], [x11] +; CHECK-NEXT: add x8, sp, #1200 +; CHECK-NEXT: add x12, sp, #816 +; CHECK-NEXT: ld1 { v3.b }[12], [x9] +; CHECK-NEXT: ld1 { v4.b }[12], [x8] +; CHECK-NEXT: ld1 { v16.b }[12], [x12] ; CHECK-NEXT: add x9, sp, #1320 -; CHECK-NEXT: ld1 { v3.b }[11], [x10] -; CHECK-NEXT: add x10, sp, #944 -; CHECK-NEXT: ld1 { v17.b }[11], [x8] -; CHECK-NEXT: add x8, sp, #816 -; CHECK-NEXT: ld1 { v16.b }[12], [x11] -; CHECK-NEXT: add x11, sp, #1208 -; CHECK-NEXT: ld1 { v2.b }[11], [x9] -; CHECK-NEXT: add x9, sp, #1328 -; CHECK-NEXT: ld1 { v3.b }[12], [x10] -; CHECK-NEXT: add x10, sp, #952 -; CHECK-NEXT: ld1 { v17.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #824 +; CHECK-NEXT: add x10, sp, #936 +; CHECK-NEXT: ld1 { v7.b }[11], [x9] +; CHECK-NEXT: ld1 { v17.b }[11], [x10] +; CHECK-NEXT: add x8, sp, #1208 +; CHECK-NEXT: add x11, sp, #824 +; CHECK-NEXT: ld1 { v4.b }[13], [x8] +; CHECK-NEXT: add x10, sp, #1328 ; CHECK-NEXT: ld1 { v16.b }[13], [x11] -; CHECK-NEXT: add x11, sp, #1216 -; CHECK-NEXT: ld1 { v2.b }[12], [x9] +; CHECK-NEXT: add x11, sp, #944 +; CHECK-NEXT: add x9, sp, #184 +; CHECK-NEXT: ld1 { v7.b }[12], [x10] +; CHECK-NEXT: ld1 { v17.b }[12], [x11] +; CHECK-NEXT: add x8, sp, #1216 +; CHECK-NEXT: add x12, sp, #832 +; CHECK-NEXT: ld1 { v3.b }[13], [x9] +; CHECK-NEXT: ld1 { v4.b }[14], [x8] +; CHECK-NEXT: ld1 { v16.b }[14], [x12] ; CHECK-NEXT: add x9, sp, #1336 -; CHECK-NEXT: ld1 { v3.b }[13], [x10] -; CHECK-NEXT: add x10, sp, #960 -; CHECK-NEXT: ld1 { v17.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #832 -; CHECK-NEXT: ld1 { v16.b }[14], [x11] -; CHECK-NEXT: add x11, sp, #1224 -; CHECK-NEXT: ld1 { v2.b }[13], [x9] -; CHECK-NEXT: add x9, sp, #1344 -; CHECK-NEXT: ld1 { v3.b }[14], [x10] -; CHECK-NEXT: add x10, sp, #968 -; CHECK-NEXT: ld1 { v17.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #840 +; CHECK-NEXT: add x10, sp, #952 +; CHECK-NEXT: ld1 { v7.b }[13], [x9] +; CHECK-NEXT: ld1 { v17.b }[13], [x10] +; CHECK-NEXT: add x8, sp, #1224 +; CHECK-NEXT: add x11, sp, #840 +; CHECK-NEXT: ld1 { v4.b }[15], [x8] +; CHECK-NEXT: add x8, sp, #192 ; CHECK-NEXT: ld1 { v16.b }[15], [x11] -; CHECK-NEXT: add x11, sp, #584 -; CHECK-NEXT: ld1 { v2.b }[14], [x9] -; CHECK-NEXT: add x9, sp, #1352 -; CHECK-NEXT: sdot v7.4s, v1.16b, v0.16b -; CHECK-NEXT: ld1 { v3.b }[15], [x10] -; CHECK-NEXT: ld1 { v17.b }[15], [x8] +; CHECK-NEXT: add x10, sp, #1344 +; CHECK-NEXT: add x11, sp, #960 +; CHECK-NEXT: ld1 { v3.b }[14], [x8] +; CHECK-NEXT: ld1 { v7.b }[14], [x10] +; CHECK-NEXT: ld1 { v17.b }[14], [x11] +; CHECK-NEXT: add x9, sp, #584 +; CHECK-NEXT: sdot v6.4s, v1.16b, v0.16b ; CHECK-NEXT: add x8, sp, #200 -; CHECK-NEXT: ld1 { v4.b }[15], [x11] +; CHECK-NEXT: sdot v5.4s, v16.16b, v4.16b ; CHECK-NEXT: ld1 { v2.b }[15], [x9] -; CHECK-NEXT: ld1 { v5.b }[15], [x8] -; CHECK-NEXT: sdot v6.4s, v17.16b, v16.16b -; CHECK-NEXT: sdot v7.4s, v5.16b, v4.16b +; CHECK-NEXT: add x9, sp, #1352 +; CHECK-NEXT: add x10, sp, #968 +; CHECK-NEXT: ld1 { v3.b }[15], [x8] +; CHECK-NEXT: ld1 { v7.b }[15], [x9] +; CHECK-NEXT: ld1 { v17.b }[15], [x10] ; CHECK-NEXT: sdot v6.4s, v3.16b, v2.16b -; CHECK-NEXT: add v0.4s, v7.4s, v6.4s +; CHECK-NEXT: sdot v5.4s, v17.16b, v7.16b +; CHECK-NEXT: add v0.4s, v6.4s, v5.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -2658,195 +2662,195 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: ldr b0, [sp, #208] +; CHECK-NEXT: ldr b1, [sp, #208] ; CHECK-NEXT: add x8, sp, #216 -; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: add x9, sp, #232 -; CHECK-NEXT: ldr b2, [sp, #80] -; CHECK-NEXT: add x11, sp, #88 -; CHECK-NEXT: ld1 { v0.b }[1], [x8] +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ldr b5, [sp, #976] +; CHECK-NEXT: add x9, sp, #984 +; CHECK-NEXT: add x12, sp, #328 +; CHECK-NEXT: ld1 { v1.b }[1], [x8] ; CHECK-NEXT: add x8, sp, #224 -; CHECK-NEXT: mov v1.b[1], w1 -; CHECK-NEXT: add x10, sp, #248 -; CHECK-NEXT: ld1 { v2.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #728 -; CHECK-NEXT: ldr b4, [sp, #720] -; CHECK-NEXT: add x12, sp, #984 -; CHECK-NEXT: ld1 { v0.b }[2], [x8] -; CHECK-NEXT: add x8, sp, #240 -; CHECK-NEXT: mov v1.b[2], w2 -; CHECK-NEXT: ldr b3, [sp, #976] -; CHECK-NEXT: ldr b5, [sp, #848] -; CHECK-NEXT: add x13, sp, #96 -; CHECK-NEXT: ld1 { v4.b }[1], [x11] -; CHECK-NEXT: add x11, sp, #856 -; CHECK-NEXT: ld1 { v0.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #256 -; CHECK-NEXT: mov v1.b[3], w3 -; CHECK-NEXT: ld1 { v3.b }[1], [x12] -; CHECK-NEXT: add x12, sp, #264 -; CHECK-NEXT: ld1 { v5.b }[1], [x11] +; CHECK-NEXT: movi v2.16b, #1 +; CHECK-NEXT: mov v0.b[1], w1 +; CHECK-NEXT: ld1 { v5.b }[1], [x9] +; CHECK-NEXT: movi v4.2d, #0000000000000000 ; CHECK-NEXT: add x11, sp, #992 -; CHECK-NEXT: ld1 { v2.b }[2], [x13] -; CHECK-NEXT: ld1 { v0.b }[4], [x8] -; CHECK-NEXT: add x13, sp, #736 -; CHECK-NEXT: mov v1.b[4], w4 +; CHECK-NEXT: ldr b6, [sp, #720] +; CHECK-NEXT: ldr b7, [sp, #80] +; CHECK-NEXT: ld1 { v1.b }[2], [x8] +; CHECK-NEXT: add x8, sp, #232 +; CHECK-NEXT: add x13, sp, #88 +; CHECK-NEXT: ld1 { v5.b }[2], [x11] +; CHECK-NEXT: ld1 { v7.b }[1], [x13] +; CHECK-NEXT: add x13, sp, #856 +; CHECK-NEXT: mov v0.b[2], w2 +; CHECK-NEXT: add x14, sp, #744 +; CHECK-NEXT: add x15, sp, #872 +; CHECK-NEXT: ld1 { v1.b }[3], [x8] +; CHECK-NEXT: add x8, sp, #240 +; CHECK-NEXT: add x16, sp, #888 +; CHECK-NEXT: add x10, sp, #16 +; CHECK-NEXT: add x9, sp, #24 +; CHECK-NEXT: add x11, sp, #40 +; CHECK-NEXT: movi v3.2d, #0000000000000000 +; CHECK-NEXT: ld1 { v1.b }[4], [x8] +; CHECK-NEXT: add x8, sp, #248 +; CHECK-NEXT: mov v0.b[3], w3 +; CHECK-NEXT: ld1 { v1.b }[5], [x8] +; CHECK-NEXT: add x8, sp, #256 +; CHECK-NEXT: mov v0.b[4], w4 +; CHECK-NEXT: ld1 { v1.b }[6], [x8] +; CHECK-NEXT: add x8, sp, #264 +; CHECK-NEXT: mov v0.b[5], w5 +; CHECK-NEXT: ld1 { v1.b }[7], [x8] ; CHECK-NEXT: add x8, sp, #272 -; CHECK-NEXT: ld1 { v3.b }[2], [x11] +; CHECK-NEXT: ld1 { v1.b }[8], [x8] +; CHECK-NEXT: add x8, sp, #280 +; CHECK-NEXT: mov v0.b[6], w6 +; CHECK-NEXT: ld1 { v1.b }[9], [x8] +; CHECK-NEXT: add x8, sp, #288 +; CHECK-NEXT: mov v0.b[7], w7 +; CHECK-NEXT: ld1 { v1.b }[10], [x8] +; CHECK-NEXT: add x8, sp, #296 +; CHECK-NEXT: ld1 { v0.b }[8], [x10] +; CHECK-NEXT: add x10, sp, #128 +; CHECK-NEXT: ld1 { v1.b }[11], [x8] +; CHECK-NEXT: add x8, sp, #304 +; CHECK-NEXT: ld1 { v0.b }[9], [x9] +; CHECK-NEXT: add x9, sp, #136 +; CHECK-NEXT: ld1 { v1.b }[12], [x8] +; CHECK-NEXT: add x8, sp, #312 +; CHECK-NEXT: ld1 { v1.b }[13], [x8] +; CHECK-NEXT: add x8, sp, #320 +; CHECK-NEXT: ld1 { v1.b }[14], [x8] +; CHECK-NEXT: add x8, sp, #32 +; CHECK-NEXT: ld1 { v0.b }[10], [x8] +; CHECK-NEXT: add x8, sp, #144 +; CHECK-NEXT: ld1 { v1.b }[15], [x12] +; CHECK-NEXT: add x12, sp, #728 +; CHECK-NEXT: ld1 { v6.b }[1], [x12] +; CHECK-NEXT: add x12, sp, #1000 +; CHECK-NEXT: ld1 { v0.b }[11], [x11] +; CHECK-NEXT: ld1 { v5.b }[3], [x12] +; CHECK-NEXT: add x12, sp, #736 +; CHECK-NEXT: add x11, sp, #920 +; CHECK-NEXT: sdot v4.4s, v1.16b, v2.16b +; CHECK-NEXT: ldr b1, [sp, #848] +; CHECK-NEXT: ld1 { v6.b }[2], [x12] +; CHECK-NEXT: add x12, sp, #1008 +; CHECK-NEXT: ld1 { v1.b }[1], [x13] +; CHECK-NEXT: ld1 { v5.b }[4], [x12] +; CHECK-NEXT: add x12, sp, #96 +; CHECK-NEXT: ld1 { v7.b }[2], [x12] +; CHECK-NEXT: add x12, sp, #1016 +; CHECK-NEXT: add x13, sp, #48 +; CHECK-NEXT: ld1 { v6.b }[3], [x14] ; CHECK-NEXT: add x14, sp, #864 -; CHECK-NEXT: ld1 { v4.b }[2], [x13] -; CHECK-NEXT: add x13, sp, #1000 -; CHECK-NEXT: ld1 { v0.b }[5], [x10] -; CHECK-NEXT: add x11, sp, #104 -; CHECK-NEXT: mov v1.b[5], w5 -; CHECK-NEXT: add x10, sp, #280 -; CHECK-NEXT: ld1 { v3.b }[3], [x13] -; CHECK-NEXT: add x13, sp, #16 -; CHECK-NEXT: ld1 { v5.b }[2], [x14] -; CHECK-NEXT: add x14, sp, #296 -; CHECK-NEXT: ld1 { v0.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #288 -; CHECK-NEXT: mov v1.b[6], w6 -; CHECK-NEXT: ld1 { v2.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #872 -; CHECK-NEXT: movi v6.16b, #1 -; CHECK-NEXT: ld1 { v0.b }[7], [x12] -; CHECK-NEXT: add x12, sp, #744 -; CHECK-NEXT: mov v1.b[7], w7 -; CHECK-NEXT: ld1 { v5.b }[3], [x11] -; CHECK-NEXT: add x11, sp, #1008 -; CHECK-NEXT: ld1 { v4.b }[3], [x12] +; CHECK-NEXT: ld1 { v0.b }[12], [x13] +; CHECK-NEXT: ld1 { v1.b }[2], [x14] +; CHECK-NEXT: add x14, sp, #752 +; CHECK-NEXT: ld1 { v5.b }[5], [x12] +; CHECK-NEXT: add x12, sp, #104 +; CHECK-NEXT: ld1 { v6.b }[4], [x14] +; CHECK-NEXT: add x14, sp, #1024 +; CHECK-NEXT: ld1 { v7.b }[3], [x12] +; CHECK-NEXT: ld1 { v1.b }[3], [x15] +; CHECK-NEXT: add x15, sp, #760 +; CHECK-NEXT: ld1 { v5.b }[6], [x14] ; CHECK-NEXT: add x12, sp, #112 -; CHECK-NEXT: ld1 { v0.b }[8], [x8] -; CHECK-NEXT: add x8, sp, #304 -; CHECK-NEXT: ld1 { v1.b }[8], [x13] -; CHECK-NEXT: add x13, sp, #24 -; CHECK-NEXT: ld1 { v3.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #880 -; CHECK-NEXT: ld1 { v2.b }[4], [x12] -; CHECK-NEXT: add x12, sp, #752 -; CHECK-NEXT: ld1 { v0.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #312 -; CHECK-NEXT: ld1 { v1.b }[9], [x13] -; CHECK-NEXT: add x13, sp, #32 -; CHECK-NEXT: ld1 { v5.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #1016 -; CHECK-NEXT: ld1 { v4.b }[4], [x12] +; CHECK-NEXT: add x14, sp, #880 +; CHECK-NEXT: ld1 { v6.b }[5], [x15] +; CHECK-NEXT: add x15, sp, #1032 +; CHECK-NEXT: ld1 { v7.b }[4], [x12] +; CHECK-NEXT: ld1 { v1.b }[4], [x14] +; CHECK-NEXT: add x14, sp, #768 +; CHECK-NEXT: ld1 { v5.b }[7], [x15] ; CHECK-NEXT: add x12, sp, #120 -; CHECK-NEXT: ld1 { v0.b }[10], [x9] -; CHECK-NEXT: add x9, sp, #320 -; CHECK-NEXT: ld1 { v1.b }[10], [x13] -; CHECK-NEXT: add x13, sp, #40 -; CHECK-NEXT: ld1 { v3.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #888 -; CHECK-NEXT: ld1 { v2.b }[5], [x12] -; CHECK-NEXT: add x12, sp, #760 -; CHECK-NEXT: ld1 { v0.b }[11], [x14] -; CHECK-NEXT: add x14, sp, #328 -; CHECK-NEXT: ld1 { v1.b }[11], [x13] -; CHECK-NEXT: add x13, sp, #48 -; CHECK-NEXT: ld1 { v4.b }[5], [x12] -; CHECK-NEXT: add x12, sp, #56 -; CHECK-NEXT: ld1 { v5.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #128 -; CHECK-NEXT: ld1 { v0.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #1024 -; CHECK-NEXT: ld1 { v1.b }[12], [x13] -; CHECK-NEXT: ld1 { v2.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #64 -; CHECK-NEXT: ld1 { v3.b }[6], [x8] -; CHECK-NEXT: add x8, sp, #1032 -; CHECK-NEXT: ld1 { v0.b }[13], [x10] -; CHECK-NEXT: add x10, sp, #768 -; CHECK-NEXT: ld1 { v1.b }[13], [x12] -; CHECK-NEXT: movi v7.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v3.b }[7], [x8] -; CHECK-NEXT: add x8, sp, #1040 -; CHECK-NEXT: ld1 { v4.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #896 -; CHECK-NEXT: ld1 { v0.b }[14], [x9] -; CHECK-NEXT: add x9, sp, #776 -; CHECK-NEXT: ld1 { v1.b }[14], [x11] -; CHECK-NEXT: add x11, sp, #136 -; CHECK-NEXT: ld1 { v3.b }[8], [x8] -; CHECK-NEXT: add x8, sp, #1048 -; CHECK-NEXT: ld1 { v4.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #784 -; CHECK-NEXT: ld1 { v5.b }[6], [x10] -; CHECK-NEXT: add x10, sp, #904 -; CHECK-NEXT: ld1 { v2.b }[7], [x11] -; CHECK-NEXT: add x11, sp, #144 -; CHECK-NEXT: ld1 { v3.b }[9], [x8] -; CHECK-NEXT: add x8, sp, #1056 -; CHECK-NEXT: ld1 { v4.b }[8], [x9] +; CHECK-NEXT: add x15, sp, #1040 +; CHECK-NEXT: ld1 { v6.b }[6], [x14] +; CHECK-NEXT: ld1 { v7.b }[5], [x12] +; CHECK-NEXT: add x12, sp, #776 +; CHECK-NEXT: ld1 { v1.b }[5], [x16] +; CHECK-NEXT: ld1 { v5.b }[8], [x15] +; CHECK-NEXT: add x15, sp, #896 +; CHECK-NEXT: add x14, sp, #1048 +; CHECK-NEXT: ld1 { v6.b }[7], [x12] +; CHECK-NEXT: ld1 { v7.b }[6], [x10] +; CHECK-NEXT: add x10, sp, #784 +; CHECK-NEXT: ld1 { v1.b }[6], [x15] +; CHECK-NEXT: ld1 { v5.b }[9], [x14] +; CHECK-NEXT: add x14, sp, #904 +; CHECK-NEXT: add x12, sp, #1056 +; CHECK-NEXT: ld1 { v6.b }[8], [x10] +; CHECK-NEXT: ld1 { v7.b }[7], [x9] ; CHECK-NEXT: add x9, sp, #792 -; CHECK-NEXT: ld1 { v5.b }[7], [x10] -; CHECK-NEXT: add x10, sp, #912 -; CHECK-NEXT: ld1 { v2.b }[8], [x11] -; CHECK-NEXT: add x11, sp, #152 -; CHECK-NEXT: ld1 { v3.b }[10], [x8] -; CHECK-NEXT: add x8, sp, #1064 -; CHECK-NEXT: ld1 { v4.b }[9], [x9] +; CHECK-NEXT: ld1 { v1.b }[7], [x14] +; CHECK-NEXT: ld1 { v5.b }[10], [x12] +; CHECK-NEXT: add x12, sp, #912 +; CHECK-NEXT: add x10, sp, #1064 +; CHECK-NEXT: ld1 { v6.b }[9], [x9] +; CHECK-NEXT: ld1 { v7.b }[8], [x8] ; CHECK-NEXT: add x9, sp, #800 -; CHECK-NEXT: ld1 { v5.b }[8], [x10] -; CHECK-NEXT: add x10, sp, #920 -; CHECK-NEXT: ld1 { v2.b }[9], [x11] -; CHECK-NEXT: add x11, sp, #160 -; CHECK-NEXT: ld1 { v3.b }[11], [x8] -; CHECK-NEXT: add x8, sp, #1072 -; CHECK-NEXT: ld1 { v4.b }[10], [x9] -; CHECK-NEXT: add x9, sp, #808 -; CHECK-NEXT: ld1 { v5.b }[9], [x10] -; CHECK-NEXT: add x10, sp, #928 -; CHECK-NEXT: ld1 { v2.b }[10], [x11] -; CHECK-NEXT: add x11, sp, #168 -; CHECK-NEXT: ld1 { v3.b }[12], [x8] -; CHECK-NEXT: add x8, sp, #1080 -; CHECK-NEXT: ld1 { v4.b }[11], [x9] -; CHECK-NEXT: add x9, sp, #816 -; CHECK-NEXT: ld1 { v5.b }[10], [x10] -; CHECK-NEXT: add x10, sp, #936 -; CHECK-NEXT: ld1 { v2.b }[11], [x11] -; CHECK-NEXT: add x11, sp, #176 -; CHECK-NEXT: ld1 { v3.b }[13], [x8] -; CHECK-NEXT: add x8, sp, #1088 -; CHECK-NEXT: ld1 { v4.b }[12], [x9] -; CHECK-NEXT: add x9, sp, #824 +; CHECK-NEXT: ld1 { v1.b }[8], [x12] ; CHECK-NEXT: ld1 { v5.b }[11], [x10] -; CHECK-NEXT: add x10, sp, #944 -; CHECK-NEXT: ld1 { v2.b }[12], [x11] -; CHECK-NEXT: add x11, sp, #184 -; CHECK-NEXT: ld1 { v3.b }[14], [x8] -; CHECK-NEXT: add x8, sp, #1096 -; CHECK-NEXT: ld1 { v4.b }[13], [x9] -; CHECK-NEXT: add x9, sp, #832 +; CHECK-NEXT: add x8, sp, #152 +; CHECK-NEXT: add x10, sp, #1072 +; CHECK-NEXT: ld1 { v6.b }[10], [x9] +; CHECK-NEXT: ld1 { v7.b }[9], [x8] +; CHECK-NEXT: add x9, sp, #808 +; CHECK-NEXT: ld1 { v1.b }[9], [x11] ; CHECK-NEXT: ld1 { v5.b }[12], [x10] -; CHECK-NEXT: add x10, sp, #952 -; CHECK-NEXT: movi v16.2d, #0000000000000000 -; CHECK-NEXT: ld1 { v0.b }[15], [x14] -; CHECK-NEXT: ld1 { v2.b }[13], [x11] -; CHECK-NEXT: add x11, sp, #72 -; CHECK-NEXT: ld1 { v3.b }[15], [x8] -; CHECK-NEXT: add x8, sp, #192 -; CHECK-NEXT: ld1 { v4.b }[14], [x9] -; CHECK-NEXT: add x9, sp, #840 +; CHECK-NEXT: add x10, sp, #160 +; CHECK-NEXT: add x8, sp, #56 +; CHECK-NEXT: ld1 { v6.b }[11], [x9] +; CHECK-NEXT: add x9, sp, #928 +; CHECK-NEXT: ld1 { v7.b }[10], [x10] +; CHECK-NEXT: add x10, sp, #1080 +; CHECK-NEXT: ld1 { v1.b }[10], [x9] +; CHECK-NEXT: ld1 { v0.b }[13], [x8] ; CHECK-NEXT: ld1 { v5.b }[13], [x10] -; CHECK-NEXT: add x10, sp, #960 -; CHECK-NEXT: ld1 { v1.b }[15], [x11] -; CHECK-NEXT: sdot v16.4s, v0.16b, v6.16b -; CHECK-NEXT: ld1 { v2.b }[14], [x8] -; CHECK-NEXT: sdot v7.4s, v3.16b, v6.16b -; CHECK-NEXT: ld1 { v4.b }[15], [x9] -; CHECK-NEXT: ld1 { v5.b }[14], [x10] +; CHECK-NEXT: add x8, sp, #816 +; CHECK-NEXT: add x9, sp, #168 +; CHECK-NEXT: ld1 { v6.b }[12], [x8] +; CHECK-NEXT: add x8, sp, #936 +; CHECK-NEXT: ld1 { v7.b }[11], [x9] +; CHECK-NEXT: add x9, sp, #1088 +; CHECK-NEXT: ld1 { v1.b }[11], [x8] +; CHECK-NEXT: add x10, sp, #176 +; CHECK-NEXT: ld1 { v5.b }[14], [x9] +; CHECK-NEXT: add x9, sp, #824 +; CHECK-NEXT: add x8, sp, #64 +; CHECK-NEXT: ld1 { v6.b }[13], [x9] +; CHECK-NEXT: add x9, sp, #944 +; CHECK-NEXT: ld1 { v7.b }[12], [x10] +; CHECK-NEXT: add x10, sp, #1096 +; CHECK-NEXT: ld1 { v1.b }[12], [x9] +; CHECK-NEXT: ld1 { v0.b }[14], [x8] +; CHECK-NEXT: ld1 { v5.b }[15], [x10] +; CHECK-NEXT: add x8, sp, #832 +; CHECK-NEXT: add x9, sp, #184 +; CHECK-NEXT: ld1 { v6.b }[14], [x8] +; CHECK-NEXT: add x8, sp, #952 +; CHECK-NEXT: ld1 { v7.b }[13], [x9] +; CHECK-NEXT: ld1 { v1.b }[13], [x8] +; CHECK-NEXT: add x10, sp, #72 +; CHECK-NEXT: add x8, sp, #840 +; CHECK-NEXT: sdot v3.4s, v5.16b, v2.16b +; CHECK-NEXT: ld1 { v0.b }[15], [x10] +; CHECK-NEXT: add x9, sp, #192 +; CHECK-NEXT: ld1 { v6.b }[15], [x8] +; CHECK-NEXT: add x8, sp, #960 +; CHECK-NEXT: ld1 { v7.b }[14], [x9] +; CHECK-NEXT: ld1 { v1.b }[14], [x8] ; CHECK-NEXT: add x8, sp, #200 ; CHECK-NEXT: add x9, sp, #968 -; CHECK-NEXT: sdot v16.4s, v1.16b, v6.16b -; CHECK-NEXT: ld1 { v2.b }[15], [x8] -; CHECK-NEXT: sdot v7.4s, v4.16b, v6.16b -; CHECK-NEXT: ld1 { v5.b }[15], [x9] -; CHECK-NEXT: sdot v16.4s, v2.16b, v6.16b -; CHECK-NEXT: sdot v7.4s, v5.16b, v6.16b -; CHECK-NEXT: add v0.4s, v16.4s, v7.4s +; CHECK-NEXT: sdot v4.4s, v0.16b, v2.16b +; CHECK-NEXT: sdot v3.4s, v6.16b, v2.16b +; CHECK-NEXT: ld1 { v7.b }[15], [x8] +; CHECK-NEXT: ld1 { v1.b }[15], [x9] +; CHECK-NEXT: sdot v4.4s, v7.16b, v2.16b +; CHECK-NEXT: sdot v3.4s, v1.16b, v2.16b +; CHECK-NEXT: add v0.4s, v4.4s, v3.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -2863,17 +2867,17 @@ define i32 @test_udot_v64i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_udot_v64i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q1, q4, [x1, #32] -; CHECK-NEXT: movi v5.2d, #0000000000000000 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ldp q2, q3, [x0, #32] -; CHECK-NEXT: udot v5.4s, v1.16b, v2.16b -; CHECK-NEXT: ldp q6, q7, [x0] -; CHECK-NEXT: udot v0.4s, v4.16b, v3.16b -; CHECK-NEXT: ldp q1, q16, [x1] -; CHECK-NEXT: udot v5.4s, v1.16b, v6.16b -; CHECK-NEXT: udot v0.4s, v16.16b, v7.16b -; CHECK-NEXT: add v0.4s, v5.4s, v0.4s +; CHECK-NEXT: ldp q4, q5, [x1, #32] +; CHECK-NEXT: udot v1.4s, v5.16b, v3.16b +; CHECK-NEXT: udot v0.4s, v4.16b, v2.16b +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q4, q5, [x1] +; CHECK-NEXT: udot v1.4s, v5.16b, v3.16b +; CHECK-NEXT: udot v0.4s, v4.16b, v2.16b +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -2892,16 +2896,16 @@ define i32 @test_udot_v64i8_nomla(ptr nocapture readonly %a1) { ; CHECK-LABEL: test_udot_v64i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q4, q3, [x0, #32] ; CHECK-NEXT: movi v0.16b, #1 ; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: ldp q3, q4, [x0, #32] +; CHECK-NEXT: udot v2.4s, v4.16b, v0.16b ; CHECK-NEXT: udot v1.4s, v3.16b, v0.16b -; CHECK-NEXT: ldp q3, q5, [x0] +; CHECK-NEXT: ldp q3, q4, [x0] ; CHECK-NEXT: udot v2.4s, v4.16b, v0.16b -; CHECK-NEXT: udot v2.4s, v3.16b, v0.16b -; CHECK-NEXT: udot v1.4s, v5.16b, v0.16b -; CHECK-NEXT: add v0.4s, v2.4s, v1.4s +; CHECK-NEXT: udot v1.4s, v3.16b, v0.16b +; CHECK-NEXT: add v0.4s, v1.4s, v2.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -2914,17 +2918,17 @@ define i32 @test_sdot_v64i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_sdot_v64i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q1, q4, [x1, #32] -; CHECK-NEXT: movi v5.2d, #0000000000000000 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ldp q2, q3, [x0, #32] -; CHECK-NEXT: sdot v5.4s, v1.16b, v2.16b -; CHECK-NEXT: ldp q6, q7, [x0] -; CHECK-NEXT: sdot v0.4s, v4.16b, v3.16b -; CHECK-NEXT: ldp q1, q16, [x1] -; CHECK-NEXT: sdot v5.4s, v1.16b, v6.16b -; CHECK-NEXT: sdot v0.4s, v16.16b, v7.16b -; CHECK-NEXT: add v0.4s, v5.4s, v0.4s +; CHECK-NEXT: ldp q4, q5, [x1, #32] +; CHECK-NEXT: sdot v1.4s, v5.16b, v3.16b +; CHECK-NEXT: sdot v0.4s, v4.16b, v2.16b +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q4, q5, [x1] +; CHECK-NEXT: sdot v1.4s, v5.16b, v3.16b +; CHECK-NEXT: sdot v0.4s, v4.16b, v2.16b +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 @@ -2943,24 +2947,24 @@ define i32 @test_sdot_v64i8_double(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) { ; CHECK-LABEL: test_sdot_v64i8_double: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q18, q19, [sp, #96] -; CHECK-NEXT: movi v22.2d, #0000000000000000 -; CHECK-NEXT: movi v23.2d, #0000000000000000 -; CHECK-NEXT: movi v24.2d, #0000000000000000 -; CHECK-NEXT: movi v25.2d, #0000000000000000 -; CHECK-NEXT: sdot v22.4s, v3.16b, v7.16b -; CHECK-NEXT: ldp q20, q21, [sp, #32] -; CHECK-NEXT: sdot v23.4s, v2.16b, v6.16b -; CHECK-NEXT: sdot v22.4s, v1.16b, v5.16b -; CHECK-NEXT: sdot v25.4s, v20.16b, v18.16b -; CHECK-NEXT: sdot v23.4s, v0.16b, v4.16b -; CHECK-NEXT: ldp q16, q17, [sp, #64] -; CHECK-NEXT: sdot v24.4s, v21.16b, v19.16b -; CHECK-NEXT: add v0.4s, v23.4s, v22.4s -; CHECK-NEXT: ldp q26, q3, [sp] -; CHECK-NEXT: sdot v25.4s, v26.16b, v16.16b -; CHECK-NEXT: sdot v24.4s, v3.16b, v17.16b -; CHECK-NEXT: add v1.4s, v25.4s, v24.4s +; CHECK-NEXT: movi v16.2d, #0000000000000000 +; CHECK-NEXT: movi v17.2d, #0000000000000000 +; CHECK-NEXT: movi v18.2d, #0000000000000000 +; CHECK-NEXT: movi v19.2d, #0000000000000000 +; CHECK-NEXT: ldp q20, q21, [sp, #96] +; CHECK-NEXT: ldp q22, q23, [sp, #32] +; CHECK-NEXT: sdot v16.4s, v3.16b, v7.16b +; CHECK-NEXT: sdot v18.4s, v2.16b, v6.16b +; CHECK-NEXT: sdot v19.4s, v23.16b, v21.16b +; CHECK-NEXT: sdot v17.4s, v22.16b, v20.16b +; CHECK-NEXT: ldp q2, q3, [sp, #64] +; CHECK-NEXT: ldp q6, q7, [sp] +; CHECK-NEXT: sdot v16.4s, v1.16b, v5.16b +; CHECK-NEXT: sdot v18.4s, v0.16b, v4.16b +; CHECK-NEXT: sdot v19.4s, v7.16b, v3.16b +; CHECK-NEXT: sdot v17.4s, v6.16b, v2.16b +; CHECK-NEXT: add v0.4s, v18.4s, v16.4s +; CHECK-NEXT: add v1.4s, v17.4s, v19.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 @@ -2981,23 +2985,23 @@ define i32 @test_sdot_v64i8_double_nomla(<64 x i8> %a, <64 x i8> %b, <64 x i8> %c, <64 x i8> %d) { ; CHECK-LABEL: test_sdot_v64i8_double_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q4, q5, [sp, #32] -; CHECK-NEXT: movi v6.16b, #1 +; CHECK-NEXT: movi v4.16b, #1 +; CHECK-NEXT: movi v5.2d, #0000000000000000 +; CHECK-NEXT: movi v6.2d, #0000000000000000 ; CHECK-NEXT: movi v7.2d, #0000000000000000 +; CHECK-NEXT: ldp q17, q18, [sp, #32] ; CHECK-NEXT: movi v16.2d, #0000000000000000 -; CHECK-NEXT: movi v17.2d, #0000000000000000 -; CHECK-NEXT: movi v18.2d, #0000000000000000 -; CHECK-NEXT: sdot v7.4s, v3.16b, v6.16b -; CHECK-NEXT: sdot v16.4s, v2.16b, v6.16b -; CHECK-NEXT: ldp q3, q2, [sp] -; CHECK-NEXT: sdot v17.4s, v5.16b, v6.16b -; CHECK-NEXT: sdot v18.4s, v4.16b, v6.16b -; CHECK-NEXT: sdot v7.4s, v1.16b, v6.16b -; CHECK-NEXT: sdot v16.4s, v0.16b, v6.16b -; CHECK-NEXT: sdot v17.4s, v2.16b, v6.16b -; CHECK-NEXT: sdot v18.4s, v3.16b, v6.16b -; CHECK-NEXT: add v0.4s, v16.4s, v7.4s -; CHECK-NEXT: add v1.4s, v18.4s, v17.4s +; CHECK-NEXT: sdot v5.4s, v3.16b, v4.16b +; CHECK-NEXT: sdot v6.4s, v17.16b, v4.16b +; CHECK-NEXT: sdot v7.4s, v2.16b, v4.16b +; CHECK-NEXT: ldp q2, q3, [sp] +; CHECK-NEXT: sdot v16.4s, v18.16b, v4.16b +; CHECK-NEXT: sdot v5.4s, v1.16b, v4.16b +; CHECK-NEXT: sdot v6.4s, v2.16b, v4.16b +; CHECK-NEXT: sdot v7.4s, v0.16b, v4.16b +; CHECK-NEXT: sdot v16.4s, v3.16b, v4.16b +; CHECK-NEXT: add v0.4s, v7.4s, v5.4s +; CHECK-NEXT: add v1.4s, v6.4s, v16.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/neon-extadd.ll b/llvm/test/CodeGen/AArch64/neon-extadd.ll --- a/llvm/test/CodeGen/AArch64/neon-extadd.ll +++ b/llvm/test/CodeGen/AArch64/neon-extadd.ll @@ -56,12 +56,13 @@ define <32 x i16> @extadds_v32i8_i16(<32 x i8> %s0, <32 x i8> %s1) { ; CHECK-LABEL: extadds_v32i8_i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl2 v4.8h, v0.16b, v2.16b -; CHECK-NEXT: saddl2 v5.8h, v1.16b, v3.16b -; CHECK-NEXT: saddl v0.8h, v0.8b, v2.8b +; CHECK-NEXT: saddl2 v4.8h, v1.16b, v3.16b +; CHECK-NEXT: saddl v5.8h, v0.8b, v2.8b +; CHECK-NEXT: saddl2 v6.8h, v0.16b, v2.16b ; CHECK-NEXT: saddl v2.8h, v1.8b, v3.8b -; CHECK-NEXT: mov v1.16b, v4.16b -; CHECK-NEXT: mov v3.16b, v5.16b +; CHECK-NEXT: mov v0.16b, v5.16b +; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: mov v3.16b, v4.16b ; CHECK-NEXT: ret entry: %s0s = sext <32 x i8> %s0 to <32 x i16> @@ -73,12 +74,13 @@ define <32 x i16> @extaddu_v32i8_i16(<32 x i8> %s0, <32 x i8> %s1) { ; CHECK-LABEL: extaddu_v32i8_i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl2 v4.8h, v0.16b, v2.16b -; CHECK-NEXT: uaddl2 v5.8h, v1.16b, v3.16b -; CHECK-NEXT: uaddl v0.8h, v0.8b, v2.8b +; CHECK-NEXT: uaddl2 v4.8h, v1.16b, v3.16b +; CHECK-NEXT: uaddl v5.8h, v0.8b, v2.8b +; CHECK-NEXT: uaddl2 v6.8h, v0.16b, v2.16b ; CHECK-NEXT: uaddl v2.8h, v1.8b, v3.8b -; CHECK-NEXT: mov v1.16b, v4.16b -; CHECK-NEXT: mov v3.16b, v5.16b +; CHECK-NEXT: mov v0.16b, v5.16b +; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: mov v3.16b, v4.16b ; CHECK-NEXT: ret entry: %s0s = zext <32 x i8> %s0 to <32 x i16> @@ -118,12 +120,12 @@ define <16 x i32> @extadds_v16i8_i32(<16 x i8> %s0, <16 x i8> %s1) { ; CHECK-LABEL: extadds_v16i8_i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl2 v2.8h, v0.16b, v1.16b -; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: sshll2 v3.4s, v2.8h, #0 -; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: sshll v2.4s, v2.4h, #0 +; CHECK-NEXT: saddl v2.8h, v0.8b, v1.8b +; CHECK-NEXT: saddl2 v4.8h, v0.16b, v1.16b +; CHECK-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0 +; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-NEXT: sshll v2.4s, v4.4h, #0 ; CHECK-NEXT: ret entry: %s0s = sext <16 x i8> %s0 to <16 x i32> @@ -135,12 +137,12 @@ define <16 x i32> @extaddu_v16i8_i32(<16 x i8> %s0, <16 x i8> %s1) { ; CHECK-LABEL: extaddu_v16i8_i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl2 v2.8h, v0.16b, v1.16b -; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: ushll2 v3.4s, v2.8h, #0 -; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-NEXT: uaddl v2.8h, v0.8b, v1.8b +; CHECK-NEXT: uaddl2 v4.8h, v0.16b, v1.16b +; CHECK-NEXT: ushll v0.4s, v2.4h, #0 +; CHECK-NEXT: ushll2 v3.4s, v4.8h, #0 +; CHECK-NEXT: ushll2 v1.4s, v2.8h, #0 +; CHECK-NEXT: ushll v2.4s, v4.4h, #0 ; CHECK-NEXT: ret entry: %s0s = zext <16 x i8> %s0 to <16 x i32> @@ -153,11 +155,11 @@ ; CHECK-LABEL: extadds_v8i8_i64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: saddl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: sshll v1.4s, v0.4h, #0 ; CHECK-NEXT: sshll2 v2.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: sshll v0.2d, v1.2s, #0 ; CHECK-NEXT: sshll2 v3.2d, v2.4s, #0 -; CHECK-NEXT: sshll2 v1.2d, v0.4s, #0 -; CHECK-NEXT: sshll v0.2d, v0.2s, #0 +; CHECK-NEXT: sshll2 v1.2d, v1.4s, #0 ; CHECK-NEXT: sshll v2.2d, v2.2s, #0 ; CHECK-NEXT: ret entry: @@ -171,11 +173,11 @@ ; CHECK-LABEL: extaddu_v8i8_i64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: ushll v1.4s, v0.4h, #0 ; CHECK-NEXT: ushll2 v2.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v0.2d, v1.2s, #0 ; CHECK-NEXT: ushll2 v3.2d, v2.4s, #0 -; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0 -; CHECK-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0 ; CHECK-NEXT: ushll v2.2d, v2.2s, #0 ; CHECK-NEXT: ret entry: @@ -240,12 +242,13 @@ define <16 x i32> @extadds_v16i16_i32(<16 x i16> %s0, <16 x i16> %s1) { ; CHECK-LABEL: extadds_v16i16_i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl2 v4.4s, v0.8h, v2.8h -; CHECK-NEXT: saddl2 v5.4s, v1.8h, v3.8h -; CHECK-NEXT: saddl v0.4s, v0.4h, v2.4h +; CHECK-NEXT: saddl2 v4.4s, v1.8h, v3.8h +; CHECK-NEXT: saddl v5.4s, v0.4h, v2.4h +; CHECK-NEXT: saddl2 v6.4s, v0.8h, v2.8h ; CHECK-NEXT: saddl v2.4s, v1.4h, v3.4h -; CHECK-NEXT: mov v1.16b, v4.16b -; CHECK-NEXT: mov v3.16b, v5.16b +; CHECK-NEXT: mov v0.16b, v5.16b +; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: mov v3.16b, v4.16b ; CHECK-NEXT: ret entry: %s0s = sext <16 x i16> %s0 to <16 x i32> @@ -257,12 +260,13 @@ define <16 x i32> @extaddu_v16i16_i32(<16 x i16> %s0, <16 x i16> %s1) { ; CHECK-LABEL: extaddu_v16i16_i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl2 v4.4s, v0.8h, v2.8h -; CHECK-NEXT: uaddl2 v5.4s, v1.8h, v3.8h -; CHECK-NEXT: uaddl v0.4s, v0.4h, v2.4h +; CHECK-NEXT: uaddl2 v4.4s, v1.8h, v3.8h +; CHECK-NEXT: uaddl v5.4s, v0.4h, v2.4h +; CHECK-NEXT: uaddl2 v6.4s, v0.8h, v2.8h ; CHECK-NEXT: uaddl v2.4s, v1.4h, v3.4h -; CHECK-NEXT: mov v1.16b, v4.16b -; CHECK-NEXT: mov v3.16b, v5.16b +; CHECK-NEXT: mov v0.16b, v5.16b +; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: mov v3.16b, v4.16b ; CHECK-NEXT: ret entry: %s0s = zext <16 x i16> %s0 to <16 x i32> @@ -302,12 +306,12 @@ define <8 x i64> @extadds_v8i16_i64(<8 x i16> %s0, <8 x i16> %s1) { ; CHECK-LABEL: extadds_v8i16_i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl2 v2.4s, v0.8h, v1.8h -; CHECK-NEXT: saddl v0.4s, v0.4h, v1.4h -; CHECK-NEXT: sshll2 v3.2d, v2.4s, #0 -; CHECK-NEXT: sshll2 v1.2d, v0.4s, #0 -; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-NEXT: saddl v2.4s, v0.4h, v1.4h +; CHECK-NEXT: saddl2 v4.4s, v0.8h, v1.8h +; CHECK-NEXT: sshll v0.2d, v2.2s, #0 +; CHECK-NEXT: sshll2 v3.2d, v4.4s, #0 +; CHECK-NEXT: sshll2 v1.2d, v2.4s, #0 +; CHECK-NEXT: sshll v2.2d, v4.2s, #0 ; CHECK-NEXT: ret entry: %s0s = sext <8 x i16> %s0 to <8 x i64> @@ -319,12 +323,12 @@ define <8 x i64> @extaddu_v8i16_i64(<8 x i16> %s0, <8 x i16> %s1) { ; CHECK-LABEL: extaddu_v8i16_i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl2 v2.4s, v0.8h, v1.8h -; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h -; CHECK-NEXT: ushll2 v3.2d, v2.4s, #0 -; CHECK-NEXT: ushll2 v1.2d, v0.4s, #0 -; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-NEXT: uaddl v2.4s, v0.4h, v1.4h +; CHECK-NEXT: uaddl2 v4.4s, v0.8h, v1.8h +; CHECK-NEXT: ushll v0.2d, v2.2s, #0 +; CHECK-NEXT: ushll2 v3.2d, v4.4s, #0 +; CHECK-NEXT: ushll2 v1.2d, v2.4s, #0 +; CHECK-NEXT: ushll v2.2d, v4.2s, #0 ; CHECK-NEXT: ret entry: %s0s = zext <8 x i16> %s0 to <8 x i64> @@ -388,12 +392,13 @@ define <8 x i64> @extadds_v8i32_i64(<8 x i32> %s0, <8 x i32> %s1) { ; CHECK-LABEL: extadds_v8i32_i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: saddl2 v4.2d, v0.4s, v2.4s -; CHECK-NEXT: saddl2 v5.2d, v1.4s, v3.4s -; CHECK-NEXT: saddl v0.2d, v0.2s, v2.2s +; CHECK-NEXT: saddl2 v4.2d, v1.4s, v3.4s +; CHECK-NEXT: saddl v5.2d, v0.2s, v2.2s +; CHECK-NEXT: saddl2 v6.2d, v0.4s, v2.4s ; CHECK-NEXT: saddl v2.2d, v1.2s, v3.2s -; CHECK-NEXT: mov v1.16b, v4.16b -; CHECK-NEXT: mov v3.16b, v5.16b +; CHECK-NEXT: mov v0.16b, v5.16b +; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: mov v3.16b, v4.16b ; CHECK-NEXT: ret entry: %s0s = sext <8 x i32> %s0 to <8 x i64> @@ -405,12 +410,13 @@ define <8 x i64> @extaddu_v8i32_i64(<8 x i32> %s0, <8 x i32> %s1) { ; CHECK-LABEL: extaddu_v8i32_i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddl2 v4.2d, v0.4s, v2.4s -; CHECK-NEXT: uaddl2 v5.2d, v1.4s, v3.4s -; CHECK-NEXT: uaddl v0.2d, v0.2s, v2.2s +; CHECK-NEXT: uaddl2 v4.2d, v1.4s, v3.4s +; CHECK-NEXT: uaddl v5.2d, v0.2s, v2.2s +; CHECK-NEXT: uaddl2 v6.2d, v0.4s, v2.4s ; CHECK-NEXT: uaddl v2.2d, v1.2s, v3.2s -; CHECK-NEXT: mov v1.16b, v4.16b -; CHECK-NEXT: mov v3.16b, v5.16b +; CHECK-NEXT: mov v0.16b, v5.16b +; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: mov v3.16b, v4.16b ; CHECK-NEXT: ret entry: %s0s = zext <8 x i32> %s0 to <8 x i64> @@ -422,14 +428,14 @@ define <16 x i32> @add_zs(<16 x i8> %s0, <16 x i8> %s1) { ; CHECK-LABEL: add_zs: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v2.8h, v0.16b, #0 -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: saddw2 v2.8h, v2.8h, v1.16b -; CHECK-NEXT: saddw v0.8h, v0.8h, v1.8b -; CHECK-NEXT: sshll2 v3.4s, v2.8h, #0 -; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: sshll v2.4s, v2.4h, #0 +; CHECK-NEXT: ushll v2.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0 +; CHECK-NEXT: saddw v2.8h, v2.8h, v1.8b +; CHECK-NEXT: saddw2 v4.8h, v0.8h, v1.16b +; CHECK-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0 +; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-NEXT: sshll v2.4s, v4.4h, #0 ; CHECK-NEXT: ret entry: %s0s = zext <16 x i8> %s0 to <16 x i32> @@ -441,79 +447,79 @@ define <20 x i32> @v20(<20 x i8> %s0, <20 x i8> %s1) { ; CHECK-LABEL: v20: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr b0, [sp, #96] -; CHECK-NEXT: add x9, sp, #104 +; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: ldr b2, [sp, #160] ; CHECK-NEXT: add x10, sp, #168 ; CHECK-NEXT: ldr b3, [sp] -; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: ld1 { v0.b }[1], [x9] -; CHECK-NEXT: add x9, sp, #112 +; CHECK-NEXT: add x11, sp, #8 +; CHECK-NEXT: ldr b1, [sp, #96] ; CHECK-NEXT: ld1 { v2.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #8 -; CHECK-NEXT: add x11, sp, #128 -; CHECK-NEXT: add x12, sp, #184 -; CHECK-NEXT: mov v1.b[1], w1 -; CHECK-NEXT: add x13, sp, #192 -; CHECK-NEXT: ld1 { v0.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #120 -; CHECK-NEXT: ld1 { v3.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #16 -; CHECK-NEXT: ldr b4, [sp, #224] -; CHECK-NEXT: mov v1.b[2], w2 +; CHECK-NEXT: add x9, sp, #104 +; CHECK-NEXT: add x10, sp, #176 +; CHECK-NEXT: mov v0.b[1], w1 +; CHECK-NEXT: ld1 { v3.b }[1], [x11] +; CHECK-NEXT: ld1 { v1.b }[1], [x9] +; CHECK-NEXT: add x12, sp, #16 +; CHECK-NEXT: add x9, sp, #112 +; CHECK-NEXT: add x13, sp, #184 +; CHECK-NEXT: ld1 { v2.b }[2], [x10] +; CHECK-NEXT: add x11, sp, #120 +; CHECK-NEXT: add x14, sp, #32 +; CHECK-NEXT: ld1 { v3.b }[2], [x12] +; CHECK-NEXT: ld1 { v1.b }[2], [x9] ; CHECK-NEXT: ldr b5, [sp, #64] -; CHECK-NEXT: ld1 { v0.b }[3], [x9] -; CHECK-NEXT: add x9, sp, #176 -; CHECK-NEXT: ld1 { v3.b }[2], [x10] -; CHECK-NEXT: add x10, sp, #24 -; CHECK-NEXT: ld1 { v2.b }[2], [x9] -; CHECK-NEXT: add x9, sp, #136 -; CHECK-NEXT: ld1 { v0.b }[4], [x11] -; CHECK-NEXT: add x11, sp, #144 -; CHECK-NEXT: ld1 { v3.b }[3], [x10] -; CHECK-NEXT: add x10, sp, #32 -; CHECK-NEXT: mov v1.b[3], w3 -; CHECK-NEXT: ld1 { v2.b }[3], [x12] -; CHECK-NEXT: add x12, sp, #200 -; CHECK-NEXT: ld1 { v0.b }[5], [x9] -; CHECK-NEXT: add x9, sp, #152 -; CHECK-NEXT: ld1 { v3.b }[4], [x10] -; CHECK-NEXT: add x10, sp, #72 -; CHECK-NEXT: mov v1.b[4], w4 -; CHECK-NEXT: ld1 { v2.b }[4], [x13] -; CHECK-NEXT: add x13, sp, #232 -; CHECK-NEXT: ld1 { v0.b }[6], [x11] -; CHECK-NEXT: add x11, sp, #40 -; CHECK-NEXT: ld1 { v5.b }[1], [x10] -; CHECK-NEXT: add x10, sp, #80 -; CHECK-NEXT: ld1 { v4.b }[1], [x13] -; CHECK-NEXT: ld1 { v2.b }[5], [x12] -; CHECK-NEXT: add x12, sp, #240 -; CHECK-NEXT: ld1 { v0.b }[7], [x9] -; CHECK-NEXT: add x9, sp, #208 -; CHECK-NEXT: ld1 { v3.b }[5], [x11] -; CHECK-NEXT: add x11, sp, #216 -; CHECK-NEXT: mov v1.b[5], w5 -; CHECK-NEXT: ld1 { v4.b }[2], [x12] -; CHECK-NEXT: ld1 { v2.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #48 -; CHECK-NEXT: ld1 { v5.b }[2], [x10] +; CHECK-NEXT: mov v0.b[2], w2 +; CHECK-NEXT: ldr b4, [sp, #224] +; CHECK-NEXT: add x10, sp, #128 +; CHECK-NEXT: ld1 { v2.b }[3], [x13] +; CHECK-NEXT: add x13, sp, #24 +; CHECK-NEXT: add x12, sp, #136 +; CHECK-NEXT: ld1 { v3.b }[3], [x13] +; CHECK-NEXT: ld1 { v1.b }[3], [x11] +; CHECK-NEXT: add x11, sp, #192 +; CHECK-NEXT: add x13, sp, #200 +; CHECK-NEXT: add x15, sp, #80 +; CHECK-NEXT: add x9, sp, #144 +; CHECK-NEXT: mov v0.b[3], w3 +; CHECK-NEXT: ld1 { v2.b }[4], [x11] +; CHECK-NEXT: add x11, sp, #232 +; CHECK-NEXT: ld1 { v3.b }[4], [x14] +; CHECK-NEXT: add x14, sp, #72 +; CHECK-NEXT: ld1 { v4.b }[1], [x11] +; CHECK-NEXT: ld1 { v5.b }[1], [x14] +; CHECK-NEXT: add x14, sp, #40 +; CHECK-NEXT: ld1 { v1.b }[4], [x10] +; CHECK-NEXT: ld1 { v2.b }[5], [x13] +; CHECK-NEXT: add x11, sp, #208 +; CHECK-NEXT: add x13, sp, #48 +; CHECK-NEXT: mov v0.b[4], w4 +; CHECK-NEXT: ld1 { v3.b }[5], [x14] +; CHECK-NEXT: add x14, sp, #240 +; CHECK-NEXT: ld1 { v4.b }[2], [x14] +; CHECK-NEXT: ld1 { v5.b }[2], [x15] +; CHECK-NEXT: ld1 { v1.b }[5], [x12] +; CHECK-NEXT: ld1 { v2.b }[6], [x11] +; CHECK-NEXT: add x10, sp, #216 +; CHECK-NEXT: add x11, sp, #56 +; CHECK-NEXT: ld1 { v3.b }[6], [x13] ; CHECK-NEXT: add x12, sp, #248 -; CHECK-NEXT: add x10, sp, #56 -; CHECK-NEXT: ld1 { v3.b }[6], [x9] -; CHECK-NEXT: add x9, sp, #88 -; CHECK-NEXT: mov v1.b[6], w6 -; CHECK-NEXT: ld1 { v2.b }[7], [x11] +; CHECK-NEXT: add x13, sp, #88 +; CHECK-NEXT: mov v0.b[5], w5 ; CHECK-NEXT: ld1 { v4.b }[3], [x12] -; CHECK-NEXT: ld1 { v5.b }[3], [x9] -; CHECK-NEXT: ld1 { v3.b }[7], [x10] -; CHECK-NEXT: mov v1.b[7], w7 +; CHECK-NEXT: ld1 { v5.b }[3], [x13] +; CHECK-NEXT: ld1 { v1.b }[6], [x9] +; CHECK-NEXT: ld1 { v2.b }[7], [x10] +; CHECK-NEXT: add x9, sp, #152 +; CHECK-NEXT: ld1 { v3.b }[7], [x11] ; CHECK-NEXT: uaddl v4.8h, v5.8b, v4.8b +; CHECK-NEXT: mov v0.b[6], w6 +; CHECK-NEXT: ld1 { v1.b }[7], [x9] ; CHECK-NEXT: uaddl v2.8h, v3.8b, v2.8b -; CHECK-NEXT: uaddl v0.8h, v1.8b, v0.8b -; CHECK-NEXT: ushll v1.4s, v4.4h, #0 +; CHECK-NEXT: mov v0.b[7], w7 ; CHECK-NEXT: ushll2 v3.4s, v2.8h, #0 ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: ushll v1.4s, v4.4h, #0 ; CHECK-NEXT: stp q3, q1, [x8, #48] ; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 @@ -530,83 +536,95 @@ define <16 x i32> @i12(<16 x i12> %s0, <16 x i12> %s1) { ; CHECK-LABEL: i12: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr w12, [sp, #32] -; CHECK-NEXT: fmov s5, w0 -; CHECK-NEXT: ldr w15, [sp] -; CHECK-NEXT: fmov s4, w4 -; CHECK-NEXT: ldr w14, [sp, #40] -; CHECK-NEXT: fmov s0, w12 -; CHECK-NEXT: ldr w16, [sp, #48] -; CHECK-NEXT: fmov s1, w15 -; CHECK-NEXT: ldr w15, [sp, #8] -; CHECK-NEXT: ldr w18, [sp, #16] -; CHECK-NEXT: mov v0.h[1], w14 -; CHECK-NEXT: ldr w17, [sp, #56] -; CHECK-NEXT: mov v1.h[1], w15 -; CHECK-NEXT: ldr w0, [sp, #24] -; CHECK-NEXT: mov v5.h[1], w1 -; CHECK-NEXT: ldr w13, [sp, #64] -; CHECK-NEXT: ldr w1, [sp, #128] -; CHECK-NEXT: mov v0.h[2], w16 -; CHECK-NEXT: ldr w16, [sp, #96] -; CHECK-NEXT: mov v1.h[2], w18 -; CHECK-NEXT: ldr w10, [sp, #72] -; CHECK-NEXT: mov v5.h[2], w2 -; CHECK-NEXT: ldr w2, [sp, #160] -; CHECK-NEXT: mov v4.h[1], w5 -; CHECK-NEXT: ldr w5, [sp, #168] -; CHECK-NEXT: mov v0.h[3], w17 -; CHECK-NEXT: ldr w14, [sp, #104] -; CHECK-NEXT: mov v1.h[3], w0 -; CHECK-NEXT: ldr w18, [sp, #136] -; CHECK-NEXT: fmov s6, w1 -; CHECK-NEXT: ldr w0, [sp, #176] -; CHECK-NEXT: fmov s7, w16 -; CHECK-NEXT: fmov s16, w13 -; CHECK-NEXT: ushll v2.4s, v0.4h, #0 -; CHECK-NEXT: ldr w9, [sp, #80] -; CHECK-NEXT: movi v0.4s, #15, msl #8 +; CHECK-NEXT: str x23, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset w23, -48 ; CHECK-NEXT: ldr w12, [sp, #112] +; CHECK-NEXT: ldr w14, [sp, #144] +; CHECK-NEXT: fmov s2, w4 +; CHECK-NEXT: ldr w16, [sp, #176] +; CHECK-NEXT: ldr w19, [sp, #208] +; CHECK-NEXT: fmov s3, w0 +; CHECK-NEXT: ldr w20, [sp, #80] +; CHECK-NEXT: ldr w21, [sp, #48] +; CHECK-NEXT: fmov s5, w12 +; CHECK-NEXT: fmov s4, w19 +; CHECK-NEXT: fmov s6, w16 +; CHECK-NEXT: fmov s7, w14 +; CHECK-NEXT: fmov s0, w20 +; CHECK-NEXT: fmov s1, w21 +; CHECK-NEXT: ldr w10, [sp, #120] +; CHECK-NEXT: ldr w11, [sp, #152] +; CHECK-NEXT: ldr w13, [sp, #184] +; CHECK-NEXT: ldr w15, [sp, #216] +; CHECK-NEXT: ldr w22, [sp, #88] +; CHECK-NEXT: ldr w23, [sp, #56] +; CHECK-NEXT: mov v2.h[1], w5 +; CHECK-NEXT: mov v3.h[1], w1 +; CHECK-NEXT: mov v5.h[1], w10 +; CHECK-NEXT: mov v4.h[1], w15 +; CHECK-NEXT: mov v0.h[1], w22 +; CHECK-NEXT: mov v1.h[1], w23 +; CHECK-NEXT: mov v6.h[1], w13 +; CHECK-NEXT: mov v7.h[1], w11 +; CHECK-NEXT: ldr w8, [sp, #128] +; CHECK-NEXT: ldr w9, [sp, #160] +; CHECK-NEXT: ldr w17, [sp, #64] +; CHECK-NEXT: ldr w18, [sp, #96] +; CHECK-NEXT: ldr w10, [sp, #192] +; CHECK-NEXT: ldr w11, [sp, #224] +; CHECK-NEXT: mov v2.h[2], w6 +; CHECK-NEXT: mov v3.h[2], w2 +; CHECK-NEXT: mov v0.h[2], w18 +; CHECK-NEXT: mov v1.h[2], w17 +; CHECK-NEXT: mov v5.h[2], w8 +; CHECK-NEXT: mov v4.h[2], w11 +; CHECK-NEXT: mov v6.h[2], w10 +; CHECK-NEXT: mov v7.h[2], w9 +; CHECK-NEXT: ldr w12, [sp, #72] +; CHECK-NEXT: ldr w13, [sp, #104] +; CHECK-NEXT: ldr w8, [sp, #136] +; CHECK-NEXT: ldr w9, [sp, #168] +; CHECK-NEXT: ldr w10, [sp, #200] +; CHECK-NEXT: ldr w11, [sp, #232] +; CHECK-NEXT: mov v0.h[3], w13 +; CHECK-NEXT: mov v1.h[3], w12 +; CHECK-NEXT: mov v2.h[3], w7 +; CHECK-NEXT: mov v3.h[3], w3 +; CHECK-NEXT: mov v5.h[3], w8 +; CHECK-NEXT: mov v4.h[3], w11 +; CHECK-NEXT: mov v6.h[3], w10 +; CHECK-NEXT: mov v7.h[3], w9 +; CHECK-NEXT: movi v16.4s, #15, msl #8 +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: ldr w17, [sp, #144] -; CHECK-NEXT: mov v6.h[1], w18 -; CHECK-NEXT: ldr w4, [sp, #184] -; CHECK-NEXT: mov v7.h[1], w14 -; CHECK-NEXT: ldr w8, [sp, #88] -; CHECK-NEXT: and v3.16b, v2.16b, v0.16b -; CHECK-NEXT: ldr w11, [sp, #120] -; CHECK-NEXT: and v2.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr w15, [sp, #152] -; CHECK-NEXT: fmov s1, w2 -; CHECK-NEXT: mov v16.h[1], w10 -; CHECK-NEXT: mov v4.h[2], w6 -; CHECK-NEXT: mov v1.h[1], w5 -; CHECK-NEXT: mov v6.h[2], w17 -; CHECK-NEXT: mov v7.h[2], w12 -; CHECK-NEXT: mov v16.h[2], w9 -; CHECK-NEXT: mov v1.h[2], w0 -; CHECK-NEXT: mov v4.h[3], w7 -; CHECK-NEXT: mov v5.h[3], w3 -; CHECK-NEXT: mov v6.h[3], w15 -; CHECK-NEXT: mov v1.h[3], w4 -; CHECK-NEXT: mov v7.h[3], w11 -; CHECK-NEXT: mov v16.h[3], w8 -; CHECK-NEXT: ushll v4.4s, v4.4h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-NEXT: ushll v3.4s, v3.4h, #0 ; CHECK-NEXT: ushll v5.4s, v5.4h, #0 +; CHECK-NEXT: ushll v4.4s, v4.4h, #0 ; CHECK-NEXT: ushll v6.4s, v6.4h, #0 -; CHECK-NEXT: and v17.16b, v1.16b, v0.16b -; CHECK-NEXT: ushll v1.4s, v7.4h, #0 -; CHECK-NEXT: ushll v7.4s, v16.4h, #0 -; CHECK-NEXT: and v4.16b, v4.16b, v0.16b -; CHECK-NEXT: and v5.16b, v5.16b, v0.16b -; CHECK-NEXT: and v6.16b, v6.16b, v0.16b -; CHECK-NEXT: and v1.16b, v1.16b, v0.16b -; CHECK-NEXT: and v0.16b, v7.16b, v0.16b -; CHECK-NEXT: add v0.4s, v5.4s, v0.4s -; CHECK-NEXT: add v1.4s, v4.4s, v1.4s -; CHECK-NEXT: add v2.4s, v2.4s, v6.4s -; CHECK-NEXT: add v3.4s, v3.4s, v17.4s +; CHECK-NEXT: ushll v7.4s, v7.4h, #0 +; CHECK-NEXT: and v17.16b, v0.16b, v16.16b +; CHECK-NEXT: and v18.16b, v1.16b, v16.16b +; CHECK-NEXT: and v1.16b, v2.16b, v16.16b +; CHECK-NEXT: and v0.16b, v3.16b, v16.16b +; CHECK-NEXT: and v2.16b, v5.16b, v16.16b +; CHECK-NEXT: and v3.16b, v4.16b, v16.16b +; CHECK-NEXT: and v4.16b, v6.16b, v16.16b +; CHECK-NEXT: and v5.16b, v7.16b, v16.16b +; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v3.4s, v17.4s, v3.4s +; CHECK-NEXT: add v1.4s, v1.4s, v5.4s +; CHECK-NEXT: add v2.4s, v18.4s, v4.4s +; CHECK-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: %s0s = zext <16 x i12> %s0 to <16 x i32> @@ -618,12 +636,12 @@ define <16 x i32> @sub_zz(<16 x i8> %s0, <16 x i8> %s1) { ; CHECK-LABEL: sub_zz: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: usubl2 v2.8h, v0.16b, v1.16b -; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: sshll2 v3.4s, v2.8h, #0 -; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: sshll v2.4s, v2.4h, #0 +; CHECK-NEXT: usubl v2.8h, v0.8b, v1.8b +; CHECK-NEXT: usubl2 v4.8h, v0.16b, v1.16b +; CHECK-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0 +; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-NEXT: sshll v2.4s, v4.4h, #0 ; CHECK-NEXT: ret entry: %s0s = zext <16 x i8> %s0 to <16 x i32> @@ -635,12 +653,12 @@ define <16 x i32> @sub_ss(<16 x i8> %s0, <16 x i8> %s1) { ; CHECK-LABEL: sub_ss: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ssubl2 v2.8h, v0.16b, v1.16b -; CHECK-NEXT: ssubl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: sshll2 v3.4s, v2.8h, #0 -; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: sshll v2.4s, v2.4h, #0 +; CHECK-NEXT: ssubl v2.8h, v0.8b, v1.8b +; CHECK-NEXT: ssubl2 v4.8h, v0.16b, v1.16b +; CHECK-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0 +; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-NEXT: sshll v2.4s, v4.4h, #0 ; CHECK-NEXT: ret entry: %s0s = sext <16 x i8> %s0 to <16 x i32> @@ -652,14 +670,14 @@ define <16 x i32> @sub_zs(<16 x i8> %s0, <16 x i8> %s1) { ; CHECK-LABEL: sub_zs: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll2 v2.8h, v0.16b, #0 -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ssubw2 v2.8h, v2.8h, v1.16b -; CHECK-NEXT: ssubw v0.8h, v0.8h, v1.8b -; CHECK-NEXT: sshll2 v3.4s, v2.8h, #0 -; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: sshll v2.4s, v2.4h, #0 +; CHECK-NEXT: ushll v2.8h, v0.8b, #0 +; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0 +; CHECK-NEXT: ssubw v2.8h, v2.8h, v1.8b +; CHECK-NEXT: ssubw2 v4.8h, v0.8h, v1.16b +; CHECK-NEXT: sshll v0.4s, v2.4h, #0 +; CHECK-NEXT: sshll2 v3.4s, v4.8h, #0 +; CHECK-NEXT: sshll2 v1.4s, v2.8h, #0 +; CHECK-NEXT: sshll v2.4s, v4.4h, #0 ; CHECK-NEXT: ret entry: %s0s = zext <16 x i8> %s0 to <16 x i32> diff --git a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll --- a/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll +++ b/llvm/test/CodeGen/AArch64/neon-extracttruncate.ll @@ -206,8 +206,8 @@ ; CHECK-LABEL: extract_4_mixed: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: xtn v2.4h, v2.4s -; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 ; CHECK-NEXT: xtn2 v0.8h, v1.4s ; CHECK-NEXT: mov v2.d[1], v3.d[0] ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b @@ -267,11 +267,11 @@ define <16 x i8> @extract_4_v4i32_badindex(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; CHECK-LABEL: extract_4_v4i32_badindex: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/neon-mov.ll b/llvm/test/CodeGen/AArch64/neon-mov.ll --- a/llvm/test/CodeGen/AArch64/neon-mov.ll +++ b/llvm/test/CodeGen/AArch64/neon-mov.ll @@ -333,8 +333,8 @@ define <2 x i32> @movi1d() { ; CHECK-LABEL: movi1d: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI40_0 ; CHECK-NEXT: movi d1, #0x00ffffffff0000 +; CHECK-NEXT: adrp x8, .LCPI40_0 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI40_0] ; CHECK-NEXT: b test_movi1d %1 = tail call <2 x i32> @test_movi1d(<2 x i32> , <2 x i32> ) diff --git a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll --- a/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll +++ b/llvm/test/CodeGen/AArch64/neon-reverseshuffle.ll @@ -48,8 +48,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -83,8 +83,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/neon-rshrn.ll b/llvm/test/CodeGen/AArch64/neon-rshrn.ll --- a/llvm/test/CodeGen/AArch64/neon-rshrn.ll +++ b/llvm/test/CodeGen/AArch64/neon-rshrn.ll @@ -112,8 +112,8 @@ ; CHECK-NEXT: movi v2.8h, #1, lsl #8 ; CHECK-NEXT: add v0.8h, v0.8h, v2.8h ; CHECK-NEXT: add v1.8h, v1.8h, v2.8h -; CHECK-NEXT: ushr v0.8h, v0.8h, #9 ; CHECK-NEXT: ushr v1.8h, v1.8h, #9 +; CHECK-NEXT: ushr v0.8h, v0.8h, #9 ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: @@ -338,8 +338,8 @@ ; CHECK-NEXT: movi v2.4s, #1, lsl #16 ; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: ushr v0.4s, v0.4s, #17 ; CHECK-NEXT: ushr v1.4s, v1.4s, #17 +; CHECK-NEXT: ushr v0.4s, v0.4s, #17 ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret entry: @@ -773,8 +773,8 @@ ; CHECK-NEXT: dup v2.2d, x8 ; CHECK-NEXT: add v0.2d, v0.2d, v2.2d ; CHECK-NEXT: add v1.2d, v1.2d, v2.2d -; CHECK-NEXT: ushr v0.2d, v0.2d, #33 ; CHECK-NEXT: ushr v1.2d, v1.2d, #33 +; CHECK-NEXT: ushr v0.2d, v0.2d, #33 ; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/neon-shift-neg.ll b/llvm/test/CodeGen/AArch64/neon-shift-neg.ll --- a/llvm/test/CodeGen/AArch64/neon-shift-neg.ll +++ b/llvm/test/CodeGen/AArch64/neon-shift-neg.ll @@ -375,8 +375,8 @@ define @shrn64x2( %a, i64 %b) { ; CHECK-LABEL: shrn64x2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg x8, x0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: neg x8, x0 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -391,8 +391,8 @@ define @shrn32x4( %a, i32 %b) { ; CHECK-LABEL: shrn32x4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret @@ -407,8 +407,8 @@ define @shrn16x8( %a, i16 %b) { ; CHECK-LABEL: shrn16x8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret @@ -423,8 +423,8 @@ define @shrn8x16( %a, i8 %b) { ; CHECK-LABEL: shrn8x16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: mov z1.b, w8 ; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret @@ -439,8 +439,8 @@ define @lshrn64x2( %a, i64 %b) { ; CHECK-LABEL: lshrn64x2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg x8, x0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: neg x8, x0 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -455,8 +455,8 @@ define @lshrn32x4( %a, i32 %b) { ; CHECK-LABEL: lshrn32x4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret @@ -471,8 +471,8 @@ define @lshrn16x8( %a, i16 %b) { ; CHECK-LABEL: lshrn16x8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret @@ -487,8 +487,8 @@ define @lshrn8x16( %a, i8 %b) { ; CHECK-LABEL: lshrn8x16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: mov z1.b, w8 ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret @@ -503,8 +503,8 @@ define @shln64x2( %a, i64 %b) { ; CHECK-LABEL: shln64x2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg x8, x0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: neg x8, x0 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret @@ -519,8 +519,8 @@ define @shln32x4( %a, i32 %b) { ; CHECK-LABEL: shln32x4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret @@ -535,8 +535,8 @@ define @shln16x8( %a, i16 %b) { ; CHECK-LABEL: shln16x8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret @@ -551,8 +551,8 @@ define @shln8x16( %a, i8 %b) { ; CHECK-LABEL: shln8x16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: neg w8, w0 ; CHECK-NEXT: mov z1.b, w8 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/neon-truncstore.ll b/llvm/test/CodeGen/AArch64/neon-truncstore.ll --- a/llvm/test/CodeGen/AArch64/neon-truncstore.ll +++ b/llvm/test/CodeGen/AArch64/neon-truncstore.ll @@ -141,10 +141,10 @@ ; CHECK-LABEL: v32i32_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: uzp1 v6.8h, v6.8h, v7.8h +; CHECK-NEXT: uzp1 v4.8h, v4.8h, v5.8h ; CHECK-NEXT: uzp1 v2.8h, v2.8h, v3.8h -; CHECK-NEXT: uzp1 v3.8h, v4.8h, v5.8h ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-NEXT: uzp1 v1.16b, v3.16b, v6.16b +; CHECK-NEXT: uzp1 v1.16b, v4.16b, v6.16b ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/neon-wide-splat.ll b/llvm/test/CodeGen/AArch64/neon-wide-splat.ll --- a/llvm/test/CodeGen/AArch64/neon-wide-splat.ll +++ b/llvm/test/CodeGen/AArch64/neon-wide-splat.ll @@ -131,8 +131,8 @@ define <8 x i8> @shuffle_not4(<8 x i8> %v) { ; CHECK-LABEL: shuffle_not4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b diff --git a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll --- a/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/neon-widen-shuffle.ll @@ -138,8 +138,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -153,8 +153,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: @@ -168,8 +168,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/nontemporal-load.ll b/llvm/test/CodeGen/AArch64/nontemporal-load.ll --- a/llvm/test/CodeGen/AArch64/nontemporal-load.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal-load.ll @@ -1,8 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc --mattr=+sve < %s -mtriple aarch64-apple-darwin | FileCheck %s ; RUN: llc --mattr=+sve < %s -mtriple aarch64_be-unknown-unknown | FileCheck --check-prefix CHECK-BE %s - define <4 x double> @test_ldnp_v4f64(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v4f64: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -15,8 +15,8 @@ %lv = load <4 x double>, ptr %A, align 8, !nontemporal !0 ret <4 x double> %lv } - define <4 x i64> @test_ldnp_v4i64(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v4i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -29,8 +29,8 @@ %lv = load <4 x i64>, ptr %A, align 8, !nontemporal !0 ret <4 x i64> %lv } - define <8 x i32> @test_ldnp_v8i32(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v8i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -43,8 +43,8 @@ %lv = load <8 x i32>, ptr %A, align 8, !nontemporal !0 ret <8 x i32> %lv } - define <8 x float> @test_ldnp_v8f32(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v8f32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -57,8 +57,8 @@ %lv = load <8 x float>, ptr %A, align 8, !nontemporal !0 ret <8 x float> %lv } - define <16 x i16> @test_ldnp_v16i16(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v16i16: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -71,8 +71,8 @@ %lv = load <16 x i16>, ptr %A, align 8, !nontemporal !0 ret <16 x i16> %lv } - define <16 x half> @test_ldnp_v16f16(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v16f16: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -85,8 +85,8 @@ %lv = load <16 x half>, ptr %A, align 8, !nontemporal !0 ret <16 x half> %lv } - define <32 x i8> @test_ldnp_v32i8(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v32i8: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -99,8 +99,8 @@ %lv = load <32 x i8>, ptr %A, align 8, !nontemporal !0 ret <32 x i8> %lv } - define <4 x i32> @test_ldnp_v4i32(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v4i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr q0, [x0] @@ -113,8 +113,8 @@ %lv = load<4 x i32>, ptr %A, align 8, !nontemporal !0 ret <4 x i32> %lv } - define <4 x float> @test_ldnp_v4f32(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v4f32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr q0, [x0] @@ -127,8 +127,8 @@ %lv = load<4 x float>, ptr %A, align 8, !nontemporal !0 ret <4 x float> %lv } - define <8 x i16> @test_ldnp_v8i16(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v8i16: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr q0, [x0] @@ -141,8 +141,8 @@ %lv = load <8 x i16>, ptr %A, align 8, !nontemporal !0 ret <8 x i16> %lv } - define <16 x i8> @test_ldnp_v16i8(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v16i8: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr q0, [x0] @@ -156,6 +156,7 @@ ret <16 x i8> %lv } define <2 x double> @test_ldnp_v2f64(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v2f64: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr q0, [x0] @@ -168,8 +169,8 @@ %lv = load <2 x double>, ptr %A, align 8, !nontemporal !0 ret <2 x double> %lv } - define <2 x i32> @test_ldnp_v2i32(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v2i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr d0, [x0] @@ -182,8 +183,8 @@ %lv = load <2 x i32>, ptr %A, align 8, !nontemporal !0 ret <2 x i32> %lv } - define <2 x float> @test_ldnp_v2f32(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v2f32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr d0, [x0] @@ -196,8 +197,8 @@ %lv = load <2 x float>, ptr %A, align 8, !nontemporal !0 ret <2 x float> %lv } - define <4 x i16> @test_ldnp_v4i16(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v4i16: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr d0, [x0] @@ -210,8 +211,8 @@ %lv = load <4 x i16>, ptr %A, align 8, !nontemporal !0 ret <4 x i16> %lv } - define <8 x i8> @test_ldnp_v8i8(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v8i8: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr d0, [x0] @@ -224,8 +225,8 @@ %lv = load <8 x i8>, ptr %A, align 8, !nontemporal !0 ret <8 x i8> %lv } - define <1 x double> @test_ldnp_v1f64(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v1f64: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr d0, [x0] @@ -238,8 +239,8 @@ %lv = load <1 x double>, ptr %A, align 8, !nontemporal !0 ret <1 x double> %lv } - define <1 x i64> @test_ldnp_v1i64(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v1i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldr d0, [x0] @@ -252,8 +253,8 @@ %lv = load <1 x i64>, ptr %A, align 8, !nontemporal !0 ret <1 x i64> %lv } - define <32 x i16> @test_ldnp_v32i16(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v32i16: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -268,8 +269,8 @@ %lv = load <32 x i16>, ptr %A, align 8, !nontemporal !0 ret <32 x i16> %lv } - define <32 x half> @test_ldnp_v32f16(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v32f16: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -284,8 +285,8 @@ %lv = load <32 x half>, ptr %A, align 8, !nontemporal !0 ret <32 x half> %lv } - define <16 x i32> @test_ldnp_v16i32(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v16i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -300,8 +301,8 @@ %lv = load <16 x i32>, ptr %A, align 8, !nontemporal !0 ret <16 x i32> %lv } - define <16 x float> @test_ldnp_v16f32(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v16f32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -316,136 +317,136 @@ %lv = load <16 x float>, ptr %A, align 8, !nontemporal !0 ret <16 x float> %lv } - define <17 x float> @test_ldnp_v17f32(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v17f32: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0, #32] -; CHECK-NEXT: ldnp q2, q3, [x0] -; CHECK-NEXT: ldr s4, [x0, #64] +; CHECK-NEXT: ldr s2, [x0, #64] +; CHECK-NEXT: ldnp q3, q4, [x0] ; CHECK-NEXT: stp q0, q1, [x8, #32] -; CHECK-NEXT: stp q2, q3, [x8] -; CHECK-NEXT: str s4, [x8, #64] +; CHECK-NEXT: stp q3, q4, [x8] +; CHECK-NEXT: str s2, [x8, #64] ; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_ldnp_v17f32: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: add x9, x0, #32 -; CHECK-BE-NEXT: ld1 { v1.4s }, [x0] -; CHECK-BE-NEXT: add x10, x0, #16 -; CHECK-BE-NEXT: ldr s2, [x0, #64] -; CHECK-BE-NEXT: ld1 { v0.4s }, [x9] ; CHECK-BE-NEXT: add x9, x0, #48 -; CHECK-BE-NEXT: ld1 { v4.4s }, [x10] -; CHECK-BE-NEXT: add x10, x8, #32 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] +; CHECK-BE-NEXT: add x10, x0, #32 +; CHECK-BE-NEXT: ld1 { v2.4s }, [x9] +; CHECK-BE-NEXT: add x9, x0, #16 +; CHECK-BE-NEXT: ldr s1, [x0, #64] ; CHECK-BE-NEXT: ld1 { v3.4s }, [x9] +; CHECK-BE-NEXT: ld1 { v4.4s }, [x10] ; CHECK-BE-NEXT: add x9, x8, #48 -; CHECK-BE-NEXT: str s2, [x8, #64] -; CHECK-BE-NEXT: st1 { v1.4s }, [x8] +; CHECK-BE-NEXT: str s1, [x8, #64] +; CHECK-BE-NEXT: add x10, x8, #32 +; CHECK-BE-NEXT: st1 { v0.4s }, [x8] ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: st1 { v3.4s }, [x9] -; CHECK-BE-NEXT: st1 { v0.4s }, [x10] -; CHECK-BE-NEXT: st1 { v4.4s }, [x8] +; CHECK-BE-NEXT: st1 { v2.4s }, [x9] +; CHECK-BE-NEXT: st1 { v4.4s }, [x10] +; CHECK-BE-NEXT: st1 { v3.4s }, [x8] ; CHECK-BE-NEXT: ret %lv = load <17 x float>, ptr %A, align 8, !nontemporal !0 ret <17 x float> %lv } - define <33 x double> @test_ldnp_v33f64(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v33f64: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] +; CHECK-NEXT: ldr d20, [x0, #256] ; CHECK-NEXT: ldnp q2, q3, [x0, #32] ; CHECK-NEXT: ldnp q4, q5, [x0, #64] ; CHECK-NEXT: ldnp q6, q7, [x0, #96] ; CHECK-NEXT: ldnp q16, q17, [x0, #128] ; CHECK-NEXT: ldnp q18, q19, [x0, #224] -; CHECK-NEXT: ldnp q20, q21, [x0, #192] -; CHECK-NEXT: ldnp q22, q23, [x0, #160] -; CHECK-NEXT: ldr d24, [x0, #256] +; CHECK-NEXT: ldnp q21, q22, [x0, #160] +; CHECK-NEXT: ldnp q23, q24, [x0, #192] ; CHECK-NEXT: stp q0, q1, [x8] ; CHECK-NEXT: stp q2, q3, [x8, #32] ; CHECK-NEXT: stp q4, q5, [x8, #64] ; CHECK-NEXT: stp q6, q7, [x8, #96] ; CHECK-NEXT: stp q16, q17, [x8, #128] -; CHECK-NEXT: stp q22, q23, [x8, #160] -; CHECK-NEXT: stp q20, q21, [x8, #192] +; CHECK-NEXT: stp q21, q22, [x8, #160] +; CHECK-NEXT: stp q23, q24, [x8, #192] ; CHECK-NEXT: stp q18, q19, [x8, #224] -; CHECK-NEXT: str d24, [x8, #256] +; CHECK-NEXT: str d20, [x8, #256] ; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_ldnp_v33f64: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: add x9, x0, #16 ; CHECK-BE-NEXT: add x10, x0, #32 -; CHECK-BE-NEXT: ld1 { v21.2d }, [x0] -; CHECK-BE-NEXT: add x11, x8, #208 +; CHECK-BE-NEXT: add x11, x0, #48 ; CHECK-BE-NEXT: ld1 { v0.2d }, [x9] -; CHECK-BE-NEXT: add x9, x0, #48 +; CHECK-BE-NEXT: add x9, x0, #64 ; CHECK-BE-NEXT: ld1 { v1.2d }, [x10] -; CHECK-BE-NEXT: add x10, x0, #64 -; CHECK-BE-NEXT: ld1 { v2.2d }, [x9] -; CHECK-BE-NEXT: add x9, x0, #80 -; CHECK-BE-NEXT: ld1 { v3.2d }, [x10] -; CHECK-BE-NEXT: add x10, x0, #96 -; CHECK-BE-NEXT: ld1 { v4.2d }, [x9] +; CHECK-BE-NEXT: add x10, x0, #80 +; CHECK-BE-NEXT: ld1 { v3.2d }, [x9] ; CHECK-BE-NEXT: add x9, x0, #112 -; CHECK-BE-NEXT: ld1 { v5.2d }, [x10] +; CHECK-BE-NEXT: ld1 { v4.2d }, [x10] ; CHECK-BE-NEXT: add x10, x0, #128 ; CHECK-BE-NEXT: ld1 { v6.2d }, [x9] -; CHECK-BE-NEXT: add x9, x0, #144 +; CHECK-BE-NEXT: add x9, x0, #160 ; CHECK-BE-NEXT: ld1 { v7.2d }, [x10] -; CHECK-BE-NEXT: add x10, x0, #160 -; CHECK-BE-NEXT: ld1 { v16.2d }, [x9] -; CHECK-BE-NEXT: add x9, x0, #176 -; CHECK-BE-NEXT: ld1 { v17.2d }, [x10] -; CHECK-BE-NEXT: add x10, x0, #192 -; CHECK-BE-NEXT: ld1 { v18.2d }, [x9] -; CHECK-BE-NEXT: add x9, x0, #224 -; CHECK-BE-NEXT: ld1 { v19.2d }, [x10] -; CHECK-BE-NEXT: add x10, x0, #208 -; CHECK-BE-NEXT: ld1 { v20.2d }, [x9] +; CHECK-BE-NEXT: add x10, x0, #176 +; CHECK-BE-NEXT: ld1 { v17.2d }, [x9] ; CHECK-BE-NEXT: add x9, x0, #240 -; CHECK-BE-NEXT: ldr d22, [x0, #256] +; CHECK-BE-NEXT: ld1 { v2.2d }, [x11] +; CHECK-BE-NEXT: add x11, x0, #96 +; CHECK-BE-NEXT: ld1 { v18.2d }, [x10] +; CHECK-BE-NEXT: ld1 { v20.2d }, [x0] +; CHECK-BE-NEXT: ld1 { v22.2d }, [x9] +; CHECK-BE-NEXT: add x10, x0, #224 +; CHECK-BE-NEXT: ld1 { v5.2d }, [x11] +; CHECK-BE-NEXT: add x11, x0, #144 +; CHECK-BE-NEXT: ldr d21, [x0, #256] +; CHECK-BE-NEXT: add x9, x0, #208 +; CHECK-BE-NEXT: ld1 { v24.2d }, [x10] +; CHECK-BE-NEXT: ld1 { v16.2d }, [x11] +; CHECK-BE-NEXT: add x11, x0, #192 ; CHECK-BE-NEXT: ld1 { v23.2d }, [x9] ; CHECK-BE-NEXT: add x9, x8, #240 -; CHECK-BE-NEXT: ld1 { v24.2d }, [x10] -; CHECK-BE-NEXT: add x10, x8, #224 -; CHECK-BE-NEXT: str d22, [x8, #256] -; CHECK-BE-NEXT: st1 { v21.2d }, [x8] +; CHECK-BE-NEXT: ld1 { v19.2d }, [x11] +; CHECK-BE-NEXT: str d21, [x8, #256] +; CHECK-BE-NEXT: st1 { v20.2d }, [x8] +; CHECK-BE-NEXT: st1 { v22.2d }, [x9] +; CHECK-BE-NEXT: add x9, x8, #224 +; CHECK-BE-NEXT: st1 { v24.2d }, [x9] +; CHECK-BE-NEXT: add x9, x8, #208 ; CHECK-BE-NEXT: st1 { v23.2d }, [x9] ; CHECK-BE-NEXT: add x9, x8, #192 -; CHECK-BE-NEXT: st1 { v20.2d }, [x10] -; CHECK-BE-NEXT: add x10, x8, #176 -; CHECK-BE-NEXT: st1 { v24.2d }, [x11] -; CHECK-BE-NEXT: add x11, x8, #160 ; CHECK-BE-NEXT: st1 { v19.2d }, [x9] +; CHECK-BE-NEXT: add x9, x8, #176 +; CHECK-BE-NEXT: st1 { v18.2d }, [x9] +; CHECK-BE-NEXT: add x9, x8, #160 +; CHECK-BE-NEXT: st1 { v17.2d }, [x9] ; CHECK-BE-NEXT: add x9, x8, #144 -; CHECK-BE-NEXT: st1 { v18.2d }, [x10] -; CHECK-BE-NEXT: add x10, x8, #128 -; CHECK-BE-NEXT: st1 { v17.2d }, [x11] -; CHECK-BE-NEXT: add x11, x8, #112 ; CHECK-BE-NEXT: st1 { v16.2d }, [x9] +; CHECK-BE-NEXT: add x9, x8, #128 +; CHECK-BE-NEXT: st1 { v7.2d }, [x9] +; CHECK-BE-NEXT: add x9, x8, #112 +; CHECK-BE-NEXT: st1 { v6.2d }, [x9] ; CHECK-BE-NEXT: add x9, x8, #96 -; CHECK-BE-NEXT: st1 { v7.2d }, [x10] -; CHECK-BE-NEXT: add x10, x8, #80 -; CHECK-BE-NEXT: st1 { v6.2d }, [x11] -; CHECK-BE-NEXT: add x11, x8, #64 ; CHECK-BE-NEXT: st1 { v5.2d }, [x9] +; CHECK-BE-NEXT: add x9, x8, #80 +; CHECK-BE-NEXT: st1 { v4.2d }, [x9] +; CHECK-BE-NEXT: add x9, x8, #64 +; CHECK-BE-NEXT: st1 { v3.2d }, [x9] ; CHECK-BE-NEXT: add x9, x8, #48 -; CHECK-BE-NEXT: st1 { v4.2d }, [x10] -; CHECK-BE-NEXT: add x10, x8, #32 -; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: st1 { v3.2d }, [x11] ; CHECK-BE-NEXT: st1 { v2.2d }, [x9] -; CHECK-BE-NEXT: st1 { v1.2d }, [x10] +; CHECK-BE-NEXT: add x9, x8, #32 +; CHECK-BE-NEXT: add x8, x8, #16 +; CHECK-BE-NEXT: st1 { v1.2d }, [x9] ; CHECK-BE-NEXT: st1 { v0.2d }, [x8] ; CHECK-BE-NEXT: ret %lv = load <33 x double>, ptr %A, align 8, !nontemporal !0 ret <33 x double> %lv } - define <33 x i8> @test_ldnp_v33i8(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v33i8: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -459,30 +460,30 @@ ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: add x9, x0, #16 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] -; CHECK-BE-NEXT: add x10, x8, #16 +; CHECK-BE-NEXT: ldrb w10, [x0, #32] ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] -; CHECK-BE-NEXT: ldrb w9, [x0, #32] -; CHECK-BE-NEXT: strb w9, [x8, #32] +; CHECK-BE-NEXT: strb w10, [x8, #32] ; CHECK-BE-NEXT: st1 { v0.16b }, [x8] -; CHECK-BE-NEXT: st1 { v1.16b }, [x10] +; CHECK-BE-NEXT: add x8, x8, #16 +; CHECK-BE-NEXT: st1 { v1.16b }, [x8] ; CHECK-BE-NEXT: ret %lv = load<33 x i8>, ptr %A, align 8, !nontemporal !0 ret <33 x i8> %lv } - define <4 x i65> @test_ldnp_v4i65(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v4i65: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldp x8, x9, [x0, #8] ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr x10, [x0, #24] -; CHECK-NEXT: and x1, x8, #0x1 ; CHECK-NEXT: ldrb w11, [x0, #32] +; CHECK-NEXT: and x1, x8, #0x1 ; CHECK-NEXT: extr x2, x9, x8, #1 ; CHECK-NEXT: extr x4, x10, x9, #2 +; CHECK-NEXT: mov.d v0[1], x1 ; CHECK-NEXT: extr x6, x11, x10, #3 ; CHECK-NEXT: ubfx x3, x9, #1, #1 -; CHECK-NEXT: mov.d v0[1], x1 ; CHECK-NEXT: ubfx x5, x10, #2, #1 ; CHECK-NEXT: ubfx x7, x11, #3, #1 ; CHECK-NEXT: fmov x0, d0 @@ -490,78 +491,79 @@ ; ; CHECK-BE-LABEL: test_ldnp_v4i65: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldp x10, x9, [x0, #16] -; CHECK-BE-NEXT: ldp x12, x11, [x0] +; CHECK-BE-NEXT: ldp x10, x9, [x0] ; CHECK-BE-NEXT: ldrb w8, [x0, #32] +; CHECK-BE-NEXT: ldp x12, x11, [x0, #16] ; CHECK-BE-NEXT: lsr x13, x10, #56 -; CHECK-BE-NEXT: lsr x14, x12, #56 -; CHECK-BE-NEXT: extr x15, x11, x10, #56 -; CHECK-BE-NEXT: orr x7, x8, x9, lsl #8 +; CHECK-BE-NEXT: orr x7, x8, x11, lsl #8 ; CHECK-BE-NEXT: extr x8, x10, x9, #56 -; CHECK-BE-NEXT: extr x9, x12, x11, #56 -; CHECK-BE-NEXT: lsr x12, x12, #59 -; CHECK-BE-NEXT: ubfx x10, x10, #57, #1 -; CHECK-BE-NEXT: extr x5, x13, x8, #1 -; CHECK-BE-NEXT: extr x1, x14, x9, #3 -; CHECK-BE-NEXT: ubfx x9, x11, #58, #1 -; CHECK-BE-NEXT: fmov d0, x12 -; CHECK-BE-NEXT: and x12, x8, #0x1 -; CHECK-BE-NEXT: lsr x11, x11, #56 -; CHECK-BE-NEXT: fmov d2, x10 +; CHECK-BE-NEXT: extr x11, x12, x11, #56 +; CHECK-BE-NEXT: lsr x14, x12, #56 +; CHECK-BE-NEXT: extr x15, x9, x12, #56 +; CHECK-BE-NEXT: lsr x10, x10, #59 +; CHECK-BE-NEXT: extr x1, x13, x8, #3 +; CHECK-BE-NEXT: lsr x8, x9, #56 +; CHECK-BE-NEXT: ubfx x12, x12, #57, #1 +; CHECK-BE-NEXT: ubfx x9, x9, #58, #1 +; CHECK-BE-NEXT: extr x5, x14, x11, #1 +; CHECK-BE-NEXT: and x11, x11, #0x1 +; CHECK-BE-NEXT: fmov d0, x10 +; CHECK-BE-NEXT: fmov d2, x12 +; CHECK-BE-NEXT: fmov d3, x11 ; CHECK-BE-NEXT: fmov d1, x9 -; CHECK-BE-NEXT: extr x3, x11, x15, #2 -; CHECK-BE-NEXT: fmov d3, x12 +; CHECK-BE-NEXT: extr x3, x8, x15, #2 ; CHECK-BE-NEXT: mov v0.d[1], x1 ; CHECK-BE-NEXT: mov v2.d[1], x5 -; CHECK-BE-NEXT: mov v1.d[1], x3 ; CHECK-BE-NEXT: mov v3.d[1], x7 +; CHECK-BE-NEXT: mov v1.d[1], x3 ; CHECK-BE-NEXT: fmov x0, d0 ; CHECK-BE-NEXT: fmov x4, d2 -; CHECK-BE-NEXT: fmov x2, d1 ; CHECK-BE-NEXT: fmov x6, d3 +; CHECK-BE-NEXT: fmov x2, d1 ; CHECK-BE-NEXT: ret %lv = load <4 x i65>, ptr %A, align 8, !nontemporal !0 ret <4 x i65> %lv } - define <4 x i63> @test_ldnp_v4i63(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v4i63: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldp x8, x9, [x0] -; CHECK-NEXT: ldp x10, x12, [x0, #16] -; CHECK-NEXT: extr x11, x9, x8, #63 +; CHECK-NEXT: ldp x10, x11, [x0, #16] +; CHECK-NEXT: extr x12, x9, x8, #63 ; CHECK-NEXT: and x0, x8, #0x7fffffffffffffff ; CHECK-NEXT: extr x9, x10, x9, #62 -; CHECK-NEXT: extr x3, x12, x10, #61 -; CHECK-NEXT: and x1, x11, #0x7fffffffffffffff +; CHECK-NEXT: extr x3, x11, x10, #61 +; CHECK-NEXT: and x1, x12, #0x7fffffffffffffff ; CHECK-NEXT: and x2, x9, #0x7fffffffffffffff ; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_ldnp_v4i63: ; CHECK-BE: // %bb.0: -; CHECK-BE-NEXT: ldp x8, x9, [x0, #16] -; CHECK-BE-NEXT: ldp x11, x10, [x0] -; CHECK-BE-NEXT: and x3, x9, #0x7fffffffffffffff -; CHECK-BE-NEXT: extr x12, x10, x8, #62 -; CHECK-BE-NEXT: extr x8, x8, x9, #63 -; CHECK-BE-NEXT: extr x0, x11, x10, #61 +; CHECK-BE-NEXT: ldp x9, x8, [x0, #8] +; CHECK-BE-NEXT: ldr x11, [x0, #24] +; CHECK-BE-NEXT: ldr x10, [x0] +; CHECK-BE-NEXT: and x3, x11, #0x7fffffffffffffff +; CHECK-BE-NEXT: extr x12, x9, x8, #62 +; CHECK-BE-NEXT: extr x8, x8, x11, #63 +; CHECK-BE-NEXT: extr x0, x10, x9, #61 ; CHECK-BE-NEXT: and x1, x12, #0x7fffffffffffffff ; CHECK-BE-NEXT: and x2, x8, #0x7fffffffffffffff ; CHECK-BE-NEXT: ret %lv = load <4 x i63>, ptr %A, align 8, !nontemporal !0 ret <4 x i63> %lv } - define <5 x double> @test_ldnp_v5f64(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v5f64: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q2, [x0] ; CHECK-NEXT: ldr d4, [x0, #32] ; CHECK-NEXT: ext.16b v1, v0, v0, #8 -; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q1 ; CHECK-NEXT: ext.16b v3, v2, v2, #8 +; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ; kill: def $d2 killed $d2 killed $q2 +; CHECK-NEXT: ; kill: def $d1 killed $d1 killed $q1 ; CHECK-NEXT: ; kill: def $d3 killed $d3 killed $q3 ; CHECK-NEXT: ret ; @@ -574,16 +576,16 @@ ; CHECK-BE-NEXT: // kill: def $d4 killed $d4 killed $q4 ; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-BE-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-BE-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-BE-NEXT: ext v3.16b, v2.16b, v2.16b, #8 +; CHECK-BE-NEXT: // kill: def $d1 killed $d1 killed $q1 ; CHECK-BE-NEXT: // kill: def $d2 killed $d2 killed $q2 ; CHECK-BE-NEXT: // kill: def $d3 killed $d3 killed $q3 ; CHECK-BE-NEXT: ret %lv = load<5 x double>, ptr %A, align 8, !nontemporal !0 ret <5 x double> %lv } - define <16 x i64> @test_ldnp_v16i64(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v16i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -602,8 +604,8 @@ %lv = load <16 x i64>, ptr %A, align 8, !nontemporal !0 ret <16 x i64> %lv } - define <16 x double> @test_ldnp_v16f64(ptr %A) { +; ; CHECK-LABEL: test_ldnp_v16f64: ; CHECK: ; %bb.0: ; CHECK-NEXT: ldnp q0, q1, [x0] @@ -622,8 +624,8 @@ %lv = load <16 x double>, ptr %A, align 8, !nontemporal !0 ret <16 x double> %lv } - define @test_ldnp_v20f32_vscale(* %A) { +; ; CHECK-LABEL: test_ldnp_v20f32_vscale: ; CHECK: ; %bb.0: ; CHECK-NEXT: ptrue p0.s @@ -646,5 +648,4 @@ %lv = load, * %A, align 8, !nontemporal !0 ret %lv } - !0 = !{i32 1} diff --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll --- a/llvm/test/CodeGen/AArch64/nontemporal.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal.ll @@ -439,43 +439,43 @@ define void @test_stnp_v17f32(<17 x float> %v, ptr %ptr) { ; CHECK-LABEL: test_stnp_v17f32: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: ldr s16, [sp, #16] -; CHECK-NEXT: add x8, sp, #20 -; CHECK-NEXT: ldr s17, [sp] -; CHECK-NEXT: add x9, sp, #4 ; CHECK-NEXT: ; kill: def $s4 killed $s4 def $q4 ; CHECK-NEXT: ; kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: ldr s16, [sp, #16] ; CHECK-NEXT: ; kill: def $s5 killed $s5 def $q5 ; CHECK-NEXT: ; kill: def $s1 killed $s1 def $q1 +; CHECK-NEXT: add x8, sp, #20 ; CHECK-NEXT: ; kill: def $s6 killed $s6 def $q6 ; CHECK-NEXT: ; kill: def $s2 killed $s2 def $q2 ; CHECK-NEXT: ; kill: def $s7 killed $s7 def $q7 ; CHECK-NEXT: ; kill: def $s3 killed $s3 def $q3 -; CHECK-NEXT: ld1.s { v16 }[1], [x8] -; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: ld1.s { v17 }[1], [x9] -; CHECK-NEXT: add x9, sp, #8 ; CHECK-NEXT: mov.s v4[1], v5[0] ; CHECK-NEXT: mov.s v0[1], v1[0] -; CHECK-NEXT: ld1.s { v16 }[2], [x8] -; CHECK-NEXT: add x8, sp, #28 -; CHECK-NEXT: ld1.s { v17 }[2], [x9] -; CHECK-NEXT: add x9, sp, #12 +; CHECK-NEXT: ldr s5, [sp] +; CHECK-NEXT: ld1.s { v16 }[1], [x8] +; CHECK-NEXT: add x8, sp, #4 +; CHECK-NEXT: ld1.s { v5 }[1], [x8] +; CHECK-NEXT: add x8, sp, #24 ; CHECK-NEXT: mov.s v4[2], v6[0] +; CHECK-NEXT: ld1.s { v16 }[2], [x8] ; CHECK-NEXT: mov.s v0[2], v2[0] +; CHECK-NEXT: add x8, sp, #8 +; CHECK-NEXT: ld1.s { v5 }[2], [x8] +; CHECK-NEXT: add x8, sp, #28 ; CHECK-NEXT: ld1.s { v16 }[3], [x8] -; CHECK-NEXT: ld1.s { v17 }[3], [x9] +; CHECK-NEXT: add x8, sp, #12 ; CHECK-NEXT: mov.s v4[3], v7[0] ; CHECK-NEXT: mov.s v0[3], v3[0] +; CHECK-NEXT: ld1.s { v5 }[3], [x8] ; CHECK-NEXT: mov d1, v16[1] -; CHECK-NEXT: mov d2, v17[1] +; CHECK-NEXT: mov d2, v5[1] ; CHECK-NEXT: mov d3, v4[1] -; CHECK-NEXT: mov d5, v0[1] +; CHECK-NEXT: mov d6, v0[1] ; CHECK-NEXT: stnp d16, d1, [x0, #48] ; CHECK-NEXT: ldr s1, [sp, #32] -; CHECK-NEXT: stnp d17, d2, [x0, #32] +; CHECK-NEXT: stnp d5, d2, [x0, #32] ; CHECK-NEXT: stnp d4, d3, [x0, #16] -; CHECK-NEXT: stnp d0, d5, [x0] +; CHECK-NEXT: stnp d0, d6, [x0] ; CHECK-NEXT: str s1, [x0, #64] ; CHECK-NEXT: ret @@ -486,8 +486,8 @@ define void @test_stnp_v16i32_invalid_offset(<16 x i32> %v, ptr %ptr) { ; CHECK-LABEL: test_stnp_v16i32_invalid_offset: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w8, #32032 -; CHECK-NEXT: mov w9, #32000 +; CHECK-NEXT: mov w8, #32032 ; =0x7d20 +; CHECK-NEXT: mov w9, #32000 ; =0x7d00 ; CHECK-NEXT: add x8, x0, x8 ; CHECK-NEXT: add x9, x0, x9 ; CHECK-NEXT: stnp q2, q3, [x8] diff --git a/llvm/test/CodeGen/AArch64/nzcv-save.ll b/llvm/test/CodeGen/AArch64/nzcv-save.ll --- a/llvm/test/CodeGen/AArch64/nzcv-save.ll +++ b/llvm/test/CodeGen/AArch64/nzcv-save.ll @@ -6,20 +6,20 @@ define void @f(ptr nocapture %a, ptr nocapture %b, ptr nocapture %cc, ptr nocapture %dd) nounwind uwtable noinline ssp { ; CHECK-LABEL: f: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp x9, x8, [x2] -; CHECK-NEXT: ldp x11, x10, [x3] +; CHECK-NEXT: ldp x8, x10, [x2] +; CHECK-NEXT: ldp x9, x11, [x3] ; CHECK-NEXT: ldp x13, x12, [x2, #16] -; CHECK-NEXT: ldp x14, x15, [x3, #16] -; CHECK-NEXT: adds x9, x9, x11 -; CHECK-NEXT: adcs x8, x8, x10 -; CHECK-NEXT: adcs x10, x13, x14 -; CHECK-NEXT: adc x11, x12, x15 +; CHECK-NEXT: adds x8, x8, x9 +; CHECK-NEXT: ldp x14, x9, [x3, #16] +; CHECK-NEXT: adcs x10, x10, x11 +; CHECK-NEXT: stp x8, x10, [x0] +; CHECK-NEXT: adcs x11, x13, x14 +; CHECK-NEXT: adc x13, x12, x9 ; CHECK-NEXT: orr x12, x12, #0x100 -; CHECK-NEXT: adc x12, x12, x15 -; CHECK-NEXT: stp x9, x8, [x0] -; CHECK-NEXT: stp x10, x11, [x0, #16] -; CHECK-NEXT: stp x10, x12, [x1, #16] -; CHECK-NEXT: stp x9, x8, [x1] +; CHECK-NEXT: adc x9, x12, x9 +; CHECK-NEXT: stp x11, x13, [x0, #16] +; CHECK-NEXT: stp x11, x9, [x1, #16] +; CHECK-NEXT: stp x8, x10, [x1] ; CHECK-NEXT: ret entry: %c = load i256, ptr %cc diff --git a/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll b/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll --- a/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll +++ b/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll @@ -5,18 +5,18 @@ ; CHECK-LABEL: jsimd_idct_ifast_neon_intrinsic: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldr q0, [x1, #32] -; CHECK-NEXT: mov w8, w3 ; CHECK-NEXT: ldr q1, [x1, #96] +; CHECK-NEXT: mov w9, w3 ; CHECK-NEXT: ldr q2, [x0, #32] ; CHECK-NEXT: ldr q3, [x0, #96] -; CHECK-NEXT: ldr x9, [x2, #48] +; CHECK-NEXT: ldr x8, [x2, #48] ; CHECK-NEXT: mul v0.8h, v2.8h, v0.8h ; CHECK-NEXT: mul v1.8h, v3.8h, v1.8h ; CHECK-NEXT: add v2.8h, v0.8h, v1.8h -; CHECK-NEXT: str q2, [x9, x8] -; CHECK-NEXT: ldr x9, [x2, #56] ; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h -; CHECK-NEXT: str q0, [x9, x8] +; CHECK-NEXT: str q2, [x8, x9] +; CHECK-NEXT: ldr x8, [x2, #56] +; CHECK-NEXT: str q0, [x8, x9] ; CHECK-NEXT: ret entry: %add.ptr5 = getelementptr inbounds i16, ptr %coef_block, i64 16 diff --git a/llvm/test/CodeGen/AArch64/peephole-and-tst.ll b/llvm/test/CodeGen/AArch64/peephole-and-tst.ll --- a/llvm/test/CodeGen/AArch64/peephole-and-tst.ll +++ b/llvm/test/CodeGen/AArch64/peephole-and-tst.ll @@ -8,11 +8,10 @@ define i32 @test_func_i32_two_uses(i32 %in, i32 %bit, i32 %mask) { ; CHECK-LABEL: test_func_i32_two_uses: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:ptr_wrapper -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ldr x9, [x9, :got_lo12:ptr_wrapper] -; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: adrp x8, :got:ptr_wrapper +; CHECK-NEXT: ldr x8, [x8, :got_lo12:ptr_wrapper] +; CHECK-NEXT: ldr x9, [x8] +; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: b .LBB0_3 ; CHECK-NEXT: .LBB0_1: // in Loop: Header=BB0_3 Depth=1 ; CHECK-NEXT: str xzr, [x9, #8] @@ -21,9 +20,9 @@ ; CHECK-NEXT: cbz w1, .LBB0_6 ; CHECK-NEXT: .LBB0_3: // %do.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ands w10, w1, w8 -; CHECK-NEXT: and w11, w2, w8 -; CHECK-NEXT: cinc w0, w0, ne +; CHECK-NEXT: ands w10, w1, w0 +; CHECK-NEXT: and w11, w2, w0 +; CHECK-NEXT: cinc w8, w8, ne ; CHECK-NEXT: cmp w10, w11 ; CHECK-NEXT: b.eq .LBB0_1 ; CHECK-NEXT: // %bb.4: // %do.body @@ -34,6 +33,7 @@ ; CHECK-NEXT: cbz w10, .LBB0_2 ; CHECK-NEXT: b .LBB0_1 ; CHECK-NEXT: .LBB0_6: // %do.end +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret entry: %0 = load ptr, ptr @ptr_wrapper, align 8 @@ -72,25 +72,25 @@ define i32 @test_func_i64_one_use(i64 %in, i64 %bit, i64 %mask) { ; CHECK-LABEL: test_func_i64_one_use: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: adrp x9, :got:ptr_wrapper -; CHECK-NEXT: mov x8, x0 -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: ldr x9, [x9, :got_lo12:ptr_wrapper] -; CHECK-NEXT: ldr x9, [x9] +; CHECK-NEXT: adrp x8, :got:ptr_wrapper +; CHECK-NEXT: ldr x8, [x8, :got_lo12:ptr_wrapper] +; CHECK-NEXT: ldr x9, [x8] +; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: b .LBB1_2 ; CHECK-NEXT: .LBB1_1: // in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: lsl x1, x1, #1 ; CHECK-NEXT: cbz x1, .LBB1_4 ; CHECK-NEXT: .LBB1_2: // %do.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ands x10, x1, x8 +; CHECK-NEXT: ands x10, x1, x0 ; CHECK-NEXT: orr x10, x2, x10 -; CHECK-NEXT: cinc w0, w0, ne +; CHECK-NEXT: cinc w8, w8, ne ; CHECK-NEXT: cbz x10, .LBB1_1 ; CHECK-NEXT: // %bb.3: // in Loop: Header=BB1_2 Depth=1 ; CHECK-NEXT: str xzr, [x9, #8] ; CHECK-NEXT: b .LBB1_1 ; CHECK-NEXT: .LBB1_4: // %do.end +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret entry: %0 = load ptr, ptr @ptr_wrapper, align 8 diff --git a/llvm/test/CodeGen/AArch64/pmull-ldr-merge.ll b/llvm/test/CodeGen/AArch64/pmull-ldr-merge.ll --- a/llvm/test/CodeGen/AArch64/pmull-ldr-merge.ll +++ b/llvm/test/CodeGen/AArch64/pmull-ldr-merge.ll @@ -6,10 +6,10 @@ define void @test1(ptr %0, i64 %1, i64 %2) { ; CHECK-LABEL: test1: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x2, lsl #4 -; CHECK-NEXT: add x9, x0, x1, lsl #4 -; CHECK-NEXT: ldr d0, [x8, #8] -; CHECK-NEXT: ldr d1, [x9, #8] +; CHECK-NEXT: add x8, x0, x1, lsl #4 +; CHECK-NEXT: add x9, x0, x2, lsl #4 +; CHECK-NEXT: ldr d0, [x9, #8] +; CHECK-NEXT: ldr d1, [x8, #8] ; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret @@ -49,9 +49,9 @@ ; CHECK-LABEL: test3: ; CHECK: // %bb.0: ; CHECK-NEXT: add x8, x0, x1, lsl #4 -; CHECK-NEXT: fmov d0, x3 -; CHECK-NEXT: ldr d1, [x8, #8] -; CHECK-NEXT: pmull v0.1q, v1.1d, v0.1d +; CHECK-NEXT: fmov d1, x3 +; CHECK-NEXT: ldr d0, [x8, #8] +; CHECK-NEXT: pmull v0.1q, v0.1d, v1.1d ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret %5 = getelementptr inbounds <2 x i64>, ptr %0, i64 %1 diff --git a/llvm/test/CodeGen/AArch64/pr-cf624b2.ll b/llvm/test/CodeGen/AArch64/pr-cf624b2.ll --- a/llvm/test/CodeGen/AArch64/pr-cf624b2.ll +++ b/llvm/test/CodeGen/AArch64/pr-cf624b2.ll @@ -13,42 +13,42 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: orr x9, x8, #0xf -; CHECK-NEXT: orr x11, x8, #0xc ; CHECK-NEXT: orr x10, x8, #0xe -; CHECK-NEXT: orr x12, x8, #0x8 ; CHECK-NEXT: st1 { v0.b }[0], [x8] ; CHECK-NEXT: st1 { v0.b }[15], [x9] +; CHECK-NEXT: orr x9, x8, #0xc +; CHECK-NEXT: st1 { v0.b }[12], [x9] +; CHECK-NEXT: orr x9, x8, #0x8 +; CHECK-NEXT: st1 { v0.b }[8], [x9] ; CHECK-NEXT: orr x9, x8, #0x7 -; CHECK-NEXT: st1 { v0.b }[12], [x11] -; CHECK-NEXT: orr x11, x8, #0x4 -; CHECK-NEXT: st1 { v0.b }[14], [x10] -; CHECK-NEXT: orr x10, x8, #0x6 ; CHECK-NEXT: st1 { v0.b }[7], [x9] +; CHECK-NEXT: orr x9, x8, #0x6 +; CHECK-NEXT: st1 { v0.b }[6], [x9] +; CHECK-NEXT: orr x9, x8, #0x4 +; CHECK-NEXT: st1 { v0.b }[4], [x9] ; CHECK-NEXT: orr x9, x8, #0x3 -; CHECK-NEXT: st1 { v0.b }[8], [x12] -; CHECK-NEXT: mov w12, #11 -; CHECK-NEXT: st1 { v0.b }[4], [x11] -; CHECK-NEXT: mov w11, #13 ; CHECK-NEXT: st1 { v0.b }[3], [x9] ; CHECK-NEXT: orr x9, x8, #0x2 -; CHECK-NEXT: st1 { v0.b }[6], [x10] -; CHECK-NEXT: orr x10, x8, #0x1 -; CHECK-NEXT: orr x11, x8, x11 +; CHECK-NEXT: st1 { v0.b }[14], [x10] +; CHECK-NEXT: mov w10, #13 // =0xd ; CHECK-NEXT: st1 { v0.b }[2], [x9] -; CHECK-NEXT: orr x9, x8, x12 -; CHECK-NEXT: st1 { v0.b }[1], [x10] -; CHECK-NEXT: mov w10, #9 -; CHECK-NEXT: st1 { v0.b }[13], [x11] -; CHECK-NEXT: mov w11, #5 +; CHECK-NEXT: orr x9, x8, #0x1 +; CHECK-NEXT: st1 { v0.b }[1], [x9] +; CHECK-NEXT: orr x9, x8, x10 +; CHECK-NEXT: mov w10, #11 // =0xb +; CHECK-NEXT: st1 { v0.b }[13], [x9] +; CHECK-NEXT: orr x9, x8, x10 +; CHECK-NEXT: mov w10, #10 // =0xa ; CHECK-NEXT: st1 { v0.b }[11], [x9] -; CHECK-NEXT: mov w9, #10 -; CHECK-NEXT: orr x9, x8, x9 -; CHECK-NEXT: orr x10, x8, x10 -; CHECK-NEXT: orr x8, x8, x11 -; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: orr x9, x8, x10 +; CHECK-NEXT: mov w10, #9 // =0x9 ; CHECK-NEXT: st1 { v0.b }[10], [x9] -; CHECK-NEXT: st1 { v0.b }[9], [x10] +; CHECK-NEXT: orr x9, x8, x10 +; CHECK-NEXT: st1 { v0.b }[9], [x9] +; CHECK-NEXT: mov w9, #5 // =0x5 +; CHECK-NEXT: orr x8, x8, x9 ; CHECK-NEXT: st1 { v0.b }[5], [x8] ; CHECK-NEXT: ldr q0, [sp] ; CHECK-NEXT: stp q0, q1, [x0] diff --git a/llvm/test/CodeGen/AArch64/pr58350.ll b/llvm/test/CodeGen/AArch64/pr58350.ll --- a/llvm/test/CodeGen/AArch64/pr58350.ll +++ b/llvm/test/CodeGen/AArch64/pr58350.ll @@ -12,9 +12,9 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: bfi x9, x0, #2, #1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: bfi x9, x0, #2, #1 ; CHECK-NEXT: str d1, [sp] ; CHECK-NEXT: ldr s1, [x9] ; CHECK-NEXT: mov v1.s[1], v0.s[0] diff --git a/llvm/test/CodeGen/AArch64/pr58516.ll b/llvm/test/CodeGen/AArch64/pr58516.ll --- a/llvm/test/CodeGen/AArch64/pr58516.ll +++ b/llvm/test/CodeGen/AArch64/pr58516.ll @@ -24,13 +24,13 @@ ; CHECK-NEXT: sub x9, sp, #32 ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 ; CHECK-NEXT: mov x19, sp -; CHECK-NEXT: mov x1, #-2 -; CHECK-NEXT: add x8, x19, #0 +; CHECK-NEXT: mov x1, #-2 // =0xfffffffffffffffe ; CHECK-NEXT: mov x20, x0 +; CHECK-NEXT: add x8, x19, #0 +; CHECK-NEXT: stur x1, [x29, #24] ; CHECK-NEXT: lsr x21, x8, #3 ; CHECK-NEXT: adrp x8, osfx ; CHECK-NEXT: add x8, x8, :lo12:osfx -; CHECK-NEXT: stur x1, [x29, #24] ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: str wzr, [x21] ; CHECK-NEXT: ldr x0, [x0] diff --git a/llvm/test/CodeGen/AArch64/pr61549.ll b/llvm/test/CodeGen/AArch64/pr61549.ll --- a/llvm/test/CodeGen/AArch64/pr61549.ll +++ b/llvm/test/CodeGen/AArch64/pr61549.ll @@ -5,10 +5,10 @@ define i35 @f(i35 %0) { ; CHECK-LABEL: f: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: sbfx x9, x0, #0, #35 -; CHECK-NEXT: sdiv x10, x8, x9 -; CHECK-NEXT: msub x8, x10, x9, x8 +; CHECK-NEXT: sbfx x8, x0, #0, #35 +; CHECK-NEXT: mov w9, #1 // =0x1 +; CHECK-NEXT: sdiv x10, x9, x8 +; CHECK-NEXT: msub x8, x10, x8, x9 ; CHECK-NEXT: clz x8, x8 ; CHECK-NEXT: sub x8, x8, #29 ; CHECK-NEXT: ubfx x0, x8, #5, #30 @@ -16,10 +16,10 @@ ; ; GISEL-LABEL: f: ; GISEL: // %bb.0: -; GISEL-NEXT: mov w8, #1 // =0x1 -; GISEL-NEXT: sbfx x9, x0, #0, #35 -; GISEL-NEXT: sdiv x10, x8, x9 -; GISEL-NEXT: msub x8, x10, x9, x8 +; GISEL-NEXT: sbfx x8, x0, #0, #35 +; GISEL-NEXT: mov w9, #1 // =0x1 +; GISEL-NEXT: sdiv x10, x9, x8 +; GISEL-NEXT: msub x8, x10, x8, x9 ; GISEL-NEXT: and x8, x8, #0x7ffffffff ; GISEL-NEXT: clz x8, x8 ; GISEL-NEXT: sub x8, x8, #29 diff --git a/llvm/test/CodeGen/AArch64/predicated-add-sub.ll b/llvm/test/CodeGen/AArch64/predicated-add-sub.ll --- a/llvm/test/CodeGen/AArch64/predicated-add-sub.ll +++ b/llvm/test/CodeGen/AArch64/predicated-add-sub.ll @@ -83,8 +83,8 @@ define @zext.add.8xi32( %a, %v) #0 { ; CHECK-LABEL: zext.add.8xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.s, #1 // =0x1 ; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: mov z2.s, #1 // =0x1 ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s ; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s @@ -98,16 +98,16 @@ ; CHECK-LABEL: zext.add.16xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: punpkhi p1.h, p0.b -; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: mov z4.s, #1 // =0x1 +; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: punpkhi p2.h, p1.b ; CHECK-NEXT: punpklo p1.h, p1.b -; CHECK-NEXT: punpkhi p3.h, p0.b -; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: add z1.s, p3/m, z1.s, z4.s -; CHECK-NEXT: add z0.s, p0/m, z0.s, z4.s -; CHECK-NEXT: add z2.s, p1/m, z2.s, z4.s +; CHECK-NEXT: punpklo p3.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: add z3.s, p2/m, z3.s, z4.s +; CHECK-NEXT: add z2.s, p1/m, z2.s, z4.s +; CHECK-NEXT: add z0.s, p3/m, z0.s, z4.s +; CHECK-NEXT: add z1.s, p0/m, z1.s, z4.s ; CHECK-NEXT: ret %extend = zext %v to %result = add %a, %extend @@ -194,8 +194,8 @@ define @zext.sub.8xi32( %a, %v) #0 { ; CHECK-LABEL: zext.sub.8xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: punpklo p1.h, p0.b +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: add z0.s, p1/m, z0.s, z2.s ; CHECK-NEXT: add z1.s, p0/m, z1.s, z2.s @@ -213,11 +213,11 @@ ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: punpklo p2.h, p1.b ; CHECK-NEXT: punpkhi p1.h, p1.b +; CHECK-NEXT: punpklo p3.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: add z0.s, p2/m, z0.s, z4.s ; CHECK-NEXT: add z1.s, p1/m, z1.s, z4.s -; CHECK-NEXT: punpklo p1.h, p0.b -; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: add z2.s, p1/m, z2.s, z4.s +; CHECK-NEXT: add z2.s, p3/m, z2.s, z4.s ; CHECK-NEXT: add z3.s, p0/m, z3.s, z4.s ; CHECK-NEXT: ret %extend = zext %v to @@ -305,8 +305,8 @@ define @sext.add.8xi32( %a, %v) #0 { ; CHECK-LABEL: sext.add.8xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: add z1.s, p1/m, z1.s, z2.s ; CHECK-NEXT: add z0.s, p0/m, z0.s, z2.s @@ -320,16 +320,16 @@ ; CHECK-LABEL: sext.add.16xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: punpkhi p1.h, p0.b -; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: mov z4.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: punpkhi p2.h, p1.b ; CHECK-NEXT: punpklo p1.h, p1.b -; CHECK-NEXT: punpkhi p3.h, p0.b -; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: add z1.s, p3/m, z1.s, z4.s -; CHECK-NEXT: add z0.s, p0/m, z0.s, z4.s -; CHECK-NEXT: add z2.s, p1/m, z2.s, z4.s +; CHECK-NEXT: punpklo p3.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: add z3.s, p2/m, z3.s, z4.s +; CHECK-NEXT: add z2.s, p1/m, z2.s, z4.s +; CHECK-NEXT: add z0.s, p3/m, z0.s, z4.s +; CHECK-NEXT: add z1.s, p0/m, z1.s, z4.s ; CHECK-NEXT: ret %extend = sext %v to %result = add %a, %extend @@ -416,8 +416,8 @@ define @sext.sub.8xi32( %a, %v) #0 { ; CHECK-LABEL: sext.sub.8xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: mov z2.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: sub z1.s, p1/m, z1.s, z2.s ; CHECK-NEXT: sub z0.s, p0/m, z0.s, z2.s @@ -431,16 +431,16 @@ ; CHECK-LABEL: sext.sub.16xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: punpkhi p1.h, p0.b -; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: mov z4.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: punpkhi p2.h, p1.b ; CHECK-NEXT: punpklo p1.h, p1.b -; CHECK-NEXT: punpkhi p3.h, p0.b -; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: sub z1.s, p3/m, z1.s, z4.s -; CHECK-NEXT: sub z0.s, p0/m, z0.s, z4.s -; CHECK-NEXT: sub z2.s, p1/m, z2.s, z4.s +; CHECK-NEXT: punpklo p3.h, p0.b +; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: sub z3.s, p2/m, z3.s, z4.s +; CHECK-NEXT: sub z2.s, p1/m, z2.s, z4.s +; CHECK-NEXT: sub z0.s, p3/m, z0.s, z4.s +; CHECK-NEXT: sub z1.s, p0/m, z1.s, z4.s ; CHECK-NEXT: ret %extend = sext %v to %result = sub %a, %extend diff --git a/llvm/test/CodeGen/AArch64/pull-negations-after-concat-of-truncates.ll b/llvm/test/CodeGen/AArch64/pull-negations-after-concat-of-truncates.ll --- a/llvm/test/CodeGen/AArch64/pull-negations-after-concat-of-truncates.ll +++ b/llvm/test/CodeGen/AArch64/pull-negations-after-concat-of-truncates.ll @@ -48,11 +48,10 @@ ; CHECK-NEXT: xtn v1.4h, v1.4s ; CHECK-NEXT: mvn v0.8b, v0.8b ; CHECK-NEXT: mvn v1.8b, v1.8b -; CHECK-NEXT: mov v2.16b, v0.16b -; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: mov v2.d[1], v1.d[0] -; CHECK-NEXT: mov v0.d[1], v0.d[0] -; CHECK-NEXT: add v0.8h, v2.8h, v0.8h +; CHECK-NEXT: add v2.4h, v0.4h, v1.4h +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: mov v2.d[1], v2.d[0] +; CHECK-NEXT: add v0.8h, v0.8h, v2.8h ; CHECK-NEXT: ret %notx = xor <4 x i32> %x, %trnx = trunc <4 x i32> %notx to <4 x i16> diff --git a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll --- a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll @@ -33,14 +33,14 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: Lloh0: ; CHECK-NEXT: adrp x14, __DefaultRuneLocale@GOTPAGE -; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: ldrb w12, [x0, #4] ; CHECK-NEXT: ldrb w13, [x1, #4] -; CHECK-NEXT: ldr x10, [x0, #16] -; CHECK-NEXT: ldr x11, [x1, #16] +; CHECK-NEXT: ldr x9, [x0, #16] +; CHECK-NEXT: ldr x10, [x1, #16] +; CHECK-NEXT: mov x11, xzr ; CHECK-NEXT: Lloh1: ; CHECK-NEXT: ldr x14, [x14, __DefaultRuneLocale@GOTPAGEOFF] -; CHECK-NEXT: ldrsb x8, [x10, x9] +; CHECK-NEXT: ldrsb x8, [x9, x11] ; CHECK-NEXT: tbz x8, #63, LBB0_3 ; CHECK-NEXT: LBB0_2: ; %cond.false.i.i ; CHECK-NEXT: stp x9, x0, [sp, #32] ; 16-byte Folded Spill @@ -69,7 +69,7 @@ ; CHECK-NEXT: and w8, w8, #0x8000 ; CHECK-NEXT: cbnz w8, LBB0_6 ; CHECK-NEXT: LBB0_4: ; %lor.rhs -; CHECK-NEXT: ldrsb x8, [x11, x9] +; CHECK-NEXT: ldrsb x8, [x10, x11] ; CHECK-NEXT: tbnz x8, #63, LBB0_8 ; CHECK-NEXT: ; %bb.5: ; %cond.true.i.i217 ; CHECK-NEXT: add x8, x14, x8, lsl #2 @@ -77,13 +77,13 @@ ; CHECK-NEXT: and w8, w8, #0x8000 ; CHECK-NEXT: cbz w8, LBB0_9 ; CHECK-NEXT: LBB0_6: ; %while.body -; CHECK-NEXT: ldrb w8, [x10, x9] -; CHECK-NEXT: ldrb w15, [x11, x9] +; CHECK-NEXT: ldrb w8, [x9, x11] +; CHECK-NEXT: ldrb w15, [x10, x11] ; CHECK-NEXT: cmp w8, w15 ; CHECK-NEXT: b.ne LBB0_42 ; CHECK-NEXT: ; %bb.7: ; %if.end17 -; CHECK-NEXT: add x9, x9, #1 -; CHECK-NEXT: ldrsb x8, [x10, x9] +; CHECK-NEXT: add x11, x11, #1 +; CHECK-NEXT: ldrsb x8, [x9, x11] ; CHECK-NEXT: tbz x8, #63, LBB0_3 ; CHECK-NEXT: b LBB0_2 ; CHECK-NEXT: LBB0_8: ; %cond.false.i.i219 @@ -111,16 +111,16 @@ ; CHECK-NEXT: cbnz w8, LBB0_24 ; CHECK-NEXT: ; %bb.10: ; %if.then23 ; CHECK-NEXT: ldr x12, [x0, #16] -; CHECK-NEXT: ldrb w8, [x10, x9] +; CHECK-NEXT: ldrb w8, [x9, x11] ; CHECK-NEXT: ldrb w13, [x12] ; CHECK-NEXT: cmp w13, #83 ; CHECK-NEXT: b.eq LBB0_19 ; CHECK-NEXT: LBB0_11: ; %while.cond59.preheader ; CHECK-NEXT: cbz w8, LBB0_23 ; CHECK-NEXT: LBB0_12: ; %land.rhs.preheader -; CHECK-NEXT: add x10, x10, x9 -; CHECK-NEXT: add x9, x11, x9 -; CHECK-NEXT: add x10, x10, #1 +; CHECK-NEXT: add x12, x9, x11 +; CHECK-NEXT: add x9, x10, x11 +; CHECK-NEXT: add x10, x12, #1 ; CHECK-NEXT: LBB0_13: ; %land.rhs ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldrb w11, [x9], #1 @@ -135,8 +135,8 @@ ; CHECK-NEXT: b.eq LBB0_18 ; CHECK-NEXT: ; %bb.16: ; %lor.lhs.false74 ; CHECK-NEXT: ; in Loop: Header=BB0_13 Depth=1 -; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: cmp w8, w11 +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: b.ne LBB0_43 ; CHECK-NEXT: ; %bb.17: ; %lor.lhs.false74 ; CHECK-NEXT: ; in Loop: Header=BB0_13 Depth=1 @@ -154,12 +154,12 @@ ; CHECK-NEXT: cmp w8, #112 ; CHECK-NEXT: b.ne LBB0_12 ; CHECK-NEXT: ; %bb.21: ; %land.lhs.true35 -; CHECK-NEXT: ldrb w13, [x11, x9] +; CHECK-NEXT: ldrb w13, [x10, x11] ; CHECK-NEXT: cmp w13, #112 ; CHECK-NEXT: b.ne LBB0_12 ; CHECK-NEXT: ; %bb.22: ; %land.lhs.true43 -; CHECK-NEXT: sub x12, x10, x12 -; CHECK-NEXT: add x12, x12, x9 +; CHECK-NEXT: sub x12, x9, x12 +; CHECK-NEXT: add x12, x12, x11 ; CHECK-NEXT: cmp x12, #1 ; CHECK-NEXT: b.ne LBB0_44 ; CHECK-NEXT: LBB0_23: @@ -172,7 +172,7 @@ ; CHECK-NEXT: cmp w13, #2 ; CHECK-NEXT: b.ne LBB0_33 ; CHECK-NEXT: ; %bb.26: ; %while.cond95.preheader -; CHECK-NEXT: ldrb w12, [x10, x9] +; CHECK-NEXT: ldrb w12, [x9, x11] ; CHECK-NEXT: cbz w12, LBB0_23 ; CHECK-NEXT: ; %bb.27: ; %land.rhs99.preheader ; CHECK-NEXT: mov x8, xzr @@ -180,15 +180,15 @@ ; CHECK-NEXT: b LBB0_29 ; CHECK-NEXT: LBB0_28: ; %if.then117 ; CHECK-NEXT: ; in Loop: Header=BB0_29 Depth=1 -; CHECK-NEXT: add x12, x10, x8 +; CHECK-NEXT: add x12, x9, x8 ; CHECK-NEXT: add x8, x8, #1 -; CHECK-NEXT: add x12, x12, x9 +; CHECK-NEXT: add x12, x12, x11 ; CHECK-NEXT: ldrb w12, [x12, #1] ; CHECK-NEXT: cbz w12, LBB0_43 ; CHECK-NEXT: LBB0_29: ; %land.rhs99 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add x13, x11, x8 -; CHECK-NEXT: ldrb w13, [x13, x9] +; CHECK-NEXT: add x13, x10, x8 +; CHECK-NEXT: ldrb w13, [x13, x11] ; CHECK-NEXT: cbz w13, LBB0_23 ; CHECK-NEXT: ; %bb.30: ; %while.body104 ; CHECK-NEXT: ; in Loop: Header=BB0_29 Depth=1 @@ -204,14 +204,14 @@ ; CHECK-NEXT: b.eq LBB0_28 ; CHECK-NEXT: b LBB0_42 ; CHECK-NEXT: LBB0_33: ; %if.else123 -; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: cmp w13, #1 +; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: b.ne LBB0_43 ; CHECK-NEXT: ; %bb.34: ; %if.else123 ; CHECK-NEXT: cmp w12, #2 ; CHECK-NEXT: b.ne LBB0_43 ; CHECK-NEXT: ; %bb.35: ; %while.cond130.preheader -; CHECK-NEXT: ldrb w8, [x10, x9] +; CHECK-NEXT: ldrb w8, [x9, x11] ; CHECK-NEXT: cbz w8, LBB0_23 ; CHECK-NEXT: ; %bb.36: ; %land.rhs134.preheader ; CHECK-NEXT: mov x12, xzr @@ -219,15 +219,15 @@ ; CHECK-NEXT: b LBB0_38 ; CHECK-NEXT: LBB0_37: ; %if.then152 ; CHECK-NEXT: ; in Loop: Header=BB0_38 Depth=1 -; CHECK-NEXT: add x8, x10, x12 +; CHECK-NEXT: add x8, x9, x12 ; CHECK-NEXT: add x12, x12, #1 -; CHECK-NEXT: add x8, x8, x9 +; CHECK-NEXT: add x8, x8, x11 ; CHECK-NEXT: ldrb w8, [x8, #1] ; CHECK-NEXT: cbz w8, LBB0_43 ; CHECK-NEXT: LBB0_38: ; %land.rhs134 ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add x13, x11, x12 -; CHECK-NEXT: ldrb w13, [x13, x9] +; CHECK-NEXT: add x13, x10, x12 +; CHECK-NEXT: ldrb w13, [x13, x11] ; CHECK-NEXT: cbz w13, LBB0_23 ; CHECK-NEXT: ; %bb.39: ; %while.body139 ; CHECK-NEXT: ; in Loop: Header=BB0_38 Depth=1 @@ -251,7 +251,7 @@ ; CHECK-NEXT: cmp x12, #2 ; CHECK-NEXT: b.ne LBB0_11 ; CHECK-NEXT: ; %bb.45: ; %land.lhs.true52 -; CHECK-NEXT: add x12, x10, x9 +; CHECK-NEXT: add x12, x9, x11 ; CHECK-NEXT: mov w0, #1 ; =0x1 ; CHECK-NEXT: ldurb w12, [x12, #-1] ; CHECK-NEXT: cmp w12, #73 diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -8,34 +8,40 @@ define dso_local void @run_test() local_unnamed_addr uwtable { ; CHECK-LABEL: run_test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: .cfi_def_cfa_offset 96 -; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_offset b8, -8 -; CHECK-NEXT: .cfi_offset b9, -16 -; CHECK-NEXT: .cfi_offset b10, -24 -; CHECK-NEXT: .cfi_offset b11, -32 -; CHECK-NEXT: .cfi_offset b12, -40 -; CHECK-NEXT: .cfi_offset b13, -48 -; CHECK-NEXT: .cfi_offset b14, -56 -; CHECK-NEXT: .cfi_offset b15, -64 -; CHECK-NEXT: movi v14.2d, #0000000000000000 +; CHECK-NEXT: sub sp, sp, #192 +; CHECK-NEXT: .cfi_def_cfa_offset 192 +; CHECK-NEXT: stp d15, d14, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #128] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #144] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #160] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #176] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w20, -16 +; CHECK-NEXT: .cfi_offset w21, -24 +; CHECK-NEXT: .cfi_offset w22, -32 +; CHECK-NEXT: .cfi_offset b8, -40 +; CHECK-NEXT: .cfi_offset b9, -48 +; CHECK-NEXT: .cfi_offset b10, -56 +; CHECK-NEXT: .cfi_offset b11, -64 +; CHECK-NEXT: .cfi_offset b12, -72 +; CHECK-NEXT: .cfi_offset b13, -80 +; CHECK-NEXT: .cfi_offset b14, -88 +; CHECK-NEXT: .cfi_offset b15, -96 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: adrp x10, B+48 ; CHECK-NEXT: add x10, x10, :lo12:B+48 ; CHECK-NEXT: adrp x11, A ; CHECK-NEXT: add x11, x11, :lo12:A -; CHECK-NEXT: // implicit-def: $q2 +; CHECK-NEXT: // implicit-def: $q6 +; CHECK-NEXT: // implicit-def: $q7 +; CHECK-NEXT: // implicit-def: $q10 ; CHECK-NEXT: // implicit-def: $q3 -; CHECK-NEXT: // implicit-def: $q15 ; CHECK-NEXT: // implicit-def: $q4 ; CHECK-NEXT: // implicit-def: $q5 -; CHECK-NEXT: // implicit-def: $q6 -; CHECK-NEXT: // implicit-def: $q7 +; CHECK-NEXT: // implicit-def: $q2 ; CHECK-NEXT: // implicit-def: $q16 ; CHECK-NEXT: // implicit-def: $q17 ; CHECK-NEXT: // implicit-def: $q18 @@ -46,131 +52,189 @@ ; CHECK-NEXT: // implicit-def: $q23 ; CHECK-NEXT: // implicit-def: $q24 ; CHECK-NEXT: // implicit-def: $q25 -; CHECK-NEXT: // implicit-def: $q26 ; CHECK-NEXT: // implicit-def: $q27 +; CHECK-NEXT: // implicit-def: $q26 ; CHECK-NEXT: // implicit-def: $q28 -; CHECK-NEXT: // implicit-def: $q29 ; CHECK-NEXT: // implicit-def: $q30 +; CHECK-NEXT: // implicit-def: $q15 +; CHECK-NEXT: // implicit-def: $q29 ; CHECK-NEXT: // implicit-def: $q31 -; CHECK-NEXT: // implicit-def: $q8 -; CHECK-NEXT: // implicit-def: $q9 -; CHECK-NEXT: // implicit-def: $q10 ; CHECK-NEXT: // implicit-def: $q11 +; CHECK-NEXT: // implicit-def: $q9 +; CHECK-NEXT: // kill: killed $q6 ; CHECK-NEXT: // implicit-def: $q12 ; CHECK-NEXT: // implicit-def: $q13 +; CHECK-NEXT: // implicit-def: $q6 +; CHECK-NEXT: // kill: killed $q6 ; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: mov x12, xzr -; CHECK-NEXT: stp q15, q14, [sp] // 32-byte Folded Spill ; CHECK-NEXT: ldr q14, [x8] -; CHECK-NEXT: add x15, x11, x8 -; CHECK-NEXT: ldr q15, [x10], #64 -; CHECK-NEXT: ldr q0, [x12] -; CHECK-NEXT: add x9, x9, #1 -; CHECK-NEXT: ldr x12, [x12] -; CHECK-NEXT: fmov x13, d14 +; CHECK-NEXT: mov x12, xzr +; CHECK-NEXT: add x7, x11, x8 +; CHECK-NEXT: ldr x13, [x12] +; CHECK-NEXT: ldr x5, [x8] +; CHECK-NEXT: ldr x7, [x7, #128] ; CHECK-NEXT: mov x14, v14.d[1] -; CHECK-NEXT: fmov x0, d15 -; CHECK-NEXT: fmov x16, d0 -; CHECK-NEXT: ldr x15, [x15, #128] -; CHECK-NEXT: mul x17, x13, x12 -; CHECK-NEXT: mov x18, v0.d[1] -; CHECK-NEXT: mul x4, x0, x12 -; CHECK-NEXT: mul x1, x16, x12 -; CHECK-NEXT: mul x3, x14, x12 -; CHECK-NEXT: fmov d0, x17 -; CHECK-NEXT: mul x5, x13, x15 -; CHECK-NEXT: mov x17, v15.d[1] -; CHECK-NEXT: fmov d15, x4 -; CHECK-NEXT: fmov d14, x1 -; CHECK-NEXT: mul x1, x18, x12 -; CHECK-NEXT: mov v0.d[1], x3 -; CHECK-NEXT: mul x3, x16, x15 -; CHECK-NEXT: ldr x2, [x8], #8 -; CHECK-NEXT: mul x12, x17, x12 -; CHECK-NEXT: fmov d1, x5 -; CHECK-NEXT: mov v14.d[1], x1 -; CHECK-NEXT: mul x1, x14, x15 -; CHECK-NEXT: add v12.2d, v12.2d, v0.2d -; CHECK-NEXT: mul x13, x13, x2 -; CHECK-NEXT: fmov d0, x3 -; CHECK-NEXT: mul x3, x0, x15 -; CHECK-NEXT: mov v15.d[1], x12 -; CHECK-NEXT: mul x12, x18, x2 -; CHECK-NEXT: mov v1.d[1], x1 -; CHECK-NEXT: mul x18, x18, x15 -; CHECK-NEXT: mul x16, x16, x2 +; CHECK-NEXT: stp q22, q26, [sp] // 32-byte Folded Spill +; CHECK-NEXT: mov v22.16b, v9.16b +; CHECK-NEXT: stp q31, q15, [sp, #32] // 32-byte Folded Spill +; CHECK-NEXT: ldr q15, [x12] +; CHECK-NEXT: fmov x12, d14 +; CHECK-NEXT: ldr q14, [x10], #64 +; CHECK-NEXT: mov v9.16b, v30.16b +; CHECK-NEXT: fmov x17, d15 +; CHECK-NEXT: mov x16, v15.d[1] +; CHECK-NEXT: mov v30.16b, v27.16b +; CHECK-NEXT: mul x15, x12, x13 +; CHECK-NEXT: mov x0, v14.d[1] +; CHECK-NEXT: fmov x4, d14 +; CHECK-NEXT: mov v27.16b, v23.16b +; CHECK-NEXT: mov v23.16b, v19.16b +; CHECK-NEXT: mov v19.16b, v2.16b +; CHECK-NEXT: mul x1, x14, x13 +; CHECK-NEXT: mov v8.16b, v28.16b +; CHECK-NEXT: mov v28.16b, v24.16b +; CHECK-NEXT: mov v24.16b, v20.16b +; CHECK-NEXT: mov v20.16b, v16.16b +; CHECK-NEXT: mov v16.16b, v3.16b +; CHECK-NEXT: mul x18, x17, x13 +; CHECK-NEXT: mov v31.16b, v18.16b +; CHECK-NEXT: mov v26.16b, v5.16b +; CHECK-NEXT: fmov d15, x15 +; CHECK-NEXT: mov v5.16b, v1.16b +; CHECK-NEXT: mov v18.16b, v10.16b +; CHECK-NEXT: mul x2, x16, x13 +; CHECK-NEXT: mov v10.16b, v29.16b +; CHECK-NEXT: mov v29.16b, v25.16b +; CHECK-NEXT: mov v25.16b, v21.16b +; CHECK-NEXT: mov v21.16b, v17.16b +; CHECK-NEXT: mov v17.16b, v4.16b +; CHECK-NEXT: mov v15.d[1], x1 +; CHECK-NEXT: mul x19, x12, x5 +; CHECK-NEXT: add x8, x8, #8 +; CHECK-NEXT: fmov d14, x18 ; CHECK-NEXT: cmp x8, #64 -; CHECK-NEXT: mul x15, x17, x15 -; CHECK-NEXT: add v13.2d, v13.2d, v14.2d -; CHECK-NEXT: mul x14, x14, x2 -; CHECK-NEXT: add v11.2d, v11.2d, v14.2d -; CHECK-NEXT: fmov d14, x3 -; CHECK-NEXT: add v10.2d, v10.2d, v15.2d -; CHECK-NEXT: fmov d15, x13 -; CHECK-NEXT: mov v0.d[1], x18 -; CHECK-NEXT: mul x13, x0, x2 -; CHECK-NEXT: add v29.2d, v29.2d, v1.2d -; CHECK-NEXT: fmov d1, x16 -; CHECK-NEXT: mov v14.d[1], x15 -; CHECK-NEXT: mov v15.d[1], x14 -; CHECK-NEXT: mov v1.d[1], x12 -; CHECK-NEXT: mul x12, x17, x2 -; CHECK-NEXT: add v28.2d, v28.2d, v0.2d +; CHECK-NEXT: add x9, x9, #1 +; CHECK-NEXT: mul x12, x12, x7 +; CHECK-NEXT: mov v14.d[1], x2 +; CHECK-NEXT: add v12.2d, v12.2d, v15.2d +; CHECK-NEXT: mul x3, x0, x13 +; CHECK-NEXT: fmov d1, x19 +; CHECK-NEXT: mul x13, x4, x13 +; CHECK-NEXT: fmov d2, x12 +; CHECK-NEXT: mul x6, x14, x5 +; CHECK-NEXT: add v6.2d, v13.2d, v14.2d +; CHECK-NEXT: mov v13.16b, v12.16b +; CHECK-NEXT: ldr q12, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: mul x14, x14, x7 ; CHECK-NEXT: fmov d0, x13 -; CHECK-NEXT: add v27.2d, v27.2d, v14.2d -; CHECK-NEXT: ldr q14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add v8.2d, v8.2d, v15.2d -; CHECK-NEXT: mov v0.d[1], x12 -; CHECK-NEXT: add v25.2d, v25.2d, v15.2d -; CHECK-NEXT: add v22.2d, v22.2d, v15.2d -; CHECK-NEXT: add v18.2d, v18.2d, v15.2d -; CHECK-NEXT: add v6.2d, v6.2d, v15.2d -; CHECK-NEXT: add v14.2d, v14.2d, v15.2d -; CHECK-NEXT: ldr q15, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add v9.2d, v9.2d, v1.2d -; CHECK-NEXT: add v31.2d, v31.2d, v1.2d -; CHECK-NEXT: add v26.2d, v26.2d, v1.2d +; CHECK-NEXT: add v12.2d, v12.2d, v14.2d +; CHECK-NEXT: mul x21, x17, x7 +; CHECK-NEXT: mov v1.d[1], x6 +; CHECK-NEXT: mul x18, x4, x7 +; CHECK-NEXT: mov v0.d[1], x3 +; CHECK-NEXT: mov v2.d[1], x14 +; CHECK-NEXT: str q12, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: mov v12.16b, v13.16b +; CHECK-NEXT: mul x13, x17, x5 +; CHECK-NEXT: mov v13.16b, v6.16b +; CHECK-NEXT: fmov d3, x21 +; CHECK-NEXT: ldp q15, q6, [sp, #48] // 32-byte Folded Reload +; CHECK-NEXT: mul x20, x16, x7 +; CHECK-NEXT: add v11.2d, v11.2d, v1.2d +; CHECK-NEXT: fmov d4, x18 +; CHECK-NEXT: mul x22, x0, x7 +; CHECK-NEXT: add v6.2d, v6.2d, v0.2d +; CHECK-NEXT: add v15.2d, v15.2d, v2.2d +; CHECK-NEXT: fmov d14, x13 +; CHECK-NEXT: mov v2.16b, v19.16b +; CHECK-NEXT: mov v19.16b, v23.16b +; CHECK-NEXT: mul x14, x4, x5 +; CHECK-NEXT: mov v23.16b, v27.16b +; CHECK-NEXT: mov v27.16b, v30.16b +; CHECK-NEXT: mov v3.d[1], x20 +; CHECK-NEXT: mov v30.16b, v9.16b +; CHECK-NEXT: mov v9.16b, v22.16b +; CHECK-NEXT: mul x12, x16, x5 +; CHECK-NEXT: str q6, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: mov v6.16b, v18.16b +; CHECK-NEXT: mov v4.d[1], x22 +; CHECK-NEXT: add v27.2d, v27.2d, v1.2d ; CHECK-NEXT: add v23.2d, v23.2d, v1.2d -; CHECK-NEXT: add v21.2d, v21.2d, v1.2d +; CHECK-NEXT: mul x13, x0, x5 ; CHECK-NEXT: add v19.2d, v19.2d, v1.2d -; CHECK-NEXT: add v17.2d, v17.2d, v1.2d -; CHECK-NEXT: add v7.2d, v7.2d, v1.2d -; CHECK-NEXT: add v5.2d, v5.2d, v1.2d -; CHECK-NEXT: add v15.2d, v15.2d, v1.2d -; CHECK-NEXT: add v3.2d, v3.2d, v1.2d -; CHECK-NEXT: add v30.2d, v30.2d, v0.2d -; CHECK-NEXT: add v24.2d, v24.2d, v0.2d -; CHECK-NEXT: add v20.2d, v20.2d, v0.2d -; CHECK-NEXT: add v16.2d, v16.2d, v0.2d +; CHECK-NEXT: add v2.2d, v2.2d, v1.2d +; CHECK-NEXT: fmov d0, x14 +; CHECK-NEXT: add v30.2d, v30.2d, v3.2d +; CHECK-NEXT: mov v3.16b, v16.16b +; CHECK-NEXT: mov v16.16b, v20.16b +; CHECK-NEXT: mov v20.16b, v24.16b +; CHECK-NEXT: mov v24.16b, v28.16b +; CHECK-NEXT: mov v14.d[1], x12 +; CHECK-NEXT: mov v28.16b, v8.16b +; CHECK-NEXT: add v1.2d, v5.2d, v1.2d +; CHECK-NEXT: add v28.2d, v8.2d, v4.2d +; CHECK-NEXT: mov v4.16b, v17.16b +; CHECK-NEXT: mov v17.16b, v21.16b +; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: mov v21.16b, v25.16b +; CHECK-NEXT: mov v25.16b, v29.16b +; CHECK-NEXT: mov v29.16b, v10.16b +; CHECK-NEXT: mov v5.16b, v26.16b +; CHECK-NEXT: mov v18.16b, v31.16b +; CHECK-NEXT: ldp q22, q26, [sp] // 32-byte Folded Reload +; CHECK-NEXT: ldr q31, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: add v9.2d, v9.2d, v14.2d +; CHECK-NEXT: add v24.2d, v24.2d, v14.2d +; CHECK-NEXT: add v20.2d, v20.2d, v14.2d +; CHECK-NEXT: add v31.2d, v31.2d, v14.2d +; CHECK-NEXT: add v18.2d, v18.2d, v14.2d +; CHECK-NEXT: add v16.2d, v16.2d, v14.2d +; CHECK-NEXT: add v26.2d, v26.2d, v14.2d +; CHECK-NEXT: add v22.2d, v22.2d, v14.2d +; CHECK-NEXT: add v5.2d, v5.2d, v14.2d +; CHECK-NEXT: add v3.2d, v3.2d, v14.2d +; CHECK-NEXT: add v10.2d, v6.2d, v14.2d +; CHECK-NEXT: add v29.2d, v29.2d, v0.2d +; CHECK-NEXT: add v25.2d, v25.2d, v0.2d +; CHECK-NEXT: add v21.2d, v21.2d, v0.2d +; CHECK-NEXT: add v17.2d, v17.2d, v0.2d ; CHECK-NEXT: add v4.2d, v4.2d, v0.2d -; CHECK-NEXT: add v2.2d, v2.2d, v0.2d +; CHECK-NEXT: add v7.2d, v7.2d, v0.2d ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %for.cond.cleanup ; CHECK-NEXT: adrp x8, C ; CHECK-NEXT: add x8, x8, :lo12:C +; CHECK-NEXT: ldr q0, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: stp q13, q12, [x8] -; CHECK-NEXT: stp q11, q10, [x8, #32] -; CHECK-NEXT: stp q9, q8, [x8, #64] -; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: stp q31, q30, [x8, #96] -; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: stp q29, q28, [x8, #144] -; CHECK-NEXT: stp q27, q26, [x8, #176] -; CHECK-NEXT: str q25, [x8, #208] -; CHECK-NEXT: stp q24, q23, [x8, #240] -; CHECK-NEXT: stp q22, q21, [x8, #272] -; CHECK-NEXT: stp q20, q19, [x8, #304] -; CHECK-NEXT: stp q18, q17, [x8, #336] -; CHECK-NEXT: stp q16, q7, [x8, #368] -; CHECK-NEXT: stp q6, q5, [x8, #400] -; CHECK-NEXT: stp q4, q15, [x8, #432] -; CHECK-NEXT: stp q14, q3, [x8, #464] -; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: str q2, [x8, #496] -; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: ldr q6, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: stp q9, q11, [x8, #64] +; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload +; CHECK-NEXT: stp q15, q30, [x8, #144] +; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload +; CHECK-NEXT: stp q4, q3, [x8, #432] +; CHECK-NEXT: ldp d9, d8, [sp, #144] // 16-byte Folded Reload +; CHECK-NEXT: stp q0, q6, [x8, #32] +; CHECK-NEXT: ldp d13, d12, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: stp q31, q29, [x8, #96] +; CHECK-NEXT: ldp d15, d14, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: stp q28, q26, [x8, #176] +; CHECK-NEXT: str q27, [x8, #208] +; CHECK-NEXT: stp q25, q24, [x8, #240] +; CHECK-NEXT: stp q23, q22, [x8, #272] +; CHECK-NEXT: stp q21, q20, [x8, #304] +; CHECK-NEXT: stp q19, q18, [x8, #336] +; CHECK-NEXT: stp q17, q16, [x8, #368] +; CHECK-NEXT: stp q2, q5, [x8, #400] +; CHECK-NEXT: stp q1, q10, [x8, #464] +; CHECK-NEXT: ldp d11, d10, [sp, #128] // 16-byte Folded Reload +; CHECK-NEXT: str q7, [x8, #496] +; CHECK-NEXT: add sp, sp, #192 ; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w19 +; CHECK-NEXT: .cfi_restore w20 +; CHECK-NEXT: .cfi_restore w21 +; CHECK-NEXT: .cfi_restore w22 ; CHECK-NEXT: .cfi_restore b8 ; CHECK-NEXT: .cfi_restore b9 ; CHECK-NEXT: .cfi_restore b10 diff --git a/llvm/test/CodeGen/AArch64/rand.ll b/llvm/test/CodeGen/AArch64/rand.ll --- a/llvm/test/CodeGen/AArch64/rand.ll +++ b/llvm/test/CodeGen/AArch64/rand.ll @@ -7,9 +7,9 @@ ; CHECK-NEXT: mrs x10, RNDR ; CHECK-NEXT: mov x9, x0 ; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: str x10, [x9] ; CHECK-NEXT: and w8, w8, #0x1 ; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: str x10, [x9] ; CHECK-NEXT: ret %1 = tail call { i64, i1 } @llvm.aarch64.rndr() %2 = extractvalue { i64, i1 } %1, 0 @@ -26,9 +26,9 @@ ; CHECK-NEXT: mrs x10, RNDRRS ; CHECK-NEXT: mov x9, x0 ; CHECK-NEXT: cset w8, eq +; CHECK-NEXT: str x10, [x9] ; CHECK-NEXT: and w8, w8, #0x1 ; CHECK-NEXT: mov w0, w8 -; CHECK-NEXT: str x10, [x9] ; CHECK-NEXT: ret %1 = tail call { i64, i1 } @llvm.aarch64.rndrrs() %2 = extractvalue { i64, i1 } %1, 0 diff --git a/llvm/test/CodeGen/AArch64/rax1.ll b/llvm/test/CodeGen/AArch64/rax1.ll --- a/llvm/test/CodeGen/AArch64/rax1.ll +++ b/llvm/test/CodeGen/AArch64/rax1.ll @@ -10,9 +10,9 @@ ; ; NOSHA3-LABEL: rax1: ; NOSHA3: // %bb.0: -; NOSHA3-NEXT: ushr v2.2d, v1.2d, #63 -; NOSHA3-NEXT: add v1.2d, v1.2d, v1.2d -; NOSHA3-NEXT: orr v1.16b, v1.16b, v2.16b +; NOSHA3-NEXT: add v2.2d, v1.2d, v1.2d +; NOSHA3-NEXT: ushr v1.2d, v1.2d, #63 +; NOSHA3-NEXT: orr v1.16b, v2.16b, v1.16b ; NOSHA3-NEXT: eor v0.16b, v0.16b, v1.16b ; NOSHA3-NEXT: ret %a = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %y, <2 x i64> %y, <2 x i64> ) diff --git a/llvm/test/CodeGen/AArch64/rcpc3-sve.ll b/llvm/test/CodeGen/AArch64/rcpc3-sve.ll --- a/llvm/test/CodeGen/AArch64/rcpc3-sve.ll +++ b/llvm/test/CodeGen/AArch64/rcpc3-sve.ll @@ -8,8 +8,8 @@ define hidden @test_load_sve_lane0(ptr nocapture noundef readonly %a, noundef %b) local_unnamed_addr { ; CHECK-LABEL: test_load_sve_lane0: ; CHECK: // %bb.0: -; CHECK-NEXT: ldapr x8, [x0] ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: ldapr x8, [x0] ; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: ret %1 = load atomic i64, ptr %a acquire, align 8 @@ -20,13 +20,13 @@ define hidden @test_load_sve_lane1(ptr nocapture noundef readonly %a, noundef %b) local_unnamed_addr { ; CHECK-LABEL: test_load_sve_lane1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: ldapr x9, [x0] -; CHECK-NEXT: index z2.d, #0, #1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.d, x8 -; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z1.d -; CHECK-NEXT: mov z0.d, p0/m, x9 +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: ldapr x8, [x0] +; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d +; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: ret %1 = load atomic i64, ptr %a acquire, align 8 %vldap1_lane = insertelement %b, i64 %1, i64 1 diff --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll --- a/llvm/test/CodeGen/AArch64/reduce-and.ll +++ b/llvm/test/CodeGen/AArch64/reduce-and.ll @@ -95,14 +95,14 @@ ; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w14, s6 ; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 +; GISEL-NEXT: fmov w9, s7 ; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 +; GISEL-NEXT: and w11, w12, w13 ; GISEL-NEXT: and w8, w8, w10 -; GISEL-NEXT: and w9, w9, w11 -; GISEL-NEXT: and w9, w12, w9 +; GISEL-NEXT: and w9, w14, w9 +; GISEL-NEXT: and w9, w11, w9 ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret @@ -130,39 +130,39 @@ ; GISEL-NEXT: mov b6, v0.b[6] ; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 -; GISEL-NEXT: fmov w9, s1 -; GISEL-NEXT: fmov w10, s2 -; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: mov b16, v0.b[8] ; GISEL-NEXT: mov b17, v0.b[9] ; GISEL-NEXT: mov b18, v0.b[10] ; GISEL-NEXT: mov b19, v0.b[11] -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: and w9, w10, w11 -; GISEL-NEXT: fmov w10, s4 -; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: fmov w10, s2 +; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s6 -; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: mov b20, v0.b[12] ; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: mov b22, v0.b[14] ; GISEL-NEXT: mov b23, v0.b[15] -; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: and w11, w12, w13 -; GISEL-NEXT: fmov w12, s16 -; GISEL-NEXT: fmov w13, s17 +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: fmov w14, s18 ; GISEL-NEXT: fmov w15, s19 ; GISEL-NEXT: fmov w16, s22 ; GISEL-NEXT: fmov w17, s23 +; GISEL-NEXT: and w10, w10, w11 +; GISEL-NEXT: and w11, w12, w13 +; GISEL-NEXT: fmov w12, s16 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: fmov w13, s17 ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: and w12, w12, w13 -; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w13, w14, w15 ; GISEL-NEXT: fmov w14, s20 ; GISEL-NEXT: fmov w15, s21 ; GISEL-NEXT: and w10, w12, w13 -; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: and w14, w14, w15 ; GISEL-NEXT: and w15, w16, w17 ; GISEL-NEXT: and w11, w14, w15 @@ -192,39 +192,39 @@ ; GISEL-NEXT: mov b6, v0.b[6] ; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 -; GISEL-NEXT: fmov w9, s1 -; GISEL-NEXT: fmov w10, s2 -; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: mov b16, v0.b[8] ; GISEL-NEXT: mov b17, v0.b[9] ; GISEL-NEXT: mov b18, v0.b[10] ; GISEL-NEXT: mov b19, v0.b[11] -; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: and w9, w10, w11 -; GISEL-NEXT: fmov w10, s4 -; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: fmov w10, s2 +; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s6 -; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: mov b20, v0.b[12] ; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: mov b22, v0.b[14] ; GISEL-NEXT: mov b23, v0.b[15] -; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: and w11, w12, w13 -; GISEL-NEXT: fmov w12, s16 -; GISEL-NEXT: fmov w13, s17 +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: fmov w14, s18 ; GISEL-NEXT: fmov w15, s19 ; GISEL-NEXT: fmov w16, s22 ; GISEL-NEXT: fmov w17, s23 +; GISEL-NEXT: and w10, w10, w11 +; GISEL-NEXT: and w11, w12, w13 +; GISEL-NEXT: fmov w12, s16 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: fmov w13, s17 ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: and w12, w12, w13 -; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w13, w14, w15 ; GISEL-NEXT: fmov w14, s20 ; GISEL-NEXT: fmov w15, s21 ; GISEL-NEXT: and w10, w12, w13 -; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: and w14, w14, w15 ; GISEL-NEXT: and w15, w16, w17 ; GISEL-NEXT: and w11, w14, w15 @@ -328,14 +328,14 @@ ; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w14, s6 ; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 +; GISEL-NEXT: fmov w9, s7 ; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 +; GISEL-NEXT: and w11, w12, w13 ; GISEL-NEXT: and w8, w8, w10 -; GISEL-NEXT: and w9, w9, w11 -; GISEL-NEXT: and w9, w12, w9 +; GISEL-NEXT: and w9, w14, w9 +; GISEL-NEXT: and w9, w11, w9 ; GISEL-NEXT: and w0, w8, w9 ; GISEL-NEXT: ret %and_result = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a) @@ -371,14 +371,14 @@ ; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w14, s6 ; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 +; GISEL-NEXT: fmov w9, s7 ; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 +; GISEL-NEXT: and w11, w12, w13 ; GISEL-NEXT: and w8, w8, w10 -; GISEL-NEXT: and w9, w9, w11 -; GISEL-NEXT: and w9, w12, w9 +; GISEL-NEXT: and w9, w14, w9 +; GISEL-NEXT: and w9, w11, w9 ; GISEL-NEXT: and w0, w8, w9 ; GISEL-NEXT: ret %and_result = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a) @@ -416,14 +416,14 @@ ; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w14, s6 ; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 +; GISEL-NEXT: fmov w9, s7 ; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 +; GISEL-NEXT: and w11, w12, w13 ; GISEL-NEXT: and w8, w8, w10 -; GISEL-NEXT: and w9, w9, w11 -; GISEL-NEXT: and w9, w12, w9 +; GISEL-NEXT: and w9, w14, w9 +; GISEL-NEXT: and w9, w11, w9 ; GISEL-NEXT: and w0, w8, w9 ; GISEL-NEXT: ret %and_result = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a) diff --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll --- a/llvm/test/CodeGen/AArch64/reduce-or.ll +++ b/llvm/test/CodeGen/AArch64/reduce-or.ll @@ -95,14 +95,14 @@ ; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w14, s6 ; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 +; GISEL-NEXT: fmov w9, s7 ; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 +; GISEL-NEXT: orr w11, w12, w13 ; GISEL-NEXT: orr w8, w8, w10 -; GISEL-NEXT: orr w9, w9, w11 -; GISEL-NEXT: orr w9, w12, w9 +; GISEL-NEXT: orr w9, w14, w9 +; GISEL-NEXT: orr w9, w11, w9 ; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret @@ -130,39 +130,39 @@ ; GISEL-NEXT: mov b6, v0.b[6] ; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 -; GISEL-NEXT: fmov w9, s1 -; GISEL-NEXT: fmov w10, s2 -; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: mov b16, v0.b[8] ; GISEL-NEXT: mov b17, v0.b[9] ; GISEL-NEXT: mov b18, v0.b[10] ; GISEL-NEXT: mov b19, v0.b[11] -; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: orr w9, w10, w11 -; GISEL-NEXT: fmov w10, s4 -; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: fmov w10, s2 +; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s6 -; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: mov b20, v0.b[12] ; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: mov b22, v0.b[14] ; GISEL-NEXT: mov b23, v0.b[15] -; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: orr w11, w12, w13 -; GISEL-NEXT: fmov w12, s16 -; GISEL-NEXT: fmov w13, s17 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: fmov w14, s18 ; GISEL-NEXT: fmov w15, s19 ; GISEL-NEXT: fmov w16, s22 ; GISEL-NEXT: fmov w17, s23 +; GISEL-NEXT: orr w10, w10, w11 +; GISEL-NEXT: orr w11, w12, w13 +; GISEL-NEXT: fmov w12, s16 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: fmov w13, s17 ; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: orr w12, w12, w13 -; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w13, w14, w15 ; GISEL-NEXT: fmov w14, s20 ; GISEL-NEXT: fmov w15, s21 ; GISEL-NEXT: orr w10, w12, w13 -; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: orr w14, w14, w15 ; GISEL-NEXT: orr w15, w16, w17 ; GISEL-NEXT: orr w11, w14, w15 @@ -192,39 +192,39 @@ ; GISEL-NEXT: mov b6, v0.b[6] ; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 -; GISEL-NEXT: fmov w9, s1 -; GISEL-NEXT: fmov w10, s2 -; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: mov b16, v0.b[8] ; GISEL-NEXT: mov b17, v0.b[9] ; GISEL-NEXT: mov b18, v0.b[10] ; GISEL-NEXT: mov b19, v0.b[11] -; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: orr w9, w10, w11 -; GISEL-NEXT: fmov w10, s4 -; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: fmov w10, s2 +; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s6 -; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: mov b20, v0.b[12] ; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: mov b22, v0.b[14] ; GISEL-NEXT: mov b23, v0.b[15] -; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: orr w11, w12, w13 -; GISEL-NEXT: fmov w12, s16 -; GISEL-NEXT: fmov w13, s17 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: fmov w14, s18 ; GISEL-NEXT: fmov w15, s19 ; GISEL-NEXT: fmov w16, s22 ; GISEL-NEXT: fmov w17, s23 +; GISEL-NEXT: orr w10, w10, w11 +; GISEL-NEXT: orr w11, w12, w13 +; GISEL-NEXT: fmov w12, s16 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: fmov w13, s17 ; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: orr w12, w12, w13 -; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w13, w14, w15 ; GISEL-NEXT: fmov w14, s20 ; GISEL-NEXT: fmov w15, s21 ; GISEL-NEXT: orr w10, w12, w13 -; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: orr w14, w14, w15 ; GISEL-NEXT: orr w15, w16, w17 ; GISEL-NEXT: orr w11, w14, w15 @@ -330,14 +330,14 @@ ; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w14, s6 ; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 +; GISEL-NEXT: fmov w9, s7 ; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 +; GISEL-NEXT: orr w11, w12, w13 ; GISEL-NEXT: orr w8, w8, w10 -; GISEL-NEXT: orr w9, w9, w11 -; GISEL-NEXT: orr w9, w12, w9 +; GISEL-NEXT: orr w9, w14, w9 +; GISEL-NEXT: orr w9, w11, w9 ; GISEL-NEXT: orr w0, w8, w9 ; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a) @@ -373,14 +373,14 @@ ; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w14, s6 ; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 +; GISEL-NEXT: fmov w9, s7 ; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 +; GISEL-NEXT: orr w11, w12, w13 ; GISEL-NEXT: orr w8, w8, w10 -; GISEL-NEXT: orr w9, w9, w11 -; GISEL-NEXT: orr w9, w12, w9 +; GISEL-NEXT: orr w9, w14, w9 +; GISEL-NEXT: orr w9, w11, w9 ; GISEL-NEXT: orr w0, w8, w9 ; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a) @@ -418,14 +418,14 @@ ; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w14, s6 ; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 +; GISEL-NEXT: fmov w9, s7 ; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 +; GISEL-NEXT: orr w11, w12, w13 ; GISEL-NEXT: orr w8, w8, w10 -; GISEL-NEXT: orr w9, w9, w11 -; GISEL-NEXT: orr w9, w12, w9 +; GISEL-NEXT: orr w9, w14, w9 +; GISEL-NEXT: orr w9, w11, w9 ; GISEL-NEXT: orr w0, w8, w9 ; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a) diff --git a/llvm/test/CodeGen/AArch64/reduce-shuffle.ll b/llvm/test/CodeGen/AArch64/reduce-shuffle.ll --- a/llvm/test/CodeGen/AArch64/reduce-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/reduce-shuffle.ll @@ -4,122 +4,129 @@ define i32 @v1(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocapture noundef readonly %p2, i32 noundef %i2) { ; CHECK-LABEL: v1: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w3 killed $w3 def $x3 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x8, w1 -; CHECK-NEXT: // kill: def $w3 killed $w3 def $x3 ; CHECK-NEXT: sxtw x9, w3 -; CHECK-NEXT: add x10, x0, x8 -; CHECK-NEXT: add x11, x2, x9 -; CHECK-NEXT: add x12, x10, x8 ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x2] +; CHECK-NEXT: add x10, x0, x8 +; CHECK-NEXT: add x11, x2, x9 ; CHECK-NEXT: ldr d2, [x10] -; CHECK-NEXT: add x10, x11, x9 -; CHECK-NEXT: ldr d6, [x12, x8] -; CHECK-NEXT: ldr d7, [x10, x9] +; CHECK-NEXT: add x10, x10, x8 ; CHECK-NEXT: ldr d3, [x11] -; CHECK-NEXT: ldr d4, [x12] -; CHECK-NEXT: ldr d5, [x10] -; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b +; CHECK-NEXT: add x11, x11, x9 +; CHECK-NEXT: ldr d4, [x10] +; CHECK-NEXT: ldr d6, [x10, x8] +; CHECK-NEXT: ldr d5, [x11] +; CHECK-NEXT: ldr d7, [x11, x9] ; CHECK-NEXT: usubl v2.8h, v2.8b, v3.8b +; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b ; CHECK-NEXT: usubl v1.8h, v4.8b, v5.8b ; CHECK-NEXT: usubl v3.8h, v6.8b, v7.8b -; CHECK-NEXT: shll2 v4.4s, v0.8h, #16 -; CHECK-NEXT: shll2 v5.4s, v2.8h, #16 +; CHECK-NEXT: shll2 v4.4s, v2.8h, #16 +; CHECK-NEXT: shll2 v5.4s, v0.8h, #16 ; CHECK-NEXT: shll2 v6.4s, v3.8h, #16 ; CHECK-NEXT: shll2 v7.4s, v1.8h, #16 -; CHECK-NEXT: saddw v0.4s, v4.4s, v0.4h -; CHECK-NEXT: saddw v2.4s, v5.4s, v2.4h +; CHECK-NEXT: saddw v2.4s, v4.4s, v2.4h +; CHECK-NEXT: saddw v0.4s, v5.4s, v0.4h ; CHECK-NEXT: saddw v3.4s, v6.4s, v3.4h ; CHECK-NEXT: saddw v1.4s, v7.4s, v1.4h -; CHECK-NEXT: zip1 v5.4s, v2.4s, v0.4s -; CHECK-NEXT: zip2 v4.4s, v2.4s, v0.4s -; CHECK-NEXT: uzp2 v7.4s, v3.4s, v1.4s +; CHECK-NEXT: mov v7.16b, v2.16b +; CHECK-NEXT: zip1 v4.4s, v2.4s, v0.4s +; CHECK-NEXT: zip2 v6.4s, v2.4s, v0.4s +; CHECK-NEXT: uzp2 v5.4s, v3.4s, v1.4s ; CHECK-NEXT: mov v17.16b, v1.16b -; CHECK-NEXT: zip2 v18.4s, v3.4s, v1.4s -; CHECK-NEXT: ext v19.16b, v2.16b, v5.16b, #8 -; CHECK-NEXT: uzp2 v7.4s, v7.4s, v3.4s -; CHECK-NEXT: mov v2.s[3], v0.s[2] -; CHECK-NEXT: zip2 v6.4s, v1.4s, v3.4s -; CHECK-NEXT: ext v16.16b, v3.16b, v3.16b, #12 +; CHECK-NEXT: zip2 v16.4s, v1.4s, v3.4s +; CHECK-NEXT: mov v7.s[3], v0.s[2] +; CHECK-NEXT: ext v18.16b, v3.16b, v3.16b, #12 +; CHECK-NEXT: ext v2.16b, v2.16b, v4.16b, #8 ; CHECK-NEXT: mov v17.s[1], v3.s[0] +; CHECK-NEXT: uzp2 v0.4s, v5.4s, v3.4s +; CHECK-NEXT: zip2 v5.4s, v3.4s, v1.4s ; CHECK-NEXT: mov v3.s[0], v1.s[1] -; CHECK-NEXT: mov v7.d[1], v4.d[1] -; CHECK-NEXT: mov v18.d[1], v2.d[1] -; CHECK-NEXT: mov v17.d[1], v19.d[1] -; CHECK-NEXT: mov v3.d[1], v5.d[1] -; CHECK-NEXT: ext v16.16b, v1.16b, v16.16b, #12 -; CHECK-NEXT: add v1.4s, v7.4s, v18.4s -; CHECK-NEXT: mov v6.d[1], v2.d[1] -; CHECK-NEXT: add v0.4s, v3.4s, v17.4s -; CHECK-NEXT: mov v16.d[1], v4.d[1] -; CHECK-NEXT: sub v2.4s, v17.4s, v3.4s +; CHECK-NEXT: ext v1.16b, v1.16b, v18.16b, #12 +; CHECK-NEXT: mov v16.d[1], v7.d[1] +; CHECK-NEXT: mov v17.d[1], v2.d[1] +; CHECK-NEXT: mov v0.d[1], v6.d[1] +; CHECK-NEXT: mov v5.d[1], v7.d[1] +; CHECK-NEXT: mov v3.d[1], v4.d[1] +; CHECK-NEXT: mov v1.d[1], v6.d[1] +; CHECK-NEXT: add v0.4s, v0.4s, v5.4s +; CHECK-NEXT: add v2.4s, v3.4s, v17.4s +; CHECK-NEXT: sub v3.4s, v17.4s, v3.4s +; CHECK-NEXT: sub v1.4s, v16.4s, v1.4s +; CHECK-NEXT: rev64 v4.4s, v0.4s +; CHECK-NEXT: rev64 v5.4s, v2.4s +; CHECK-NEXT: add v6.4s, v1.4s, v3.4s +; CHECK-NEXT: sub v1.4s, v3.4s, v1.4s +; CHECK-NEXT: mov v4.d[1], v0.d[1] +; CHECK-NEXT: mov v5.d[1], v2.d[1] ; CHECK-NEXT: rev64 v3.4s, v1.4s -; CHECK-NEXT: rev64 v5.4s, v0.4s -; CHECK-NEXT: sub v4.4s, v6.4s, v16.4s -; CHECK-NEXT: mov v3.d[1], v1.d[1] -; CHECK-NEXT: mov v5.d[1], v0.d[1] -; CHECK-NEXT: add v6.4s, v4.4s, v2.4s ; CHECK-NEXT: sub v2.4s, v2.4s, v4.4s -; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s -; CHECK-NEXT: rev64 v4.4s, v2.4s -; CHECK-NEXT: rev64 v3.4s, v6.4s -; CHECK-NEXT: add v1.4s, v1.4s, v5.4s -; CHECK-NEXT: addp v7.4s, v0.4s, v2.4s -; CHECK-NEXT: addp v5.4s, v1.4s, v6.4s -; CHECK-NEXT: sub v2.4s, v2.4s, v4.4s -; CHECK-NEXT: sub v3.4s, v6.4s, v3.4s -; CHECK-NEXT: rev64 v6.4s, v0.4s -; CHECK-NEXT: ext v4.16b, v7.16b, v2.16b, #4 -; CHECK-NEXT: rev64 v16.4s, v1.4s -; CHECK-NEXT: ext v17.16b, v5.16b, v3.16b, #4 -; CHECK-NEXT: sub v0.4s, v0.4s, v6.4s -; CHECK-NEXT: zip2 v4.4s, v4.4s, v7.4s -; CHECK-NEXT: ext v6.16b, v0.16b, v7.16b, #8 -; CHECK-NEXT: sub v1.4s, v1.4s, v16.4s -; CHECK-NEXT: zip2 v16.4s, v17.4s, v5.4s -; CHECK-NEXT: zip1 v18.4s, v5.4s, v5.4s -; CHECK-NEXT: ext v19.16b, v1.16b, v5.16b, #4 -; CHECK-NEXT: ext v4.16b, v2.16b, v4.16b, #12 -; CHECK-NEXT: mov v2.s[2], v7.s[3] -; CHECK-NEXT: ext v17.16b, v6.16b, v0.16b, #4 -; CHECK-NEXT: ext v16.16b, v3.16b, v16.16b, #12 -; CHECK-NEXT: mov v3.s[2], v5.s[3] -; CHECK-NEXT: trn2 v1.4s, v18.4s, v1.4s -; CHECK-NEXT: ext v18.16b, v19.16b, v19.16b, #4 -; CHECK-NEXT: mov v0.s[2], v7.s[1] -; CHECK-NEXT: uzp2 v6.4s, v6.4s, v17.4s -; CHECK-NEXT: sub v17.4s, v2.4s, v4.4s -; CHECK-NEXT: sub v21.4s, v3.4s, v16.4s -; CHECK-NEXT: mov v3.s[1], v5.s[2] -; CHECK-NEXT: mov v2.s[1], v7.s[2] -; CHECK-NEXT: sub v19.4s, v1.4s, v18.4s -; CHECK-NEXT: mov v18.s[0], v5.s[1] -; CHECK-NEXT: sub v20.4s, v0.4s, v6.4s -; CHECK-NEXT: mov v0.s[1], v7.s[0] -; CHECK-NEXT: add v3.4s, v3.4s, v16.4s -; CHECK-NEXT: add v2.4s, v2.4s, v4.4s -; CHECK-NEXT: add v1.4s, v1.4s, v18.4s -; CHECK-NEXT: mov v2.d[1], v17.d[1] -; CHECK-NEXT: mov v3.d[1], v21.d[1] +; CHECK-NEXT: add v0.4s, v0.4s, v5.4s +; CHECK-NEXT: rev64 v4.4s, v6.4s +; CHECK-NEXT: rev64 v5.4s, v2.4s +; CHECK-NEXT: rev64 v7.4s, v0.4s +; CHECK-NEXT: addp v16.4s, v0.4s, v6.4s +; CHECK-NEXT: addp v17.4s, v2.4s, v1.4s +; CHECK-NEXT: sub v4.4s, v6.4s, v4.4s +; CHECK-NEXT: sub v1.4s, v1.4s, v3.4s +; CHECK-NEXT: sub v2.4s, v2.4s, v5.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v7.4s +; CHECK-NEXT: zip1 v21.4s, v16.4s, v16.4s +; CHECK-NEXT: ext v5.16b, v17.16b, v1.16b, #4 +; CHECK-NEXT: ext v6.16b, v16.16b, v4.16b, #4 +; CHECK-NEXT: mov v18.16b, v1.16b +; CHECK-NEXT: mov v19.16b, v4.16b +; CHECK-NEXT: ext v3.16b, v2.16b, v17.16b, #8 +; CHECK-NEXT: ext v7.16b, v0.16b, v16.16b, #4 +; CHECK-NEXT: mov v18.s[2], v17.s[3] +; CHECK-NEXT: zip2 v5.4s, v5.4s, v17.4s +; CHECK-NEXT: zip2 v6.4s, v6.4s, v16.4s +; CHECK-NEXT: mov v19.s[2], v16.s[3] +; CHECK-NEXT: trn2 v0.4s, v21.4s, v0.4s +; CHECK-NEXT: ext v20.16b, v3.16b, v2.16b, #4 +; CHECK-NEXT: ext v7.16b, v7.16b, v7.16b, #4 +; CHECK-NEXT: mov v2.s[2], v17.s[1] +; CHECK-NEXT: ext v1.16b, v1.16b, v5.16b, #12 +; CHECK-NEXT: ext v4.16b, v4.16b, v6.16b, #12 +; CHECK-NEXT: mov v5.16b, v18.16b +; CHECK-NEXT: uzp2 v3.4s, v3.4s, v20.4s +; CHECK-NEXT: mov v6.16b, v7.16b +; CHECK-NEXT: mov v20.16b, v19.16b +; CHECK-NEXT: mov v21.16b, v2.16b +; CHECK-NEXT: mov v5.s[1], v17.s[2] +; CHECK-NEXT: sub v7.4s, v0.4s, v7.4s +; CHECK-NEXT: mov v6.s[0], v16.s[1] +; CHECK-NEXT: mov v20.s[1], v16.s[2] +; CHECK-NEXT: sub v16.4s, v19.4s, v4.4s +; CHECK-NEXT: mov v21.s[1], v17.s[0] +; CHECK-NEXT: sub v2.4s, v2.4s, v3.4s +; CHECK-NEXT: sub v17.4s, v18.4s, v1.4s +; CHECK-NEXT: add v1.4s, v5.4s, v1.4s ; CHECK-NEXT: add v0.4s, v0.4s, v6.4s -; CHECK-NEXT: mov v1.d[1], v19.d[1] -; CHECK-NEXT: mov v0.d[1], v20.d[1] -; CHECK-NEXT: cmlt v6.8h, v2.8h, #0 -; CHECK-NEXT: cmlt v7.8h, v3.8h, #0 -; CHECK-NEXT: cmlt v4.8h, v1.8h, #0 -; CHECK-NEXT: add v2.4s, v6.4s, v2.4s -; CHECK-NEXT: add v3.4s, v7.4s, v3.4s -; CHECK-NEXT: cmlt v5.8h, v0.8h, #0 -; CHECK-NEXT: add v1.4s, v4.4s, v1.4s -; CHECK-NEXT: eor v3.16b, v3.16b, v7.16b -; CHECK-NEXT: eor v2.16b, v2.16b, v6.16b -; CHECK-NEXT: add v2.4s, v3.4s, v2.4s -; CHECK-NEXT: add v0.4s, v5.4s, v0.4s -; CHECK-NEXT: eor v1.16b, v1.16b, v4.16b -; CHECK-NEXT: add v1.4s, v1.4s, v2.4s -; CHECK-NEXT: eor v0.16b, v0.16b, v5.16b -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: add v4.4s, v20.4s, v4.4s +; CHECK-NEXT: add v3.4s, v21.4s, v3.4s +; CHECK-NEXT: mov v1.d[1], v17.d[1] +; CHECK-NEXT: mov v0.d[1], v7.d[1] +; CHECK-NEXT: mov v4.d[1], v16.d[1] +; CHECK-NEXT: mov v3.d[1], v2.d[1] +; CHECK-NEXT: cmlt v7.8h, v1.8h, #0 +; CHECK-NEXT: cmlt v2.8h, v0.8h, #0 +; CHECK-NEXT: cmlt v6.8h, v4.8h, #0 +; CHECK-NEXT: cmlt v5.8h, v3.8h, #0 +; CHECK-NEXT: add v1.4s, v7.4s, v1.4s +; CHECK-NEXT: add v0.4s, v2.4s, v0.4s +; CHECK-NEXT: add v4.4s, v6.4s, v4.4s +; CHECK-NEXT: add v3.4s, v5.4s, v3.4s +; CHECK-NEXT: eor v1.16b, v1.16b, v7.16b +; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b +; CHECK-NEXT: eor v2.16b, v3.16b, v5.16b +; CHECK-NEXT: eor v3.16b, v4.16b, v6.16b +; CHECK-NEXT: add v1.4s, v3.4s, v1.4s +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: lsr w9, w8, #16 @@ -224,110 +231,112 @@ ; CHECK-NEXT: sxtw x8, w1 ; CHECK-NEXT: // kill: def $w3 killed $w3 def $x3 ; CHECK-NEXT: sxtw x9, w3 +; CHECK-NEXT: ldr d4, [x0] +; CHECK-NEXT: ldr d5, [x2] ; CHECK-NEXT: add x10, x0, x8 ; CHECK-NEXT: add x11, x2, x9 ; CHECK-NEXT: add x12, x10, x8 -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x2] -; CHECK-NEXT: ldr d2, [x10] -; CHECK-NEXT: add x10, x11, x9 -; CHECK-NEXT: ldr d6, [x12, x8] -; CHECK-NEXT: ldr d7, [x10, x9] -; CHECK-NEXT: ldr d3, [x11] -; CHECK-NEXT: ldr d4, [x12] -; CHECK-NEXT: ldr d5, [x10] -; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: usubl v2.8h, v2.8b, v3.8b -; CHECK-NEXT: usubl v1.8h, v4.8b, v5.8b +; CHECK-NEXT: ldr d6, [x10] +; CHECK-NEXT: ldr d7, [x11] +; CHECK-NEXT: ldr d0, [x12, x8] +; CHECK-NEXT: add x8, x11, x9 +; CHECK-NEXT: ldr d1, [x12] +; CHECK-NEXT: ldr d2, [x8, x9] +; CHECK-NEXT: ldr d3, [x8] +; CHECK-NEXT: usubl v1.8h, v1.8b, v3.8b +; CHECK-NEXT: usubl v0.8h, v0.8b, v2.8b ; CHECK-NEXT: usubl v3.8h, v6.8b, v7.8b +; CHECK-NEXT: usubl v2.8h, v4.8b, v5.8b ; CHECK-NEXT: shll2 v4.4s, v0.8h, #16 -; CHECK-NEXT: shll2 v5.4s, v2.8h, #16 -; CHECK-NEXT: shll2 v6.4s, v3.8h, #16 -; CHECK-NEXT: shll2 v7.4s, v1.8h, #16 +; CHECK-NEXT: shll2 v5.4s, v1.8h, #16 +; CHECK-NEXT: shll2 v7.4s, v3.8h, #16 +; CHECK-NEXT: shll2 v6.4s, v2.8h, #16 ; CHECK-NEXT: saddw v0.4s, v4.4s, v0.4h -; CHECK-NEXT: saddw v2.4s, v5.4s, v2.4h -; CHECK-NEXT: saddw v3.4s, v6.4s, v3.4h -; CHECK-NEXT: saddw v1.4s, v7.4s, v1.4h -; CHECK-NEXT: zip1 v5.4s, v2.4s, v0.4s -; CHECK-NEXT: ext v17.16b, v3.16b, v3.16b, #12 -; CHECK-NEXT: uzp2 v7.4s, v3.4s, v1.4s -; CHECK-NEXT: mov v16.16b, v3.16b -; CHECK-NEXT: zip2 v4.4s, v2.4s, v0.4s -; CHECK-NEXT: zip2 v6.4s, v1.4s, v3.4s -; CHECK-NEXT: zip2 v18.4s, v3.4s, v1.4s -; CHECK-NEXT: ext v19.16b, v2.16b, v5.16b, #8 -; CHECK-NEXT: mov v16.s[0], v1.s[1] -; CHECK-NEXT: ext v17.16b, v1.16b, v17.16b, #12 -; CHECK-NEXT: uzp2 v7.4s, v7.4s, v3.4s -; CHECK-NEXT: mov v2.s[3], v0.s[2] -; CHECK-NEXT: mov v1.s[1], v3.s[0] -; CHECK-NEXT: mov v16.d[1], v5.d[1] -; CHECK-NEXT: mov v7.d[1], v4.d[1] +; CHECK-NEXT: saddw v1.4s, v5.4s, v1.4h +; CHECK-NEXT: saddw v3.4s, v7.4s, v3.4h +; CHECK-NEXT: saddw v2.4s, v6.4s, v2.4h +; CHECK-NEXT: uzp2 v4.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v7.16b, v3.16b +; CHECK-NEXT: mov v17.16b, v1.16b +; CHECK-NEXT: zip1 v5.4s, v3.4s, v2.4s +; CHECK-NEXT: zip2 v6.4s, v3.4s, v2.4s +; CHECK-NEXT: zip2 v16.4s, v0.4s, v1.4s +; CHECK-NEXT: ext v18.16b, v0.16b, v0.16b, #12 +; CHECK-NEXT: mov v7.s[3], v2.s[2] +; CHECK-NEXT: mov v17.s[1], v0.s[0] +; CHECK-NEXT: uzp2 v2.4s, v4.4s, v0.4s +; CHECK-NEXT: mov v4.16b, v0.16b +; CHECK-NEXT: zip2 v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ext v3.16b, v3.16b, v5.16b, #8 +; CHECK-NEXT: mov v4.s[0], v1.s[1] +; CHECK-NEXT: mov v16.d[1], v7.d[1] +; CHECK-NEXT: ext v1.16b, v1.16b, v18.16b, #12 +; CHECK-NEXT: mov v2.d[1], v6.d[1] +; CHECK-NEXT: mov v0.d[1], v7.d[1] +; CHECK-NEXT: mov v17.d[1], v3.d[1] +; CHECK-NEXT: mov v4.d[1], v5.d[1] +; CHECK-NEXT: mov v1.d[1], v6.d[1] +; CHECK-NEXT: add v2.4s, v2.4s, v16.4s +; CHECK-NEXT: add v3.4s, v4.4s, v17.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: sub v1.4s, v17.4s, v4.4s +; CHECK-NEXT: rev64 v5.4s, v2.4s +; CHECK-NEXT: rev64 v6.4s, v3.4s +; CHECK-NEXT: sub v4.4s, v1.4s, v0.4s +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v5.d[1], v2.d[1] +; CHECK-NEXT: mov v6.d[1], v3.d[1] +; CHECK-NEXT: sub v3.4s, v3.4s, v5.4s +; CHECK-NEXT: add v1.4s, v2.4s, v6.4s +; CHECK-NEXT: zip1 v2.4s, v3.4s, v4.4s +; CHECK-NEXT: zip2 v7.4s, v3.4s, v4.4s +; CHECK-NEXT: zip1 v5.4s, v1.4s, v0.4s +; CHECK-NEXT: uzp2 v6.4s, v1.4s, v0.4s +; CHECK-NEXT: mov v18.16b, v1.16b +; CHECK-NEXT: ext v16.16b, v3.16b, v2.16b, #8 +; CHECK-NEXT: zip2 v17.4s, v1.4s, v0.4s +; CHECK-NEXT: mov v3.s[3], v4.s[2] +; CHECK-NEXT: mov v18.s[1], v0.s[1] +; CHECK-NEXT: trn2 v4.4s, v1.4s, v5.4s +; CHECK-NEXT: uzp2 v1.4s, v6.4s, v1.4s +; CHECK-NEXT: mov v17.d[1], v3.d[1] ; CHECK-NEXT: mov v18.d[1], v2.d[1] -; CHECK-NEXT: mov v1.d[1], v19.d[1] -; CHECK-NEXT: mov v6.d[1], v2.d[1] -; CHECK-NEXT: mov v17.d[1], v4.d[1] -; CHECK-NEXT: add v0.4s, v7.4s, v18.4s -; CHECK-NEXT: add v2.4s, v16.4s, v1.4s -; CHECK-NEXT: rev64 v3.4s, v0.4s -; CHECK-NEXT: rev64 v4.4s, v2.4s -; CHECK-NEXT: sub v5.4s, v6.4s, v17.4s -; CHECK-NEXT: sub v1.4s, v1.4s, v16.4s -; CHECK-NEXT: mov v3.d[1], v0.d[1] -; CHECK-NEXT: mov v4.d[1], v2.d[1] -; CHECK-NEXT: add v6.4s, v5.4s, v1.4s -; CHECK-NEXT: sub v1.4s, v1.4s, v5.4s -; CHECK-NEXT: sub v2.4s, v2.4s, v3.4s -; CHECK-NEXT: add v0.4s, v0.4s, v4.4s -; CHECK-NEXT: zip1 v3.4s, v2.4s, v1.4s -; CHECK-NEXT: uzp2 v5.4s, v0.4s, v6.4s -; CHECK-NEXT: zip2 v4.4s, v2.4s, v1.4s -; CHECK-NEXT: zip1 v7.4s, v0.4s, v6.4s -; CHECK-NEXT: ext v16.16b, v2.16b, v3.16b, #8 -; CHECK-NEXT: zip2 v17.4s, v0.4s, v6.4s -; CHECK-NEXT: uzp2 v5.4s, v5.4s, v0.4s -; CHECK-NEXT: mov v2.s[3], v1.s[2] -; CHECK-NEXT: mov v18.16b, v0.16b -; CHECK-NEXT: trn2 v0.4s, v0.4s, v7.4s -; CHECK-NEXT: mov v18.s[1], v6.s[1] -; CHECK-NEXT: mov v5.d[1], v4.d[1] -; CHECK-NEXT: mov v17.d[1], v2.d[1] -; CHECK-NEXT: mov v0.d[1], v16.d[1] -; CHECK-NEXT: mov v18.d[1], v3.d[1] -; CHECK-NEXT: add v1.4s, v17.4s, v5.4s -; CHECK-NEXT: sub v2.4s, v5.4s, v17.4s -; CHECK-NEXT: ext v4.16b, v1.16b, v1.16b, #4 -; CHECK-NEXT: add v3.4s, v18.4s, v0.4s -; CHECK-NEXT: sub v0.4s, v0.4s, v18.4s -; CHECK-NEXT: ext v5.16b, v3.16b, v3.16b, #4 -; CHECK-NEXT: ext v16.16b, v4.16b, v2.16b, #8 -; CHECK-NEXT: zip1 v6.4s, v1.4s, v2.4s -; CHECK-NEXT: zip2 v7.4s, v1.4s, v2.4s -; CHECK-NEXT: ext v17.16b, v5.16b, v0.16b, #8 -; CHECK-NEXT: zip2 v1.4s, v2.4s, v1.4s -; CHECK-NEXT: zip2 v2.4s, v0.4s, v3.4s -; CHECK-NEXT: ext v4.16b, v16.16b, v4.16b, #4 -; CHECK-NEXT: zip1 v16.4s, v3.4s, v0.4s -; CHECK-NEXT: zip2 v0.4s, v3.4s, v0.4s -; CHECK-NEXT: ext v5.16b, v17.16b, v5.16b, #4 -; CHECK-NEXT: add v1.4s, v2.4s, v1.4s -; CHECK-NEXT: sub v3.4s, v6.4s, v16.4s -; CHECK-NEXT: sub v0.4s, v7.4s, v0.4s -; CHECK-NEXT: cmlt v6.8h, v1.8h, #0 -; CHECK-NEXT: cmlt v7.8h, v0.8h, #0 -; CHECK-NEXT: add v2.4s, v5.4s, v4.4s -; CHECK-NEXT: cmlt v4.8h, v3.8h, #0 -; CHECK-NEXT: add v1.4s, v6.4s, v1.4s -; CHECK-NEXT: add v0.4s, v7.4s, v0.4s -; CHECK-NEXT: cmlt v5.8h, v2.8h, #0 -; CHECK-NEXT: add v3.4s, v4.4s, v3.4s -; CHECK-NEXT: eor v0.16b, v0.16b, v7.16b -; CHECK-NEXT: eor v1.16b, v1.16b, v6.16b -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s -; CHECK-NEXT: add v2.4s, v5.4s, v2.4s -; CHECK-NEXT: eor v1.16b, v3.16b, v4.16b -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s -; CHECK-NEXT: eor v1.16b, v2.16b, v5.16b +; CHECK-NEXT: mov v4.d[1], v16.d[1] +; CHECK-NEXT: mov v1.d[1], v7.d[1] +; CHECK-NEXT: add v0.4s, v17.4s, v1.4s +; CHECK-NEXT: sub v1.4s, v1.4s, v17.4s +; CHECK-NEXT: add v2.4s, v18.4s, v4.4s +; CHECK-NEXT: sub v3.4s, v4.4s, v18.4s +; CHECK-NEXT: zip2 v4.4s, v0.4s, v1.4s +; CHECK-NEXT: ext v5.16b, v0.16b, v0.16b, #4 +; CHECK-NEXT: ext v6.16b, v2.16b, v2.16b, #4 +; CHECK-NEXT: zip2 v7.4s, v1.4s, v0.4s +; CHECK-NEXT: zip2 v16.4s, v3.4s, v2.4s +; CHECK-NEXT: zip2 v17.4s, v2.4s, v3.4s +; CHECK-NEXT: zip1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: zip1 v2.4s, v2.4s, v3.4s +; CHECK-NEXT: ext v1.16b, v5.16b, v1.16b, #8 +; CHECK-NEXT: ext v18.16b, v6.16b, v3.16b, #8 +; CHECK-NEXT: add v3.4s, v16.4s, v7.4s +; CHECK-NEXT: sub v4.4s, v4.4s, v17.4s +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ext v1.16b, v1.16b, v5.16b, #4 +; CHECK-NEXT: ext v5.16b, v18.16b, v6.16b, #4 +; CHECK-NEXT: cmlt v2.8h, v4.8h, #0 +; CHECK-NEXT: cmlt v6.8h, v3.8h, #0 +; CHECK-NEXT: add v3.4s, v6.4s, v3.4s +; CHECK-NEXT: add v4.4s, v2.4s, v4.4s +; CHECK-NEXT: add v1.4s, v5.4s, v1.4s +; CHECK-NEXT: cmlt v5.8h, v0.8h, #0 +; CHECK-NEXT: add v0.4s, v5.4s, v0.4s +; CHECK-NEXT: eor v2.16b, v4.16b, v2.16b +; CHECK-NEXT: eor v3.16b, v3.16b, v6.16b +; CHECK-NEXT: cmlt v4.8h, v1.8h, #0 +; CHECK-NEXT: add v2.4s, v3.4s, v2.4s +; CHECK-NEXT: add v1.4s, v4.4s, v1.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v5.16b +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: eor v1.16b, v1.16b, v4.16b ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 @@ -434,111 +443,110 @@ define i32 @v3(ptr nocapture noundef readonly %p1, i32 noundef %i1, ptr nocapture noundef readonly %p2, i32 noundef %i2) { ; CHECK-LABEL: v3: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w3 killed $w3 def $x3 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x8, w1 -; CHECK-NEXT: // kill: def $w3 killed $w3 def $x3 ; CHECK-NEXT: sxtw x9, w3 -; CHECK-NEXT: add x10, x0, x8 -; CHECK-NEXT: add x11, x2, x9 -; CHECK-NEXT: add x12, x10, x8 ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x2] +; CHECK-NEXT: add x10, x0, x8 +; CHECK-NEXT: add x11, x2, x9 ; CHECK-NEXT: ldr d2, [x10] -; CHECK-NEXT: add x10, x11, x9 -; CHECK-NEXT: ldr d4, [x12, x8] -; CHECK-NEXT: ldr d5, [x10, x9] ; CHECK-NEXT: ldr d3, [x11] -; CHECK-NEXT: ldr d6, [x12] -; CHECK-NEXT: ldr d7, [x10] ; CHECK-NEXT: usubl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: usubl v1.8h, v4.8b, v5.8b +; CHECK-NEXT: add x10, x10, x8 +; CHECK-NEXT: add x11, x11, x9 +; CHECK-NEXT: usubl v1.8h, v2.8b, v3.8b +; CHECK-NEXT: ldr d2, [x10, x8] +; CHECK-NEXT: ldr d3, [x11, x9] +; CHECK-NEXT: ldr d4, [x10] +; CHECK-NEXT: ldr d5, [x11] ; CHECK-NEXT: usubl v2.8h, v2.8b, v3.8b -; CHECK-NEXT: usubl v3.8h, v6.8b, v7.8b +; CHECK-NEXT: usubl v3.8h, v4.8b, v5.8b ; CHECK-NEXT: shll2 v4.4s, v0.8h, #16 -; CHECK-NEXT: shll2 v5.4s, v2.8h, #16 -; CHECK-NEXT: shll2 v6.4s, v3.8h, #16 -; CHECK-NEXT: shll2 v7.4s, v1.8h, #16 +; CHECK-NEXT: shll2 v5.4s, v1.8h, #16 ; CHECK-NEXT: saddw v0.4s, v4.4s, v0.4h -; CHECK-NEXT: saddw v2.4s, v5.4s, v2.4h -; CHECK-NEXT: saddw v3.4s, v6.4s, v3.4h -; CHECK-NEXT: saddw v1.4s, v7.4s, v1.4h +; CHECK-NEXT: shll2 v4.4s, v2.8h, #16 +; CHECK-NEXT: saddw v1.4s, v5.4s, v1.4h +; CHECK-NEXT: shll2 v5.4s, v3.8h, #16 +; CHECK-NEXT: saddw v2.4s, v4.4s, v2.4h +; CHECK-NEXT: saddw v3.4s, v5.4s, v3.4h ; CHECK-NEXT: rev64 v4.4s, v0.4s -; CHECK-NEXT: rev64 v5.4s, v2.4s -; CHECK-NEXT: rev64 v7.4s, v1.4s -; CHECK-NEXT: rev64 v16.4s, v3.4s -; CHECK-NEXT: addp v6.4s, v2.4s, v0.4s -; CHECK-NEXT: addp v17.4s, v1.4s, v3.4s -; CHECK-NEXT: sub v0.4s, v0.4s, v4.4s -; CHECK-NEXT: sub v2.4s, v2.4s, v5.4s -; CHECK-NEXT: sub v3.4s, v3.4s, v16.4s -; CHECK-NEXT: sub v1.4s, v1.4s, v7.4s -; CHECK-NEXT: ext v4.16b, v2.16b, v0.16b, #4 -; CHECK-NEXT: zip2 v5.4s, v1.4s, v3.4s -; CHECK-NEXT: mov v0.s[3], v2.s[2] -; CHECK-NEXT: uzp2 v7.4s, v17.4s, v6.4s -; CHECK-NEXT: zip1 v1.4s, v1.4s, v3.4s -; CHECK-NEXT: ext v3.16b, v6.16b, v6.16b, #8 -; CHECK-NEXT: mov v5.d[1], v0.d[1] -; CHECK-NEXT: ext v0.16b, v4.16b, v2.16b, #4 -; CHECK-NEXT: uzp1 v2.4s, v17.4s, v6.4s -; CHECK-NEXT: rev64 v4.4s, v7.4s -; CHECK-NEXT: mov v1.d[1], v0.d[1] -; CHECK-NEXT: rev64 v0.4s, v2.4s -; CHECK-NEXT: uzp1 v2.4s, v17.4s, v3.4s -; CHECK-NEXT: uzp2 v3.4s, v17.4s, v3.4s -; CHECK-NEXT: add v6.4s, v5.4s, v1.4s -; CHECK-NEXT: add v0.4s, v4.4s, v0.4s -; CHECK-NEXT: sub v1.4s, v1.4s, v5.4s -; CHECK-NEXT: sub v2.4s, v2.4s, v3.4s -; CHECK-NEXT: zip1 v3.4s, v0.4s, v6.4s -; CHECK-NEXT: zip1 v4.4s, v2.4s, v1.4s -; CHECK-NEXT: mov v7.16b, v0.16b -; CHECK-NEXT: uzp2 v5.4s, v0.4s, v6.4s -; CHECK-NEXT: trn2 v3.4s, v0.4s, v3.4s -; CHECK-NEXT: ext v16.16b, v2.16b, v4.16b, #8 -; CHECK-NEXT: mov v7.s[1], v6.s[1] -; CHECK-NEXT: uzp2 v5.4s, v5.4s, v0.4s -; CHECK-NEXT: zip2 v0.4s, v0.4s, v6.4s -; CHECK-NEXT: mov v3.d[1], v16.d[1] -; CHECK-NEXT: zip2 v6.4s, v2.4s, v1.4s -; CHECK-NEXT: mov v7.d[1], v4.d[1] -; CHECK-NEXT: mov v2.s[3], v1.s[2] -; CHECK-NEXT: mov v5.d[1], v6.d[1] -; CHECK-NEXT: add v1.4s, v3.4s, v7.4s -; CHECK-NEXT: mov v0.d[1], v2.d[1] -; CHECK-NEXT: ext v2.16b, v1.16b, v1.16b, #4 -; CHECK-NEXT: sub v3.4s, v7.4s, v3.4s -; CHECK-NEXT: add v4.4s, v5.4s, v0.4s -; CHECK-NEXT: ext v6.16b, v2.16b, v3.16b, #8 -; CHECK-NEXT: ext v7.16b, v4.16b, v4.16b, #4 +; CHECK-NEXT: rev64 v5.4s, v1.4s +; CHECK-NEXT: rev64 v6.4s, v2.4s +; CHECK-NEXT: rev64 v7.4s, v3.4s +; CHECK-NEXT: sub v4.4s, v0.4s, v4.4s +; CHECK-NEXT: addp v0.4s, v1.4s, v0.4s +; CHECK-NEXT: sub v5.4s, v1.4s, v5.4s +; CHECK-NEXT: sub v6.4s, v2.4s, v6.4s +; CHECK-NEXT: addp v2.4s, v2.4s, v3.4s +; CHECK-NEXT: sub v1.4s, v3.4s, v7.4s +; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v7.16b, v5.16b, v4.16b, #4 +; CHECK-NEXT: mov v4.s[3], v5.s[2] +; CHECK-NEXT: zip2 v16.4s, v6.4s, v1.4s +; CHECK-NEXT: zip1 v1.4s, v6.4s, v1.4s +; CHECK-NEXT: uzp2 v6.4s, v2.4s, v0.4s +; CHECK-NEXT: ext v5.16b, v7.16b, v5.16b, #4 +; CHECK-NEXT: uzp1 v0.4s, v2.4s, v0.4s +; CHECK-NEXT: uzp1 v7.4s, v2.4s, v3.4s +; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s +; CHECK-NEXT: mov v16.d[1], v4.d[1] +; CHECK-NEXT: rev64 v3.4s, v6.4s +; CHECK-NEXT: mov v1.d[1], v5.d[1] +; CHECK-NEXT: rev64 v0.4s, v0.4s +; CHECK-NEXT: sub v2.4s, v7.4s, v2.4s +; CHECK-NEXT: sub v4.4s, v1.4s, v16.4s +; CHECK-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-NEXT: add v1.4s, v16.4s, v1.4s +; CHECK-NEXT: zip1 v3.4s, v2.4s, v4.4s +; CHECK-NEXT: zip1 v5.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp2 v6.4s, v0.4s, v1.4s +; CHECK-NEXT: zip2 v7.4s, v0.4s, v1.4s +; CHECK-NEXT: zip2 v17.4s, v2.4s, v4.4s +; CHECK-NEXT: ext v16.16b, v2.16b, v3.16b, #8 +; CHECK-NEXT: trn2 v5.4s, v0.4s, v5.4s +; CHECK-NEXT: uzp2 v6.4s, v6.4s, v0.4s +; CHECK-NEXT: mov v2.s[3], v4.s[2] +; CHECK-NEXT: mov v0.s[1], v1.s[1] +; CHECK-NEXT: mov v5.d[1], v16.d[1] +; CHECK-NEXT: mov v6.d[1], v17.d[1] +; CHECK-NEXT: mov v7.d[1], v2.d[1] +; CHECK-NEXT: mov v0.d[1], v3.d[1] +; CHECK-NEXT: add v1.4s, v6.4s, v7.4s +; CHECK-NEXT: sub v2.4s, v7.4s, v6.4s +; CHECK-NEXT: add v3.4s, v5.4s, v0.4s ; CHECK-NEXT: sub v0.4s, v0.4s, v5.4s -; CHECK-NEXT: zip2 v5.4s, v3.4s, v1.4s -; CHECK-NEXT: ext v2.16b, v6.16b, v2.16b, #4 -; CHECK-NEXT: ext v6.16b, v7.16b, v0.16b, #8 -; CHECK-NEXT: zip1 v16.4s, v4.4s, v0.4s -; CHECK-NEXT: zip2 v17.4s, v4.4s, v0.4s -; CHECK-NEXT: zip2 v0.4s, v0.4s, v4.4s -; CHECK-NEXT: ext v4.16b, v6.16b, v7.16b, #4 -; CHECK-NEXT: zip1 v6.4s, v1.4s, v3.4s -; CHECK-NEXT: zip2 v1.4s, v1.4s, v3.4s +; CHECK-NEXT: zip2 v4.4s, v1.4s, v2.4s +; CHECK-NEXT: ext v5.16b, v1.16b, v1.16b, #4 +; CHECK-NEXT: ext v6.16b, v3.16b, v3.16b, #4 +; CHECK-NEXT: zip2 v7.4s, v2.4s, v1.4s +; CHECK-NEXT: zip2 v16.4s, v0.4s, v3.4s +; CHECK-NEXT: zip2 v17.4s, v3.4s, v0.4s +; CHECK-NEXT: zip1 v1.4s, v1.4s, v2.4s +; CHECK-NEXT: ext v2.16b, v5.16b, v2.16b, #8 +; CHECK-NEXT: ext v18.16b, v6.16b, v0.16b, #8 +; CHECK-NEXT: zip1 v0.4s, v3.4s, v0.4s +; CHECK-NEXT: add v3.4s, v16.4s, v7.4s +; CHECK-NEXT: sub v4.4s, v4.4s, v17.4s +; CHECK-NEXT: ext v2.16b, v2.16b, v5.16b, #4 +; CHECK-NEXT: ext v5.16b, v18.16b, v6.16b, #4 +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmlt v1.8h, v4.8h, #0 +; CHECK-NEXT: cmlt v6.8h, v3.8h, #0 +; CHECK-NEXT: add v3.4s, v6.4s, v3.4s +; CHECK-NEXT: add v4.4s, v1.4s, v4.4s +; CHECK-NEXT: add v2.4s, v5.4s, v2.4s +; CHECK-NEXT: cmlt v5.8h, v0.8h, #0 ; CHECK-NEXT: add v0.4s, v5.4s, v0.4s -; CHECK-NEXT: add v2.4s, v2.4s, v4.4s -; CHECK-NEXT: sub v3.4s, v16.4s, v6.4s -; CHECK-NEXT: sub v1.4s, v17.4s, v1.4s -; CHECK-NEXT: cmlt v6.8h, v0.8h, #0 -; CHECK-NEXT: cmlt v7.8h, v1.8h, #0 -; CHECK-NEXT: cmlt v4.8h, v3.8h, #0 -; CHECK-NEXT: add v0.4s, v6.4s, v0.4s -; CHECK-NEXT: add v1.4s, v7.4s, v1.4s -; CHECK-NEXT: cmlt v5.8h, v2.8h, #0 -; CHECK-NEXT: add v3.4s, v4.4s, v3.4s -; CHECK-NEXT: eor v1.16b, v1.16b, v7.16b -; CHECK-NEXT: eor v0.16b, v0.16b, v6.16b +; CHECK-NEXT: eor v1.16b, v4.16b, v1.16b +; CHECK-NEXT: eor v3.16b, v3.16b, v6.16b +; CHECK-NEXT: cmlt v4.8h, v2.8h, #0 +; CHECK-NEXT: add v1.4s, v3.4s, v1.4s +; CHECK-NEXT: add v2.4s, v4.4s, v2.4s +; CHECK-NEXT: eor v0.16b, v0.16b, v5.16b ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: add v2.4s, v5.4s, v2.4s -; CHECK-NEXT: eor v1.16b, v3.16b, v4.16b -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s -; CHECK-NEXT: eor v1.16b, v2.16b, v5.16b +; CHECK-NEXT: eor v1.16b, v2.16b, v4.16b ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 diff --git a/llvm/test/CodeGen/AArch64/reduce-xor.ll b/llvm/test/CodeGen/AArch64/reduce-xor.ll --- a/llvm/test/CodeGen/AArch64/reduce-xor.ll +++ b/llvm/test/CodeGen/AArch64/reduce-xor.ll @@ -88,14 +88,14 @@ ; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w14, s6 ; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 +; GISEL-NEXT: fmov w9, s7 ; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 +; GISEL-NEXT: eor w11, w12, w13 ; GISEL-NEXT: eor w8, w8, w10 -; GISEL-NEXT: eor w9, w9, w11 -; GISEL-NEXT: eor w9, w12, w9 +; GISEL-NEXT: eor w9, w14, w9 +; GISEL-NEXT: eor w9, w11, w9 ; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret @@ -121,39 +121,39 @@ ; GISEL-NEXT: mov b6, v0.b[6] ; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 -; GISEL-NEXT: fmov w9, s1 -; GISEL-NEXT: fmov w10, s2 -; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: mov b16, v0.b[8] ; GISEL-NEXT: mov b17, v0.b[9] ; GISEL-NEXT: mov b18, v0.b[10] ; GISEL-NEXT: mov b19, v0.b[11] -; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: eor w9, w10, w11 -; GISEL-NEXT: fmov w10, s4 -; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: fmov w10, s2 +; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s6 -; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: mov b20, v0.b[12] ; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: mov b22, v0.b[14] ; GISEL-NEXT: mov b23, v0.b[15] -; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: eor w11, w12, w13 -; GISEL-NEXT: fmov w12, s16 -; GISEL-NEXT: fmov w13, s17 +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: fmov w14, s18 ; GISEL-NEXT: fmov w15, s19 ; GISEL-NEXT: fmov w16, s22 ; GISEL-NEXT: fmov w17, s23 +; GISEL-NEXT: eor w10, w10, w11 +; GISEL-NEXT: eor w11, w12, w13 +; GISEL-NEXT: fmov w12, s16 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: fmov w13, s17 ; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: eor w12, w12, w13 -; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: eor w13, w14, w15 ; GISEL-NEXT: fmov w14, s20 ; GISEL-NEXT: fmov w15, s21 ; GISEL-NEXT: eor w10, w12, w13 -; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: eor w14, w14, w15 ; GISEL-NEXT: eor w15, w16, w17 ; GISEL-NEXT: eor w11, w14, w15 @@ -181,39 +181,39 @@ ; GISEL-NEXT: mov b6, v0.b[6] ; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 -; GISEL-NEXT: fmov w9, s1 -; GISEL-NEXT: fmov w10, s2 -; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: mov b16, v0.b[8] ; GISEL-NEXT: mov b17, v0.b[9] ; GISEL-NEXT: mov b18, v0.b[10] ; GISEL-NEXT: mov b19, v0.b[11] -; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: eor w9, w10, w11 -; GISEL-NEXT: fmov w10, s4 -; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w9, s1 +; GISEL-NEXT: fmov w10, s2 +; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s6 -; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: mov b20, v0.b[12] ; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: mov b22, v0.b[14] ; GISEL-NEXT: mov b23, v0.b[15] -; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: eor w11, w12, w13 -; GISEL-NEXT: fmov w12, s16 -; GISEL-NEXT: fmov w13, s17 +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: fmov w14, s18 ; GISEL-NEXT: fmov w15, s19 ; GISEL-NEXT: fmov w16, s22 ; GISEL-NEXT: fmov w17, s23 +; GISEL-NEXT: eor w10, w10, w11 +; GISEL-NEXT: eor w11, w12, w13 +; GISEL-NEXT: fmov w12, s16 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: fmov w13, s17 ; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: eor w12, w12, w13 -; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: eor w13, w14, w15 ; GISEL-NEXT: fmov w14, s20 ; GISEL-NEXT: fmov w15, s21 ; GISEL-NEXT: eor w10, w12, w13 -; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: eor w14, w14, w15 ; GISEL-NEXT: eor w15, w16, w17 ; GISEL-NEXT: eor w11, w14, w15 @@ -319,14 +319,14 @@ ; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w14, s6 ; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 +; GISEL-NEXT: fmov w9, s7 ; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 +; GISEL-NEXT: eor w11, w12, w13 ; GISEL-NEXT: eor w8, w8, w10 -; GISEL-NEXT: eor w9, w9, w11 -; GISEL-NEXT: eor w9, w12, w9 +; GISEL-NEXT: eor w9, w14, w9 +; GISEL-NEXT: eor w9, w11, w9 ; GISEL-NEXT: eor w0, w8, w9 ; GISEL-NEXT: ret %xor_result = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a) @@ -362,14 +362,14 @@ ; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w14, s6 ; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 +; GISEL-NEXT: fmov w9, s7 ; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 +; GISEL-NEXT: eor w11, w12, w13 ; GISEL-NEXT: eor w8, w8, w10 -; GISEL-NEXT: eor w9, w9, w11 -; GISEL-NEXT: eor w9, w12, w9 +; GISEL-NEXT: eor w9, w14, w9 +; GISEL-NEXT: eor w9, w11, w9 ; GISEL-NEXT: eor w0, w8, w9 ; GISEL-NEXT: ret %xor_result = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a) @@ -407,14 +407,14 @@ ; GISEL-NEXT: fmov w11, s3 ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: fmov w14, s6 ; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 +; GISEL-NEXT: fmov w9, s7 ; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 +; GISEL-NEXT: eor w11, w12, w13 ; GISEL-NEXT: eor w8, w8, w10 -; GISEL-NEXT: eor w9, w9, w11 -; GISEL-NEXT: eor w9, w12, w9 +; GISEL-NEXT: eor w9, w14, w9 +; GISEL-NEXT: eor w9, w11, w9 ; GISEL-NEXT: eor w0, w8, w9 ; GISEL-NEXT: ret %xor_result = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %a) diff --git a/llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll b/llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll --- a/llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll +++ b/llvm/test/CodeGen/AArch64/regress-tblgen-chains.ll @@ -25,9 +25,9 @@ ; CHECK-NEXT: bl _bar ; CHECK-NEXT: ldurb w8, [x29, #-1] ; CHECK-NEXT: add x8, x8, #1 -; CHECK-NEXT: and x0, x8, #0xff ; CHECK-NEXT: sturb w8, [x29, #-1] ; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: and x0, x8, #0xff ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/rotate-extract.ll b/llvm/test/CodeGen/AArch64/rotate-extract.ll --- a/llvm/test/CodeGen/AArch64/rotate-extract.ll +++ b/llvm/test/CodeGen/AArch64/rotate-extract.ll @@ -50,7 +50,7 @@ define i64 @ror_extract_udiv(i64 %i) nounwind { ; CHECK-LABEL: ror_extract_udiv: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-6148914691236517206 +; CHECK-NEXT: mov x8, #-6148914691236517206 // =0xaaaaaaaaaaaaaaaa ; CHECK-NEXT: movk x8, #43691 ; CHECK-NEXT: umulh x8, x0, x8 ; CHECK-NEXT: lsr x8, x8, #1 @@ -127,15 +127,15 @@ define i32 @no_extract_udiv(i32 %i) nounwind { ; CHECK-LABEL: no_extract_udiv: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #33437 -; CHECK-NEXT: mov w9, #43691 -; CHECK-NEXT: movk w8, #21399, lsl #16 -; CHECK-NEXT: movk w9, #43690, lsl #16 +; CHECK-NEXT: mov w8, #43691 // =0xaaab +; CHECK-NEXT: mov w9, #33437 // =0x829d +; CHECK-NEXT: movk w8, #43690, lsl #16 +; CHECK-NEXT: movk w9, #21399, lsl #16 ; CHECK-NEXT: umull x8, w0, w8 ; CHECK-NEXT: umull x9, w0, w9 -; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: lsr x9, x9, #33 -; CHECK-NEXT: extr w0, w9, w8, #4 +; CHECK-NEXT: lsr x8, x8, #33 +; CHECK-NEXT: lsr x9, x9, #32 +; CHECK-NEXT: extr w0, w8, w9, #4 ; CHECK-NEXT: ret %lhs_div = udiv i32 %i, 3 %rhs_div = udiv i32 %i, 49 diff --git a/llvm/test/CodeGen/AArch64/sadd_sat.ll b/llvm/test/CodeGen/AArch64/sadd_sat.ll --- a/llvm/test/CodeGen/AArch64/sadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat.ll @@ -36,11 +36,11 @@ ; CHECK-LABEL: func16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: mov w9, #32767 +; CHECK-NEXT: mov w9, #32767 // =0x7fff ; CHECK-NEXT: add w8, w8, w1, sxth ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-32768 +; CHECK-NEXT: mov w9, #-32768 // =0xffff8000 ; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret @@ -51,12 +51,12 @@ define i8 @func8(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: func8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: mov w9, #127 -; CHECK-NEXT: add w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #127 -; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-128 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: mov w8, #127 // =0x7f +; CHECK-NEXT: add w9, w9, w1, sxtb +; CHECK-NEXT: cmp w9, #127 +; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mov w9, #-128 // =0xffffff80 ; CHECK-NEXT: cmn w8, #128 ; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret @@ -67,13 +67,13 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; CHECK-LABEL: func3: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w1, #28 -; CHECK-NEXT: sbfx w9, w0, #0, #4 -; CHECK-NEXT: add w8, w9, w8, asr #28 -; CHECK-NEXT: mov w9, #7 -; CHECK-NEXT: cmp w8, #7 -; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-8 +; CHECK-NEXT: lsl w9, w1, #28 +; CHECK-NEXT: sbfx w10, w0, #0, #4 +; CHECK-NEXT: mov w8, #7 // =0x7 +; CHECK-NEXT: add w9, w10, w9, asr #28 +; CHECK-NEXT: cmp w9, #7 +; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mov w9, #-8 // =0xfffffff8 ; CHECK-NEXT: cmn w8, #8 ; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll b/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll --- a/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_plus.ll @@ -37,13 +37,13 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { ; CHECK-LABEL: func16: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: sxth w10, w0 -; CHECK-NEXT: mov w8, #32767 -; CHECK-NEXT: add w9, w10, w9, sxth -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w8, w9, w8, lt -; CHECK-NEXT: mov w9, #-32768 +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: add w8, w9, w8, sxth +; CHECK-NEXT: mov w9, #32767 // =0x7fff +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: mov w9, #-32768 // =0xffff8000 ; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret @@ -55,13 +55,13 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { ; CHECK-LABEL: func8: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: sxtb w10, w0 -; CHECK-NEXT: mov w8, #127 -; CHECK-NEXT: add w9, w10, w9, sxtb -; CHECK-NEXT: cmp w9, #127 -; CHECK-NEXT: csel w8, w9, w8, lt -; CHECK-NEXT: mov w9, #-128 +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: add w8, w9, w8, sxtb +; CHECK-NEXT: mov w9, #127 // =0x7f +; CHECK-NEXT: cmp w8, #127 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: mov w9, #-128 // =0xffffff80 ; CHECK-NEXT: cmn w8, #128 ; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret @@ -73,14 +73,14 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { ; CHECK-LABEL: func4: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: sbfx w10, w0, #0, #4 -; CHECK-NEXT: mov w8, #7 -; CHECK-NEXT: lsl w9, w9, #28 -; CHECK-NEXT: add w9, w10, w9, asr #28 -; CHECK-NEXT: cmp w9, #7 -; CHECK-NEXT: csel w8, w9, w8, lt -; CHECK-NEXT: mov w9, #-8 +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: sbfx w9, w0, #0, #4 +; CHECK-NEXT: lsl w8, w8, #28 +; CHECK-NEXT: add w8, w9, w8, asr #28 +; CHECK-NEXT: mov w9, #7 // =0x7 +; CHECK-NEXT: cmp w8, #7 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: mov w9, #-8 // =0xfffffff8 ; CHECK-NEXT: cmn w8, #8 ; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -44,8 +44,8 @@ define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; CHECK-LABEL: v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v0.16b, v0.16b, v2.16b ; CHECK-NEXT: sqadd v1.16b, v1.16b, v3.16b +; CHECK-NEXT: sqadd v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %z = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z @@ -75,8 +75,8 @@ define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { ; CHECK-LABEL: v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v0.8h, v0.8h, v2.8h ; CHECK-NEXT: sqadd v1.8h, v1.8h, v3.8h +; CHECK-NEXT: sqadd v0.8h, v0.8h, v2.8h ; CHECK-NEXT: ret %z = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z @@ -97,9 +97,9 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: sqadd v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sqadd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, ptr %px @@ -116,8 +116,8 @@ ; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: shl v1.4h, v1.4h, #8 +; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-NEXT: xtn v0.8b, v0.8h @@ -133,15 +133,15 @@ define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.b }[0], [x1] -; CHECK-NEXT: add x8, x1, #1 -; CHECK-NEXT: ld1 { v1.b }[0], [x0] -; CHECK-NEXT: add x9, x0, #1 +; CHECK-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-NEXT: ld1 { v1.b }[0], [x1] +; CHECK-NEXT: add x8, x0, #1 +; CHECK-NEXT: add x9, x1, #1 ; CHECK-NEXT: ld1 { v0.b }[4], [x8] ; CHECK-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: sqadd v0.2s, v1.2s, v0.2s +; CHECK-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 @@ -158,9 +158,9 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: sqadd v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, ptr %px @@ -173,15 +173,15 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.h }[0], [x1] -; CHECK-NEXT: add x8, x1, #2 -; CHECK-NEXT: ld1 { v1.h }[0], [x0] -; CHECK-NEXT: add x9, x0, #2 +; CHECK-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: add x9, x1, #2 ; CHECK-NEXT: ld1 { v0.h }[2], [x8] ; CHECK-NEXT: ld1 { v1.h }[2], [x9] -; CHECK-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: sqadd v0.2s, v1.2s, v0.2s +; CHECK-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 @@ -224,9 +224,9 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x1] -; CHECK-NEXT: ldr b1, [x0] -; CHECK-NEXT: sqadd v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: ldr b1, [x1] +; CHECK-NEXT: sqadd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, ptr %px @@ -239,9 +239,9 @@ define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x1] -; CHECK-NEXT: ldr h1, [x0] -; CHECK-NEXT: sqadd v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ldr h1, [x1] +; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, ptr %px @@ -297,8 +297,8 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { ; CHECK-LABEL: v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v0.4s, v0.4s, v2.4s ; CHECK-NEXT: sqadd v1.4s, v1.4s, v3.4s +; CHECK-NEXT: sqadd v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %z = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z @@ -328,8 +328,8 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { ; CHECK-LABEL: v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v0.2d, v0.2d, v2.2d ; CHECK-NEXT: sqadd v1.2d, v1.2d, v3.2d +; CHECK-NEXT: sqadd v0.2d, v0.2d, v2.2d ; CHECK-NEXT: ret %z = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z @@ -353,16 +353,16 @@ ; CHECK-NEXT: adds x8, x2, x6 ; CHECK-NEXT: adcs x9, x3, x7 ; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: eor x11, x10, #0x8000000000000000 ; CHECK-NEXT: csel x2, x10, x8, vs -; CHECK-NEXT: eor x8, x10, #0x8000000000000000 -; CHECK-NEXT: csel x3, x8, x9, vs +; CHECK-NEXT: csel x3, x11, x9, vs ; CHECK-NEXT: adds x8, x0, x4 ; CHECK-NEXT: adcs x9, x1, x5 ; CHECK-NEXT: asr x10, x9, #63 ; CHECK-NEXT: csel x8, x10, x8, vs -; CHECK-NEXT: eor x10, x10, #0x8000000000000000 -; CHECK-NEXT: csel x1, x10, x9, vs +; CHECK-NEXT: eor x11, x10, #0x8000000000000000 ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: csel x1, x11, x9, vs ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sat-add.ll b/llvm/test/CodeGen/AArch64/sat-add.ll --- a/llvm/test/CodeGen/AArch64/sat-add.ll +++ b/llvm/test/CodeGen/AArch64/sat-add.ll @@ -10,7 +10,7 @@ ; CHECK-LABEL: unsigned_sat_constant_i8_using_min: ; CHECK: // %bb.0: ; CHECK-NEXT: and w9, w0, #0xff -; CHECK-NEXT: mov w8, #-43 +; CHECK-NEXT: mov w8, #-43 // =0xffffffd5 ; CHECK-NEXT: cmp w9, #213 ; CHECK-NEXT: csel w8, w0, w8, lo ; CHECK-NEXT: add w0, w8, #42 @@ -52,9 +52,9 @@ define i16 @unsigned_sat_constant_i16_using_min(i16 %x) { ; CHECK-LABEL: unsigned_sat_constant_i16_using_min: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65493 +; CHECK-NEXT: mov w8, #65493 // =0xffd5 ; CHECK-NEXT: cmp w8, w0, uxth -; CHECK-NEXT: mov w8, #-43 +; CHECK-NEXT: mov w8, #-43 // =0xffffffd5 ; CHECK-NEXT: csel w8, w0, w8, hi ; CHECK-NEXT: add w0, w8, #42 ; CHECK-NEXT: ret @@ -81,7 +81,7 @@ define i16 @unsigned_sat_constant_i16_using_cmp_notval(i16 %x) { ; CHECK-LABEL: unsigned_sat_constant_i16_using_cmp_notval: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65493 +; CHECK-NEXT: mov w8, #65493 // =0xffd5 ; CHECK-NEXT: add w9, w0, #42 ; CHECK-NEXT: cmp w8, w0, uxth ; CHECK-NEXT: csinv w0, w9, wzr, hs @@ -95,7 +95,7 @@ define i32 @unsigned_sat_constant_i32_using_min(i32 %x) { ; CHECK-LABEL: unsigned_sat_constant_i32_using_min: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-43 +; CHECK-NEXT: mov w8, #-43 // =0xffffffd5 ; CHECK-NEXT: cmn w0, #43 ; CHECK-NEXT: csel w8, w0, w8, lo ; CHECK-NEXT: add w0, w8, #42 @@ -133,7 +133,7 @@ define i64 @unsigned_sat_constant_i64_using_min(i64 %x) { ; CHECK-LABEL: unsigned_sat_constant_i64_using_min: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-43 +; CHECK-NEXT: mov x8, #-43 // =0xffffffffffffffd5 ; CHECK-NEXT: cmn x0, #43 ; CHECK-NEXT: csel x8, x0, x8, lo ; CHECK-NEXT: add x0, x8, #42 @@ -217,9 +217,9 @@ define i16 @unsigned_sat_variable_i16_using_min(i16 %x, i16 %y) { ; CHECK-LABEL: unsigned_sat_variable_i16_using_min: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: and w9, w0, #0xffff -; CHECK-NEXT: cmp w9, w8, uxth +; CHECK-NEXT: and w8, w0, #0xffff +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: cmp w8, w9, uxth ; CHECK-NEXT: csinv w8, w0, w1, lo ; CHECK-NEXT: add w0, w8, w1 ; CHECK-NEXT: ret @@ -346,9 +346,9 @@ ; CHECK-LABEL: unsigned_sat_constant_v16i8_using_min: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.16b, #213 -; CHECK-NEXT: movi v2.16b, #42 ; CHECK-NEXT: umin v0.16b, v0.16b, v1.16b -; CHECK-NEXT: add v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.16b, #42 +; CHECK-NEXT: add v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %c = icmp ult <16 x i8> %x, %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> @@ -383,9 +383,9 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) { ; CHECK-LABEL: unsigned_sat_constant_v8i16_using_min: ; CHECK: // %bb.0: +; CHECK-NEXT: mvni v1.8h, #42 +; CHECK-NEXT: umin v0.8h, v0.8h, v1.8h ; CHECK-NEXT: movi v1.8h, #42 -; CHECK-NEXT: mvni v2.8h, #42 -; CHECK-NEXT: umin v0.8h, v0.8h, v2.8h ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %c = icmp ult <8 x i16> %x, @@ -459,9 +459,9 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_min(<2 x i64> %x) { ; CHECK-LABEL: unsigned_sat_constant_v2i64_using_min: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-43 +; CHECK-NEXT: mov x8, #-43 // =0xffffffffffffffd5 ; CHECK-NEXT: dup v1.2d, x8 -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: cmhi v2.2d, v1.2d, v0.2d ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: dup v1.2d, x8 @@ -476,7 +476,7 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_sum(<2 x i64> %x) { ; CHECK-LABEL: unsigned_sat_constant_v2i64_using_cmp_sum: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uqadd v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret @@ -489,7 +489,7 @@ define <2 x i64> @unsigned_sat_constant_v2i64_using_cmp_notval(<2 x i64> %x) { ; CHECK-LABEL: unsigned_sat_constant_v2i64_using_cmp_notval: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uqadd v0.2d, v0.2d, v1.2d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/select-constant-xor.ll b/llvm/test/CodeGen/AArch64/select-constant-xor.ll --- a/llvm/test/CodeGen/AArch64/select-constant-xor.ll +++ b/llvm/test/CodeGen/AArch64/select-constant-xor.ll @@ -52,7 +52,7 @@ define i8 @xori32i8(i32 %a) { ; CHECK-LABEL: xori32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #84 +; CHECK-NEXT: mov w8, #84 // =0x54 ; CHECK-NEXT: eor w0, w8, w0, asr #31 ; CHECK-NEXT: ret %shr4 = ashr i32 %a, 31 @@ -64,7 +64,7 @@ define i32 @selecti32i32(i32 %a) { ; CHECK-LABEL: selecti32i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #84 +; CHECK-NEXT: mov w8, #84 // =0x54 ; CHECK-NEXT: eor w0, w8, w0, asr #31 ; CHECK-NEXT: ret %c = icmp sgt i32 %a, -1 @@ -75,7 +75,7 @@ define i8 @selecti32i8(i32 %a) { ; CHECK-LABEL: selecti32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #84 +; CHECK-NEXT: mov w8, #84 // =0x54 ; CHECK-NEXT: eor w0, w8, w0, asr #31 ; CHECK-NEXT: ret %c = icmp sgt i32 %a, -1 @@ -87,7 +87,7 @@ ; CHECK-LABEL: selecti8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: mov w9, #84 +; CHECK-NEXT: mov w9, #84 // =0x54 ; CHECK-NEXT: eor w0, w9, w8, asr #7 ; CHECK-NEXT: ret %c = icmp sgt i8 %a, -1 @@ -200,8 +200,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: asr w8, w0, #31 ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: eor w8, w8, #0x7f ; CHECK-NEXT: csel w9, w2, w1, lt +; CHECK-NEXT: eor w8, w8, #0x7f ; CHECK-NEXT: add w0, w8, w9 ; CHECK-NEXT: ret %c = icmp sle i32 %a, -1 diff --git a/llvm/test/CodeGen/AArch64/select_const.ll b/llvm/test/CodeGen/AArch64/select_const.ll --- a/llvm/test/CodeGen/AArch64/select_const.ll +++ b/llvm/test/CodeGen/AArch64/select_const.ll @@ -9,7 +9,7 @@ define i32 @select_0_or_1(i1 %cond) { ; CHECK-LABEL: select_0_or_1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: bic w0, w8, w0 ; CHECK-NEXT: ret %sel = select i1 %cond, i32 0, i32 1 @@ -28,7 +28,7 @@ define i32 @select_0_or_1_signext(i1 signext %cond) { ; CHECK-LABEL: select_0_or_1_signext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: bic w0, w8, w0 ; CHECK-NEXT: ret %sel = select i1 %cond, i32 0, i32 1 @@ -126,7 +126,7 @@ define i32 @select_Cplus1_C(i1 %cond) { ; CHECK-LABEL: select_Cplus1_C: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #41 +; CHECK-NEXT: mov w8, #41 // =0x29 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: cinc w0, w8, ne ; CHECK-NEXT: ret @@ -137,7 +137,7 @@ define i32 @select_Cplus1_C_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_Cplus1_C_zeroext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #41 +; CHECK-NEXT: mov w8, #41 // =0x29 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: cinc w0, w8, ne ; CHECK-NEXT: ret @@ -148,7 +148,7 @@ define i32 @select_Cplus1_C_signext(i1 signext %cond) { ; CHECK-LABEL: select_Cplus1_C_signext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #41 +; CHECK-NEXT: mov w8, #41 // =0x29 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: cinc w0, w8, ne ; CHECK-NEXT: ret @@ -161,7 +161,7 @@ define i32 @select_C_Cplus1(i1 %cond) { ; CHECK-LABEL: select_C_Cplus1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #41 +; CHECK-NEXT: mov w8, #41 // =0x29 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: cinc w0, w8, eq ; CHECK-NEXT: ret @@ -172,7 +172,7 @@ define i32 @select_C_Cplus1_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_C_Cplus1_zeroext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #41 +; CHECK-NEXT: mov w8, #41 // =0x29 ; CHECK-NEXT: cmp w0, #0 ; CHECK-NEXT: cinc w0, w8, eq ; CHECK-NEXT: ret @@ -183,7 +183,7 @@ define i32 @select_C_Cplus1_signext(i1 signext %cond) { ; CHECK-LABEL: select_C_Cplus1_signext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #41 +; CHECK-NEXT: mov w8, #41 // =0x29 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: cinc w0, w8, eq ; CHECK-NEXT: ret @@ -197,9 +197,9 @@ define i32 @select_C1_C2(i1 %cond) { ; CHECK-LABEL: select_C1_C2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #421 +; CHECK-NEXT: mov w9, #421 // =0x1a5 ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i32 421, i32 42 @@ -209,9 +209,9 @@ define i32 @select_C1_C2_zeroext(i1 zeroext %cond) { ; CHECK-LABEL: select_C1_C2_zeroext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w9, #421 +; CHECK-NEXT: mov w9, #421 // =0x1a5 ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i32 421, i32 42 @@ -221,9 +221,9 @@ define i32 @select_C1_C2_signext(i1 signext %cond) { ; CHECK-LABEL: select_C1_C2_signext: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #421 +; CHECK-NEXT: mov w9, #421 // =0x1a5 ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i32 421, i32 42 @@ -235,7 +235,7 @@ define i8 @sel_constants_add_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_add_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #28 +; CHECK-NEXT: mov w8, #28 // =0x1c ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: csinc w0, w8, wzr, eq ; CHECK-NEXT: ret @@ -247,9 +247,9 @@ define i8 @sel_constants_sub_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_sub_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #18 +; CHECK-NEXT: mov w8, #18 // =0x12 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #-9 +; CHECK-NEXT: mov w9, #-9 // =0xfffffff7 ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i8 -4, i8 23 @@ -260,9 +260,9 @@ define i8 @sel_constants_sub_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: sel_constants_sub_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: mov w8, #2 // =0x2 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #9 +; CHECK-NEXT: mov w9, #9 // =0x9 ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i8 -4, i8 3 @@ -273,9 +273,9 @@ define i8 @sel_constants_mul_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_mul_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #115 +; CHECK-NEXT: mov w8, #115 // =0x73 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #-20 +; CHECK-NEXT: mov w9, #-20 // =0xffffffec ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i8 -4, i8 23 @@ -286,7 +286,7 @@ define i8 @sel_constants_sdiv_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_sdiv_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w8, #4 // =0x4 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: csel w0, wzr, w8, ne ; CHECK-NEXT: ret @@ -298,7 +298,7 @@ define i8 @sdiv_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: sdiv_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: mov w8, #5 // =0x5 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: csel w0, wzr, w8, ne ; CHECK-NEXT: ret @@ -310,9 +310,9 @@ define i8 @sel_constants_udiv_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_udiv_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w8, #4 // =0x4 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #50 +; CHECK-NEXT: mov w9, #50 // =0x32 ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i8 -4, i8 23 @@ -323,7 +323,7 @@ define i8 @udiv_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: udiv_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: mov w8, #5 // =0x5 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: csel w0, wzr, w8, ne ; CHECK-NEXT: ret @@ -335,7 +335,7 @@ define i8 @sel_constants_srem_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_srem_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-4 +; CHECK-NEXT: mov w8, #-4 // =0xfffffffc ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: cinv w0, w8, eq ; CHECK-NEXT: ret @@ -347,9 +347,9 @@ define i8 @srem_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: srem_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: mov w8, #5 // =0x5 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #120 +; CHECK-NEXT: mov w9, #120 // =0x78 ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i8 121, i8 23 @@ -360,7 +360,7 @@ define i8 @sel_constants_urem_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_urem_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2 +; CHECK-NEXT: mov w8, #2 // =0x2 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: cinc w0, w8, eq ; CHECK-NEXT: ret @@ -372,9 +372,9 @@ define i8 @urem_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: urem_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: mov w8, #5 // =0x5 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #120 +; CHECK-NEXT: mov w9, #120 // =0x78 ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i8 -4, i8 23 @@ -385,7 +385,7 @@ define i8 @sel_constants_and_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_and_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4 +; CHECK-NEXT: mov w8, #4 // =0x4 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: cinc w0, w8, eq ; CHECK-NEXT: ret @@ -397,9 +397,9 @@ define i8 @sel_constants_or_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_or_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23 +; CHECK-NEXT: mov w8, #23 // =0x17 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #-3 +; CHECK-NEXT: mov w9, #-3 // =0xfffffffd ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i8 -4, i8 23 @@ -410,9 +410,9 @@ define i8 @sel_constants_xor_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_xor_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #18 +; CHECK-NEXT: mov w8, #18 // =0x12 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #-7 +; CHECK-NEXT: mov w9, #-7 // =0xfffffff9 ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i8 -4, i8 23 @@ -423,9 +423,9 @@ define i8 @sel_constants_shl_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_shl_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-32 +; CHECK-NEXT: mov w8, #-32 // =0xffffffe0 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #-128 +; CHECK-NEXT: mov w9, #-128 // =0xffffff80 ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i8 -4, i8 23 @@ -436,9 +436,9 @@ define i8 @shl_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: shl_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: mov w8, #8 // =0x8 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: mov w9, #4 // =0x4 ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i8 2, i8 3 @@ -449,7 +449,7 @@ define i8 @sel_constants_lshr_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_lshr_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #7 +; CHECK-NEXT: mov w8, #7 // =0x7 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: csel w0, w8, wzr, ne ; CHECK-NEXT: ret @@ -461,9 +461,9 @@ define i8 @lshr_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: lshr_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: mov w8, #8 // =0x8 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #16 +; CHECK-NEXT: mov w9, #16 // =0x10 ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i8 2, i8 3 @@ -485,9 +485,9 @@ define i8 @ashr_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: ashr_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-16 +; CHECK-NEXT: mov w8, #-16 // =0xfffffff0 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: mov w9, #-32 +; CHECK-NEXT: mov w9, #-32 // =0xffffffe0 ; CHECK-NEXT: csel w0, w9, w8, ne ; CHECK-NEXT: ret %sel = select i1 %cond, i8 2, i8 3 @@ -498,13 +498,13 @@ define double @sel_constants_fadd_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_fadd_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #7378697629483820646 -; CHECK-NEXT: adrp x9, .LCPI42_0 -; CHECK-NEXT: movk x8, #16444, lsl #48 +; CHECK-NEXT: mov x9, #7378697629483820646 // =0x6666666666666666 +; CHECK-NEXT: adrp x8, .LCPI42_0 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: ldr d1, [x9, :lo12:.LCPI42_0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcsel d0, d1, d0, ne +; CHECK-NEXT: movk x9, #16444, lsl #48 +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI42_0] +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 %bo = fadd double %sel, 5.1 @@ -514,12 +514,12 @@ define double @sel_constants_fsub_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_fsub_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #3689348814741910323 ; CHECK-NEXT: adrp x8, .LCPI43_0 -; CHECK-NEXT: movk x9, #49186, lsl #48 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI43_0] -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov x8, #3689348814741910323 // =0x3333333333333333 +; CHECK-NEXT: movk x8, #49186, lsl #48 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fcsel d0, d1, d0, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 @@ -530,12 +530,12 @@ define double @fsub_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: fsub_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #3689348814741910323 ; CHECK-NEXT: adrp x8, .LCPI44_0 -; CHECK-NEXT: movk x9, #16418, lsl #48 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI44_0] -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov x8, #3689348814741910323 // =0x3333333333333333 +; CHECK-NEXT: movk x8, #16418, lsl #48 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fcsel d0, d1, d0, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 @@ -546,12 +546,12 @@ define double @sel_constants_fmul_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_fmul_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #7378697629483820646 ; CHECK-NEXT: adrp x8, .LCPI45_0 -; CHECK-NEXT: movk x9, #49204, lsl #48 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI45_0] -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov x8, #7378697629483820646 // =0x6666666666666666 +; CHECK-NEXT: movk x8, #49204, lsl #48 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fcsel d0, d1, d0, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 @@ -577,12 +577,12 @@ define double @fdiv_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: fdiv_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #7378697629483820646 ; CHECK-NEXT: adrp x8, .LCPI47_0 -; CHECK-NEXT: movk x9, #49140, lsl #48 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI47_0] -; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: mov x8, #7378697629483820646 // =0x6666666666666666 +; CHECK-NEXT: movk x8, #49140, lsl #48 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fcsel d0, d1, d0, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 @@ -594,10 +594,10 @@ ; CHECK-LABEL: sel_constants_frem_constant: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI48_0 -; CHECK-NEXT: fmov d1, #-4.00000000 +; CHECK-NEXT: fmov d0, #-4.00000000 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI48_0] -; CHECK-NEXT: fcsel d0, d1, d0, ne +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI48_0] +; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 %bo = frem double %sel, 5.1 @@ -607,13 +607,13 @@ define double @frem_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: frem_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #7378697629483820646 -; CHECK-NEXT: adrp x9, .LCPI49_0 -; CHECK-NEXT: movk x8, #16404, lsl #48 +; CHECK-NEXT: mov x9, #7378697629483820646 // =0x6666666666666666 +; CHECK-NEXT: adrp x8, .LCPI49_0 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: ldr d1, [x9, :lo12:.LCPI49_0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcsel d0, d1, d0, ne +; CHECK-NEXT: movk x9, #16404, lsl #48 +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI49_0] +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 %bo = frem double 5.1, %sel diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll --- a/llvm/test/CodeGen/AArch64/select_fmf.ll +++ b/llvm/test/CodeGen/AArch64/select_fmf.ll @@ -7,11 +7,11 @@ define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: select_select_fold_select_and: ; CHECK: // %bb.0: -; CHECK-NEXT: fminnm s5, s1, s2 +; CHECK-NEXT: fminnm s4, s1, s2 ; CHECK-NEXT: fcmp s1, s2 ; CHECK-NEXT: fmaxnm s1, s0, s3 +; CHECK-NEXT: fccmp s4, s0, #4, lt ; CHECK-NEXT: fmov s4, #0.50000000 -; CHECK-NEXT: fccmp s5, s0, #4, lt ; CHECK-NEXT: fcsel s2, s1, s0, gt ; CHECK-NEXT: fadd s1, s0, s4 ; CHECK-NEXT: fadd s4, s1, s2 @@ -22,11 +22,11 @@ ; CHECK-NEXT: fadd s0, s2, s0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB0_2: // %if.end.i159.i.i -; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: mov w9, #13107 +; CHECK-NEXT: mov w8, #52429 // =0xcccd +; CHECK-NEXT: mov w9, #13107 // =0x3333 +; CHECK-NEXT: fcmp s1, #0.0 ; CHECK-NEXT: movk w8, #48844, lsl #16 ; CHECK-NEXT: movk w9, #48819, lsl #16 -; CHECK-NEXT: fcmp s1, #0.0 ; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: fmov s4, w9 ; CHECK-NEXT: fadd s0, s0, s2 @@ -65,11 +65,11 @@ define float @select_select_fold_select_or(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: select_select_fold_select_or: ; CHECK: // %bb.0: -; CHECK-NEXT: fminnm s5, s1, s2 +; CHECK-NEXT: fminnm s4, s1, s2 ; CHECK-NEXT: fcmp s1, s2 ; CHECK-NEXT: fmaxnm s1, s0, s3 +; CHECK-NEXT: fccmp s4, s0, #0, ge ; CHECK-NEXT: fmov s4, #0.50000000 -; CHECK-NEXT: fccmp s5, s0, #0, ge ; CHECK-NEXT: fcsel s2, s0, s1, gt ; CHECK-NEXT: fadd s1, s0, s4 ; CHECK-NEXT: fadd s4, s1, s2 @@ -80,11 +80,11 @@ ; CHECK-NEXT: fadd s0, s2, s0 ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_2: // %if.end.i159.i.i -; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: mov w9, #13107 +; CHECK-NEXT: mov w8, #52429 // =0xcccd +; CHECK-NEXT: mov w9, #13107 // =0x3333 +; CHECK-NEXT: fcmp s1, #0.0 ; CHECK-NEXT: movk w8, #48844, lsl #16 ; CHECK-NEXT: movk w9, #48819, lsl #16 -; CHECK-NEXT: fcmp s1, #0.0 ; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: fmov s4, w9 ; CHECK-NEXT: fadd s0, s0, s2 diff --git a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll --- a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll @@ -6,7 +6,7 @@ define i32 @neg_sel_constants(i32 %a) { ; CHECK-LABEL: neg_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: mov w8, #5 // =0x5 ; CHECK-NEXT: and w0, w8, w0, asr #31 ; CHECK-NEXT: ret %tmp.1 = icmp slt i32 %a, 0 @@ -58,7 +58,7 @@ define i32 @pos_sel_constants(i32 %a) { ; CHECK-LABEL: pos_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: mov w8, #5 // =0x5 ; CHECK-NEXT: bic w0, w8, w0, asr #31 ; CHECK-NEXT: ret %tmp.1 = icmp sgt i32 %a, -1 @@ -71,7 +71,7 @@ define i32 @pos_sel_special_constant(i32 %a) { ; CHECK-LABEL: pos_sel_special_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #512 +; CHECK-NEXT: mov w8, #512 // =0x200 ; CHECK-NEXT: bic w0, w8, w0, lsr #22 ; CHECK-NEXT: ret %tmp.1 = icmp sgt i32 %a, -1 @@ -121,7 +121,7 @@ define i8 @sel_shift_bool_i8(i1 %t) { ; CHECK-LABEL: sel_shift_bool_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-128 +; CHECK-NEXT: mov w8, #-128 // =0xffffff80 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: csel w0, w8, wzr, ne ; CHECK-NEXT: ret @@ -132,7 +132,7 @@ define i16 @sel_shift_bool_i16(i1 %t) { ; CHECK-LABEL: sel_shift_bool_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 +; CHECK-NEXT: mov w8, #128 // =0x80 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: csel w0, w8, wzr, ne ; CHECK-NEXT: ret @@ -143,7 +143,7 @@ define i32 @sel_shift_bool_i32(i1 %t) { ; CHECK-LABEL: sel_shift_bool_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 +; CHECK-NEXT: mov w8, #64 // =0x40 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: csel w0, w8, wzr, ne ; CHECK-NEXT: ret @@ -154,7 +154,7 @@ define i64 @sel_shift_bool_i64(i1 %t) { ; CHECK-LABEL: sel_shift_bool_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65536 +; CHECK-NEXT: mov w8, #65536 // =0x10000 ; CHECK-NEXT: tst w0, #0x1 ; CHECK-NEXT: csel x0, x8, xzr, ne ; CHECK-NEXT: ret @@ -165,8 +165,8 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) { ; CHECK-LABEL: sel_shift_bool_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #128 ; CHECK-NEXT: shl v0.16b, v0.16b, #7 +; CHECK-NEXT: movi v1.16b, #128 ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -204,9 +204,9 @@ ; CHECK-LABEL: sel_shift_bool_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: mov w8, #65536 -; CHECK-NEXT: shl v0.2d, v0.2d, #63 +; CHECK-NEXT: mov w8, #65536 // =0x10000 ; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: shl v0.2d, v0.2d, #63 ; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/setcc-type-mismatch.ll b/llvm/test/CodeGen/AArch64/setcc-type-mismatch.ll --- a/llvm/test/CodeGen/AArch64/setcc-type-mismatch.ll +++ b/llvm/test/CodeGen/AArch64/setcc-type-mismatch.ll @@ -6,11 +6,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi v2.4s, #63, msl #16 ; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: strb w8, [x0] diff --git a/llvm/test/CodeGen/AArch64/settag-merge-order.ll b/llvm/test/CodeGen/AArch64/settag-merge-order.ll --- a/llvm/test/CodeGen/AArch64/settag-merge-order.ll +++ b/llvm/test/CodeGen/AArch64/settag-merge-order.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -aarch64-order-frame-objects=1 | FileCheck %s declare void @use(ptr %p) @@ -7,13 +8,26 @@ ; Two loops of size 256; the second loop updates SP. ; After frame reordering, two loops can be merged into one. define void @stg128_128_gap_128_128() { -entry: ; CHECK-LABEL: stg128_128_gap_128_128: -; CHECK: mov x8, #512 -; CHECK: st2g sp, [sp], #32 -; CHECK: subs x8, x8, #32 -; CHECK: b.ne -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #544 +; CHECK-NEXT: .cfi_def_cfa_offset 560 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: add x0, sp, #512 +; CHECK-NEXT: bl use +; CHECK-NEXT: mov x8, #512 // =0x200 +; CHECK-NEXT: .LBB0_1: // %entry +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st2g sp, [sp], #32 +; CHECK-NEXT: subs x8, x8, #32 +; CHECK-NEXT: b.ne .LBB0_1 +; CHECK-NEXT: // %bb.2: // %entry +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: %a = alloca i8, i32 128, align 16 %a2 = alloca i8, i32 128, align 16 %b = alloca i8, i32 32, align 16 @@ -28,8 +42,51 @@ } define void @stg2(i1 %flag) { -entry: ; CHECK-LABEL: stg2: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x29, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp x30, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #608 +; CHECK-NEXT: .cfi_def_cfa_offset 640 +; CHECK-NEXT: .cfi_offset w19, -8 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: mov w19, w0 +; CHECK-NEXT: add x0, sp, #576 +; CHECK-NEXT: bl use +; CHECK-NEXT: tbz w19, #0, .LBB1_4 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: add x9, sp, #256 +; CHECK-NEXT: mov x8, #320 // =0x140 +; CHECK-NEXT: .LBB1_2: // %if.then +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st2g x9, [x9], #32 +; CHECK-NEXT: subs x8, x8, #32 +; CHECK-NEXT: b.ne .LBB1_2 +; CHECK-NEXT: // %bb.3: // %if.then +; CHECK-NEXT: b .LBB1_7 +; CHECK-NEXT: .LBB1_4: // %if.else +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: mov x8, #256 // =0x100 +; CHECK-NEXT: .LBB1_5: // %if.else +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st2g x9, [x9], #32 +; CHECK-NEXT: subs x8, x8, #32 +; CHECK-NEXT: b.ne .LBB1_5 +; CHECK-NEXT: // %bb.6: // %if.else +; CHECK-NEXT: .LBB1_7: // %if.end +; CHECK-NEXT: mov x8, #576 // =0x240 +; CHECK-NEXT: .LBB1_8: // %if.end +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st2g sp, [sp], #32 +; CHECK-NEXT: subs x8, x8, #32 +; CHECK-NEXT: b.ne .LBB1_8 +; CHECK-NEXT: // %bb.9: // %if.end +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: %a = alloca i8, i32 160, align 16 %a2 = alloca i8, i32 160, align 16 %b = alloca i8, i32 32, align 16 @@ -39,33 +96,20 @@ br i1 %flag, label %if.then, label %if.else if.then: -; CHECK: mov x8, #320 -; CHECK: subs x8, x8, #32 -; CHECK: st2g x9, [x9], #32 -; CHECK: b.ne call void @llvm.aarch64.settag(ptr %a, i64 160) call void @llvm.aarch64.settag(ptr %a2, i64 160) br label %if.end if.else: -; CHECK: mov x8, #256 -; CHECK: subs x8, x8, #32 -; CHECK: st2g x9, [x9], #32 -; CHECK: b.ne call void @llvm.aarch64.settag(ptr %c, i64 128) call void @llvm.aarch64.settag(ptr %c2, i64 128) br label %if.end if.end: -; CHECK: mov x8, #576 -; CHECK: st2g sp, [sp], #32 -; CHECK: subs x8, x8, #32 -; CHECK: b.ne call void @llvm.aarch64.settag(ptr %a, i64 160) call void @llvm.aarch64.settag(ptr %a2, i64 160) call void @llvm.aarch64.settag(ptr %c, i64 128) call void @llvm.aarch64.settag(ptr %c2, i64 128) -; CHECK: ret ret void } diff --git a/llvm/test/CodeGen/AArch64/settag-merge.ll b/llvm/test/CodeGen/AArch64/settag-merge.ll --- a/llvm/test/CodeGen/AArch64/settag-merge.ll +++ b/llvm/test/CodeGen/AArch64/settag-merge.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -mtriple=aarch64 -mattr=+mte -aarch64-order-frame-objects=0 | FileCheck %s declare void @use(ptr %p) @@ -5,10 +6,13 @@ declare void @llvm.aarch64.settag.zero(ptr %p, i64 %a) define void @stg16_16() { -entry: ; CHECK-LABEL: stg16_16: -; CHECK: st2g sp, [sp], #32 -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: st2g sp, [sp], #32 +; CHECK-NEXT: ret +entry: %a = alloca i8, i32 16, align 16 %b = alloca i8, i32 16, align 16 call void @llvm.aarch64.settag(ptr %a, i64 16) @@ -17,12 +21,15 @@ } define i32 @stg16_16_16_16_ret() { -entry: ; CHECK-LABEL: stg16_16_16_16_ret: -; CHECK: mov w0, wzr -; CHECK: st2g sp, [sp, #32] -; CHECK: st2g sp, [sp], #64 -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: st2g sp, [sp, #32] +; CHECK-NEXT: st2g sp, [sp], #64 +; CHECK-NEXT: ret +entry: %a = alloca i8, i32 16, align 16 %b = alloca i8, i32 16, align 16 %c = alloca i8, i32 16, align 16 @@ -35,11 +42,14 @@ } define void @stg16_16_16_16() { -entry: ; CHECK-LABEL: stg16_16_16_16: -; CHECK: st2g sp, [sp, #32] -; CHECK: st2g sp, [sp], #64 -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: st2g sp, [sp, #32] +; CHECK-NEXT: st2g sp, [sp], #64 +; CHECK-NEXT: ret +entry: %a = alloca i8, i32 16, align 16 %b = alloca i8, i32 16, align 16 %c = alloca i8, i32 16, align 16 @@ -52,13 +62,22 @@ } define void @stg128_128_128_128() { -entry: ; CHECK-LABEL: stg128_128_128_128: -; CHECK: mov x8, #512 -; CHECK: st2g sp, [sp], #32 -; CHECK: subs x8, x8, #32 -; CHECK: b.ne -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #512 +; CHECK-NEXT: .cfi_def_cfa_offset 528 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: mov x8, #512 // =0x200 +; CHECK-NEXT: .LBB3_1: // %entry +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st2g sp, [sp], #32 +; CHECK-NEXT: subs x8, x8, #32 +; CHECK-NEXT: b.ne .LBB3_1 +; CHECK-NEXT: // %bb.2: // %entry +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: %a = alloca i8, i32 128, align 16 %b = alloca i8, i32 128, align 16 %c = alloca i8, i32 128, align 16 @@ -71,13 +90,22 @@ } define void @stg16_512_16() { -entry: ; CHECK-LABEL: stg16_512_16: -; CHECK: mov x8, #544 -; CHECK: st2g sp, [sp], #32 -; CHECK: subs x8, x8, #32 -; CHECK: b.ne -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #544 +; CHECK-NEXT: .cfi_def_cfa_offset 560 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: mov x8, #544 // =0x220 +; CHECK-NEXT: .LBB4_1: // %entry +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st2g sp, [sp], #32 +; CHECK-NEXT: subs x8, x8, #32 +; CHECK-NEXT: b.ne .LBB4_1 +; CHECK-NEXT: // %bb.2: // %entry +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: %a = alloca i8, i32 16, align 16 %b = alloca i8, i32 512, align 16 %c = alloca i8, i32 16, align 16 @@ -88,13 +116,22 @@ } define void @stg512_512_512() { -entry: ; CHECK-LABEL: stg512_512_512: -; CHECK: mov x8, #1536 -; CHECK: st2g sp, [sp], #32 -; CHECK: subs x8, x8, #32 -; CHECK: b.ne -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #1536 +; CHECK-NEXT: .cfi_def_cfa_offset 1552 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: mov x8, #1536 // =0x600 +; CHECK-NEXT: .LBB5_1: // %entry +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st2g sp, [sp], #32 +; CHECK-NEXT: subs x8, x8, #32 +; CHECK-NEXT: b.ne .LBB5_1 +; CHECK-NEXT: // %bb.2: // %entry +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: %a = alloca i8, i32 512, align 16 %b = alloca i8, i32 512, align 16 %c = alloca i8, i32 512, align 16 @@ -105,16 +142,20 @@ } define void @early(i1 %flag) { -entry: ; CHECK-LABEL: early: -; CHECK: tbz w0, #0, [[LABEL:.LBB.*]] -; CHECK: st2g sp, [sp, # -; CHECK: st2g sp, [sp, # -; CHECK: st2g sp, [sp, # -; CHECK: [[LABEL]]: -; CHECK: stg sp, [sp, # -; CHECK: st2g sp, [sp], # -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #144 +; CHECK-NEXT: .cfi_def_cfa_offset 144 +; CHECK-NEXT: tbz w0, #0, .LBB6_2 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: st2g sp, [sp, #48] +; CHECK-NEXT: st2g sp, [sp, #80] +; CHECK-NEXT: st2g sp, [sp, #112] +; CHECK-NEXT: .LBB6_2: // %if.end +; CHECK-NEXT: stg sp, [sp, #32] +; CHECK-NEXT: st2g sp, [sp], #144 +; CHECK-NEXT: ret +entry: %a = alloca i8, i32 48, align 16 %b = alloca i8, i32 48, align 16 %c = alloca i8, i32 48, align 16 @@ -131,18 +172,28 @@ } define void @early_128_128(i1 %flag) { -entry: ; CHECK-LABEL: early_128_128: -; CHECK: tbz w0, #0, [[LABEL:.LBB.*]] -; CHECK: add x9, sp, # -; CHECK: mov x8, #256 -; CHECK: subs x8, x8, #32 -; CHECK: st2g x9, [x9], #32 -; CHECK: b.ne -; CHECK: [[LABEL]]: -; CHECK: stg sp, [sp, # -; CHECK: st2g sp, [sp], # -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub sp, sp, #320 +; CHECK-NEXT: str x29, [sp, #304] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 320 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: tbz w0, #0, .LBB7_4 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: add x9, sp, #48 +; CHECK-NEXT: mov x8, #256 // =0x100 +; CHECK-NEXT: .LBB7_2: // %if.then +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st2g x9, [x9], #32 +; CHECK-NEXT: subs x8, x8, #32 +; CHECK-NEXT: b.ne .LBB7_2 +; CHECK-NEXT: // %bb.3: // %if.then +; CHECK-NEXT: .LBB7_4: // %if.end +; CHECK-NEXT: stg sp, [sp, #32] +; CHECK-NEXT: st2g sp, [sp], #304 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: %a = alloca i8, i32 128, align 16 %b = alloca i8, i32 128, align 16 %c = alloca i8, i32 48, align 16 @@ -159,18 +210,28 @@ } define void @early_512_512(i1 %flag) { -entry: ; CHECK-LABEL: early_512_512: -; CHECK: tbz w0, #0, [[LABEL:.LBB.*]] -; CHECK: add x9, sp, # -; CHECK: mov x8, #1024 -; CHECK: subs x8, x8, #32 -; CHECK: st2g x9, [x9], #32 -; CHECK: b.ne -; CHECK: [[LABEL]]: -; CHECK: stg sp, [sp, # -; CHECK: st2g sp, [sp], # -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #1072 +; CHECK-NEXT: .cfi_def_cfa_offset 1088 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: tbz w0, #0, .LBB8_4 +; CHECK-NEXT: // %bb.1: // %if.then +; CHECK-NEXT: add x9, sp, #48 +; CHECK-NEXT: mov x8, #1024 // =0x400 +; CHECK-NEXT: .LBB8_2: // %if.then +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st2g x9, [x9], #32 +; CHECK-NEXT: subs x8, x8, #32 +; CHECK-NEXT: b.ne .LBB8_2 +; CHECK-NEXT: // %bb.3: // %if.then +; CHECK-NEXT: .LBB8_4: // %if.end +; CHECK-NEXT: stg sp, [sp, #32] +; CHECK-NEXT: st2g sp, [sp], #1072 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: ret +entry: %a = alloca i8, i32 512, align 16 %b = alloca i8, i32 512, align 16 %c = alloca i8, i32 48, align 16 @@ -188,18 +249,34 @@ ; Two loops of size 256; the second loop updates SP. define void @stg128_128_gap_128_128() { -entry: ; CHECK-LABEL: stg128_128_gap_128_128: -; CHECK: mov x9, sp -; CHECK: mov x8, #256 -; CHECK: subs x8, x8, #32 -; CHECK: st2g x9, [x9], #32 -; CHECK: b.ne -; CHECK: mov x8, #256 -; CHECK: st2g sp, [sp], #32 -; CHECK: subs x8, x8, #32 -; CHECK: b.ne -; CHECK: ret +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #544 +; CHECK-NEXT: .cfi_def_cfa_offset 560 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: add x0, sp, #256 +; CHECK-NEXT: bl use +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: mov x8, #256 // =0x100 +; CHECK-NEXT: .LBB9_1: // %entry +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st2g x9, [x9], #32 +; CHECK-NEXT: subs x8, x8, #32 +; CHECK-NEXT: b.ne .LBB9_1 +; CHECK-NEXT: // %bb.2: // %entry +; CHECK-NEXT: add sp, sp, #288 +; CHECK-NEXT: mov x8, #256 // =0x100 +; CHECK-NEXT: .LBB9_3: // %entry +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: st2g sp, [sp], #32 +; CHECK-NEXT: subs x8, x8, #32 +; CHECK-NEXT: b.ne .LBB9_3 +; CHECK-NEXT: // %bb.4: // %entry +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: ret +entry: %a = alloca i8, i32 128, align 16 %a2 = alloca i8, i32 128, align 16 %b = alloca i8, i32 32, align 16 diff --git a/llvm/test/CodeGen/AArch64/settag.ll b/llvm/test/CodeGen/AArch64/settag.ll --- a/llvm/test/CodeGen/AArch64/settag.ll +++ b/llvm/test/CodeGen/AArch64/settag.ll @@ -58,11 +58,11 @@ define void @stg16(ptr %p) { ; CHECK-LABEL: stg16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #256 +; CHECK-NEXT: mov x8, #256 // =0x100 ; CHECK-NEXT: .LBB5_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subs x8, x8, #32 ; CHECK-NEXT: st2g x0, [x0], #32 +; CHECK-NEXT: subs x8, x8, #32 ; CHECK-NEXT: b.ne .LBB5_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: ret @@ -74,12 +74,12 @@ define void @stg17(ptr %p) { ; CHECK-LABEL: stg17: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #256 ; CHECK-NEXT: stg x0, [x0], #16 +; CHECK-NEXT: mov x8, #256 // =0x100 ; CHECK-NEXT: .LBB6_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subs x8, x8, #32 ; CHECK-NEXT: st2g x0, [x0], #32 +; CHECK-NEXT: subs x8, x8, #32 ; CHECK-NEXT: b.ne .LBB6_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: ret @@ -102,12 +102,12 @@ define void @stzg17(ptr %p) { ; CHECK-LABEL: stzg17: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #256 ; CHECK-NEXT: stzg x0, [x0], #16 +; CHECK-NEXT: mov x8, #256 // =0x100 ; CHECK-NEXT: .LBB8_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subs x8, x8, #32 ; CHECK-NEXT: stz2g x0, [x0], #32 +; CHECK-NEXT: subs x8, x8, #32 ; CHECK-NEXT: b.ne .LBB8_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: ret @@ -150,7 +150,7 @@ ; CHECK-LABEL: stg_alloca17: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #288 -; CHECK-NEXT: mov x8, #256 +; CHECK-NEXT: mov x8, #256 // =0x100 ; CHECK-NEXT: str x29, [sp, #272] // 8-byte Folded Spill ; CHECK-NEXT: .LBB11_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 @@ -175,12 +175,12 @@ ; CHECK-NEXT: str x29, [sp, #272] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: mov x8, #256 +; CHECK-NEXT: mov x8, #256 // =0x100 ; CHECK-NEXT: stg x9, [x9], #16 ; CHECK-NEXT: .LBB12_1: // %entry ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subs x8, x8, #32 ; CHECK-NEXT: st2g x9, [x9], #32 +; CHECK-NEXT: subs x8, x8, #32 ; CHECK-NEXT: b.ne .LBB12_1 ; CHECK-NEXT: // %bb.2: // %entry ; CHECK-NEXT: add sp, sp, #272 @@ -198,11 +198,31 @@ ; Verify that SLH works together with MTE stack tagging, ; see issue https://github.com/llvm/llvm-project/issues/61830 define void @test_slh() speculative_load_hardening { -; CHECK-LABEL: test_slh +; CHECK-LABEL: test_slh: +; CHECK: // %bb.0: +; CHECK-NEXT: cmp sp, #0 +; CHECK-NEXT: csetm x16, ne +; CHECK-NEXT: sub sp, sp, #208 +; CHECK-NEXT: str x30, [sp, #192] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 208 +; CHECK-NEXT: .cfi_offset w30, -16 +; CHECK-NEXT: mov x1, sp +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: and x1, x1, x16 +; CHECK-NEXT: mov sp, x1 +; CHECK-NEXT: bl b +; CHECK-NEXT: cmp sp, #0 +; CHECK-NEXT: ldr x30, [sp, #192] // 8-byte Folded Reload +; CHECK-NEXT: csetm x16, ne +; CHECK-NEXT: and x30, x30, x16 +; CHECK-NEXT: add sp, sp, #208 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: and x0, x0, x16 +; CHECK-NEXT: mov sp, x0 +; CHECK-NEXT: csdb +; CHECK-NEXT: ret ; Verify that the memtag loop uses a b.cc conditional branch ; rather than an cb[n]z branch. -;CHECK-NOT: cb{{n?}}z -;CHECK: b. %d = alloca [48 x i32], align 4 call void @b(ptr %d) ret void diff --git a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll --- a/llvm/test/CodeGen/AArch64/shift-amount-mod.ll +++ b/llvm/test/CodeGen/AArch64/shift-amount-mod.ll @@ -21,9 +21,9 @@ define i32 @load32_shl_by_negated(ptr %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: load32_shl_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsl w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsl w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %valptr %negshamt = sub i32 32, %shamt @@ -45,9 +45,9 @@ define void @modify32_shl_by_negated(ptr %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: modify32_shl_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsl w8, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %val = load i32, ptr %valptr @@ -59,11 +59,11 @@ define void @modify32_shl_by_negated_multi_use(ptr %valptr, i32 %shamt, ptr %shamtptr) nounwind { ; CHECK-LABEL: modify32_shl_by_negated_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #32 -; CHECK-NEXT: lsl w8, w9, w8 -; CHECK-NEXT: sub w9, w10, w1 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: mov w9, #32 // =0x20 +; CHECK-NEXT: sub w9, w9, w1 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret @@ -88,9 +88,9 @@ define i64 @load64_shl_by_negated(ptr %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: load64_shl_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsl x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsl x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %valptr %negshamt = sub i64 64, %shamt @@ -112,9 +112,9 @@ define void @modify64_shl_by_negated(ptr %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: modify64_shl_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsl x8, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsl x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret %val = load i64, ptr %valptr @@ -126,11 +126,11 @@ define void @modify64_shl_by_negated_multi_use(ptr %valptr, i64 %shamt, ptr %shamtptr) nounwind { ; CHECK-LABEL: modify64_shl_by_negated_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #64 -; CHECK-NEXT: lsl x8, x9, x8 -; CHECK-NEXT: sub x9, x10, x1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: mov w9, #64 // =0x40 +; CHECK-NEXT: sub x9, x9, x1 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: str x9, [x2] ; CHECK-NEXT: ret @@ -158,9 +158,9 @@ define i32 @load32_lshr_by_negated(ptr %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: load32_lshr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %valptr %negshamt = sub i32 32, %shamt @@ -182,9 +182,9 @@ define void @modify32_lshr_by_negated(ptr %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: modify32_lshr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %val = load i32, ptr %valptr @@ -196,11 +196,11 @@ define void @modify32_lshr_by_negated_multi_use(ptr %valptr, i32 %shamt, ptr %shamtptr) nounwind { ; CHECK-LABEL: modify32_lshr_by_negated_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #32 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: sub w9, w10, w1 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: lsr w8, w8, w9 +; CHECK-NEXT: mov w9, #32 // =0x20 +; CHECK-NEXT: sub w9, w9, w1 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret @@ -225,9 +225,9 @@ define i64 @load64_lshr_by_negated(ptr %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: load64_lshr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %valptr %negshamt = sub i64 64, %shamt @@ -249,9 +249,9 @@ define void @modify64_lshr_by_negated(ptr %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: modify64_lshr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsr x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret %val = load i64, ptr %valptr @@ -263,11 +263,11 @@ define void @modify64_lshr_by_negated_multi_use(ptr %valptr, i64 %shamt, ptr %shamtptr) nounwind { ; CHECK-LABEL: modify64_lshr_by_negated_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #64 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: sub x9, x10, x1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: lsr x8, x8, x9 +; CHECK-NEXT: mov w9, #64 // =0x40 +; CHECK-NEXT: sub x9, x9, x1 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: str x9, [x2] ; CHECK-NEXT: ret @@ -295,9 +295,9 @@ define i32 @load32_ashr_by_negated(ptr %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: load32_ashr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: asr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: asr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %valptr %negshamt = sub i32 32, %shamt @@ -319,9 +319,9 @@ define void @modify32_ashr_by_negated(ptr %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: modify32_ashr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: asr w8, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: asr w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %val = load i32, ptr %valptr @@ -333,11 +333,11 @@ define void @modify32_ashr_by_negated_multi_use(ptr %valptr, i32 %shamt, ptr %shamtptr) nounwind { ; CHECK-LABEL: modify32_ashr_by_negated_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: neg w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #32 -; CHECK-NEXT: asr w8, w9, w8 -; CHECK-NEXT: sub w9, w10, w1 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: neg w9, w1 +; CHECK-NEXT: asr w8, w8, w9 +; CHECK-NEXT: mov w9, #32 // =0x20 +; CHECK-NEXT: sub w9, w9, w1 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret @@ -362,9 +362,9 @@ define i64 @load64_ashr_by_negated(ptr %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: load64_ashr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: asr x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: asr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %valptr %negshamt = sub i64 64, %shamt @@ -386,9 +386,9 @@ define void @modify64_ashr_by_negated(ptr %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: modify64_ashr_by_negated: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: asr x8, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: asr x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret %val = load i64, ptr %valptr @@ -400,11 +400,11 @@ define void @modify64_ashr_by_negated_multi_use(ptr %valptr, i64 %shamt, ptr %shamtptr) nounwind { ; CHECK-LABEL: modify64_ashr_by_negated_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: neg x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #64 -; CHECK-NEXT: asr x8, x9, x8 -; CHECK-NEXT: sub x9, x10, x1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: neg x9, x1 +; CHECK-NEXT: asr x8, x8, x9 +; CHECK-NEXT: mov w9, #64 // =0x40 +; CHECK-NEXT: sub x9, x9, x1 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: str x9, [x2] ; CHECK-NEXT: ret @@ -436,9 +436,9 @@ define i32 @load32_shl_by_complemented(ptr %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: load32_shl_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsl w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: lsl w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %valptr %negshamt = sub i32 31, %shamt @@ -460,9 +460,9 @@ define void @modify32_shl_by_complemented(ptr %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: modify32_shl_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsl w8, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %val = load i32, ptr %valptr @@ -474,11 +474,11 @@ define void @modify32_shl_by_complemented_multi_use(ptr %valptr, i32 %shamt, ptr %shamtptr) nounwind { ; CHECK-LABEL: modify32_shl_by_complemented_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #31 -; CHECK-NEXT: lsl w8, w9, w8 -; CHECK-NEXT: sub w9, w10, w1 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: lsl w8, w8, w9 +; CHECK-NEXT: mov w9, #31 // =0x1f +; CHECK-NEXT: sub w9, w9, w1 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret @@ -503,9 +503,9 @@ define i64 @load64_shl_by_complemented(ptr %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: load64_shl_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsl x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: lsl x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %valptr %negshamt = sub i64 63, %shamt @@ -527,9 +527,9 @@ define void @modify64_shl_by_complemented(ptr %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: modify64_shl_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsl x8, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: lsl x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret %val = load i64, ptr %valptr @@ -541,11 +541,11 @@ define void @modify64_shl_by_complemented_multi_use(ptr %valptr, i64 %shamt, ptr %shamtptr) nounwind { ; CHECK-LABEL: modify64_shl_by_complemented_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #63 -; CHECK-NEXT: lsl x8, x9, x8 -; CHECK-NEXT: sub x9, x10, x1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: lsl x8, x8, x9 +; CHECK-NEXT: mov w9, #63 // =0x3f +; CHECK-NEXT: sub x9, x9, x1 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: str x9, [x2] ; CHECK-NEXT: ret @@ -573,9 +573,9 @@ define i32 @load32_lshr_by_complemented(ptr %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: load32_lshr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: lsr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %valptr %negshamt = sub i32 31, %shamt @@ -597,9 +597,9 @@ define void @modify32_lshr_by_complemented(ptr %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: modify32_lshr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %val = load i32, ptr %valptr @@ -611,11 +611,11 @@ define void @modify32_lshr_by_complemented_multi_use(ptr %valptr, i32 %shamt, ptr %shamtptr) nounwind { ; CHECK-LABEL: modify32_lshr_by_complemented_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #31 -; CHECK-NEXT: lsr w8, w9, w8 -; CHECK-NEXT: sub w9, w10, w1 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: lsr w8, w8, w9 +; CHECK-NEXT: mov w9, #31 // =0x1f +; CHECK-NEXT: sub w9, w9, w1 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret @@ -640,9 +640,9 @@ define i64 @load64_lshr_by_complemented(ptr %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: load64_lshr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsr x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: lsr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %valptr %negshamt = sub i64 63, %shamt @@ -664,9 +664,9 @@ define void @modify64_lshr_by_complemented(ptr %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: modify64_lshr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: lsr x8, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: lsr x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret %val = load i64, ptr %valptr @@ -678,11 +678,11 @@ define void @modify64_lshr_by_complemented_multi_use(ptr %valptr, i64 %shamt, ptr %shamtptr) nounwind { ; CHECK-LABEL: modify64_lshr_by_complemented_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #63 -; CHECK-NEXT: lsr x8, x9, x8 -; CHECK-NEXT: sub x9, x10, x1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: lsr x8, x8, x9 +; CHECK-NEXT: mov w9, #63 // =0x3f +; CHECK-NEXT: sub x9, x9, x1 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: str x9, [x2] ; CHECK-NEXT: ret @@ -710,9 +710,9 @@ define i32 @load32_ashr_by_complemented(ptr %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: load32_ashr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: asr w0, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: asr w0, w8, w9 ; CHECK-NEXT: ret %val = load i32, ptr %valptr %negshamt = sub i32 31, %shamt @@ -734,9 +734,9 @@ define void @modify32_ashr_by_complemented(ptr %valptr, i32 %shamt) nounwind { ; CHECK-LABEL: modify32_ashr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: asr w8, w9, w8 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: asr w8, w8, w9 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: ret %val = load i32, ptr %valptr @@ -748,11 +748,11 @@ define void @modify32_ashr_by_complemented_multi_use(ptr %valptr, i32 %shamt, ptr %shamtptr) nounwind { ; CHECK-LABEL: modify32_ashr_by_complemented_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn w8, w1 -; CHECK-NEXT: ldr w9, [x0] -; CHECK-NEXT: mov w10, #31 -; CHECK-NEXT: asr w8, w9, w8 -; CHECK-NEXT: sub w9, w10, w1 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: mvn w9, w1 +; CHECK-NEXT: asr w8, w8, w9 +; CHECK-NEXT: mov w9, #31 // =0x1f +; CHECK-NEXT: sub w9, w9, w1 ; CHECK-NEXT: str w8, [x0] ; CHECK-NEXT: str w9, [x2] ; CHECK-NEXT: ret @@ -777,9 +777,9 @@ define i64 @load64_ashr_by_complemented(ptr %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: load64_ashr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: asr x0, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: asr x0, x8, x9 ; CHECK-NEXT: ret %val = load i64, ptr %valptr %negshamt = sub i64 63, %shamt @@ -801,9 +801,9 @@ define void @modify64_ashr_by_complemented(ptr %valptr, i64 %shamt) nounwind { ; CHECK-LABEL: modify64_ashr_by_complemented: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: asr x8, x9, x8 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: asr x8, x8, x9 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: ret %val = load i64, ptr %valptr @@ -815,11 +815,11 @@ define void @modify64_ashr_by_complemented_multi_use(ptr %valptr, i64 %shamt, ptr %shamtptr) nounwind { ; CHECK-LABEL: modify64_ashr_by_complemented_multi_use: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn x8, x1 -; CHECK-NEXT: ldr x9, [x0] -; CHECK-NEXT: mov w10, #63 -; CHECK-NEXT: asr x8, x9, x8 -; CHECK-NEXT: sub x9, x10, x1 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: mvn x9, x1 +; CHECK-NEXT: asr x8, x8, x9 +; CHECK-NEXT: mov w9, #63 // =0x3f +; CHECK-NEXT: sub x9, x9, x1 ; CHECK-NEXT: str x8, [x0] ; CHECK-NEXT: str x9, [x2] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/shift-by-signext.ll b/llvm/test/CodeGen/AArch64/shift-by-signext.ll --- a/llvm/test/CodeGen/AArch64/shift-by-signext.ll +++ b/llvm/test/CodeGen/AArch64/shift-by-signext.ll @@ -80,11 +80,11 @@ define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind { ; CHECK-LABEL: n6_fshl: ; CHECK: // %bb.0: +; CHECK-NEXT: lsr w8, w1, #1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsr w9, w1, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsl w10, w0, w2 -; CHECK-NEXT: lsr w8, w9, w8 +; CHECK-NEXT: lsr w8, w8, w9 ; CHECK-NEXT: orr w0, w10, w8 ; CHECK-NEXT: ret %shamt_wide = sext i8 %shamt to i32 @@ -94,11 +94,11 @@ define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind { ; CHECK-LABEL: n7_fshr: ; CHECK: // %bb.0: +; CHECK-NEXT: lsl w8, w0, #1 ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 -; CHECK-NEXT: mvn w8, w2 -; CHECK-NEXT: lsl w9, w0, #1 +; CHECK-NEXT: mvn w9, w2 ; CHECK-NEXT: lsr w10, w1, w2 -; CHECK-NEXT: lsl w8, w9, w8 +; CHECK-NEXT: lsl w8, w8, w9 ; CHECK-NEXT: orr w0, w8, w10 ; CHECK-NEXT: ret %shamt_wide = sext i8 %shamt to i32 diff --git a/llvm/test/CodeGen/AArch64/shift_minsize.ll b/llvm/test/CodeGen/AArch64/shift_minsize.ll --- a/llvm/test/CodeGen/AArch64/shift_minsize.ll +++ b/llvm/test/CodeGen/AArch64/shift_minsize.ll @@ -17,6 +17,11 @@ ; CHECK-NEXT: lsl x0, x0, x1 ; CHECK-NEXT: ret ; +; CHECK-WIN-LABEL: f0: +; CHECK-WIN: // %bb.0: +; CHECK-WIN-NEXT: lsl x0, x0, x1 +; CHECK-WIN-NEXT: ret +; ; CHECK-DARWIN-LABEL: f0: ; CHECK-DARWIN: ; %bb.0: ; CHECK-DARWIN-NEXT: lsl x0, x0, x1 @@ -32,6 +37,12 @@ ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret ; +; CHECK-WIN-LABEL: f1: +; CHECK-WIN: // %bb.0: +; CHECK-WIN-NEXT: lsl x0, x0, x1 +; CHECK-WIN-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-WIN-NEXT: ret +; ; CHECK-DARWIN-LABEL: f1: ; CHECK-DARWIN: ; %bb.0: ; CHECK-DARWIN-NEXT: lsl x0, x0, x1 @@ -49,6 +60,12 @@ ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret ; +; CHECK-WIN-LABEL: f2: +; CHECK-WIN: // %bb.0: +; CHECK-WIN-NEXT: asr x0, x0, x1 +; CHECK-WIN-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-WIN-NEXT: ret +; ; CHECK-DARWIN-LABEL: f2: ; CHECK-DARWIN: ; %bb.0: ; CHECK-DARWIN-NEXT: asr x0, x0, x1 @@ -66,6 +83,12 @@ ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 ; CHECK-NEXT: ret ; +; CHECK-WIN-LABEL: f3: +; CHECK-WIN: // %bb.0: +; CHECK-WIN-NEXT: lsr x0, x0, x1 +; CHECK-WIN-NEXT: // kill: def $w0 killed $w0 killed $x0 +; CHECK-WIN-NEXT: ret +; ; CHECK-DARWIN-LABEL: f3: ; CHECK-DARWIN: ; %bb.0: ; CHECK-DARWIN-NEXT: lsr x0, x0, x1 @@ -86,18 +109,32 @@ ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret ; +; CHECK-WIN-LABEL: shl128: +; CHECK-WIN: // %bb.0: // %entry +; CHECK-WIN-NEXT: lsr x8, x0, #1 +; CHECK-WIN-NEXT: mvn w9, w2 +; CHECK-WIN-NEXT: mov w10, w2 +; CHECK-WIN-NEXT: lsl x11, x0, x10 +; CHECK-WIN-NEXT: tst x10, #0x40 +; CHECK-WIN-NEXT: lsr x8, x8, x9 +; CHECK-WIN-NEXT: lsl x9, x1, x10 +; CHECK-WIN-NEXT: csel x0, xzr, x11, ne +; CHECK-WIN-NEXT: orr x8, x9, x8 +; CHECK-WIN-NEXT: csel x1, x11, x8, ne +; CHECK-WIN-NEXT: ret +; ; CHECK-DARWIN-LABEL: shl128: ; CHECK-DARWIN: ; %bb.0: ; %entry -; CHECK-DARWIN-NEXT: mvn w8, w2 -; CHECK-DARWIN-NEXT: mov w9, w2 -; CHECK-DARWIN-NEXT: lsr x10, x0, #1 -; CHECK-DARWIN-NEXT: tst x9, #0x40 -; CHECK-DARWIN-NEXT: lsr x8, x10, x8 -; CHECK-DARWIN-NEXT: lsl x10, x1, x9 -; CHECK-DARWIN-NEXT: orr x8, x10, x8 -; CHECK-DARWIN-NEXT: lsl x10, x0, x9 -; CHECK-DARWIN-NEXT: csel x1, x10, x8, ne -; CHECK-DARWIN-NEXT: csel x0, xzr, x10, ne +; CHECK-DARWIN-NEXT: lsr x8, x0, #1 +; CHECK-DARWIN-NEXT: mvn w9, w2 +; CHECK-DARWIN-NEXT: mov w10, w2 +; CHECK-DARWIN-NEXT: lsl x11, x0, x10 +; CHECK-DARWIN-NEXT: tst x10, #0x40 +; CHECK-DARWIN-NEXT: lsr x8, x8, x9 +; CHECK-DARWIN-NEXT: lsl x9, x1, x10 +; CHECK-DARWIN-NEXT: csel x0, xzr, x11, ne +; CHECK-DARWIN-NEXT: orr x8, x9, x8 +; CHECK-DARWIN-NEXT: csel x1, x11, x8, ne ; CHECK-DARWIN-NEXT: ret entry: @@ -126,19 +163,34 @@ ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret ; +; CHECK-WIN-LABEL: ashr128: +; CHECK-WIN: // %bb.0: // %entry +; CHECK-WIN-NEXT: lsl x8, x1, #1 +; CHECK-WIN-NEXT: mov w9, w2 +; CHECK-WIN-NEXT: mvn w10, w2 +; CHECK-WIN-NEXT: lsr x11, x0, x9 +; CHECK-WIN-NEXT: asr x12, x1, #63 +; CHECK-WIN-NEXT: tst x9, #0x40 +; CHECK-WIN-NEXT: lsl x8, x8, x10 +; CHECK-WIN-NEXT: asr x10, x1, x9 +; CHECK-WIN-NEXT: orr x8, x8, x11 +; CHECK-WIN-NEXT: csel x1, x12, x10, ne +; CHECK-WIN-NEXT: csel x0, x10, x8, ne +; CHECK-WIN-NEXT: ret +; ; CHECK-DARWIN-LABEL: ashr128: ; CHECK-DARWIN: ; %bb.0: ; %entry -; CHECK-DARWIN-NEXT: mov w8, w2 -; CHECK-DARWIN-NEXT: mvn w9, w2 -; CHECK-DARWIN-NEXT: lsl x10, x1, #1 -; CHECK-DARWIN-NEXT: tst x8, #0x40 -; CHECK-DARWIN-NEXT: lsr x11, x0, x8 -; CHECK-DARWIN-NEXT: lsl x9, x10, x9 -; CHECK-DARWIN-NEXT: asr x10, x1, x8 -; CHECK-DARWIN-NEXT: orr x9, x9, x11 -; CHECK-DARWIN-NEXT: asr x8, x1, #63 -; CHECK-DARWIN-NEXT: csel x0, x10, x9, ne -; CHECK-DARWIN-NEXT: csel x1, x8, x10, ne +; CHECK-DARWIN-NEXT: lsl x8, x1, #1 +; CHECK-DARWIN-NEXT: mov w9, w2 +; CHECK-DARWIN-NEXT: mvn w10, w2 +; CHECK-DARWIN-NEXT: lsr x11, x0, x9 +; CHECK-DARWIN-NEXT: asr x12, x1, #63 +; CHECK-DARWIN-NEXT: tst x9, #0x40 +; CHECK-DARWIN-NEXT: lsl x8, x8, x10 +; CHECK-DARWIN-NEXT: asr x10, x1, x9 +; CHECK-DARWIN-NEXT: orr x8, x8, x11 +; CHECK-DARWIN-NEXT: csel x1, x12, x10, ne +; CHECK-DARWIN-NEXT: csel x0, x10, x8, ne ; CHECK-DARWIN-NEXT: ret entry: %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128 @@ -166,18 +218,32 @@ ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret ; +; CHECK-WIN-LABEL: lshr128: +; CHECK-WIN: // %bb.0: // %entry +; CHECK-WIN-NEXT: lsl x8, x1, #1 +; CHECK-WIN-NEXT: mov w9, w2 +; CHECK-WIN-NEXT: mvn w10, w2 +; CHECK-WIN-NEXT: lsr x11, x0, x9 +; CHECK-WIN-NEXT: tst x9, #0x40 +; CHECK-WIN-NEXT: lsl x8, x8, x10 +; CHECK-WIN-NEXT: lsr x10, x1, x9 +; CHECK-WIN-NEXT: orr x8, x8, x11 +; CHECK-WIN-NEXT: csel x1, xzr, x10, ne +; CHECK-WIN-NEXT: csel x0, x10, x8, ne +; CHECK-WIN-NEXT: ret +; ; CHECK-DARWIN-LABEL: lshr128: ; CHECK-DARWIN: ; %bb.0: ; %entry -; CHECK-DARWIN-NEXT: mov w8, w2 -; CHECK-DARWIN-NEXT: mvn w9, w2 -; CHECK-DARWIN-NEXT: lsl x10, x1, #1 -; CHECK-DARWIN-NEXT: tst x8, #0x40 -; CHECK-DARWIN-NEXT: lsr x11, x0, x8 -; CHECK-DARWIN-NEXT: lsl x9, x10, x9 -; CHECK-DARWIN-NEXT: orr x9, x9, x11 -; CHECK-DARWIN-NEXT: lsr x10, x1, x8 -; CHECK-DARWIN-NEXT: csel x0, x10, x9, ne +; CHECK-DARWIN-NEXT: lsl x8, x1, #1 +; CHECK-DARWIN-NEXT: mov w9, w2 +; CHECK-DARWIN-NEXT: mvn w10, w2 +; CHECK-DARWIN-NEXT: lsr x11, x0, x9 +; CHECK-DARWIN-NEXT: tst x9, #0x40 +; CHECK-DARWIN-NEXT: lsl x8, x8, x10 +; CHECK-DARWIN-NEXT: lsr x10, x1, x9 +; CHECK-DARWIN-NEXT: orr x8, x8, x11 ; CHECK-DARWIN-NEXT: csel x1, xzr, x10, ne +; CHECK-DARWIN-NEXT: csel x0, x10, x8, ne ; CHECK-DARWIN-NEXT: ret entry: %x.sroa.2.0.insert.ext = zext i64 %x.coerce1 to i128 diff --git a/llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll b/llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll --- a/llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll +++ b/llvm/test/CodeGen/AArch64/shrink-wrap-byval-inalloca-preallocated.ll @@ -13,8 +13,8 @@ ; CHECK-LABEL: test_regular_pointers: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: mov x8, #1 ; =0x1 ; CHECK-NEXT: ldr d1, [x1, #8] +; CHECK-NEXT: mov x8, #1 ; =0x1 ; CHECK-NEXT: movk x8, #2047, lsl #16 ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: fmov d1, x8 @@ -67,8 +67,8 @@ ; CHECK-NEXT: .cfi_offset w19, -24 ; CHECK-NEXT: .cfi_offset w20, -32 ; CHECK-NEXT: ldr d0, [sp, #40] -; CHECK-NEXT: mov x8, #1 ; =0x1 ; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: mov x8, #1 ; =0x1 ; CHECK-NEXT: movk x8, #2047, lsl #16 ; CHECK-NEXT: fadd d0, d1, d0 ; CHECK-NEXT: fmov d1, x8 @@ -115,8 +115,8 @@ ; CHECK-NEXT: .cfi_offset w19, -24 ; CHECK-NEXT: .cfi_offset w20, -32 ; CHECK-NEXT: ldr d0, [sp, #40] -; CHECK-NEXT: mov x8, #1 ; =0x1 ; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: mov x8, #1 ; =0x1 ; CHECK-NEXT: movk x8, #2047, lsl #16 ; CHECK-NEXT: fadd d0, d1, d0 ; CHECK-NEXT: fmov d1, x8 @@ -163,8 +163,8 @@ ; CHECK-NEXT: .cfi_offset w19, -24 ; CHECK-NEXT: .cfi_offset w20, -32 ; CHECK-NEXT: ldr d0, [sp, #40] -; CHECK-NEXT: mov x8, #1 ; =0x1 ; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: mov x8, #1 ; =0x1 ; CHECK-NEXT: movk x8, #2047, lsl #16 ; CHECK-NEXT: fadd d0, d1, d0 ; CHECK-NEXT: fmov d1, x8 diff --git a/llvm/test/CodeGen/AArch64/shrink-wrapping-vla.ll b/llvm/test/CodeGen/AArch64/shrink-wrapping-vla.ll --- a/llvm/test/CodeGen/AArch64/shrink-wrapping-vla.ll +++ b/llvm/test/CodeGen/AArch64/shrink-wrapping-vla.ll @@ -1,5 +1,5 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; Test shrink wrapping placement is correct with respect to calls to llvm.{stacksave,stackrestore} - ; void f(int n, int x[]) { ; if (n < 0) ; return; @@ -14,27 +14,71 @@ ; } ; ; RUN: llc -mtriple aarch64-linux %s -o - | FileCheck %s - define dso_local void @f(i32 %n, ptr nocapture %x) uwtable { +; CHECK-LABEL: f: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: tbnz w0, #31, .LBB0_7 +; CHECK-NEXT: // %bb.1: // %if.end +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ubfiz x8, x0, #2, #32 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: mov x2, sp +; CHECK-NEXT: add x8, x8, #15 +; CHECK-NEXT: and x8, x8, #0x7fffffff0 +; CHECK-NEXT: sub x8, x9, x8 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: cbz w0, .LBB0_6 +; CHECK-NEXT: // %bb.2: +; CHECK-NEXT: mov w9, w0 +; CHECK-NEXT: mov x10, x8 +; CHECK-NEXT: mov x11, x9 +; CHECK-NEXT: .LBB0_3: // %for.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subs x11, x11, #1 +; CHECK-NEXT: ldr w12, [x1, w11, sxtw #2] +; CHECK-NEXT: str w12, [x10], #4 +; CHECK-NEXT: b.ne .LBB0_3 +; CHECK-NEXT: // %bb.4: // %for.cond6.preheader +; CHECK-NEXT: cmp w0, #1 +; CHECK-NEXT: b.lt .LBB0_6 +; CHECK-NEXT: .LBB0_5: // %for.body9 +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldr w10, [x8], #4 +; CHECK-NEXT: subs x9, x9, #1 +; CHECK-NEXT: add w10, w10, #1 +; CHECK-NEXT: str w10, [x1], #4 +; CHECK-NEXT: b.ne .LBB0_5 +; CHECK-NEXT: .LBB0_6: // %for.cond.cleanup8 +; CHECK-NEXT: mov sp, x2 +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .LBB0_7: // %return +; CHECK-NEXT: ret entry: %cmp = icmp slt i32 %n, 0 br i1 %cmp, label %return, label %if.end - if.end: ; preds = %entry %0 = zext i32 %n to i64 %1 = tail call ptr @llvm.stacksave() %vla = alloca i32, i64 %0, align 16 %cmp132 = icmp eq i32 %n, 0 br i1 %cmp132, label %for.cond.cleanup8, label %for.body.lr.ph - for.body.lr.ph: ; preds = %if.end %sub = add i32 %n, -1 br label %for.body - for.cond6.preheader: ; preds = %for.body %cmp730 = icmp sgt i32 %n, 0 br i1 %cmp730, label %for.body9, label %for.cond.cleanup8 - for.body: ; preds = %for.body, %for.body.lr.ph %indvars.iv34 = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next35, %for.body ] %2 = trunc i64 %indvars.iv34 to i32 @@ -47,11 +91,9 @@ %indvars.iv.next35 = add nuw nsw i64 %indvars.iv34, 1 %exitcond37 = icmp eq i64 %indvars.iv.next35, %0 br i1 %exitcond37, label %for.cond6.preheader, label %for.body - for.cond.cleanup8: ; preds = %for.body9, %if.end, %for.cond6.preheader tail call void @llvm.stackrestore(ptr %1) br label %return - for.body9: ; preds = %for.cond6.preheader, %for.body9 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body9 ], [ 0, %for.cond6.preheader ] %arrayidx11 = getelementptr inbounds i32, ptr %vla, i64 %indvars.iv @@ -62,45 +104,14 @@ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 %exitcond = icmp eq i64 %indvars.iv.next, %0 br i1 %exitcond, label %for.cond.cleanup8, label %for.body9 - return: ; preds = %entry, %for.cond.cleanup8 ret void } - ; Function Attrs: nounwind declare ptr @llvm.stacksave() - ; Function Attrs: nounwind declare void @llvm.stackrestore(ptr) - -; Check that llvm.stackrestore() happens before CSRs are popped off the stack - -; CHECK-LABEL: f - -; CHECK: stp x29, x30, [sp, #-16]! -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: .cfi_def_cfa w29, 16 -; CHECK-NEXT: .cfi_offset w30, -8 -; CHECK-NEXT: .cfi_offset w29, -16 - - ; VLA allocation -; CHECK: ubfiz x8, x0, #2, #32 -; CHECK: mov x9, sp -; CHECK: add x8, x8, #15 -; CHECK: mov [[SAVE:x[0-9]+]], sp -; CHECK: and [[X1:x[0-9]+]], [[X1]], #0x7fffffff0 ; Saving the SP via llvm.stacksave() -; CHECK: sub [[X1]], [[X2:x[0-9]+]], [[X1]] - ; The next instruction comes from llvm.stackrestore() -; CHECK: mov sp, [[SAVE]] ; Epilogue -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll --- a/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll +++ b/llvm/test/CodeGen/AArch64/shuffle-tbl34.ll @@ -21,11 +21,11 @@ define <16 x i8> @shuffle4_v4i8_16(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; CHECK-LABEL: shuffle4_v4i8_16: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret @@ -47,11 +47,11 @@ define <8 x i8> @shuffle4_v4i8_8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c, <4 x i8> %d) { ; CHECK-LABEL: shuffle4_v4i8_8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.8b ; CHECK-NEXT: ret @@ -101,17 +101,17 @@ define <16 x i8> @shuffle4_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: shuffle4_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: adrp x9, .LCPI2_1 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 -; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: adrp x8, .LCPI2_0 +; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: mov v2.d[1], v3.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI2_0] +; CHECK-NEXT: adrp x8, .LCPI2_1 +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI2_1] ; CHECK-NEXT: adrp x8, .LCPI2_2 -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI2_1] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b ; CHECK-NEXT: tbl v1.8b, { v2.16b }, v3.8b ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI2_2] @@ -178,10 +178,10 @@ ; CHECK-LABEL: shuffle4_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: adrp x9, .LCPI3_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: adrp x8, .LCPI3_1 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI3_1] ; CHECK-NEXT: adrp x8, .LCPI3_2 -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI3_1] ; CHECK-NEXT: tbl v1.16b, { v0.16b }, v1.16b ; CHECK-NEXT: tbl v0.16b, { v2.16b }, v3.16b ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_2] @@ -214,10 +214,10 @@ ; CHECK-LABEL: shuffle4_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d5, d2 -; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d3 killed $d3 def $q3 +; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: fmov d4, d0 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: mov v4.d[1], v1.d[0] ; CHECK-NEXT: mov v5.d[1], v3.d[0] @@ -232,12 +232,11 @@ define <4 x i32> @shuffle4_v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, <4 x i32> %d) { ; CHECK-LABEL: shuffle4_v4i32: ; CHECK: // %bb.0: +; CHECK-NEXT: zip1 v1.4s, v1.4s, v1.4s ; CHECK-NEXT: rev64 v3.4s, v3.4s -; CHECK-NEXT: zip1 v4.4s, v1.4s, v1.4s -; CHECK-NEXT: zip2 v1.4s, v3.4s, v2.4s -; CHECK-NEXT: ext v0.16b, v4.16b, v0.16b, #4 -; CHECK-NEXT: mov v1.d[1], v0.d[1] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ext v1.16b, v1.16b, v0.16b, #4 +; CHECK-NEXT: zip2 v0.4s, v3.4s, v2.4s +; CHECK-NEXT: mov v0.d[1], v1.d[1] ; CHECK-NEXT: ret %x = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> %y = shufflevector <4 x i32> %c, <4 x i32> %d, <8 x i32> @@ -276,9 +275,9 @@ define <16 x i8> @shuffle4_v8i8_v16i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: shuffle4_v8i8_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: mov v2.d[1], v2.d[0] ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI6_0] @@ -315,9 +314,9 @@ define <8 x i8> @shuffle4_v8i8_v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: shuffle4_v8i8_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: mov v2.d[1], v2.d[0] ; CHECK-NEXT: mov v0.d[1], v0.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI7_0] @@ -355,12 +354,12 @@ ; CHECK-LABEL: shuffle4_v4i8_zext: ; CHECK: // %bb.0: ; CHECK-NEXT: uzp1 v0.8b, v0.8b, v1.8b +; CHECK-NEXT: uzp1 v1.8b, v2.8b, v3.8b ; CHECK-NEXT: adrp x8, .LCPI8_0 -; CHECK-NEXT: uzp1 v2.8b, v2.8b, v3.8b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: ushll v3.8h, v0.8b, #0 -; CHECK-NEXT: ushll v4.8h, v2.8b, #0 -; CHECK-NEXT: tbl v0.16b, { v3.16b, v4.16b }, v1.16b +; CHECK-NEXT: ushll v2.8h, v0.8b, #0 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_0] +; CHECK-NEXT: ushll v3.8h, v1.8b, #0 +; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> %y = shufflevector <4 x i8> %c, <4 x i8> %d, <8 x i32> @@ -390,11 +389,11 @@ define <16 x i8> @shuffle4_v4i16_trunc(<4 x i16> %ae, <4 x i16> %be, <4 x i16> %ce, <4 x i16> %de) { ; CHECK-LABEL: shuffle4_v4i16_trunc: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI9_0 ; CHECK-NEXT: // kill: def $d3 killed $d3 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 +; CHECK-NEXT: adrp x8, .LCPI9_0 ; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 -; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: ldr q4, [x8, :lo12:.LCPI9_0] +; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2_q3 def $q0_q1_q2_q3 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v4.16b ; CHECK-NEXT: ret @@ -429,11 +428,11 @@ define <16 x i8> @shuffle4_v4i32_trunc(<4 x i32> %ae, <4 x i32> %be, <4 x i32> %ce, <4 x i32> %de) { ; CHECK-LABEL: shuffle4_v4i32_trunc: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: xtn v4.4h, v0.4s +; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: xtn v5.4h, v1.4s -; CHECK-NEXT: xtn v6.4h, v2.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: xtn v6.4h, v2.4s ; CHECK-NEXT: xtn v7.4h, v3.4s ; CHECK-NEXT: tbl v0.16b, { v4.16b, v5.16b, v6.16b, v7.16b }, v0.16b ; CHECK-NEXT: ret @@ -467,11 +466,11 @@ define <12 x i8> @shuffle3_v4i8(<4 x i8> %a, <4 x i8> %b, <4 x i8> %c) { ; CHECK-LABEL: shuffle3_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: // kill: def $d2 killed $d2 killed $q0_q1_q2 def $q0_q1_q2 +; CHECK-NEXT: adrp x8, .LCPI11_0 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: // kill: def $d1 killed $d1 killed $q0_q1_q2 def $q0_q1_q2 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0_q1_q2 def $q0_q1_q2 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v3.16b ; CHECK-NEXT: ret %x = shufflevector <4 x i8> %a, <4 x i8> %b, <8 x i32> @@ -501,9 +500,9 @@ ; CHECK-LABEL: shuffle3_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov d3, d2 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: fmov d2, d0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: mov v2.d[1], v1.d[0] ; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b @@ -559,11 +558,11 @@ define <8 x i8> @insert4_v8i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) { ; CHECK-LABEL: insert4_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 -; CHECK-NEXT: adrp x9, .LCPI14_1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov v4.16b, v3.16b ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 +; CHECK-NEXT: adrp x8, .LCPI14_0 +; CHECK-NEXT: adrp x9, .LCPI14_1 ; CHECK-NEXT: mov v0.d[1], v2.d[0] ; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI14_0] @@ -629,16 +628,16 @@ define <16 x i8> @insert4_v16i8(<8 x i8> %a, <16 x i8> %b, <8 x i8> %c, <16 x i8> %d) { ; CHECK-LABEL: insert4_v16i8: ; CHECK: // %bb.0: +; CHECK-NEXT: mov v4.16b, v3.16b ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q31_q0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 -; CHECK-NEXT: mov v4.16b, v3.16b ; CHECK-NEXT: mov v3.16b, v1.16b ; CHECK-NEXT: ldr q5, [x8, :lo12:.LCPI15_0] -; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: mov v0.d[1], v2.d[0] -; CHECK-NEXT: tbl v31.16b, { v3.16b, v4.16b }, v5.16b +; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_1] +; CHECK-NEXT: tbl v31.16b, { v3.16b, v4.16b }, v5.16b ; CHECK-NEXT: tbl v0.16b, { v31.16b, v0.16b }, v1.16b ; CHECK-NEXT: ret %e1 = extractelement <8 x i8> %a, i32 4 @@ -698,8 +697,8 @@ ; CHECK-LABEL: test: ; CHECK: // %bb.0: ; CHECK-NEXT: frintm v0.2d, v0.2d -; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: frintm v4.2d, v4.2d +; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: frintm v1.2d, v1.2d ; CHECK-NEXT: frintm v5.2d, v5.2d ; CHECK-NEXT: frintm v2.2d, v2.2d @@ -713,20 +712,20 @@ ; CHECK-NEXT: fcvtzs v2.2d, v2.2d ; CHECK-NEXT: fcvtzs v6.2d, v6.2d ; CHECK-NEXT: fcvtzs v3.2d, v3.2d +; CHECK-NEXT: fcvtzs v7.2d, v7.2d ; CHECK-NEXT: xtn v16.2s, v0.2d -; CHECK-NEXT: fcvtzs v0.2d, v7.2d ; CHECK-NEXT: xtn v20.2s, v4.2d +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: xtn v17.2s, v1.2d ; CHECK-NEXT: xtn v21.2s, v5.2d ; CHECK-NEXT: xtn v18.2s, v2.2d -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: xtn v22.2s, v6.2d ; CHECK-NEXT: xtn v19.2s, v3.2d -; CHECK-NEXT: xtn v23.2s, v0.2d -; CHECK-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b -; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b -; CHECK-NEXT: uzp1 v0.8h, v2.8h, v1.8h -; CHECK-NEXT: uzp2 v1.8h, v2.8h, v1.8h +; CHECK-NEXT: xtn v23.2s, v7.2d +; CHECK-NEXT: tbl v1.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b +; CHECK-NEXT: tbl v2.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v0.16b +; CHECK-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; CHECK-NEXT: ret %l214 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %l213) %l215 = fptosi <2 x double> %l214 to <2 x i16> diff --git a/llvm/test/CodeGen/AArch64/shuffles.ll b/llvm/test/CodeGen/AArch64/shuffles.ll --- a/llvm/test/CodeGen/AArch64/shuffles.ll +++ b/llvm/test/CodeGen/AArch64/shuffles.ll @@ -4,18 +4,18 @@ define <16 x i32> @test_shuf1(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: test_shuf1: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v16.16b, v6.16b, v1.16b, #4 -; CHECK-NEXT: dup v5.4s, v4.s[0] -; CHECK-NEXT: uzp1 v17.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp2 v18.4s, v2.4s, v4.4s +; CHECK-NEXT: ext v3.16b, v6.16b, v1.16b, #4 +; CHECK-NEXT: uzp1 v5.4s, v1.4s, v0.4s +; CHECK-NEXT: uzp2 v16.4s, v2.4s, v4.4s +; CHECK-NEXT: dup v17.4s, v4.s[0] +; CHECK-NEXT: trn2 v4.4s, v1.4s, v3.4s +; CHECK-NEXT: mov v17.s[0], v6.s[3] +; CHECK-NEXT: trn2 v1.4s, v5.4s, v1.4s ; CHECK-NEXT: rev64 v3.4s, v7.4s -; CHECK-NEXT: trn2 v4.4s, v1.4s, v16.4s -; CHECK-NEXT: mov v5.s[0], v6.s[3] -; CHECK-NEXT: trn2 v1.4s, v17.4s, v1.4s -; CHECK-NEXT: trn1 v2.4s, v18.4s, v2.4s +; CHECK-NEXT: trn1 v2.4s, v16.4s, v2.4s ; CHECK-NEXT: mov v4.s[0], v7.s[1] -; CHECK-NEXT: mov v3.d[0], v5.d[0] ; CHECK-NEXT: ext v1.16b, v0.16b, v1.16b, #12 +; CHECK-NEXT: mov v3.d[0], v17.d[0] ; CHECK-NEXT: mov v2.s[3], v7.s[0] ; CHECK-NEXT: mov v0.16b, v4.16b ; CHECK-NEXT: ret @@ -26,10 +26,10 @@ define <4 x i32> @test_shuf2(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: test_shuf2: ; CHECK: // %bb.0: -; CHECK-NEXT: zip2 v2.4s, v7.4s, v6.4s +; CHECK-NEXT: zip2 v0.4s, v7.4s, v6.4s +; CHECK-NEXT: trn2 v2.4s, v7.4s, v0.4s ; CHECK-NEXT: ext v0.16b, v1.16b, v1.16b, #4 -; CHECK-NEXT: trn2 v1.4s, v7.4s, v2.4s -; CHECK-NEXT: mov v0.d[0], v1.d[0] +; CHECK-NEXT: mov v0.d[0], v2.d[0] ; CHECK-NEXT: ret %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> ret <4 x i32> %s3 @@ -60,8 +60,8 @@ define <4 x i32> @test_shuf5(<16 x i32> %x, <16 x i32> %y) { ; CHECK-LABEL: test_shuf5: ; CHECK: // %bb.0: -; CHECK-NEXT: rev64 v0.4s, v7.4s ; CHECK-NEXT: ext v1.16b, v6.16b, v4.16b, #12 +; CHECK-NEXT: rev64 v0.4s, v7.4s ; CHECK-NEXT: mov v0.d[0], v1.d[0] ; CHECK-NEXT: ret %s3 = shufflevector <16 x i32> %x, <16 x i32> %y, <4 x i32> @@ -155,9 +155,9 @@ define <8 x i8> @test_shuf8(<8 x i8> %a, <8 x i8> %b) ; CHECK-LABEL: test_shuf8: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 +; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b @@ -172,8 +172,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret { @@ -198,8 +198,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret { @@ -212,8 +212,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret { @@ -226,8 +226,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret { @@ -240,8 +240,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret { @@ -254,8 +254,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI19_0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 ; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b ; CHECK-NEXT: ret { diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll --- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll +++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll @@ -129,9 +129,9 @@ define i32 @sink_sub_from_const_to_sub(i32 %a, i32 %b) { ; CHECK-LABEL: sink_sub_from_const_to_sub: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: mov w9, #32 // =0x20 -; CHECK-NEXT: sub w0, w9, w8 +; CHECK-NEXT: mov w8, #32 // =0x20 +; CHECK-NEXT: add w9, w0, w1 +; CHECK-NEXT: sub w0, w8, w9 ; CHECK-NEXT: ret %t0 = sub i32 32, %a %r = sub i32 %t0, %b @@ -158,10 +158,10 @@ define <4 x i32> @vec_sink_add_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vec_sink_add_of_const_to_add0: ; CHECK: // %bb.0: +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: adrp x8, .LCPI12_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = add <4 x i32> %t0, %b @@ -170,10 +170,10 @@ define <4 x i32> @vec_sink_add_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vec_sink_add_of_const_to_add1: ; CHECK: // %bb.0: +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: adrp x8, .LCPI13_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0] ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = add <4 x i32> %b, %t0 @@ -186,10 +186,10 @@ define <4 x i32> @vec_sink_sub_of_const_to_add0(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vec_sink_sub_of_const_to_add0: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s +; CHECK-NEXT: adrp x8, .LCPI14_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = add <4 x i32> %t0, %b @@ -198,10 +198,10 @@ define <4 x i32> @vec_sink_sub_of_const_to_add1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vec_sink_sub_of_const_to_add1: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s +; CHECK-NEXT: adrp x8, .LCPI15_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = add <4 x i32> %b, %t0 @@ -214,10 +214,10 @@ define <4 x i32> @vec_sink_sub_from_const_to_add0(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vec_sink_sub_from_const_to_add0: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = add <4 x i32> %t0, %b @@ -226,10 +226,10 @@ define <4 x i32> @vec_sink_sub_from_const_to_add1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vec_sink_sub_from_const_to_add1: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = add <4 x i32> %b, %t0 @@ -242,10 +242,10 @@ define <4 x i32> @vec_sink_add_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vec_sink_add_of_const_to_sub: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: adrp x8, .LCPI18_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = sub <4 x i32> %t0, %b @@ -254,10 +254,10 @@ define <4 x i32> @vec_sink_add_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vec_sink_add_of_const_to_sub2: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI19_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s +; CHECK-NEXT: adrp x8, .LCPI19_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = sub <4 x i32> %b, %t0 @@ -270,10 +270,10 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vec_sink_sub_of_const_to_sub: ; CHECK: // %bb.0: +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: adrp x8, .LCPI20_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = sub <4 x i32> %t0, %b @@ -282,10 +282,10 @@ define <4 x i32> @vec_sink_sub_of_const_to_sub2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vec_sink_sub_of_const_to_sub2: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI21_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_0] -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: adrp x8, .LCPI21_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0] +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = sub <4 x i32> %b, %t0 @@ -298,10 +298,10 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vec_sink_sub_from_const_to_sub: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI22_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_0] -; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s +; CHECK-NEXT: adrp x8, .LCPI22_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0] +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = sub <4 x i32> %t0, %b @@ -310,10 +310,10 @@ define <4 x i32> @vec_sink_sub_from_const_to_sub2(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vec_sink_sub_from_const_to_sub2: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI23_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s +; CHECK-NEXT: adrp x8, .LCPI23_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = sub <4 x i32> %b, %t0 diff --git a/llvm/test/CodeGen/AArch64/sinksplat.ll b/llvm/test/CodeGen/AArch64/sinksplat.ll --- a/llvm/test/CodeGen/AArch64/sinksplat.ll +++ b/llvm/test/CodeGen/AArch64/sinksplat.ll @@ -5,8 +5,8 @@ ; CHECK-LABEL: smull: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fmov d1, d0 -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: .LBB0_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d2, [x0] @@ -37,8 +37,8 @@ ; CHECK-LABEL: umull: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fmov d1, d0 -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: .LBB1_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d2, [x0] @@ -69,8 +69,8 @@ ; CHECK-LABEL: sqadd: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: .LBB2_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] @@ -102,8 +102,8 @@ ; CHECK-LABEL: sqsub: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: .LBB3_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] @@ -135,8 +135,8 @@ ; CHECK-LABEL: sqdmulh: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: .LBB4_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] @@ -168,8 +168,8 @@ ; CHECK-LABEL: sqdmull: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fmov d1, d0 -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: .LBB5_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d2, [x0] @@ -201,8 +201,8 @@ ; CHECK-LABEL: mlal: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: dup v1.4s, v1.s[3] ; CHECK-NEXT: .LBB6_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 @@ -234,8 +234,8 @@ ; CHECK-LABEL: fmul: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: .LBB7_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] @@ -267,8 +267,8 @@ ; CHECK-LABEL: fmuladd: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: dup v1.4s, v1.s[3] ; CHECK-NEXT: .LBB8_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 @@ -300,16 +300,16 @@ ; CHECK-LABEL: fma: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: dup v1.4s, v1.s[3] ; CHECK-NEXT: .LBB9_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr q3, [x0] -; CHECK-NEXT: subs w8, w8, #1 -; CHECK-NEXT: mov v2.16b, v0.16b +; CHECK-NEXT: mov v3.16b, v0.16b ; CHECK-NEXT: mov v0.16b, v1.16b -; CHECK-NEXT: fmla v0.4s, v2.4s, v3.4s +; CHECK-NEXT: ldr q2, [x0] +; CHECK-NEXT: subs w8, w8, #1 +; CHECK-NEXT: fmla v0.4s, v3.4s, v2.4s ; CHECK-NEXT: b.eq .LBB9_1 ; CHECK-NEXT: // %bb.2: // %l2 ; CHECK-NEXT: ret @@ -334,8 +334,8 @@ ; CHECK-LABEL: smull_nonsplat: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fmov d1, d0 -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: trn2 v2.4h, v1.4h, v1.4h ; CHECK-NEXT: zip2 v1.4h, v2.4h, v1.4h ; CHECK-NEXT: .LBB10_1: // %l1 diff --git a/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll b/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll --- a/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll +++ b/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll @@ -4,21 +4,21 @@ define <16 x double> @test_sitofp_fixed(<16 x i32> %in) { ; CHECK-LABEL: test_sitofp_fixed: ; CHECK: ; %bb.0: -; CHECK-NEXT: sshll2.2d v4, v2, #0 -; CHECK-NEXT: sshll2.2d v5, v0, #0 -; CHECK-NEXT: sshll2.2d v6, v1, #0 -; CHECK-NEXT: sshll2.2d v7, v3, #0 +; CHECK-NEXT: sshll2.2d v4, v0, #0 ; CHECK-NEXT: sshll.2d v0, v0, #0 -; CHECK-NEXT: sshll.2d v16, v1, #0 -; CHECK-NEXT: sshll.2d v17, v2, #0 +; CHECK-NEXT: sshll2.2d v5, v1, #0 +; CHECK-NEXT: sshll.2d v6, v1, #0 +; CHECK-NEXT: sshll.2d v7, v2, #0 +; CHECK-NEXT: sshll2.2d v16, v2, #0 +; CHECK-NEXT: sshll2.2d v17, v3, #0 ; CHECK-NEXT: sshll.2d v18, v3, #0 -; CHECK-NEXT: scvtf.2d v1, v5, #6 +; CHECK-NEXT: scvtf.2d v1, v4, #6 ; CHECK-NEXT: scvtf.2d v0, v0, #6 -; CHECK-NEXT: scvtf.2d v3, v6, #6 -; CHECK-NEXT: scvtf.2d v2, v16, #6 -; CHECK-NEXT: scvtf.2d v5, v4, #6 -; CHECK-NEXT: scvtf.2d v4, v17, #6 -; CHECK-NEXT: scvtf.2d v7, v7, #6 +; CHECK-NEXT: scvtf.2d v3, v5, #6 +; CHECK-NEXT: scvtf.2d v2, v6, #6 +; CHECK-NEXT: scvtf.2d v4, v7, #6 +; CHECK-NEXT: scvtf.2d v5, v16, #6 +; CHECK-NEXT: scvtf.2d v7, v17, #6 ; CHECK-NEXT: scvtf.2d v6, v18, #6 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll --- a/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll +++ b/llvm/test/CodeGen/AArch64/sme-aarch64-svcount.ll @@ -149,11 +149,11 @@ ; CHECK-O3: // %bb.0: ; CHECK-O3-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-O3-NEXT: addvl sp, sp, #-1 -; CHECK-O3-NEXT: addpl x0, sp, #7 ; CHECK-O3-NEXT: mov p1.b, p0.b +; CHECK-O3-NEXT: addpl x0, sp, #7 +; CHECK-O3-NEXT: str p0, [sp, #7, mul vl] ; CHECK-O3-NEXT: mov p2.b, p0.b ; CHECK-O3-NEXT: mov p3.b, p0.b -; CHECK-O3-NEXT: str p0, [sp, #7, mul vl] ; CHECK-O3-NEXT: bl take_svcount_5 ; CHECK-O3-NEXT: addvl sp, sp, #1 ; CHECK-O3-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll --- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll +++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll @@ -49,7 +49,7 @@ ; CHECK-GISEL-NEXT: bl streaming_callee ; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill ; CHECK-GISEL-NEXT: smstop sm -; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 +; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; CHECK-GISEL-NEXT: fmov d0, x8 ; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload ; CHECK-GISEL-NEXT: fadd d0, d1, d0 @@ -108,7 +108,7 @@ ; CHECK-GISEL-NEXT: bl normal_callee ; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill ; CHECK-GISEL-NEXT: smstart sm -; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 +; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; CHECK-GISEL-NEXT: fmov d0, x8 ; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload ; CHECK-GISEL-NEXT: fadd d0, d1, d0 @@ -141,7 +141,7 @@ ; CHECK-COMMON-NEXT: bl normal_callee ; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-COMMON-NEXT: smstart sm -; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 +; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; CHECK-COMMON-NEXT: fmov d0, x8 ; CHECK-COMMON-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: fadd d0, d1, d0 @@ -246,7 +246,7 @@ ; CHECK-COMMON-NEXT: .LBB6_2: // %entry ; CHECK-COMMON-NEXT: smstart za ; CHECK-COMMON-NEXT: bl za_shared_callee -; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 +; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; CHECK-COMMON-NEXT: fmov d1, x8 ; CHECK-COMMON-NEXT: fadd d0, d0, d1 ; CHECK-COMMON-NEXT: smstop za @@ -285,7 +285,7 @@ ; CHECK-COMMON-NEXT: b .LBB7_2 ; CHECK-COMMON-NEXT: .LBB7_2: // %entry ; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr -; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 +; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; CHECK-COMMON-NEXT: fmov d1, x8 ; CHECK-COMMON-NEXT: fadd d0, d0, d1 ; CHECK-COMMON-NEXT: mov sp, x29 @@ -309,14 +309,14 @@ ; CHECK-COMMON-NEXT: mul x8, x8, x8 ; CHECK-COMMON-NEXT: sub x9, x9, x8 ; CHECK-COMMON-NEXT: mov sp, x9 -; CHECK-COMMON-NEXT: sub x10, x29, #16 ; CHECK-COMMON-NEXT: stur x9, [x29, #-16] +; CHECK-COMMON-NEXT: sub x9, x29, #16 ; CHECK-COMMON-NEXT: sturh w8, [x29, #-8] -; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10 +; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9 ; CHECK-COMMON-NEXT: bl __addtf3 ; CHECK-COMMON-NEXT: smstart za -; CHECK-COMMON-NEXT: sub x0, x29, #16 ; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-COMMON-NEXT: sub x0, x29, #16 ; CHECK-COMMON-NEXT: cbnz x8, .LBB8_2 ; CHECK-COMMON-NEXT: // %bb.1: ; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore @@ -347,11 +347,11 @@ ; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: smstart sm ; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: add sp, sp, #112 ; CHECK-COMMON-NEXT: ret %res = fadd fp128 %a, %b diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-loads.ll @@ -18,10 +18,10 @@ define void @ld1b_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: ld1b_with_addr_offset: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, wzr -; CHECK-NEXT: mov w13, w2 -; CHECK-NEXT: ld1b {za0h.b[w12, 0]}, p0/z, [x0, x1] -; CHECK-NEXT: ld1b {za0v.b[w13, 15]}, p0/z, [x0, x1] +; CHECK-NEXT: mov w13, wzr +; CHECK-NEXT: mov w12, w2 +; CHECK-NEXT: ld1b {za0h.b[w13, 0]}, p0/z, [x0, x1] +; CHECK-NEXT: ld1b {za0v.b[w12, 15]}, p0/z, [x0, x1] ; CHECK-NEXT: ret %base = getelementptr i8, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 15 @@ -66,16 +66,16 @@ define void @ld1w( %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: ld1w: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, wzr -; CHECK-NEXT: mov w13, w1 -; CHECK-NEXT: ld1w {za0h.s[w12, 0]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za1h.s[w12, 0]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za2h.s[w12, 0]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za3h.s[w13, 3]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za0v.s[w12, 0]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za1v.s[w12, 0]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za2v.s[w13, 3]}, p0/z, [x0] -; CHECK-NEXT: ld1w {za3v.s[w12, 0]}, p0/z, [x0] +; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: mov w13, wzr +; CHECK-NEXT: ld1w {za0h.s[w13, 0]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za1h.s[w13, 0]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za2h.s[w13, 0]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za3h.s[w12, 3]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za0v.s[w13, 0]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za1v.s[w13, 0]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za2v.s[w12, 3]}, p0/z, [x0] +; CHECK-NEXT: ld1w {za3v.s[w13, 0]}, p0/z, [x0] ; CHECK-NEXT: ret %tileslice = add i32 %sliceidx, 3 call void @llvm.aarch64.sme.ld1w.horiz( %pg, ptr %ptr, i32 0, i32 0) @@ -107,8 +107,8 @@ define void @ld1d( %pg, ptr %ptr, i32 %sliceidx) { ; CHECK-LABEL: ld1d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w13, wzr ; CHECK-NEXT: mov w12, w1 +; CHECK-NEXT: mov w13, wzr ; CHECK-NEXT: ld1d {za0h.d[w13, 0]}, p0/z, [x0] ; CHECK-NEXT: ld1d {za1h.d[w13, 0]}, p0/z, [x0] ; CHECK-NEXT: ld1d {za2h.d[w13, 0]}, p0/z, [x0] diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-extract.ll @@ -4,21 +4,21 @@ define @extract_row_b( %zd, %pg, i32 %tileslice) { ; CHECK-LABEL: extract_row_b: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.b, p0/m, za0h.b[w12, 0] ; CHECK-NEXT: mov z2.b, p0/m, za0h.b[w12, 2] -; CHECK-NEXT: mov z3.b, p0/m, za0h.b[w12, 4] ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z4.b, p0/m, za0h.b[w12, 6] -; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0h.b[w12, 4] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0h.b[w12, 6] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.b, p0/m, za0h.b[w12, 8] -; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: mov z3.b, p0/m, za0h.b[w12, 10] -; CHECK-NEXT: mov z4.b, p0/m, za0h.b[w12, 12] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0h.b[w12, 10] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0h.b[w12, 12] ; CHECK-NEXT: mov z0.b, p0/m, za0h.b[w12, 14] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -43,21 +43,21 @@ define @extract_col_b( %zd, %pg, i32 %tileslice) { ; CHECK-LABEL: extract_col_b: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.b, p0/m, za0v.b[w12, 1] ; CHECK-NEXT: mov z2.b, p0/m, za0v.b[w12, 3] -; CHECK-NEXT: mov z3.b, p0/m, za0v.b[w12, 5] ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z4.b, p0/m, za0v.b[w12, 7] -; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0v.b[w12, 5] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0v.b[w12, 7] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.b, p0/m, za0v.b[w12, 9] -; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: mov z3.b, p0/m, za0v.b[w12, 11] -; CHECK-NEXT: mov z4.b, p0/m, za0v.b[w12, 13] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0v.b[w12, 11] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.b, p0/m, za0v.b[w12, 13] ; CHECK-NEXT: mov z0.b, p0/m, za0v.b[w12, 15] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -83,13 +83,13 @@ define @extract_row_h( %zd, %pg, i32 %tileslice) { ; CHECK-LABEL: extract_row_h: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.h, p0/m, za0h.h[w12, 0] ; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 2] -; CHECK-NEXT: mov z3.h, p0/m, za0h.h[w12, 4] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 4] ; CHECK-NEXT: mov z0.h, p0/m, za0h.h[w12, 6] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -106,13 +106,13 @@ define @extract_col_h( %zd, %pg, i32 %tileslice) { ; CHECK-LABEL: extract_col_h: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.h, p0/m, za1v.h[w12, 1] ; CHECK-NEXT: mov z2.h, p0/m, za1v.h[w12, 3] -; CHECK-NEXT: mov z3.h, p0/m, za1v.h[w12, 5] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za1v.h[w12, 5] ; CHECK-NEXT: mov z0.h, p0/m, za1v.h[w12, 7] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -130,21 +130,21 @@ define @extract_f16( %zd, %pg, i32 %tileslice) { ; CHECK-LABEL: extract_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.h, p0/m, za0h.h[w12, 0] ; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 1] -; CHECK-NEXT: mov z3.h, p0/m, za0v.h[w12, 2] ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z4.h, p0/m, za0v.h[w12, 3] -; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0v.h[w12, 2] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0v.h[w12, 3] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 4] -; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: mov z3.h, p0/m, za0h.h[w12, 5] -; CHECK-NEXT: mov z4.h, p0/m, za0v.h[w12, 6] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 5] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0v.h[w12, 6] ; CHECK-NEXT: mov z0.h, p0/m, za0v.h[w12, 7] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -169,21 +169,21 @@ define @extract_bf16( %zd, %pg, i32 %tileslice, *%ptr) { ; CHECK-LABEL: extract_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: mov z4.d, z0.d +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.h, p0/m, za0h.h[w12, 0] ; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 1] -; CHECK-NEXT: mov z3.h, p0/m, za0v.h[w12, 2] ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z4.h, p0/m, za0v.h[w12, 3] -; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0v.h[w12, 2] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0v.h[w12, 3] +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 4] -; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: mov z3.h, p0/m, za0h.h[w12, 5] -; CHECK-NEXT: mov z4.h, p0/m, za0v.h[w12, 6] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0h.h[w12, 5] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.h, p0/m, za0v.h[w12, 6] ; CHECK-NEXT: mov z0.h, p0/m, za0v.h[w12, 7] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -208,8 +208,8 @@ define @extract_row_s( %zd, %pg, i32 %tileslice) { ; CHECK-LABEL: extract_row_s: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 0] ; CHECK-NEXT: mov z0.s, p0/m, za0h.s[w12, 2] ; CHECK-NEXT: mov z0.d, z1.d @@ -223,8 +223,8 @@ define @extract_col_s( %zd, %pg, i32 %tileslice) { ; CHECK-LABEL: extract_col_s: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.s, p0/m, za3v.s[w12, 1] ; CHECK-NEXT: mov z0.s, p0/m, za3v.s[w12, 3] ; CHECK-NEXT: mov z0.d, z1.d @@ -239,13 +239,13 @@ define @extract_f32( %zd, %pg, i32 %tileslice) { ; CHECK-LABEL: extract_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 0] ; CHECK-NEXT: mov z2.s, p0/m, za0h.s[w12, 1] -; CHECK-NEXT: mov z3.s, p0/m, za0v.s[w12, 2] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z2.s, p0/m, za0v.s[w12, 2] ; CHECK-NEXT: mov z0.s, p0/m, za0v.s[w12, 3] ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -283,8 +283,8 @@ define @extract_f64( %zd, %pg, i32 %tileslice) { ; CHECK-LABEL: extract_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z1.d, p0/m, za0h.d[w12, 0] ; CHECK-NEXT: mov z0.d, p0/m, za0v.d[w12, 1] ; CHECK-NEXT: mov z0.d, z1.d @@ -438,16 +438,16 @@ define @test_sink_offset_operand( %pg, i32 %base, i32 %N) { ; CHECK-LABEL: test_sink_offset_operand: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: .LBB26_1: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.d, z0.d -; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 0] +; CHECK-NEXT: subs w1, w1, #3 ; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z1.s, p0/m, za0h.s[w12, 0] ; CHECK-NEXT: mov z2.s, p0/m, za0h.s[w12, 1] -; CHECK-NEXT: subs w1, w1, #3 ; CHECK-NEXT: mov z3.s, p0/m, za0h.s[w12, 2] ; CHECK-NEXT: b.ne .LBB26_1 ; CHECK-NEXT: // %bb.2: // %exit diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-mova-insert.ll @@ -441,12 +441,12 @@ define void @test_sink_offset_operand( %pg, i32 %base, i32 %N) { ; CHECK-LABEL: test_sink_offset_operand: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: mov z0.s, #0 // =0x0 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: .LBB28_1: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: subs w1, w1, #3 ; CHECK-NEXT: mov za0h.s[w12, 0], p0/m, z0.s +; CHECK-NEXT: subs w1, w1, #3 ; CHECK-NEXT: mov za0h.s[w12, 1], p0/m, z0.s ; CHECK-NEXT: mov za0h.s[w12, 2], p0/m, z0.s ; CHECK-NEXT: b.ne .LBB28_1 diff --git a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll --- a/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sme-intrinsics-stores.ll @@ -18,10 +18,10 @@ define void @st1b_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: st1b_with_addr_offset: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, wzr -; CHECK-NEXT: mov w13, w2 -; CHECK-NEXT: st1b {za0h.b[w12, 0]}, p0, [x0, x1] -; CHECK-NEXT: st1b {za0v.b[w13, 15]}, p0, [x0, x1] +; CHECK-NEXT: mov w13, wzr +; CHECK-NEXT: mov w12, w2 +; CHECK-NEXT: st1b {za0h.b[w13, 0]}, p0, [x0, x1] +; CHECK-NEXT: st1b {za0v.b[w12, 15]}, p0, [x0, x1] ; CHECK-NEXT: ret %base = getelementptr i8, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 15 @@ -92,10 +92,10 @@ define void @st1w_with_addr_offset( %pg, ptr %ptr, i64 %index, i32 %sliceidx) { ; CHECK-LABEL: st1w_with_addr_offset: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, wzr -; CHECK-NEXT: mov w13, w2 -; CHECK-NEXT: st1w {za0h.s[w12, 0]}, p0, [x0, x1, lsl #2] -; CHECK-NEXT: st1w {za3v.s[w13, 3]}, p0, [x0, x1, lsl #2] +; CHECK-NEXT: mov w13, wzr +; CHECK-NEXT: mov w12, w2 +; CHECK-NEXT: st1w {za0h.s[w13, 0]}, p0, [x0, x1, lsl #2] +; CHECK-NEXT: st1w {za3v.s[w12, 3]}, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %base = getelementptr i32, ptr %ptr, i64 %index %tileslice = add i32 %sliceidx, 3 diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll --- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll +++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll @@ -16,14 +16,14 @@ ; CHECK-NEXT: mul x8, x8, x8 ; CHECK-NEXT: sub x9, x9, x8 ; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: sub x10, x29, #16 ; CHECK-NEXT: stur x9, [x29, #-16] +; CHECK-NEXT: sub x9, x29, #16 ; CHECK-NEXT: sturh w8, [x29, #-8] -; CHECK-NEXT: msr TPIDR2_EL0, x10 +; CHECK-NEXT: msr TPIDR2_EL0, x9 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za -; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB0_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: bl __arm_tpidr2_restore @@ -55,8 +55,8 @@ ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za -; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB1_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: bl __arm_tpidr2_restore @@ -66,8 +66,8 @@ ; CHECK-NEXT: msr TPIDR2_EL0, x20 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za -; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB1_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: bl __arm_tpidr2_restore @@ -94,14 +94,14 @@ ; CHECK-NEXT: mul x8, x8, x8 ; CHECK-NEXT: sub x9, x9, x8 ; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: sub x10, x29, #16 ; CHECK-NEXT: stur x9, [x29, #-16] +; CHECK-NEXT: sub x9, x29, #16 ; CHECK-NEXT: sturh w8, [x29, #-8] -; CHECK-NEXT: msr TPIDR2_EL0, x10 +; CHECK-NEXT: msr TPIDR2_EL0, x9 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: smstart za -; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB2_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: bl __arm_tpidr2_restore @@ -131,10 +131,10 @@ ; CHECK-NEXT: mul x8, x8, x8 ; CHECK-NEXT: sub x9, x9, x8 ; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: sub x10, x29, #80 ; CHECK-NEXT: stur x9, [x29, #-80] +; CHECK-NEXT: sub x9, x29, #80 ; CHECK-NEXT: sturh w8, [x29, #-72] -; CHECK-NEXT: msr TPIDR2_EL0, x10 +; CHECK-NEXT: msr TPIDR2_EL0, x9 ; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: and x19, x0, #0x1 ; CHECK-NEXT: tbz x19, #0, .LBB3_2 @@ -147,8 +147,8 @@ ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB3_4: ; CHECK-NEXT: smstart za -; CHECK-NEXT: sub x0, x29, #80 ; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: sub x0, x29, #80 ; CHECK-NEXT: cbnz x8, .LBB3_6 ; CHECK-NEXT: // %bb.5: ; CHECK-NEXT: bl __arm_tpidr2_restore @@ -156,10 +156,10 @@ ; CHECK-NEXT: msr TPIDR2_EL0, xzr ; CHECK-NEXT: sub sp, x29, #64 ; CHECK-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x19, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @private_za_callee() diff --git a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll --- a/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-shared-za-interface.ll @@ -15,14 +15,14 @@ ; CHECK-NEXT: mul x8, x8, x8 ; CHECK-NEXT: sub x9, x9, x8 ; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: sub x10, x29, #16 ; CHECK-NEXT: stur x9, [x29, #-16] +; CHECK-NEXT: sub x9, x29, #16 ; CHECK-NEXT: sturh w8, [x29, #-8] -; CHECK-NEXT: msr TPIDR2_EL0, x10 +; CHECK-NEXT: msr TPIDR2_EL0, x9 ; CHECK-NEXT: bl private_za_callee ; CHECK-NEXT: smstart za -; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB0_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: bl __arm_tpidr2_restore @@ -47,14 +47,14 @@ ; CHECK-NEXT: mul x8, x8, x8 ; CHECK-NEXT: sub x9, x9, x8 ; CHECK-NEXT: mov sp, x9 -; CHECK-NEXT: sub x10, x29, #16 ; CHECK-NEXT: stur x9, [x29, #-16] +; CHECK-NEXT: sub x9, x29, #16 ; CHECK-NEXT: sturh w8, [x29, #-8] -; CHECK-NEXT: msr TPIDR2_EL0, x10 +; CHECK-NEXT: msr TPIDR2_EL0, x9 ; CHECK-NEXT: bl __addtf3 ; CHECK-NEXT: smstart za -; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-NEXT: sub x0, x29, #16 ; CHECK-NEXT: cbnz x8, .LBB1_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: bl __arm_tpidr2_restore diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll --- a/llvm/test/CodeGen/AArch64/sme-streaming-body.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-body.ll @@ -18,9 +18,9 @@ ; CHECK-NEXT: bl streaming_compatible_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -101,10 +101,10 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret @@ -129,9 +129,9 @@ ; CHECK-NEXT: smstart sm ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -162,11 +162,11 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret %res = call <2 x i64> @streaming_compatible_callee_vec_args_ret(<2 x i64> %a) "aarch64_pstate_sm_compatible" @@ -191,11 +191,11 @@ ; CHECK-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret %v1.arg = extractvalue {<2 x i64>, <2 x i64>} %arg, 1 @@ -251,10 +251,10 @@ ; CHECK-NEXT: smstart sm ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr d0, [sp, #72] // 8-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: @@ -286,10 +286,10 @@ ; CHECK-NEXT: str s0, [sp, #12] // 4-byte Folded Spill ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload ; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll --- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll @@ -147,17 +147,17 @@ ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB4_4: +; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldr z0, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #96 // 16-byte Folded Reload ; CHECK-NEXT: ret %res = call <2 x double> @normal_callee_vec_arg(<2 x double> %arg) @@ -220,33 +220,33 @@ ; CHECK-NEXT: fadd z0.d, z1.d, z0.d ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload @@ -312,33 +312,33 @@ ; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x30, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr x29, [sp], #32 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll --- a/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-interface.ll @@ -30,9 +30,9 @@ ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @streaming_callee() @@ -55,9 +55,9 @@ ; CHECK-NEXT: bl normal_callee ; CHECK-NEXT: smstart sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void @normal_callee() @@ -111,9 +111,9 @@ ; CHECK-NEXT: blr x0 ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret call void %p() "aarch64_pstate_sm_enabled" @@ -135,11 +135,11 @@ ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret call void @streaming_callee() @@ -188,33 +188,33 @@ ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -268,33 +268,33 @@ ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -319,14 +319,14 @@ ; CHECK-NEXT: bl cos ; CHECK-NEXT: str d0, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: smstart sm -; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload ; CHECK-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fadd d0, d1, d0 +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret entry: @@ -351,9 +351,9 @@ ; CHECK-NEXT: bl streaming_callee ; CHECK-NEXT: smstop sm ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-NEXT: ret tail call void @streaming_callee() diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-add.ll @@ -8,8 +8,8 @@ define void @multi_vector_add_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s @@ -27,8 +27,8 @@ define void @multi_vector_add_write_single_za_vg1x2_i64(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_write_single_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d @@ -105,9 +105,9 @@ define void @multi_vector_add_write_za_vg1x2_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } @@ -128,9 +128,9 @@ define void @multi_vector_add_write_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_write_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } @@ -223,8 +223,8 @@ define void @multi_vector_add_za_vg1x2_i32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: add za.s[w8, 7, vgx2], { z0.s, z1.s } @@ -238,8 +238,8 @@ define void @multi_vector_add_za_vg1x2_i64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: add za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: add za.d[w8, 7, vgx2], { z0.d, z1.d } @@ -253,8 +253,8 @@ define void @multi_vector_add_za_vg1x2_f32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fadd za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: fadd za.s[w8, 7, vgx2], { z0.s, z1.s } @@ -270,8 +270,8 @@ define void @multi_vector_add_za_vg1x2_f64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_add_za_vg1x2_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fadd za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: fadd za.d[w8, 7, vgx2], { z0.d, z1.d } diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fmlas.ll @@ -6,8 +6,8 @@ define void @multi_vector_add_single_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s @@ -25,8 +25,8 @@ define void @multi_vector_add_single_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d @@ -94,8 +94,8 @@ define void @multi_vector_sub_single_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s @@ -113,8 +113,8 @@ define void @multi_vector_sub_single_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d @@ -182,9 +182,9 @@ define void @multi_vector_add_vg1x2_s(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } @@ -204,9 +204,9 @@ define void @multi_vector_add_vg1x2_d(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } @@ -227,9 +227,9 @@ define void @multi_vector_add_vg1x2_s_regclass(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_add_vg1x2_s_regclass: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z7.d, z0.d ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z6.s, z7.s }, { z4.s, z5.s } @@ -320,9 +320,9 @@ define void @multi_vector_sub_vg1x2_s(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } @@ -342,9 +342,9 @@ define void @multi_vector_sub_vg1x2_d(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } @@ -418,8 +418,8 @@ define void @multi_vector_add_lane_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: fmla za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3] @@ -437,8 +437,8 @@ define void @multi_vector_add_lane_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmla za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: fmla za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1] @@ -457,8 +457,8 @@ define void @multi_vector_add_lane_vg1x2_s_regclass(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg1x2_s_regclass: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z0.d ; CHECK-NEXT: fmla za.s[w8, 0, vgx2], { z4.s, z5.s }, z2.s[3] ; CHECK-NEXT: ret @@ -540,8 +540,8 @@ define void @multi_vector_sub_lane_vg1x2_s(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_s: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s[3] ; CHECK-NEXT: fmls za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s[3] @@ -559,8 +559,8 @@ define void @multi_vector_sub_lane_vg1x2_d(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmls za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d[1] ; CHECK-NEXT: fmls za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d[1] diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-fp-dots.ll @@ -9,9 +9,9 @@ define void @fdot_multi_za32_f16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3) #0 { ; CHECK-LABEL: fdot_multi_za32_f16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: fdot za.s[w8, 0, vgx2], { z6.h, z7.h }, { z4.h, z5.h } @@ -26,16 +26,16 @@ define void @fdot_multi_za32_f16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: fdot_multi_za32_f16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] ; CHECK-NEXT: fdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: fdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret @@ -54,9 +54,9 @@ define void @bfdot_multi_za32_bf16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3) #0 { ; CHECK-LABEL: bfdot_multi_za32_bf16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z6.h, z7.h }, { z4.h, z5.h } @@ -71,16 +71,16 @@ define void @fdot_multi_za32_bf16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: fdot_multi_za32_bf16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] ; CHECK-NEXT: bfdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: bfdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret @@ -99,8 +99,8 @@ define void @fdot_single_za32_f16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: fdot_single_za32_f16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: fdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: fdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h @@ -134,8 +134,8 @@ define void @bfdot_single_za32_bf16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: bfdot_single_za32_bf16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: bfdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h @@ -169,8 +169,8 @@ define void @fdot_lane_za32_f16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: fdot_lane_za32_f16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: fdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3] ; CHECK-NEXT: fdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3] @@ -206,8 +206,8 @@ define void @bfdot_lane_za32_bf16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: bfdot_lane_za32_bf16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: bfdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3] ; CHECK-NEXT: bfdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3] diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-insert-mova.ll @@ -10,8 +10,8 @@ define void @za_write_vg2_horiz_b(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_b: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.b[w12, 0:1], { z0.b, z1.b } ; CHECK-NEXT: mov za0h.b[w12, 14:15], { z0.b, z1.b } @@ -25,8 +25,8 @@ define void @za_write_vg2_horiz_h(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_h: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } @@ -40,8 +40,8 @@ define void @za_write_vg2_horiz_f16(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } @@ -55,8 +55,8 @@ define void @za_write_vg2_horiz_bf16(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1h.h[w12, 6:7], { z0.h, z1.h } @@ -70,8 +70,8 @@ define void @za_write_vg2_horiz_s(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_s: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s } @@ -85,8 +85,8 @@ define void @za_write_vg2_horiz_f32(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3h.s[w12, 2:3], { z0.s, z1.s } @@ -100,8 +100,8 @@ define void @za_write_vg2_horiz_d(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -112,8 +112,8 @@ define void @za_write_vg2_horiz_f64(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_horiz_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0h.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -126,8 +126,8 @@ define void @za_write_vg2_vert_b(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_b: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.b[w12, 0:1], { z0.b, z1.b } ; CHECK-NEXT: mov za0v.b[w12, 14:15], { z0.b, z1.b } @@ -141,8 +141,8 @@ define void @za_write_vg2_vert_h(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_h: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } @@ -156,8 +156,8 @@ define void @za_write_vg2_vert_f16(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } @@ -171,8 +171,8 @@ define void @za_write_vg2_vert_bf16(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.h[w12, 0:1], { z0.h, z1.h } ; CHECK-NEXT: mov za1v.h[w12, 6:7], { z0.h, z1.h } @@ -186,8 +186,8 @@ define void @za_write_vg2_vert_s(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_s: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s } @@ -201,8 +201,8 @@ define void @za_write_vg2_vert_f32(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.s[w12, 0:1], { z0.s, z1.s } ; CHECK-NEXT: mov za3v.s[w12, 2:3], { z0.s, z1.s } @@ -216,8 +216,8 @@ define void @za_write_vg2_vert_d(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -228,8 +228,8 @@ define void @za_write_vg2_vert_f64(i32 %slice, %zn1, %zn2) { ; CHECK-LABEL: za_write_vg2_vert_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w12, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za0v.d[w12, 0:1], { z0.d, z1.d } ; CHECK-NEXT: ret @@ -500,8 +500,8 @@ define void @za_write_vg1x2_d(i32 %slice, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } @@ -515,8 +515,8 @@ define void @za_write_vg1x2_f64(i32 %slice, %za1, %za2) { ; CHECK-LABEL: za_write_vg1x2_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: mov za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: mov za.d[w8, 7, vgx2], { z0.d, z1.d } diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-int-dots.ll @@ -9,9 +9,9 @@ define void @udot_multi_za32_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3) #0 { ; CHECK-LABEL: udot_multi_za32_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z6.h, z7.h }, { z4.h, z5.h } @@ -26,16 +26,16 @@ define void @udot_multi_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: udot_multi_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret @@ -51,9 +51,9 @@ define void @udot_multi_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3) #0 { ; CHECK-LABEL: udot_multi_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z6.b, z7.b }, { z4.b, z5.b } @@ -68,16 +68,16 @@ define void @udot_multi_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: udot_multi_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] ; CHECK-NEXT: udot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: udot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret @@ -93,9 +93,9 @@ define void @udot_multi_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3) #1 { ; CHECK-LABEL: udot_multi_za64_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: udot za.d[w8, 0, vgx2], { z6.h, z7.h }, { z4.h, z5.h } @@ -110,16 +110,16 @@ define void @udot_multi_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: udot_multi_za64_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] ; CHECK-NEXT: udot za.d[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: udot za.d[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret @@ -135,9 +135,9 @@ define void @usdot_multi_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3) #0 { ; CHECK-LABEL: usdot_multi_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z6.b, z7.b }, { z4.b, z5.b } @@ -152,16 +152,16 @@ define void @usdot_multi_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: usdot_multi_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] ; CHECK-NEXT: usdot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: usdot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret @@ -180,9 +180,9 @@ define void @sdot_multi_za32_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3) #0 { ; CHECK-LABEL: sdot_multi_za32_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z6.h, z7.h }, { z4.h, z5.h } @@ -197,16 +197,16 @@ define void @sdot_multi_za32_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: sdot_multi_za32_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret @@ -222,9 +222,9 @@ define void @sdot_multi_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3) #0 { ; CHECK-LABEL: sdot_multi_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z6.b, z7.b }, { z4.b, z5.b } @@ -239,16 +239,16 @@ define void @sdot_multi_za32_u8_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: sdot_multi_za32_u8_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] ; CHECK-NEXT: sdot za.s[w8, 0, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: sdot za.s[w8, 7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret @@ -264,9 +264,9 @@ define void @sdot_multi_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3) #1 { ; CHECK-LABEL: sdot_multi_za64_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: sdot za.d[w8, 0, vgx2], { z6.h, z7.h }, { z4.h, z5.h } @@ -281,16 +281,16 @@ define void @sdot_multi_za64_u16_vg1x4(i32 %slice, %unused, %zn0, %zn1, %zn2, %zn3, ; CHECK-LABEL: sdot_multi_za64_u16_vg1x4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] ; CHECK-NEXT: sdot za.d[w8, 0, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: sdot za.d[w8, 7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret @@ -309,8 +309,8 @@ define void @udot_single_za32_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: udot_single_za32_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h @@ -341,8 +341,8 @@ define void @udot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: udot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b @@ -373,8 +373,8 @@ define void @udot_single_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: udot_single_za64_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: udot za.d[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: udot za.d[w8, 7, vgx2], { z1.h, z2.h }, z3.h @@ -405,8 +405,8 @@ define void @usdot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: usdot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: usdot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b @@ -440,8 +440,8 @@ define void @sdot_single_za32_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sdot_single_za32_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z1.h, z2.h }, z3.h @@ -472,8 +472,8 @@ define void @sdot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sdot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b @@ -504,8 +504,8 @@ define void @sdot_single_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: sdot_single_za64_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sdot za.d[w8, 0, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: sdot za.d[w8, 7, vgx2], { z1.h, z2.h }, z3.h @@ -536,8 +536,8 @@ define void @sudot_single_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sudot_single_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: sudot za.s[w8, 7, vgx2], { z1.b, z2.b }, z3.b @@ -570,8 +570,8 @@ define void @udot_lane_za32_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: udot_lane_za32_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3] ; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3] @@ -604,8 +604,8 @@ define void @udot_lane_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: udot_lane_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: udot za.s[w8, 0, vgx2], { z4.b, z5.b }, z3.b[3] ; CHECK-NEXT: udot za.s[w8, 7, vgx2], { z4.b, z5.b }, z3.b[3] @@ -638,8 +638,8 @@ define void @udot_lane_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: udot_lane_za64_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: udot za.d[w8, 0, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: udot za.d[w8, 7, vgx2], { z4.h, z5.h }, z3.h[1] @@ -672,8 +672,8 @@ define void @usdot_lane_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: usdot_lane_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: usdot za.s[w8, 0, vgx2], { z4.b, z5.b }, z3.b[3] ; CHECK-NEXT: usdot za.s[w8, 7, vgx2], { z4.b, z5.b }, z3.b[3] @@ -709,8 +709,8 @@ define void @sdot_lane_za32_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sdot_lane_za32_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z4.h, z5.h }, z3.h[3] ; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z4.h, z5.h }, z3.h[3] @@ -743,8 +743,8 @@ define void @sdot_lane_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sdot_lane_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: sdot za.s[w8, 0, vgx2], { z4.b, z5.b }, z3.b[3] ; CHECK-NEXT: sdot za.s[w8, 7, vgx2], { z4.b, z5.b }, z3.b[3] @@ -777,8 +777,8 @@ define void @sdot_lane_za64_u16_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #1 { ; CHECK-LABEL: sdot_lane_za64_u16_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: sdot za.d[w8, 0, vgx2], { z4.h, z5.h }, z3.h[1] ; CHECK-NEXT: sdot za.d[w8, 7, vgx2], { z4.h, z5.h }, z3.h[1] @@ -813,8 +813,8 @@ define void @sudot_lane_za32_u8_vg1x2(i32 %slice, %unused, %zn0, %zn1, %zn2) #0 { ; CHECK-LABEL: sudot_lane_za32_u8_vg1x2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: sudot za.s[w8, 0, vgx2], { z4.b, z5.b }, z3.b[3] ; CHECK-NEXT: sudot za.s[w8, 7, vgx2], { z4.b, z5.b }, z3.b[3] diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-max.ll @@ -1,7 +1,6 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s - ; SMAX (Single, x2) - define { , } @multi_vec_max_single_x2_s8( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_s8: ; CHECK: // %bb.0: @@ -14,7 +13,6 @@ %res = call { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_max_single_x2_s16( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_s16: ; CHECK: // %bb.0: @@ -27,7 +25,6 @@ %res = call { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_max_single_x2_s32( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_s32: ; CHECK: // %bb.0: @@ -40,7 +37,6 @@ %res = call { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_max_single_x2_s64( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_s64: ; CHECK: // %bb.0: @@ -53,9 +49,7 @@ %res = call { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64( %zdn1, %zdn2, %zm) ret { , } %res } - ; UMAX (Single, x2) - define { , } @multi_vec_max_single_x2_u8( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_u8: ; CHECK: // %bb.0: @@ -68,7 +62,6 @@ %res = call { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_max_single_x2_u16( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_u16: ; CHECK: // %bb.0: @@ -81,7 +74,6 @@ %res = call { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_max_single_x2_u32( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_u32: ; CHECK: // %bb.0: @@ -94,7 +86,6 @@ %res = call { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_max_single_x2_u64( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_u64: ; CHECK: // %bb.0: @@ -107,9 +98,7 @@ %res = call { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64( %zdn1, %zdn2, %zm) ret { , } %res } - ; FMAX (Single, x2) - define { , } @multi_vec_max_single_x2_f16( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_f16: ; CHECK: // %bb.0: @@ -122,7 +111,6 @@ %res = call { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_max_single_x2_f32( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_f32: ; CHECK: // %bb.0: @@ -135,7 +123,6 @@ %res = call { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_max_single_x2_f64( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_max_single_x2_f64: ; CHECK: // %bb.0: @@ -148,224 +135,76 @@ %res = call { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64( %zdn1, %zdn2, %zm) ret { , } %res } - ; SMAX (Single, x4) - define { , , , } @multi_vec_max_single_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_max_single_x4_s8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.b - z27.b }, { z24.b - z27.b }, z5.b -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_max_single_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_max_single_x4_s16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.h - z27.h }, { z24.h - z27.h }, z5.h -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_max_single_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_max_single_x4_s32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.s - z27.s }, { z24.s - z27.s }, z5.s -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_max_single_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_max_single_x4_s64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.d - z27.d }, { z24.d - z27.d }, z5.d -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - ; UMAX (Single, x4) - define { , , , } @multi_vec_max_single_x4_u8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_max_single_x4_u8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.b - z27.b }, { z24.b - z27.b }, z5.b -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_max_single_x4_u16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_max_single_x4_u16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.h - z27.h }, { z24.h - z27.h }, z5.h -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_max_single_x4_u32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_max_single_x4_u32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.s - z27.s }, { z24.s - z27.s }, z5.s -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_max_single_x4_u64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_max_single_x4_u64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.d - z27.d }, { z24.d - z27.d }, z5.d -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - ; FMAX (SINGLE, x4) - define { , , , } @multi_vec_max_single_x4_f16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_max_single_x4_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmax { z24.h - z27.h }, { z24.h - z27.h }, z5.h -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_max_single_x4_f32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_max_single_x4_f32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmax { z24.s - z27.s }, { z24.s - z27.s }, z5.s -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_max_single_x4_f64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_max_single_x4_f64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmax { z24.d - z27.d }, { z24.d - z27.d }, z5.d -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - ; SMAX (Multi, x2) - define { , } @multi_vec_max_multi_x2_s8( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_multi_x2_s8: ; CHECK: // %bb.0: @@ -380,7 +219,6 @@ %res = call { , } @llvm.aarch64.sve.smax.x2.nxv16i8( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_max_multi_x2_s16( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_multi_x2_s16: ; CHECK: // %bb.0: @@ -395,7 +233,6 @@ %res = call { , } @llvm.aarch64.sve.smax.x2.nxv8i16( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_max_multi_x2_s32( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_multi_x2_s32: ; CHECK: // %bb.0: @@ -410,7 +247,6 @@ %res = call { , } @llvm.aarch64.sve.smax.x2.nxv4i32( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_max_multi_x2_s64( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_multi_x2_s64: ; CHECK: // %bb.0: @@ -425,9 +261,7 @@ %res = call { , } @llvm.aarch64.sve.smax.x2.nxv2i64( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - ; UMAX (Multi, x2) - define { , } @multi_vec_max_multi_x2_u8( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_multi_x2_u8: ; CHECK: // %bb.0: @@ -442,7 +276,6 @@ %res = call { , } @llvm.aarch64.sve.umax.x2.nxv16i8( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_max_multi_x2_u16( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_multi_x2_u16: ; CHECK: // %bb.0: @@ -457,7 +290,6 @@ %res = call { , } @llvm.aarch64.sve.umax.x2.nxv8i16( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_max_multi_x2_u32( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_multi_x2_u32: ; CHECK: // %bb.0: @@ -472,7 +304,6 @@ %res = call { , } @llvm.aarch64.sve.umax.x2.nxv4i32( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_max_multi_x2_u64( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_multi_x2_u64: ; CHECK: // %bb.0: @@ -487,9 +318,7 @@ %res = call { , } @llvm.aarch64.sve.umax.x2.nxv2i64( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - ; FMAX (Multi, x2) - define { , } @multi_vec_max_multi_x2_f16( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_multi_x2_f16: ; CHECK: // %bb.0: @@ -504,7 +333,6 @@ %res = call { , } @llvm.aarch64.sve.fmax.x2.nxv8f16( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_max_multi_x2_f32( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_multi_x2_f32: ; CHECK: // %bb.0: @@ -519,7 +347,6 @@ %res = call { , } @llvm.aarch64.sve.fmax.x2.nxv4f32( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_max_multi_x2_f64( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_max_multi_x2_f64: ; CHECK: // %bb.0: @@ -534,301 +361,98 @@ %res = call { , } @llvm.aarch64.sve.fmax.x2.nxv2f64( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - ; SMAX (Multi, x4) - define { , , , } @multi_vec_max_multi_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_max_multi_x4_s8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_max_multi_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_max_multi_x4_s16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smax.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_max_multi_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_max_multi_x4_s32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smax.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_max_multi_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_max_multi_x4_s64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - ; UMAX (Multi, x4) - define { , , , } @multi_vec_max_multi_x4_u8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_max_multi_x4_u8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_max_multi_x4_u16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_max_multi_x4_u16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umax.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_max_multi_x4_u32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_max_multi_x4_u32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umax.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_max_multi_x4_u64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_max_multi_x4_u64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - ; FMAX (Multi, x4) - define { , , , } @multi_vec_max_multi_x4_f16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_max_multi_x4_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmax { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_max_multi_x4_f32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_max_multi_x4_f32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmax { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_max_multi_x4_f64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_max_multi_x4_f64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmax { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - ; FMAXNM (Single, x2) - define { , } @multi_vec_maxnm_single_x2_f16( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_maxnm_single_x2_f16: ; CHECK: // %bb.0: @@ -841,7 +465,6 @@ %res = call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_maxnm_single_x2_f32( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_maxnm_single_x2_f32: ; CHECK: // %bb.0: @@ -854,7 +477,6 @@ %res = call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_maxnm_single_x2_f64( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_maxnm_single_x2_f64: ; CHECK: // %bb.0: @@ -867,68 +489,26 @@ %res = call { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64( %zdn1, %zdn2, %zm) ret { , } %res } - ; FMAXNM (Single, x4) - define { , , , } @multi_vec_maxnm_single_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_maxnm_single_x4_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, z5.h -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_maxnm_single_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_maxnm_single_x4_f32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, z5.s -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_maxnm_single_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_maxnm_single_x4_f64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, z5.d -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - ; FMAXNM (Multi, x2) - define { , } @multi_vec_maxnm_x2_f16( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_maxnm_x2_f16: ; CHECK: // %bb.0: @@ -943,7 +523,6 @@ %res = call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_maxnm_x2_f32( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_maxnm_x2_f32: ; CHECK: // %bb.0: @@ -958,7 +537,6 @@ %res = call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_maxnm_x2_f64( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_maxnm_x2_f64: ; CHECK: // %bb.0: @@ -973,129 +551,64 @@ %res = call { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - ; FMAXNM (Multi, x4) - define { , , , } @multi_vec_maxnm_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_maxnm_x4_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmaxnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } - @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, + @llvm.aarch64.sve.fmaxnm.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_maxnm_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_maxnm_x4_f32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmaxnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_maxnm_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_maxnm_x4_f64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmaxnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - declare { , } @llvm.aarch64.sve.smax.single.x2.nxv16i8(, , ) declare { , } @llvm.aarch64.sve.smax.single.x2.nxv8i16(, , ) declare { , } @llvm.aarch64.sve.smax.single.x2.nxv4i32(, , ) declare { , } @llvm.aarch64.sve.smax.single.x2.nxv2i64(, , ) - declare { , } @llvm.aarch64.sve.umax.single.x2.nxv16i8(, , ) declare { , } @llvm.aarch64.sve.umax.single.x2.nxv8i16(, , ) declare { , } @llvm.aarch64.sve.umax.single.x2.nxv4i32(, , ) declare { , } @llvm.aarch64.sve.umax.single.x2.nxv2i64(, , ) - declare { , } @llvm.aarch64.sve.fmax.single.x2.nxv8f16(, , ) declare { , } @llvm.aarch64.sve.fmax.single.x2.nxv4f32(, , ) declare { , } @llvm.aarch64.sve.fmax.single.x2.nxv2f64(, , ) - declare { , , , } @llvm.aarch64.sve.smax.single.x4.nxv16i8(, , , , ) declare { , , , } @llvm.aarch64.sve.smax.single.x4.nxv8i16(, , , , ) declare { , , , } @llvm.aarch64.sve.smax.single.x4.nxv4i32(, , , , ) declare { , , , } @llvm.aarch64.sve.smax.single.x4.nxv2i64(, , , , ) - declare { , , , } @llvm.aarch64.sve.umax.single.x4.nxv16i8(, , , , ) declare { , , , } @llvm.aarch64.sve.umax.single.x4.nxv8i16(, , , , ) declare { , , , } @llvm.aarch64.sve.umax.single.x4.nxv4i32(, , , , ) declare { , , , } @llvm.aarch64.sve.umax.single.x4.nxv2i64(, , , , ) - declare { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv8f16(, , , , ) declare { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv4f32(, , , , ) declare { , , , } @llvm.aarch64.sve.fmax.single.x4.nxv2f64(, , , , ) - declare { , } @llvm.aarch64.sve.smax.x2.nxv16i8(, , , ) declare { , } @llvm.aarch64.sve.smax.x2.nxv8i16(, , , ) declare { , } @llvm.aarch64.sve.smax.x2.nxv4i32(, , , ) declare { , } @llvm.aarch64.sve.smax.x2.nxv2i64(, , , ) - declare { , } @llvm.aarch64.sve.umax.x2.nxv16i8(, , , ) declare { , } @llvm.aarch64.sve.umax.x2.nxv8i16(, , , ) declare { , } @llvm.aarch64.sve.umax.x2.nxv4i32(, , , ) declare { , } @llvm.aarch64.sve.umax.x2.nxv2i64(, , , ) - declare { , } @llvm.aarch64.sve.fmax.x2.nxv8f16(, , , ) declare { , } @llvm.aarch64.sve.fmax.x2.nxv4f32(, , , ) declare { , } @llvm.aarch64.sve.fmax.x2.nxv2f64(, , , ) - declare { , , , } @llvm.aarch64.sve.smax.x4.nxv16i8(, , , , , , , ) declare { , , , } @@ -1104,7 +617,6 @@ @llvm.aarch64.sve.smax.x4.nxv4i32(, , , , , , , ) declare { , , , } @llvm.aarch64.sve.smax.x4.nxv2i64(, , , , , , , ) - declare { , , , } @llvm.aarch64.sve.umax.x4.nxv16i8(, , , , , , , ) declare { , , , } @@ -1113,29 +625,24 @@ @llvm.aarch64.sve.umax.x4.nxv4i32(, , , , , , , ) declare { , , , } @llvm.aarch64.sve.umax.x4.nxv2i64(, , , , , , , ) - declare { , , , } @llvm.aarch64.sve.fmax.x4.nxv8f16(, , , , , , , ) declare { , , , } @llvm.aarch64.sve.fmax.x4.nxv4f32(, , , , , , , ) declare { , , , } @llvm.aarch64.sve.fmax.x4.nxv2f64(, , , , , , , ) - declare { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv8f16(, , ) declare { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv4f32(, , ) declare { , } @llvm.aarch64.sve.fmaxnm.single.x2.nxv2f64(, , ) - declare { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv8f16(, , , , ) declare { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv4f32(, , , , ) declare { , , , } @llvm.aarch64.sve.fmaxnm.single.x4.nxv2f64(, , , , ) - declare { , } @llvm.aarch64.sve.fmaxnm.x2.nxv8f16(, , , ) declare { , } @llvm.aarch64.sve.fmaxnm.x2.nxv4f32(, , , ) declare { , } @llvm.aarch64.sve.fmaxnm.x2.nxv2f64(, , , ) - declare { , , , } @llvm.aarch64.sve.fmaxnm.x4.nxv8f16(, , , , , , , ) declare { , , , } diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-min.ll @@ -1,7 +1,6 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s - ; SMIN (Single, x2) - define { , } @multi_vec_min_single_x2_s8( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_s8: ; CHECK: // %bb.0: @@ -14,7 +13,6 @@ %res = call { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_min_single_x2_s16( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_s16: ; CHECK: // %bb.0: @@ -27,7 +25,6 @@ %res = call { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_min_single_x2_s32( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_s32: ; CHECK: // %bb.0: @@ -40,7 +37,6 @@ %res = call { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_min_single_x2_s64( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_s64: ; CHECK: // %bb.0: @@ -53,9 +49,7 @@ %res = call { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64( %zdn1, %zdn2, %zm) ret { , } %res } - ; UMIN (Single, x2) - define { , } @multi_vec_min_single_x2_u8( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_u8: ; CHECK: // %bb.0: @@ -68,7 +62,6 @@ %res = call { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_min_single_x2_u16( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_u16: ; CHECK: // %bb.0: @@ -81,7 +74,6 @@ %res = call { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_min_single_x2_u32( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_u32: ; CHECK: // %bb.0: @@ -94,7 +86,6 @@ %res = call { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_min_single_x2_u64( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_u64: ; CHECK: // %bb.0: @@ -107,9 +98,7 @@ %res = call { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64( %zdn1, %zdn2, %zm) ret { , } %res } - ; FMIN (Single, x2) - define { , } @multi_vec_min_single_x2_f16( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_f16: ; CHECK: // %bb.0: @@ -122,7 +111,6 @@ %res = call { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_min_single_x2_f32( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_f32: ; CHECK: // %bb.0: @@ -135,7 +123,6 @@ %res = call { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_min_single_x2_f64( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_min_single_x2_f64: ; CHECK: // %bb.0: @@ -148,224 +135,76 @@ %res = call { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64( %zdn1, %zdn2, %zm) ret { , } %res } - ; SMIN (Single, x4) - define { , , , } @multi_vec_min_single_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_min_single_x4_s8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.b - z27.b }, { z24.b - z27.b }, z5.b -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_min_single_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_min_single_x4_s16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.h - z27.h }, { z24.h - z27.h }, z5.h -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_min_single_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_min_single_x4_s32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.s - z27.s }, { z24.s - z27.s }, z5.s -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_min_single_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_min_single_x4_s64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.d - z27.d }, { z24.d - z27.d }, z5.d -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - ; UMIN (Single, x4) - define { , , , } @multi_vec_min_single_x4_u8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_min_single_x4_u8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.b - z27.b }, { z24.b - z27.b }, z5.b -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_min_single_x4_u16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_min_single_x4_u16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.h - z27.h }, { z24.h - z27.h }, z5.h -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_min_single_x4_u32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_min_single_x4_u32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.s - z27.s }, { z24.s - z27.s }, z5.s -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_min_single_x4_u64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_min_single_x4_u64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.d - z27.d }, { z24.d - z27.d }, z5.d -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - ; FMIN (SINGLE, x4) - define { , , , } @multi_vec_min_single_x4_f16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_min_single_x4_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmin { z24.h - z27.h }, { z24.h - z27.h }, z5.h -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_min_single_x4_f32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_min_single_x4_f32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmin { z24.s - z27.s }, { z24.s - z27.s }, z5.s -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_min_single_x4_f64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_min_single_x4_f64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmin { z24.d - z27.d }, { z24.d - z27.d }, z5.d -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - ; SMIN (Multi, x2) - define { , } @multi_vec_min_multi_x2_s8( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_multi_x2_s8: ; CHECK: // %bb.0: @@ -380,7 +219,6 @@ %res = call { , } @llvm.aarch64.sve.smin.x2.nxv16i8( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_min_multi_x2_s16( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_multi_x2_s16: ; CHECK: // %bb.0: @@ -395,7 +233,6 @@ %res = call { , } @llvm.aarch64.sve.smin.x2.nxv8i16( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_min_multi_x2_s32( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_multi_x2_s32: ; CHECK: // %bb.0: @@ -410,7 +247,6 @@ %res = call { , } @llvm.aarch64.sve.smin.x2.nxv4i32( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_min_multi_x2_s64( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_multi_x2_s64: ; CHECK: // %bb.0: @@ -425,9 +261,7 @@ %res = call { , } @llvm.aarch64.sve.smin.x2.nxv2i64( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - ; UMIN (Multi, x2) - define { , } @multi_vec_min_multi_x2_u8( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_multi_x2_u8: ; CHECK: // %bb.0: @@ -442,7 +276,6 @@ %res = call { , } @llvm.aarch64.sve.umin.x2.nxv16i8( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_min_multi_x2_u16( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_multi_x2_u16: ; CHECK: // %bb.0: @@ -457,7 +290,6 @@ %res = call { , } @llvm.aarch64.sve.umin.x2.nxv8i16( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_min_multi_x2_u32( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_multi_x2_u32: ; CHECK: // %bb.0: @@ -472,7 +304,6 @@ %res = call { , } @llvm.aarch64.sve.umin.x2.nxv4i32( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_min_multi_x2_u64( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_multi_x2_u64: ; CHECK: // %bb.0: @@ -487,9 +318,7 @@ %res = call { , } @llvm.aarch64.sve.umin.x2.nxv2i64( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - ; FMIN (Multi, x2) - define { , } @multi_vec_min_multi_x2_f16( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_multi_x2_f16: ; CHECK: // %bb.0: @@ -504,7 +333,6 @@ %res = call { , } @llvm.aarch64.sve.fmin.x2.nxv8f16( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_min_multi_x2_f32( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_multi_x2_f32: ; CHECK: // %bb.0: @@ -519,7 +347,6 @@ %res = call { , } @llvm.aarch64.sve.fmin.x2.nxv4f32( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_min_multi_x2_f64( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_min_multi_x2_f64: ; CHECK: // %bb.0: @@ -534,301 +361,98 @@ %res = call { , } @llvm.aarch64.sve.fmin.x2.nxv2f64( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - ; SMIN (Multi, x4) - define { , , , } @multi_vec_min_multi_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_min_multi_x4_s8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_min_multi_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_min_multi_x4_s16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smin.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_min_multi_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_min_multi_x4_s32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smin.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_min_multi_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_min_multi_x4_s64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: smin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - ; UMIN (Multi, x4) - define { , , , } @multi_vec_min_multi_x4_u8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_min_multi_x4_u8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_min_multi_x4_u16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_min_multi_x4_u16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umin.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_min_multi_x4_u32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_min_multi_x4_u32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umin.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_min_multi_x4_u64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_min_multi_x4_u64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: umin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - ; FMIN (Multi, x4) - define { , , , } @multi_vec_min_multi_x4_f16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_min_multi_x4_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmin { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_min_multi_x4_f32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_min_multi_x4_f32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmin { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_min_multi_x4_f64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_min_multi_x4_f64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fmin { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - ; FMINNM (Single, x2) - define { , } @multi_vec_minnm_single_x2_f16( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_minnm_single_x2_f16: ; CHECK: // %bb.0: @@ -841,7 +465,6 @@ %res = call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_minnm_single_x2_f32( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_minnm_single_x2_f32: ; CHECK: // %bb.0: @@ -854,7 +477,6 @@ %res = call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_minnm_single_x2_f64( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_minnm_single_x2_f64: ; CHECK: // %bb.0: @@ -867,68 +489,26 @@ %res = call { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64( %zdn1, %zdn2, %zm) ret { , } %res } - ; FMINNM (Single, x4) - define { , , , } @multi_vec_minnm_single_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_minnm_single_x4_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fminnm { z24.h - z27.h }, { z24.h - z27.h }, z5.h -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_minnm_single_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_minnm_single_x4_f32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fminnm { z24.s - z27.s }, { z24.s - z27.s }, z5.s -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_minnm_single_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_minnm_single_x4_f64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fminnm { z24.d - z27.d }, { z24.d - z27.d }, z5.d -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - ; FMINNM (Multi, x2) - define { , } @multi_vec_minnm_x2_f16( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_minnm_x2_f16: ; CHECK: // %bb.0: @@ -943,7 +523,6 @@ %res = call { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_minnm_x2_f32( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_minnm_x2_f32: ; CHECK: // %bb.0: @@ -958,7 +537,6 @@ %res = call { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_minnm_x2_f64( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_minnm_x2_f64: ; CHECK: // %bb.0: @@ -973,129 +551,64 @@ %res = call { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - ; FMINNM (Multi, x4) - define { , , , } @multi_vec_minnm_x4_f16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_minnm_x4_f16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fminnm { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_minnm_x4_f32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_minnm_x4_f32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fminnm { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv4f32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_minnm_x4_f64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_minnm_x4_f64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: fminnm { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.fminnm.x4.nxv2f64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - declare { , } @llvm.aarch64.sve.smin.single.x2.nxv16i8(, , ) declare { , } @llvm.aarch64.sve.smin.single.x2.nxv8i16(, , ) declare { , } @llvm.aarch64.sve.smin.single.x2.nxv4i32(, , ) declare { , } @llvm.aarch64.sve.smin.single.x2.nxv2i64(, , ) - declare { , } @llvm.aarch64.sve.umin.single.x2.nxv16i8(, , ) declare { , } @llvm.aarch64.sve.umin.single.x2.nxv8i16(, , ) declare { , } @llvm.aarch64.sve.umin.single.x2.nxv4i32(, , ) declare { , } @llvm.aarch64.sve.umin.single.x2.nxv2i64(, , ) - declare { , } @llvm.aarch64.sve.fmin.single.x2.nxv8f16(, , ) declare { , } @llvm.aarch64.sve.fmin.single.x2.nxv4f32(, , ) declare { , } @llvm.aarch64.sve.fmin.single.x2.nxv2f64(, , ) - declare { , , , } @llvm.aarch64.sve.smin.single.x4.nxv16i8(, , , , ) declare { , , , } @llvm.aarch64.sve.smin.single.x4.nxv8i16(, , , , ) declare { , , , } @llvm.aarch64.sve.smin.single.x4.nxv4i32(, , , , ) declare { , , , } @llvm.aarch64.sve.smin.single.x4.nxv2i64(, , , , ) - declare { , , , } @llvm.aarch64.sve.umin.single.x4.nxv16i8(, , , , ) declare { , , , } @llvm.aarch64.sve.umin.single.x4.nxv8i16(, , , , ) declare { , , , } @llvm.aarch64.sve.umin.single.x4.nxv4i32(, , , , ) declare { , , , } @llvm.aarch64.sve.umin.single.x4.nxv2i64(, , , , ) - declare { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv8f16(, , , , ) declare { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv4f32(, , , , ) declare { , , , } @llvm.aarch64.sve.fmin.single.x4.nxv2f64(, , , , ) - declare { , } @llvm.aarch64.sve.smin.x2.nxv16i8(, , , ) declare { , } @llvm.aarch64.sve.smin.x2.nxv8i16(, , , ) declare { , } @llvm.aarch64.sve.smin.x2.nxv4i32(, , , ) declare { , } @llvm.aarch64.sve.smin.x2.nxv2i64(, , , ) - declare { , } @llvm.aarch64.sve.umin.x2.nxv16i8(, , , ) declare { , } @llvm.aarch64.sve.umin.x2.nxv8i16(, , , ) declare { , } @llvm.aarch64.sve.umin.x2.nxv4i32(, , , ) declare { , } @llvm.aarch64.sve.umin.x2.nxv2i64(, , , ) - declare { , } @llvm.aarch64.sve.fmin.x2.nxv8f16(, , , ) declare { , } @llvm.aarch64.sve.fmin.x2.nxv4f32(, , , ) declare { , } @llvm.aarch64.sve.fmin.x2.nxv2f64(, , , ) - declare { , , , } @llvm.aarch64.sve.smin.x4.nxv16i8(, , , , , , , ) declare { , , , } @@ -1104,7 +617,6 @@ @llvm.aarch64.sve.smin.x4.nxv4i32(, , , , , , , ) declare { , , , } @llvm.aarch64.sve.smin.x4.nxv2i64(, , , , , , , ) - declare { , , , } @llvm.aarch64.sve.umin.x4.nxv16i8(, , , , , , , ) declare { , , , } @@ -1113,29 +625,24 @@ @llvm.aarch64.sve.umin.x4.nxv4i32(, , , , , , , ) declare { , , , } @llvm.aarch64.sve.umin.x4.nxv2i64(, , , , , , , ) - declare { , , , } @llvm.aarch64.sve.fmin.x4.nxv8f16(, , , , , , , ) declare { , , , } @llvm.aarch64.sve.fmin.x4.nxv4f32(, , , , , , , ) declare { , , , } @llvm.aarch64.sve.fmin.x4.nxv2f64(, , , , , , , ) - declare { , } @llvm.aarch64.sve.fminnm.single.x2.nxv8f16(, , ) declare { , } @llvm.aarch64.sve.fminnm.single.x2.nxv4f32(, , ) declare { , } @llvm.aarch64.sve.fminnm.single.x2.nxv2f64(, , ) - declare { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv8f16(, , , , ) declare { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv4f32(, , , , ) declare { , , , } @llvm.aarch64.sve.fminnm.single.x4.nxv2f64(, , , , ) - declare { , } @llvm.aarch64.sve.fminnm.x2.nxv8f16(, , , ) declare { , } @llvm.aarch64.sve.fminnm.x2.nxv4f32(, , , ) declare { , } @llvm.aarch64.sve.fminnm.x2.nxv2f64(, , , ) - declare { , , , } @llvm.aarch64.sve.fminnm.x4.nxv8f16(, , , , , , , ) declare { , , , } diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlall.ll @@ -38,8 +38,8 @@ define void @multi_vector_mul_add_single_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: smlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b @@ -53,8 +53,8 @@ define void @multi_vector_mul_add_single_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: smlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h @@ -106,9 +106,9 @@ define void @multi_vector_mul_add_multi_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: smlall za.s[w8, 0:3, vgx2], { z6.b, z7.b }, { z4.b, z5.b } @@ -123,9 +123,9 @@ define void @multi_vector_mul_add_multi_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: smlall za.d[w8, 0:3, vgx2], { z6.h, z7.h }, { z4.h, z5.h } @@ -142,16 +142,16 @@ define void @multi_vector_mul_add_multi_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] ; CHECK-NEXT: smlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: smlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret @@ -164,16 +164,16 @@ define void @multi_vector_mul_add_multi_long_vg4x4_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] ; CHECK-NEXT: smlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: smlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret @@ -216,8 +216,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: smlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0] ; CHECK-NEXT: smlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15] @@ -231,8 +231,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: smlall za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0] ; CHECK-NEXT: smlall za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7] @@ -314,8 +314,8 @@ define void @multi_vector_mul_add_single_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: umlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b @@ -329,8 +329,8 @@ define void @multi_vector_mul_add_single_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_long_vg4x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlall za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: umlall za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h @@ -382,9 +382,9 @@ define void @multi_vector_mul_add_multi_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x2_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: umlall za.s[w8, 0:3, vgx2], { z6.b, z7.b }, { z4.b, z5.b } @@ -399,9 +399,9 @@ define void @multi_vector_mul_add_multi_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: umlall za.d[w8, 0:3, vgx2], { z6.h, z7.h }, { z4.h, z5.h } @@ -418,16 +418,16 @@ define void @multi_vector_mul_add_multi_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] ; CHECK-NEXT: umlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: umlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret @@ -440,16 +440,16 @@ define void @multi_vector_mul_add_multi_long_vg4x4_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_long_vg4x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] ; CHECK-NEXT: umlall za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: umlall za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret @@ -492,8 +492,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: umlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0] ; CHECK-NEXT: umlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15] @@ -507,8 +507,8 @@ define void @multi_vector_mul_add_lane_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_lane_long_vg4x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: umlall za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0] ; CHECK-NEXT: umlall za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7] @@ -590,8 +590,8 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b @@ -605,8 +605,8 @@ define void @multi_vector_mul_sub_single_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h @@ -658,9 +658,9 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx2], { z6.b, z7.b }, { z4.b, z5.b } @@ -675,9 +675,9 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx2], { z6.h, z7.h }, { z4.h, z5.h } @@ -694,16 +694,16 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_s8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] ; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret @@ -716,16 +716,16 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_s16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] ; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret @@ -768,8 +768,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: smlsll za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0] ; CHECK-NEXT: smlsll za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15] @@ -783,8 +783,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_s16(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: smlsll za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0] ; CHECK-NEXT: smlsll za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7] @@ -866,8 +866,8 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b @@ -881,8 +881,8 @@ define void @multi_vector_mul_sub_single_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_single_long_vg4x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx2], { z1.h, z2.h }, z3.h ; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx2], { z1.h, z2.h }, z3.h @@ -934,9 +934,9 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x2_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx2], { z6.b, z7.b }, { z4.b, z5.b } @@ -951,9 +951,9 @@ define void @multi_vector_mul_sub_multi_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx2], { z6.h, z7.h }, { z4.h, z5.h } @@ -970,16 +970,16 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] ; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret @@ -992,16 +992,16 @@ define void @multi_vector_mul_sub_multi_long_vg4x4_u16(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_sub_multi_long_vg4x4_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1h { z27.h }, p0/z, [x1] ; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx4], { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ret @@ -1044,8 +1044,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: umlsll za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0] ; CHECK-NEXT: umlsll za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15] @@ -1059,8 +1059,8 @@ define void @multi_vector_mul_sub_lane_long_vg4x2_u16(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_sub_lane_long_vg4x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: umlsll za.d[w8, 0:3, vgx2], { z4.h, z5.h }, z3.h[0] ; CHECK-NEXT: umlsll za.d[w8, 4:7, vgx2], { z4.h, z5.h }, z3.h[7] @@ -1116,8 +1116,8 @@ define void @multi_vector_mul_add_single_signed_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_signed_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: sumlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: sumlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b @@ -1167,8 +1167,8 @@ define void @multi_vector_mul_add_lane_signed_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_lane_signed_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: sumlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0] ; CHECK-NEXT: sumlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15] @@ -1220,8 +1220,8 @@ define void @multi_vector_mul_add_single_unsigned_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_single_unsigned_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z1_z2 def $z1_z2 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z1_z2 def $z1_z2 ; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx2], { z1.b, z2.b }, z3.b ; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx2], { z1.b, z2.b }, z3.b @@ -1256,9 +1256,9 @@ define void @multi_vector_mul_add_multi_unsigned_long_vg4x2_u8(i32 %slice, %dummy, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_mul_add_multi_unsigned_long_vg4x2_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z4.d ; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx2], { z6.b, z7.b }, { z4.b, z5.b } @@ -1275,16 +1275,16 @@ define void @multi_vector_mul_add_multi_unsigned_long_vg4x4_u8(i32 %slice, %dummy, %zn0, %zn1, %zn2, %zn3, %zm0, %zm1, %zm2, %zm3) { ; CHECK-LABEL: multi_vector_mul_add_multi_unsigned_long_vg4x4_u8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z25.d, z6.d +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: mov z24.d, z5.d -; CHECK-NEXT: mov z30.d, z3.d -; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1b { z27.b }, p0/z, [x1] ; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx4], { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ret @@ -1314,8 +1314,8 @@ define void @multi_vector_mul_add_lane_unsigned_long_vg4x2_s8(i32 %slice, %dummy, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_mul_add_lane_unsigned_long_vg4x2_s8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: usmlall za.s[w8, 0:3, vgx2], { z4.b, z5.b }, z3.b[0] ; CHECK-NEXT: usmlall za.s[w8, 4:7, vgx2], { z4.b, z5.b }, z3.b[15] diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-mlals.ll @@ -120,8 +120,8 @@ define void @multi_vector_add_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h @@ -135,8 +135,8 @@ define void @multi_vector_add_single_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h @@ -150,8 +150,8 @@ define void @multi_vector_add_single_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h @@ -165,8 +165,8 @@ define void @multi_vector_add_single_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_single_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h @@ -184,8 +184,8 @@ define void @multi_vector_sub_single_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h @@ -199,8 +199,8 @@ define void @multi_vector_sub_single_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h @@ -214,8 +214,8 @@ define void @multi_vector_sub_single_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h @@ -229,8 +229,8 @@ define void @multi_vector_sub_single_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_single_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h @@ -424,9 +424,9 @@ define void @multi_vector_add_multi_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } @@ -443,9 +443,9 @@ define void @multi_vector_add_multi_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } @@ -462,9 +462,9 @@ define void @multi_vector_add_multi_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } @@ -481,9 +481,9 @@ define void @multi_vector_add_multi_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_add_multi_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } @@ -504,9 +504,9 @@ define void @multi_vector_sub_multi_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } @@ -523,9 +523,9 @@ define void @multi_vector_sub_multi_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } @@ -542,9 +542,9 @@ define void @multi_vector_sub_multi_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } @@ -561,9 +561,9 @@ define void @multi_vector_sub_multi_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm0, %zm1) { ; CHECK-LABEL: multi_vector_sub_multi_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, { z2.h, z3.h } @@ -912,8 +912,8 @@ define void @multi_vector_add_lane_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: fmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] @@ -929,8 +929,8 @@ define void @multi_vector_add_lane_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: bfmlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] @@ -946,8 +946,8 @@ define void @multi_vector_add_lane_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: smlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] @@ -963,8 +963,8 @@ define void @multi_vector_add_lane_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_add_lane_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlal za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: umlal za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] @@ -984,8 +984,8 @@ define void @multi_vector_sub_lane_vg2x2_f16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: fmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] @@ -1001,8 +1001,8 @@ define void @multi_vector_sub_lane_vg2x2_bf16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfmlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: bfmlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] @@ -1018,8 +1018,8 @@ define void @multi_vector_sub_lane_vg2x2_s16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_s16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: smlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: smlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] @@ -1035,8 +1035,8 @@ define void @multi_vector_sub_lane_vg2x2_u16(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_lane_vg2x2_u16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: umlsl za.s[w8, 0:1, vgx2], { z0.h, z1.h }, z2.h[0] ; CHECK-NEXT: umlsl za.s[w8, 6:7, vgx2], { z0.h, z1.h }, z2.h[7] diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-rshl.ll @@ -1,7 +1,6 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s - ; SRSHL (Single, x2) - define { , } @multi_vec_rounding_shl_single_x2_s8( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_s8: ; CHECK: // %bb.0: @@ -14,7 +13,6 @@ %res = call { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_rounding_shl_single_x2_s16( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_s16: ; CHECK: // %bb.0: @@ -27,7 +25,6 @@ %res = call { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_rounding_shl_single_x2_s32( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_s32: ; CHECK: // %bb.0: @@ -40,7 +37,6 @@ %res = call { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_rounding_shl_single_x2_s64( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_s64: ; CHECK: // %bb.0: @@ -53,87 +49,32 @@ %res = call { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64( %zdn1, %zdn2, %zm) ret { , } %res } - ; SRSHL (Single, x4) - define { , , , } @multi_vec_rounding_shl_single_x4_s8( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_rounding_shl_single_x4_s8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.b - z27.b }, { z24.b - z27.b }, z5.b -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_rounding_shl_single_x4_s16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_rounding_shl_single_x4_s16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.h - z27.h }, { z24.h - z27.h }, z5.h -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_rounding_shl_single_x4_s32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_rounding_shl_single_x4_s32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.s - z27.s }, { z24.s - z27.s }, z5.s -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_rounding_shl_single_x4_s64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_rounding_shl_single_x4_s64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.d - z27.d }, { z24.d - z27.d }, z5.d -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - ; URSHL (Single, x2) - define { , } @multi_vec_rounding_shl_single_x2_u8( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_u8: ; CHECK: // %bb.0: @@ -146,7 +87,6 @@ %res = call { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_rounding_shl_single_x2_u16( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_u16: ; CHECK: // %bb.0: @@ -159,7 +99,6 @@ %res = call { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_rounding_shl_single_x2_u32( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_u32: ; CHECK: // %bb.0: @@ -172,7 +111,6 @@ %res = call { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_rounding_shl_single_x2_u64( %dummy, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_rounding_shl_single_x2_u64: ; CHECK: // %bb.0: @@ -185,87 +123,32 @@ %res = call { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64( %zdn1, %zdn2, %zm) ret { , } %res } - ; URSHL (Single, x4) - define { , , , } @multi_vec_rounding_shl_single_x4_u8( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_rounding_shl_single_x4_u8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.b - z27.b }, { z24.b - z27.b }, z5.b -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_rounding_shl_single_x4_u16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_rounding_shl_single_x4_u16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.h - z27.h }, { z24.h - z27.h }, z5.h -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_rounding_shl_single_x4_u32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_rounding_shl_single_x4_u32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.s - z27.s }, { z24.s - z27.s }, z5.s -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_rounding_shl_single_x4_u64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_rounding_shl_single_x4_u64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.d - z27.d }, { z24.d - z27.d }, z5.d -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - ; SRSHL (Multi, x2) - define { , } @multi_vec_rounding_shl_x2_s8( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_rounding_shl_x2_s8: ; CHECK: // %bb.0: @@ -280,7 +163,6 @@ %res = call { , } @llvm.aarch64.sve.srshl.x2.nxv16i8( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_rounding_shl_x2_s16( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_rounding_shl_x2_s16: ; CHECK: // %bb.0: @@ -295,7 +177,6 @@ %res = call { , } @llvm.aarch64.sve.srshl.x2.nxv8i16( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_rounding_shl_x2_s32( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_rounding_shl_x2_s32: ; CHECK: // %bb.0: @@ -310,7 +191,6 @@ %res = call { , } @llvm.aarch64.sve.srshl.x2.nxv4i32( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_rounding_shl_x2_s64( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_rounding_shl_x2_s64: ; CHECK: // %bb.0: @@ -325,111 +205,36 @@ %res = call { , } @llvm.aarch64.sve.srshl.x2.nxv2i64( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - ; SRSHL (Multi, x4) - define { , , , } @multi_vec_rounding_shl_x4_s8( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_rounding_shl_x4_s8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_rounding_shl_x4_s16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_rounding_shl_x4_s16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_rounding_shl_x4_s32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_rounding_shl_x4_s32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_rounding_shl_x4_s64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_rounding_shl_x4_s64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: srshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - ; URSHL (Multi, x2) - define { , } @multi_vec_rounding_uhl_x2_u8( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_rounding_uhl_x2_u8: ; CHECK: // %bb.0: @@ -444,7 +249,6 @@ %res = call { , } @llvm.aarch64.sve.urshl.x2.nxv16i8( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_rounding_uhl_x2_u16( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_rounding_uhl_x2_u16: ; CHECK: // %bb.0: @@ -459,7 +263,6 @@ %res = call { , } @llvm.aarch64.sve.urshl.x2.nxv8i16( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_rounding_uhl_x2_u32( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_rounding_uhl_x2_u32: ; CHECK: // %bb.0: @@ -474,7 +277,6 @@ %res = call { , } @llvm.aarch64.sve.urshl.x2.nxv4i32( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_rounding_uhl_x2_u64( %dummy, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_rounding_uhl_x2_u64: ; CHECK: // %bb.0: @@ -489,134 +291,55 @@ %res = call { , } @llvm.aarch64.sve.urshl.x2.nxv2i64( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - ; URSHL (Multi, x4) - define { , , , } @multi_vec_rounding_shl_x4_u8( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_rounding_shl_x4_u8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_rounding_shl_x4_u16( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_rounding_shl_x4_u16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_rounding_shl_x4_u32( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_rounding_shl_x4_u32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_rounding_shl_x4_u64( %dummy, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_rounding_shl_x4_u64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: urshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.urshl.x4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - declare { , } @llvm.aarch64.sve.srshl.single.x2.nxv16i8(, , ) declare { , } @llvm.aarch64.sve.srshl.single.x2.nxv8i16(, , ) declare { , } @llvm.aarch64.sve.srshl.single.x2.nxv4i32(, , ) declare { , } @llvm.aarch64.sve.srshl.single.x2.nxv2i64(, , ) - declare { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv16i8(, , , , ) declare { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv8i16(, , , , ) declare { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv4i32(, , , , ) declare { , , , } @llvm.aarch64.sve.srshl.single.x4.nxv2i64(, , , , ) - declare { , } @llvm.aarch64.sve.urshl.single.x2.nxv16i8(, , ) declare { , } @llvm.aarch64.sve.urshl.single.x2.nxv8i16(, , ) declare { , } @llvm.aarch64.sve.urshl.single.x2.nxv4i32(, , ) declare { , } @llvm.aarch64.sve.urshl.single.x2.nxv2i64(, , ) - declare { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv16i8(, , , , ) declare { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv8i16(, , , , ) declare { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv4i32(, , , , ) declare { , , , } @llvm.aarch64.sve.urshl.single.x4.nxv2i64(, , , , ) - declare { , } @llvm.aarch64.sve.srshl.x2.nxv16i8(, , , ) declare { , } @llvm.aarch64.sve.srshl.x2.nxv8i16(, , , ) declare { , } @llvm.aarch64.sve.srshl.x2.nxv4i32(, , , ) declare { , } @llvm.aarch64.sve.srshl.x2.nxv2i64(, , , ) - declare { , , , } @llvm.aarch64.sve.srshl.x4.nxv16i8(, , , , , , , ) declare { , , , } @@ -625,12 +348,10 @@ @llvm.aarch64.sve.srshl.x4.nxv4i32(, , , , , , , ) declare { , , , } @llvm.aarch64.sve.srshl.x4.nxv2i64(, , , , , , , ) - declare { , } @llvm.aarch64.sve.urshl.x2.nxv16i8(, , , ) declare { , } @llvm.aarch64.sve.urshl.x2.nxv8i16(, , , ) declare { , } @llvm.aarch64.sve.urshl.x2.nxv4i32(, , , ) declare { , } @llvm.aarch64.sve.urshl.x2.nxv2i64(, , , ) - declare { , , , } @llvm.aarch64.sve.urshl.x4.nxv16i8(, , , , , , , ) declare { , , , } diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sqdmulh.ll @@ -1,7 +1,6 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -verify-machineinstrs < %s | FileCheck %s - ; SQDMULH (Single, x2) - define { , } @multi_vec_sat_double_mulh_single_x2_s8( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s8: ; CHECK: // %bb.0: @@ -14,7 +13,6 @@ %res = call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_sat_double_mulh_single_x2_s16( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s16: ; CHECK: // %bb.0: @@ -27,7 +25,6 @@ %res = call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_sat_double_mulh_single_x2_s32( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s32: ; CHECK: // %bb.0: @@ -40,7 +37,6 @@ %res = call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32( %zdn1, %zdn2, %zm) ret { , } %res } - define { , } @multi_vec_sat_double_mulh_single_x2_s64( %unused, %zdn1, %zdn2, %zm) { ; CHECK-LABEL: multi_vec_sat_double_mulh_single_x2_s64: ; CHECK: // %bb.0: @@ -53,87 +49,32 @@ %res = call { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64( %zdn1, %zdn2, %zm) ret { , } %res } - ; SQDMULH (Single, x4) - define { , , , } @multi_vec_sat_double_mulh_single_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, z5.b -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_sat_double_mulh_single_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, z5.h -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_sat_double_mulh_single_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, z5.s -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - define { , , , } @multi_vec_sat_double_mulh_single_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm) { -; CHECK-LABEL: multi_vec_sat_double_mulh_single_x4_s64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, z5.d -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm) ret { , , , } %res } - ; SQDMULH (x2, Multi) - define { , } @multi_vec_sat_double_mulh_multi_x2_s8( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s8: ; CHECK: // %bb.0: @@ -148,7 +89,6 @@ %res = call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_sat_double_mulh_multi_x2_s16( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s16: ; CHECK: // %bb.0: @@ -163,7 +103,6 @@ %res = call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_sat_double_mulh_multi_x2_s32( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s32: ; CHECK: // %bb.0: @@ -178,7 +117,6 @@ %res = call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - define { , } @multi_vec_sat_double_mulh_multi_x2_s64( %unused, %zdn1, %zdn2, %zm1, %zm2) { ; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x2_s64: ; CHECK: // %bb.0: @@ -193,118 +131,43 @@ %res = call { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64( %zdn1, %zdn2, %zm1, %zm2) ret { , } %res } - ; SQDMULH (x4, Multi) - define { , , , } @multi_vec_sat_double_mulh_multi_x4_s8( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s8: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_sat_double_mulh_multi_x4_s16( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s16: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv8i16( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_sat_double_mulh_multi_x4_s32( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s32: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv4i32( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - define { , , , } @multi_vec_sat_double_mulh_multi_x4_s64( %unused, %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) { -; CHECK-LABEL: multi_vec_sat_double_mulh_multi_x4_s64: -; CHECK: // %bb.0: -; CHECK-NEXT: mov z30.d, z7.d -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z29.d, z6.d -; CHECK-NEXT: mov z27.d, z4.d -; CHECK-NEXT: mov z28.d, z5.d -; CHECK-NEXT: mov z26.d, z3.d -; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] -; CHECK-NEXT: mov z25.d, z2.d -; CHECK-NEXT: mov z24.d, z1.d -; CHECK-NEXT: sqdmulh { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } -; CHECK-NEXT: mov z0.d, z24.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z26.d -; CHECK-NEXT: mov z3.d, z27.d -; CHECK-NEXT: ret %res = call { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv2i64( %zdn1, %zdn2, %zdn3, %zdn4, %zm1, %zm2, %zm3, %zm4) ret { , , , } %res } - declare { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv16i8(, , ) declare { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv8i16(, , ) declare { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv4i32(, , ) declare { , } @llvm.aarch64.sve.sqdmulh.single.vgx2.nxv2i64(, , ) - declare { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv16i8(, , , , ) declare { , , , } @@ -313,12 +176,10 @@ @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv4i32(, , , , ) declare { , , , } @llvm.aarch64.sve.sqdmulh.single.vgx4.nxv2i64(, , , , ) - declare { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv16i8(, , , ) declare { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv8i16(, , , ) declare { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv4i32(, , , ) declare { , } @llvm.aarch64.sve.sqdmulh.vgx2.nxv2i64(, , , ) - declare { , , , } @llvm.aarch64.sve.sqdmulh.vgx4.nxv16i8(, , , , , , , ) diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-sub.ll @@ -8,8 +8,8 @@ define void @multi_vector_sub_write_single_za_vg1x2_i32(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s }, z2.s ; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s }, z2.s @@ -27,8 +27,8 @@ define void @multi_vector_sub_write_single_za_vg1x2_i64(i32 %slice, %zn0, %zn1, %zm) { ; CHECK-LABEL: multi_vector_sub_write_single_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d }, z2.d ; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d }, z2.d @@ -105,9 +105,9 @@ define void @multi_vector_sub_write_za_vg1x2_i32(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s }, { z2.s, z3.s } @@ -128,9 +128,9 @@ define void @multi_vector_sub_write_za_vg1x2_i64(i32 %slice, %zn0, %zn1, ; CHECK-LABEL: multi_vector_sub_write_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z2_z3 def $z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d }, { z2.d, z3.d } @@ -225,8 +225,8 @@ define void @multi_vector_sub_za_vg1x2_i32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: sub za.s[w8, 7, vgx2], { z0.s, z1.s } @@ -240,8 +240,8 @@ define void @multi_vector_sub_za_vg1x2_i64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: sub za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: sub za.d[w8, 7, vgx2], { z0.d, z1.d } @@ -255,8 +255,8 @@ define void @multi_vector_sub_za_vg1x2_f32(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fsub za.s[w8, 0, vgx2], { z0.s, z1.s } ; CHECK-NEXT: fsub za.s[w8, 7, vgx2], { z0.s, z1.s } @@ -272,8 +272,8 @@ define void @multi_vector_sub_za_vg1x2_f64(i32 %slice, %zn0, %zn1) { ; CHECK-LABEL: multi_vector_sub_za_vg1x2_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fsub za.d[w8, 0, vgx2], { z0.d, z1.d } ; CHECK-NEXT: fsub za.d[w8, 7, vgx2], { z0.d, z1.d } diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-vdot.ll @@ -7,8 +7,8 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8f16(i32 %slice, %zn1, %zn2, %zm) { ; CHECK-LABEL: test_fvdot_lane_za32_vg1x2_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: fvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: fvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] @@ -25,8 +25,8 @@ define void @test_fvdot_lane_za32_vg1x2_nxv8bf16(i32 %slice, %zn1, %zn2, %zm) { ; CHECK-LABEL: test_fvdot_lane_za32_vg1x2_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: bfvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: bfvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] @@ -43,8 +43,8 @@ define void @test_svdot_lane_za32_vg1x2_nxv8i16(i32 %slice, %zn1, %zn2, %zm) { ; CHECK-LABEL: test_svdot_lane_za32_vg1x2_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: svdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: svdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] @@ -95,8 +95,8 @@ define void @test_uvdot_lane_za32_vg1x2_nxv8i16(i32 %slice, %zn1, %zn2, %zm) { ; CHECK-LABEL: test_uvdot_lane_za32_vg1x2_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1 def $z0_z1 +; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: uvdot za.s[w8, 0, vgx2], { z0.h, z1.h }, z2.h[3] ; CHECK-NEXT: uvdot za.s[w8, 7, vgx2], { z0.h, z1.h }, z2.h[3] diff --git a/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll b/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll --- a/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll +++ b/llvm/test/CodeGen/AArch64/speculation-hardening-loads.ll @@ -1,41 +1,58 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu | FileCheck %s define i128 @ldp_single_csdb(ptr %p) speculative_load_hardening { +; CHECK-LABEL: ldp_single_csdb: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldp x8, x1, [x0] +; CHECK-NEXT: cmp sp, #0 +; CHECK-NEXT: csetm x16, ne +; CHECK-NEXT: and x8, x8, x16 +; CHECK-NEXT: and x1, x1, x16 +; CHECK-NEXT: csdb +; CHECK-NEXT: mov x2, sp +; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: and x2, x2, x16 +; CHECK-NEXT: mov sp, x2 +; CHECK-NEXT: ret entry: %0 = load i128, ptr %p, align 16 ret i128 %0 -; CHECK-LABEL: ldp_single_csdb -; CHECK: ldp x8, x1, [x0] -; CHECK-NEXT: cmp sp, #0 -; CHECK-NEXT: csetm x16, ne -; CHECK-NEXT: and x8, x8, x16 -; CHECK-NEXT: and x1, x1, x16 -; CHECK-NEXT: csdb -; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 -; CHECK-NEXT: mov sp, [[TMPREG]] -; CHECK-NEXT: ret } define double @ld_double(ptr %p) speculative_load_hardening { +; CHECK-LABEL: ld_double: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp sp, #0 +; CHECK-NEXT: csetm x16, ne +; CHECK-NEXT: and x0, x0, x16 +; CHECK-NEXT: csdb +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: and x0, x0, x16 +; CHECK-NEXT: mov sp, x0 +; CHECK-NEXT: ret entry: %0 = load double, ptr %p, align 8 ret double %0 ; Checking that the address laoded from is masked for a floating point load. -; CHECK-LABEL: ld_double -; CHECK: cmp sp, #0 -; CHECK-NEXT: csetm x16, ne -; CHECK-NEXT: and x0, x0, x16 -; CHECK-NEXT: csdb -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp -; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 -; CHECK-NEXT: mov sp, [[TMPREG]] -; CHECK-NEXT: ret } define i32 @csdb_emitted_for_subreg_use(ptr %p, i32 %b) speculative_load_hardening { +; CHECK-LABEL: csdb_emitted_for_subreg_use: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp sp, #0 +; CHECK-NEXT: ldr x8, [x0] +; CHECK-NEXT: csetm x16, ne +; CHECK-NEXT: and x8, x8, x16 +; CHECK-NEXT: csdb +; CHECK-NEXT: add w9, w1, w8 +; CHECK-NEXT: cmp x8, #0 +; CHECK-NEXT: csel w0, w1, w9, eq +; CHECK-NEXT: mov x1, sp +; CHECK-NEXT: and x1, x1, x16 +; CHECK-NEXT: mov sp, x1 +; CHECK-NEXT: ret entry: %X = load i64, ptr %p, align 8 %X_trunc = trunc i64 %X to i32 @@ -44,23 +61,24 @@ %ret = select i1 %iszero, i32 %b, i32 %add ret i32 %ret ; Checking that the address laoded from is masked for a floating point load. -; CHECK-LABEL: csdb_emitted_for_subreg_use -; CHECK: ldr x8, [x0] -; CHECK-NEXT: cmp sp, #0 -; CHECK-NEXT: csetm x16, ne -; CHECK-NEXT: and x8, x8, x16 ; csdb instruction must occur before the add instruction with w8 as operand. -; CHECK-NEXT: csdb -; CHECK-NEXT: add w9, w1, w8 -; CHECK-NEXT: cmp x8, #0 -; CHECK-NEXT: csel w0, w1, w9, eq -; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp -; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 -; CHECK-NEXT: mov sp, [[TMPREG]] -; CHECK-NEXT: ret } define i64 @csdb_emitted_for_superreg_use(ptr %p, i64 %b) speculative_load_hardening { +; CHECK-LABEL: csdb_emitted_for_superreg_use: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp sp, #0 +; CHECK-NEXT: ldr w8, [x0] +; CHECK-NEXT: csetm x16, ne +; CHECK-NEXT: and w8, w8, w16 +; CHECK-NEXT: csdb +; CHECK-NEXT: add x9, x1, x8 +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: csel x0, x1, x9, eq +; CHECK-NEXT: mov x1, sp +; CHECK-NEXT: and x1, x1, x16 +; CHECK-NEXT: mov sp, x1 +; CHECK-NEXT: ret entry: %X = load i32, ptr %p, align 4 %X_ext = zext i32 %X to i64 @@ -69,88 +87,84 @@ %ret = select i1 %iszero, i64 %b, i64 %add ret i64 %ret ; Checking that the address laoded from is masked for a floating point load. -; CHECK-LABEL: csdb_emitted_for_superreg_use -; CHECK: ldr w8, [x0] -; CHECK-NEXT: cmp sp, #0 -; CHECK-NEXT: csetm x16, ne -; CHECK-NEXT: and w8, w8, w16 ; csdb instruction must occur before the add instruction with x8 as operand. -; CHECK-NEXT: csdb -; CHECK-NEXT: add x9, x1, x8 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: csel x0, x1, x9, eq -; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp -; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 -; CHECK-NEXT: mov sp, [[TMPREG]] -; CHECK-NEXT: ret } define i64 @no_masking_with_full_control_flow_barriers(i64 %a, i64 %b, ptr %p) speculative_load_hardening { -; CHECK-LABEL: no_masking_with_full_control_flow_barriers -; CHECK: dsb sy -; CHECK: isb +; CHECK-LABEL: no_masking_with_full_control_flow_barriers: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: dsb sy +; CHECK-NEXT: isb +; CHECK-NEXT: ldr x8, [x2] +; CHECK-NEXT: mov x17, x0 +; CHECK-NEXT: mov x16, x1 +; CHECK-NEXT: //APP +; CHECK-NEXT: hint #12 +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: add x0, x8, x17 +; CHECK-NEXT: ret entry: %0 = tail call i64 asm "hint #12", "={x17},{x16},0"(i64 %b, i64 %a) %X = load i64, ptr %p, align 8 %ret = add i64 %X, %0 -; CHECK-NOT: csdb -; CHECK-NOT: and -; CHECK: ret ret i64 %ret } define void @f_implicitdef_vector_load(ptr %dst, ptr %src) speculative_load_hardening +; CHECK-LABEL: f_implicitdef_vector_load: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp sp, #0 +; CHECK-NEXT: csetm x16, ne +; CHECK-NEXT: and x1, x1, x16 +; CHECK-NEXT: csdb +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: mov v0.d[1], v0.d[0] +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: and x0, x0, x16 +; CHECK-NEXT: mov sp, x0 +; CHECK-NEXT: ret { entry: %0 = load <2 x i32>, ptr %src, align 8 %shuffle = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> store <4 x i32> %shuffle, ptr %dst, align 4 ret void -; CHECK-LABEL: f_implicitdef_vector_load -; CHECK: cmp sp, #0 -; CHECK-NEXT: csetm x16, ne -; CHECK-NEXT: and x1, x1, x16 -; CHECK-NEXT: csdb -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: mov v0.d[1], v0.d[0] -; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp -; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 -; CHECK-NEXT: mov sp, [[TMPREG]] -; CHECK-NEXT: ret } define <2 x double> @f_usedefvectorload(ptr %a, ptr %b) speculative_load_hardening { +; CHECK-LABEL: f_usedefvectorload: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp sp, #0 +; CHECK-NEXT: csetm x16, ne +; CHECK-NEXT: and x1, x1, x16 +; CHECK-NEXT: csdb +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: and x0, x0, x16 +; CHECK-NEXT: mov sp, x0 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: f_usedefvectorload -; CHECK: cmp sp, #0 -; CHECK-NEXT: csetm x16, ne -; CHECK-NEXT: and x1, x1, x16 -; CHECK-NEXT: csdb -; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 -; CHECK-NEXT: mov sp, [[TMPREG]] -; CHECK-NEXT: ret %0 = load double, ptr %b, align 16 %vld1_lane = insertelement <2 x double> , double %0, i32 0 ret <2 x double> %vld1_lane } define i32 @deadload() speculative_load_hardening uwtable { +; CHECK-LABEL: deadload: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: cmp sp, #0 +; CHECK-NEXT: csetm x16, ne +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: ldr w8, [sp, #12] +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: and x0, x0, x16 +; CHECK-NEXT: mov sp, x0 +; CHECK-NEXT: ret entry: -; CHECK-LABEL: deadload -; CHECK: cmp sp, #0 -; CHECK-NEXT: csetm x16, ne -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldr w8, [sp, #12] -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: mov [[TMPREG:x[0-9]+]], sp -; CHECK-NEXT: and [[TMPREG]], [[TMPREG]], x16 -; CHECK-NEXT: mov sp, [[TMPREG]] -; CHECK-NEXT: ret %a = alloca i32, align 4 %val = load volatile i32, ptr %a, align 4 ret i32 undef diff --git a/llvm/test/CodeGen/AArch64/split-vector-insert.ll b/llvm/test/CodeGen/AArch64/split-vector-insert.ll --- a/llvm/test/CodeGen/AArch64/split-vector-insert.ll +++ b/llvm/test/CodeGen/AArch64/split-vector-insert.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc < %s -debug-only=legalize-types 2>&1 | FileCheck %s --check-prefix=CHECK-LEGALIZATION ; RUN: llc < %s | FileCheck %s ; REQUIRES: asserts @@ -9,11 +10,50 @@ declare @llvm.vector.insert.nxv2f64.v8f64(, <8 x double>, i64) define @test_nxv2i64_v8i64( %a, <8 x i64> %b) #0 { -; CHECK-LEGALIZATION: Legally typed node: [[T1:t[0-9]+]]: nxv2i64 = insert_subvector {{t[0-9]+}}, {{t[0-9]+}}, Constant:i64<0> -; CHECK-LEGALIZATION: Legally typed node: [[T2:t[0-9]+]]: nxv2i64 = insert_subvector [[T1]], {{t[0-9]+}}, Constant:i64<2> -; CHECK-LEGALIZATION: Legally typed node: [[T3:t[0-9]+]]: nxv2i64 = insert_subvector [[T2]], {{t[0-9]+}}, Constant:i64<4> -; CHECK-LEGALIZATION: Legally typed node: [[T4:t[0-9]+]]: nxv2i64 = insert_subvector [[T3]], {{t[0-9]+}}, Constant:i64<6> - +; CHECK-LEGALIZATION-LABEL: test_nxv2i64_v8i64: +; CHECK-LEGALIZATION: // %bb.0: +; CHECK-LEGALIZATION-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-LEGALIZATION-NEXT: .cfi_def_cfa_offset 16 +; CHECK-LEGALIZATION-NEXT: .cfi_offset w29, -16 +; CHECK-LEGALIZATION-NEXT: addvl sp, sp, #-3 +; CHECK-LEGALIZATION-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-LEGALIZATION-NEXT: ptrue p1.d, vl2 +; CHECK-LEGALIZATION-NEXT: cntd x8 +; CHECK-LEGALIZATION-NEXT: mov w9, #2 // =0x2 +; CHECK-LEGALIZATION-NEXT: ptrue p0.d +; CHECK-LEGALIZATION-NEXT: sub x8, x8, #2 +; CHECK-LEGALIZATION-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-LEGALIZATION-NEXT: mov x10, sp +; CHECK-LEGALIZATION-NEXT: cmp x8, #2 +; CHECK-LEGALIZATION-NEXT: csel x9, x8, x9, lo +; CHECK-LEGALIZATION-NEXT: cmp x8, #4 +; CHECK-LEGALIZATION-NEXT: lsl x9, x9, #3 +; CHECK-LEGALIZATION-NEXT: mov z0.d, p1/m, z1.d +; CHECK-LEGALIZATION-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-LEGALIZATION-NEXT: str q2, [x10, x9] +; CHECK-LEGALIZATION-NEXT: mov w9, #4 // =0x4 +; CHECK-LEGALIZATION-NEXT: addvl x10, sp, #1 +; CHECK-LEGALIZATION-NEXT: ld1d { z0.d }, p0/z, [sp] +; CHECK-LEGALIZATION-NEXT: csel x9, x8, x9, lo +; CHECK-LEGALIZATION-NEXT: cmp x8, #6 +; CHECK-LEGALIZATION-NEXT: lsl x9, x9, #3 +; CHECK-LEGALIZATION-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] +; CHECK-LEGALIZATION-NEXT: str q3, [x10, x9] +; CHECK-LEGALIZATION-NEXT: mov w9, #6 // =0x6 +; CHECK-LEGALIZATION-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-LEGALIZATION-NEXT: csel x8, x8, x9, lo +; CHECK-LEGALIZATION-NEXT: addvl x9, sp, #2 +; CHECK-LEGALIZATION-NEXT: lsl x8, x8, #3 +; CHECK-LEGALIZATION-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] +; CHECK-LEGALIZATION-NEXT: str q4, [x9, x8] +; CHECK-LEGALIZATION-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl] +; CHECK-LEGALIZATION-NEXT: addvl sp, sp, #3 +; CHECK-LEGALIZATION-NEXT: .cfi_def_cfa wsp, 16 +; CHECK-LEGALIZATION-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-LEGALIZATION-NEXT: .cfi_def_cfa_offset 0 +; CHECK-LEGALIZATION-NEXT: .cfi_restore w29 +; CHECK-LEGALIZATION-NEXT: ret +; ; CHECK-LABEL: test_nxv2i64_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill @@ -21,29 +61,29 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-NEXT: ptrue p1.d, vl2 ; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: mov w9, #2 // =0x2 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sub x8, x8, #2 -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: cmp x8, #2 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: cmp x8, #2 ; CHECK-NEXT: csel x9, x8, x9, lo -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: mov z0.d, p0/m, z1.d +; CHECK-NEXT: cmp x8, #4 ; CHECK-NEXT: lsl x9, x9, #3 -; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z0.d, p1/m, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: cmp x8, #4 ; CHECK-NEXT: str q2, [x10, x9] -; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: mov w9, #4 // =0x4 +; CHECK-NEXT: addvl x10, sp, #1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: csel x9, x8, x9, lo -; CHECK-NEXT: lsl x9, x9, #3 -; CHECK-NEXT: addvl x10, sp, #1 ; CHECK-NEXT: cmp x8, #6 +; CHECK-NEXT: lsl x9, x9, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: str q3, [x10, x9] -; CHECK-NEXT: mov w9, #6 +; CHECK-NEXT: mov w9, #6 // =0x6 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: addvl x9, sp, #2 @@ -54,23 +94,63 @@ ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret + %r = call @llvm.vector.insert.nxv2i64.v8i64( %a, <8 x i64> %b, i64 0) ret %r } define @test_nxv2f64_v8f64( %a, <8 x double> %b) #0 { -; CHECK-LEGALIZATION: Legally typed node: [[T1:t[0-9]+]]: nxv2f64 = insert_subvector {{t[0-9]+}}, {{t[0-9]+}}, Constant:i64<0> -; CHECK-LEGALIZATION: Legally typed node: [[T2:t[0-9]+]]: nxv2f64 = insert_subvector [[T1]], {{t[0-9]+}}, Constant:i64<2> -; CHECK-LEGALIZATION: Legally typed node: [[T3:t[0-9]+]]: nxv2f64 = insert_subvector [[T2]], {{t[0-9]+}}, Constant:i64<4> -; CHECK-LEGALIZATION: Legally typed node: [[T4:t[0-9]+]]: nxv2f64 = insert_subvector [[T3]], {{t[0-9]+}}, Constant:i64<6> - +; CHECK-LEGALIZATION-LABEL: test_nxv2f64_v8f64: +; CHECK-LEGALIZATION: // %bb.0: +; CHECK-LEGALIZATION-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-LEGALIZATION-NEXT: .cfi_def_cfa_offset 16 +; CHECK-LEGALIZATION-NEXT: .cfi_offset w29, -16 +; CHECK-LEGALIZATION-NEXT: addvl sp, sp, #-3 +; CHECK-LEGALIZATION-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-LEGALIZATION-NEXT: ptrue p1.d, vl2 +; CHECK-LEGALIZATION-NEXT: cntd x8 +; CHECK-LEGALIZATION-NEXT: mov w9, #2 // =0x2 +; CHECK-LEGALIZATION-NEXT: ptrue p0.d +; CHECK-LEGALIZATION-NEXT: sub x8, x8, #2 +; CHECK-LEGALIZATION-NEXT: // kill: def $q1 killed $q1 def $z1 +; CHECK-LEGALIZATION-NEXT: mov x10, sp +; CHECK-LEGALIZATION-NEXT: cmp x8, #2 +; CHECK-LEGALIZATION-NEXT: csel x9, x8, x9, lo +; CHECK-LEGALIZATION-NEXT: cmp x8, #4 +; CHECK-LEGALIZATION-NEXT: lsl x9, x9, #3 +; CHECK-LEGALIZATION-NEXT: mov z0.d, p1/m, z1.d +; CHECK-LEGALIZATION-NEXT: st1d { z0.d }, p0, [sp] +; CHECK-LEGALIZATION-NEXT: str q2, [x10, x9] +; CHECK-LEGALIZATION-NEXT: mov w9, #4 // =0x4 +; CHECK-LEGALIZATION-NEXT: addvl x10, sp, #1 +; CHECK-LEGALIZATION-NEXT: ld1d { z0.d }, p0/z, [sp] +; CHECK-LEGALIZATION-NEXT: csel x9, x8, x9, lo +; CHECK-LEGALIZATION-NEXT: cmp x8, #6 +; CHECK-LEGALIZATION-NEXT: lsl x9, x9, #3 +; CHECK-LEGALIZATION-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] +; CHECK-LEGALIZATION-NEXT: str q3, [x10, x9] +; CHECK-LEGALIZATION-NEXT: mov w9, #6 // =0x6 +; CHECK-LEGALIZATION-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-LEGALIZATION-NEXT: csel x8, x8, x9, lo +; CHECK-LEGALIZATION-NEXT: addvl x9, sp, #2 +; CHECK-LEGALIZATION-NEXT: lsl x8, x8, #3 +; CHECK-LEGALIZATION-NEXT: st1d { z0.d }, p0, [sp, #2, mul vl] +; CHECK-LEGALIZATION-NEXT: str q4, [x9, x8] +; CHECK-LEGALIZATION-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl] +; CHECK-LEGALIZATION-NEXT: addvl sp, sp, #3 +; CHECK-LEGALIZATION-NEXT: .cfi_def_cfa wsp, 16 +; CHECK-LEGALIZATION-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK-LEGALIZATION-NEXT: .cfi_def_cfa_offset 0 +; CHECK-LEGALIZATION-NEXT: .cfi_restore w29 +; CHECK-LEGALIZATION-NEXT: ret +; ; CHECK-LABEL: test_nxv2f64_v8f64: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill @@ -78,29 +158,29 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG +; CHECK-NEXT: ptrue p1.d, vl2 ; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: mov w9, #2 // =0x2 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sub x8, x8, #2 -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: cmp x8, #2 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: mov x10, sp +; CHECK-NEXT: cmp x8, #2 ; CHECK-NEXT: csel x9, x8, x9, lo -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: mov z0.d, p0/m, z1.d +; CHECK-NEXT: cmp x8, #4 ; CHECK-NEXT: lsl x9, x9, #3 -; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z0.d, p1/m, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: cmp x8, #4 ; CHECK-NEXT: str q2, [x10, x9] -; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: mov w9, #4 // =0x4 +; CHECK-NEXT: addvl x10, sp, #1 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: csel x9, x8, x9, lo -; CHECK-NEXT: lsl x9, x9, #3 -; CHECK-NEXT: addvl x10, sp, #1 ; CHECK-NEXT: cmp x8, #6 +; CHECK-NEXT: lsl x9, x9, #3 ; CHECK-NEXT: st1d { z0.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: str q3, [x10, x9] -; CHECK-NEXT: mov w9, #6 +; CHECK-NEXT: mov w9, #6 // =0x6 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #1, mul vl] ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: addvl x9, sp, #2 @@ -109,15 +189,16 @@ ; CHECK-NEXT: str q4, [x9, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp, #2, mul vl] ; CHECK-NEXT: addvl sp, sp, #3 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w29 ; CHECK-NEXT: ret + %r = call @llvm.vector.insert.nxv2f64.v8f64( %a, <8 x double> %b, i64 0) ret %r } diff --git a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll --- a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll +++ b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll @@ -530,7 +530,7 @@ ; FAULT-LABEL: sqrt_simplify_before_recip_3_uses: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt d0, d0 -; FAULT-NEXT: mov x8, #4631107791820423168 +; FAULT-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; FAULT-NEXT: fmov d1, #1.00000000 ; FAULT-NEXT: fmov d2, x8 ; FAULT-NEXT: fdiv d1, d1, d0 @@ -542,17 +542,17 @@ ; CHECK-LABEL: sqrt_simplify_before_recip_3_uses: ; CHECK: // %bb.0: ; CHECK-NEXT: frsqrte d1, d0 -; CHECK-NEXT: mov x8, #4631107791820423168 +; CHECK-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fmul d3, d1, d1 -; CHECK-NEXT: frsqrts d3, d0, d3 -; CHECK-NEXT: fmul d1, d1, d3 -; CHECK-NEXT: fmul d3, d1, d1 -; CHECK-NEXT: frsqrts d3, d0, d3 -; CHECK-NEXT: fmul d1, d1, d3 -; CHECK-NEXT: fmul d3, d1, d1 -; CHECK-NEXT: frsqrts d3, d0, d3 -; CHECK-NEXT: fmul d1, d1, d3 ; CHECK-NEXT: fmul d0, d0, d1 ; CHECK-NEXT: fmul d2, d1, d2 ; CHECK-NEXT: str d1, [x0] @@ -571,9 +571,9 @@ ; FAULT-LABEL: sqrt_simplify_before_recip_3_uses_order: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt d0, d0 -; FAULT-NEXT: mov x8, #4631107791820423168 +; FAULT-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; FAULT-NEXT: fmov d1, x8 -; FAULT-NEXT: mov x8, #140737488355328 +; FAULT-NEXT: mov x8, #140737488355328 // =0x800000000000 ; FAULT-NEXT: movk x8, #16453, lsl #48 ; FAULT-NEXT: fmov d2, x8 ; FAULT-NEXT: fdiv d1, d1, d0 @@ -585,10 +585,7 @@ ; CHECK-LABEL: sqrt_simplify_before_recip_3_uses_order: ; CHECK: // %bb.0: ; CHECK-NEXT: frsqrte d1, d0 -; CHECK-NEXT: mov x9, #140737488355328 -; CHECK-NEXT: mov x8, #4631107791820423168 -; CHECK-NEXT: movk x9, #16453, lsl #48 -; CHECK-NEXT: fmov d3, x9 +; CHECK-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; CHECK-NEXT: fmul d2, d1, d1 ; CHECK-NEXT: frsqrts d2, d0, d2 ; CHECK-NEXT: fmul d1, d1, d2 @@ -599,6 +596,9 @@ ; CHECK-NEXT: frsqrts d2, d0, d2 ; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: mov x8, #140737488355328 // =0x800000000000 +; CHECK-NEXT: movk x8, #16453, lsl #48 +; CHECK-NEXT: fmov d3, x8 ; CHECK-NEXT: fmul d0, d0, d1 ; CHECK-NEXT: fmul d2, d1, d2 ; CHECK-NEXT: fmul d1, d1, d3 @@ -620,11 +620,11 @@ ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt d0, d0 ; FAULT-NEXT: fmov d1, #1.00000000 -; FAULT-NEXT: mov x9, #140737488355328 -; FAULT-NEXT: mov x8, #4631107791820423168 -; FAULT-NEXT: movk x9, #16453, lsl #48 +; FAULT-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; FAULT-NEXT: fmov d2, x8 -; FAULT-NEXT: fmov d3, x9 +; FAULT-NEXT: mov x8, #140737488355328 // =0x800000000000 +; FAULT-NEXT: movk x8, #16453, lsl #48 +; FAULT-NEXT: fmov d3, x8 ; FAULT-NEXT: fdiv d1, d1, d0 ; FAULT-NEXT: fmul d2, d1, d2 ; FAULT-NEXT: fmul d3, d1, d3 @@ -637,10 +637,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: frsqrte d1, d0 ; CHECK-NEXT: fcmp d0, #0.0 -; CHECK-NEXT: mov x9, #140737488355328 -; CHECK-NEXT: mov x8, #4631107791820423168 -; CHECK-NEXT: movk x9, #16453, lsl #48 -; CHECK-NEXT: fmov d3, x9 +; CHECK-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; CHECK-NEXT: fmul d2, d1, d1 ; CHECK-NEXT: frsqrts d2, d0, d2 ; CHECK-NEXT: fmul d1, d1, d2 @@ -651,12 +648,15 @@ ; CHECK-NEXT: frsqrts d2, d0, d2 ; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fmul d2, d0, d1 -; CHECK-NEXT: fmul d3, d1, d3 ; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: fcsel d2, d0, d2, eq ; CHECK-NEXT: fdiv d0, d0, d2 ; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: mov x8, #140737488355328 // =0x800000000000 +; CHECK-NEXT: movk x8, #16453, lsl #48 +; CHECK-NEXT: fmov d3, x8 ; CHECK-NEXT: fmul d2, d1, d2 +; CHECK-NEXT: fmul d3, d1, d3 ; CHECK-NEXT: str d2, [x1] ; CHECK-NEXT: str d3, [x2] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -4,12 +4,12 @@ define i1 @test_srem_odd(i29 %X) nounwind { ; CHECK-LABEL: test_srem_odd: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #33099 -; CHECK-NEXT: mov w9, #24493 +; CHECK-NEXT: mov w8, #33099 // =0x814b +; CHECK-NEXT: mov w9, #24493 // =0x5fad ; CHECK-NEXT: movk w8, #8026, lsl #16 ; CHECK-NEXT: movk w9, #41, lsl #16 ; CHECK-NEXT: madd w8, w0, w8, w9 -; CHECK-NEXT: mov w9, #48987 +; CHECK-NEXT: mov w9, #48987 // =0xbf5b ; CHECK-NEXT: movk w9, #82, lsl #16 ; CHECK-NEXT: and w8, w8, #0x1fffffff ; CHECK-NEXT: cmp w8, w9 @@ -23,12 +23,12 @@ define i1 @test_srem_even(i4 %X) nounwind { ; CHECK-LABEL: test_srem_even: ; CHECK: // %bb.0: -; CHECK-NEXT: sbfx w9, w0, #0, #4 -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: add w9, w9, w9, lsl #1 -; CHECK-NEXT: ubfx w10, w9, #7, #1 -; CHECK-NEXT: add w9, w10, w9, lsr #4 -; CHECK-NEXT: msub w8, w9, w8, w0 +; CHECK-NEXT: sbfx w8, w0, #0, #4 +; CHECK-NEXT: add w8, w8, w8, lsl #1 +; CHECK-NEXT: ubfx w9, w8, #7, #1 +; CHECK-NEXT: add w8, w9, w8, lsr #4 +; CHECK-NEXT: mov w9, #6 // =0x6 +; CHECK-NEXT: msub w8, w8, w9, w0 ; CHECK-NEXT: and w8, w8, #0xf ; CHECK-NEXT: cmp w8, #1 ; CHECK-NEXT: cset w0, eq @@ -57,45 +57,45 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { ; CHECK-LABEL: test_srem_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #7282 -; CHECK-NEXT: sbfx x9, x0, #0, #33 -; CHECK-NEXT: movk x8, #29127, lsl #16 -; CHECK-NEXT: mov x11, #7281 -; CHECK-NEXT: movk x8, #50972, lsl #32 -; CHECK-NEXT: movk x11, #29127, lsl #16 -; CHECK-NEXT: movk x8, #7281, lsl #48 -; CHECK-NEXT: movk x11, #50972, lsl #32 -; CHECK-NEXT: sbfx x12, x1, #0, #33 -; CHECK-NEXT: sbfx x10, x2, #0, #33 -; CHECK-NEXT: smulh x13, x9, x8 -; CHECK-NEXT: movk x11, #7281, lsl #48 -; CHECK-NEXT: smulh x8, x12, x8 -; CHECK-NEXT: smulh x11, x10, x11 -; CHECK-NEXT: add x13, x13, x13, lsr #63 -; CHECK-NEXT: sub x11, x11, x10 -; CHECK-NEXT: add x8, x8, x8, lsr #63 -; CHECK-NEXT: add x13, x13, x13, lsl #3 -; CHECK-NEXT: asr x14, x11, #3 -; CHECK-NEXT: sub x9, x9, x13 -; CHECK-NEXT: add x11, x14, x11, lsr #63 -; CHECK-NEXT: add x8, x8, x8, lsl #3 -; CHECK-NEXT: sub x8, x12, x8 +; CHECK-NEXT: mov x9, #7282 // =0x1c72 +; CHECK-NEXT: sbfx x8, x0, #0, #33 +; CHECK-NEXT: sbfx x10, x1, #0, #33 +; CHECK-NEXT: movk x9, #29127, lsl #16 +; CHECK-NEXT: mov x13, #7281 // =0x1c71 +; CHECK-NEXT: sbfx x12, x2, #0, #33 +; CHECK-NEXT: movk x9, #50972, lsl #32 +; CHECK-NEXT: movk x13, #29127, lsl #16 +; CHECK-NEXT: movk x9, #7281, lsl #48 +; CHECK-NEXT: movk x13, #50972, lsl #32 +; CHECK-NEXT: smulh x11, x8, x9 +; CHECK-NEXT: movk x13, #7281, lsl #48 +; CHECK-NEXT: smulh x9, x10, x9 +; CHECK-NEXT: smulh x13, x12, x13 +; CHECK-NEXT: add x11, x11, x11, lsr #63 +; CHECK-NEXT: add x9, x9, x9, lsr #63 ; CHECK-NEXT: add x11, x11, x11, lsl #3 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: add x10, x10, x11 -; CHECK-NEXT: mov x9, #8589934591 -; CHECK-NEXT: adrp x11, .LCPI3_0 -; CHECK-NEXT: adrp x12, .LCPI3_1 -; CHECK-NEXT: mov v0.d[1], x8 -; CHECK-NEXT: fmov d1, x10 -; CHECK-NEXT: dup v2.2d, x9 -; CHECK-NEXT: ldr q3, [x11, :lo12:.LCPI3_0] -; CHECK-NEXT: ldr q4, [x12, :lo12:.LCPI3_1] -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: cmeq v0.2d, v0.2d, v3.2d -; CHECK-NEXT: cmeq v1.2d, v1.2d, v4.2d +; CHECK-NEXT: add x9, x9, x9, lsl #3 +; CHECK-NEXT: sub x8, x8, x11 +; CHECK-NEXT: sub x11, x13, x12 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov x8, #8589934591 // =0x1ffffffff +; CHECK-NEXT: sub x9, x10, x9 +; CHECK-NEXT: asr x10, x11, #3 +; CHECK-NEXT: dup v1.2d, x8 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: add x9, x10, x11, lsr #63 +; CHECK-NEXT: add x8, x9, x9, lsl #3 +; CHECK-NEXT: adrp x9, .LCPI3_0 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_0] +; CHECK-NEXT: add x8, x12, x8 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: fmov d3, x8 +; CHECK-NEXT: adrp x8, .LCPI3_1 +; CHECK-NEXT: cmeq v0.2d, v0.2d, v2.2d +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] +; CHECK-NEXT: and v1.16b, v3.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: cmeq v1.2d, v1.2d, v2.2d ; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: xtn v1.2s, v1.2d diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll @@ -7,10 +7,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: adrp x9, .LCPI0_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: adrp x8, .LCPI0_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] +; CHECK-NEXT: adrp x8, .LCPI0_2 ; CHECK-NEXT: adrp x9, .LCPI0_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI0_2] @@ -18,10 +17,11 @@ ; CHECK-NEXT: adrp x8, .LCPI0_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -35,16 +35,16 @@ define <4 x i32> @test_srem_odd_allones_eq(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_odd_allones_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: mov w9, #39321 +; CHECK-NEXT: mov w8, #52429 // =0xcccd +; CHECK-NEXT: mov w9, #39321 // =0x9999 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -56,16 +56,16 @@ define <4 x i32> @test_srem_odd_allones_ne(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_odd_allones_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: mov w9, #39321 +; CHECK-NEXT: mov w8, #52429 // =0xcccd +; CHECK-NEXT: mov w9, #39321 // =0x9999 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -79,21 +79,21 @@ define <4 x i32> @test_srem_even_allones_eq(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_even_allones_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #28087 -; CHECK-NEXT: mov w9, #9362 +; CHECK-NEXT: mov w8, #28087 // =0x6db7 +; CHECK-NEXT: mov w9, #9362 // =0x2492 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -103,21 +103,21 @@ define <4 x i32> @test_srem_even_allones_ne(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_even_allones_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #28087 -; CHECK-NEXT: mov w9, #9362 +; CHECK-NEXT: mov w8, #28087 // =0x6db7 +; CHECK-NEXT: mov w9, #9362 // =0x2492 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp ne <4 x i32> %srem, @@ -131,10 +131,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: adrp x9, .LCPI5_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: adrp x8, .LCPI5_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_1] +; CHECK-NEXT: adrp x8, .LCPI5_2 ; CHECK-NEXT: adrp x9, .LCPI5_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI5_2] @@ -142,10 +141,11 @@ ; CHECK-NEXT: adrp x8, .LCPI5_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -157,10 +157,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: adrp x9, .LCPI6_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0] -; CHECK-NEXT: adrp x8, .LCPI6_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI6_1] +; CHECK-NEXT: adrp x8, .LCPI6_2 ; CHECK-NEXT: adrp x9, .LCPI6_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI6_2] @@ -168,10 +167,11 @@ ; CHECK-NEXT: adrp x8, .LCPI6_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_4] +; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp ne <4 x i32> %srem, @@ -187,10 +187,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: adrp x9, .LCPI7_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0] -; CHECK-NEXT: adrp x8, .LCPI7_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI7_1] +; CHECK-NEXT: adrp x8, .LCPI7_2 ; CHECK-NEXT: adrp x9, .LCPI7_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI7_2] @@ -198,10 +197,11 @@ ; CHECK-NEXT: adrp x8, .LCPI7_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -215,10 +215,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI8_0 ; CHECK-NEXT: adrp x9, .LCPI8_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0] -; CHECK-NEXT: adrp x8, .LCPI8_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI8_1] +; CHECK-NEXT: adrp x8, .LCPI8_2 ; CHECK-NEXT: adrp x9, .LCPI8_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI8_2] @@ -226,10 +225,11 @@ ; CHECK-NEXT: adrp x8, .LCPI8_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -243,10 +243,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI9_0 ; CHECK-NEXT: adrp x9, .LCPI9_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0] -; CHECK-NEXT: adrp x8, .LCPI9_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI9_1] +; CHECK-NEXT: adrp x8, .LCPI9_2 ; CHECK-NEXT: adrp x9, .LCPI9_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI9_2] @@ -254,10 +253,11 @@ ; CHECK-NEXT: adrp x8, .LCPI9_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -271,16 +271,16 @@ define <4 x i32> @test_srem_odd_one(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_odd_one: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: mov w9, #39321 +; CHECK-NEXT: mov w8, #52429 // =0xcccd +; CHECK-NEXT: mov w9, #39321 // =0x9999 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -294,21 +294,21 @@ define <4 x i32> @test_srem_even_one(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_even_one: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #28087 -; CHECK-NEXT: mov w9, #9362 +; CHECK-NEXT: mov w8, #28087 // =0x6db7 +; CHECK-NEXT: mov w9, #9362 // =0x2492 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -322,10 +322,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: adrp x9, .LCPI12_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: adrp x8, .LCPI12_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI12_1] +; CHECK-NEXT: adrp x8, .LCPI12_2 ; CHECK-NEXT: adrp x9, .LCPI12_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI12_2] @@ -333,10 +332,11 @@ ; CHECK-NEXT: adrp x8, .LCPI12_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -441,10 +441,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: adrp x9, .LCPI16_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: adrp x8, .LCPI16_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI16_1] +; CHECK-NEXT: adrp x8, .LCPI16_2 ; CHECK-NEXT: adrp x9, .LCPI16_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI16_2] @@ -452,10 +451,11 @@ ; CHECK-NEXT: adrp x8, .LCPI16_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -469,10 +469,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: adrp x9, .LCPI17_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: adrp x8, .LCPI17_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI17_1] +; CHECK-NEXT: adrp x8, .LCPI17_2 ; CHECK-NEXT: adrp x9, .LCPI17_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI17_2] @@ -480,10 +479,11 @@ ; CHECK-NEXT: adrp x8, .LCPI17_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -497,10 +497,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: adrp x9, .LCPI18_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: adrp x8, .LCPI18_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_1] +; CHECK-NEXT: adrp x8, .LCPI18_2 ; CHECK-NEXT: adrp x9, .LCPI18_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI18_2] @@ -508,10 +507,11 @@ ; CHECK-NEXT: adrp x8, .LCPI18_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -525,16 +525,16 @@ define <4 x i32> @test_srem_odd_allones_and_one(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_odd_allones_and_one: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: mov w9, #39321 +; CHECK-NEXT: mov w8, #52429 // =0xcccd +; CHECK-NEXT: mov w9, #39321 // =0x9999 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI19_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -548,21 +548,21 @@ define <4 x i32> @test_srem_even_allones_and_one(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_even_allones_and_one: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #28087 -; CHECK-NEXT: mov w9, #9362 +; CHECK-NEXT: mov w8, #28087 // =0x6db7 +; CHECK-NEXT: mov w9, #9362 // =0x2492 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI20_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -576,10 +576,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI21_0 ; CHECK-NEXT: adrp x9, .LCPI21_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0] -; CHECK-NEXT: adrp x8, .LCPI21_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI21_1] +; CHECK-NEXT: adrp x8, .LCPI21_2 ; CHECK-NEXT: adrp x9, .LCPI21_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI21_2] @@ -587,10 +586,11 @@ ; CHECK-NEXT: adrp x8, .LCPI21_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -606,10 +606,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI22_0 ; CHECK-NEXT: adrp x9, .LCPI22_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0] -; CHECK-NEXT: adrp x8, .LCPI22_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI22_1] +; CHECK-NEXT: adrp x8, .LCPI22_2 ; CHECK-NEXT: adrp x9, .LCPI22_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI22_2] @@ -617,10 +616,11 @@ ; CHECK-NEXT: adrp x8, .LCPI22_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -634,10 +634,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 ; CHECK-NEXT: adrp x9, .LCPI23_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] -; CHECK-NEXT: adrp x8, .LCPI23_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI23_1] +; CHECK-NEXT: adrp x8, .LCPI23_2 ; CHECK-NEXT: adrp x9, .LCPI23_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI23_2] @@ -645,10 +644,11 @@ ; CHECK-NEXT: adrp x8, .LCPI23_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -662,10 +662,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: adrp x9, .LCPI24_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0] -; CHECK-NEXT: adrp x8, .LCPI24_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI24_1] +; CHECK-NEXT: adrp x8, .LCPI24_2 ; CHECK-NEXT: adrp x9, .LCPI24_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI24_2] @@ -673,10 +672,11 @@ ; CHECK-NEXT: adrp x8, .LCPI24_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -691,10 +691,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI25_0 ; CHECK-NEXT: adrp x9, .LCPI25_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] -; CHECK-NEXT: adrp x8, .LCPI25_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI25_1] +; CHECK-NEXT: adrp x8, .LCPI25_2 ; CHECK-NEXT: adrp x9, .LCPI25_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI25_2] @@ -702,10 +701,11 @@ ; CHECK-NEXT: adrp x8, .LCPI25_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -718,10 +718,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI26_0 ; CHECK-NEXT: adrp x9, .LCPI26_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_0] -; CHECK-NEXT: adrp x8, .LCPI26_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI26_1] +; CHECK-NEXT: adrp x8, .LCPI26_2 ; CHECK-NEXT: adrp x9, .LCPI26_3 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI26_2] @@ -729,10 +728,11 @@ ; CHECK-NEXT: adrp x8, .LCPI26_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_4] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_4] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll @@ -5,17 +5,17 @@ define <4 x i32> @test_srem_odd_25(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_odd_25: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23593 -; CHECK-NEXT: mov w9, #47185 +; CHECK-NEXT: mov w8, #23593 // =0x5c29 +; CHECK-NEXT: mov w9, #47185 // =0xb851 ; CHECK-NEXT: movk w8, #49807, lsl #16 ; CHECK-NEXT: movk w9, #1310, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 -; CHECK-NEXT: mov w8, #28834 +; CHECK-NEXT: mov w8, #28834 // =0x70a2 ; CHECK-NEXT: movk w8, #2621, lsl #16 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: dup v0.4s, w8 +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -29,22 +29,22 @@ define <4 x i32> @test_srem_even_100(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_even_100: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23593 -; CHECK-NEXT: mov w9, #47184 +; CHECK-NEXT: mov w8, #23593 // =0x5c29 +; CHECK-NEXT: mov w9, #47184 // =0xb850 ; CHECK-NEXT: movk w8, #49807, lsl #16 ; CHECK-NEXT: movk w9, #1310, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 -; CHECK-NEXT: mov w8, #23592 -; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: mov w8, #23592 // =0x5c28 ; CHECK-NEXT: movk w8, #655, lsl #16 +; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #30 ; CHECK-NEXT: ushr v1.4s, v2.4s, #2 -; CHECK-NEXT: dup v2.4s, w8 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -58,17 +58,17 @@ define <4 x i32> @test_srem_odd_neg25(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_odd_neg25: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23593 -; CHECK-NEXT: mov w9, #47185 +; CHECK-NEXT: mov w8, #23593 // =0x5c29 +; CHECK-NEXT: mov w9, #47185 // =0xb851 ; CHECK-NEXT: movk w8, #49807, lsl #16 ; CHECK-NEXT: movk w9, #1310, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 -; CHECK-NEXT: mov w8, #28834 +; CHECK-NEXT: mov w8, #28834 // =0x70a2 ; CHECK-NEXT: movk w8, #2621, lsl #16 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: dup v0.4s, w8 +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -82,22 +82,22 @@ define <4 x i32> @test_srem_even_neg100(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_even_neg100: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23593 -; CHECK-NEXT: mov w9, #47184 +; CHECK-NEXT: mov w8, #23593 // =0x5c29 +; CHECK-NEXT: mov w9, #47184 // =0xb850 ; CHECK-NEXT: movk w8, #49807, lsl #16 ; CHECK-NEXT: movk w9, #1310, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 -; CHECK-NEXT: mov w8, #23592 -; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: mov w8, #23592 // =0x5c28 ; CHECK-NEXT: movk w8, #655, lsl #16 +; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #30 ; CHECK-NEXT: ushr v1.4s, v2.4s, #2 -; CHECK-NEXT: dup v2.4s, w8 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -112,17 +112,17 @@ define <4 x i32> @test_srem_odd_undef1(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_odd_undef1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #34079 +; CHECK-NEXT: mov w8, #34079 // =0x851f ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v3.4s, #25 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: sshr v2.4s, v1.4s, #3 ; CHECK-NEXT: usra v2.4s, v1.4s, #31 +; CHECK-NEXT: movi v1.4s, #25 +; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: mls v0.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -135,17 +135,17 @@ define <4 x i32> @test_srem_even_undef1(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_even_undef1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #34079 +; CHECK-NEXT: mov w8, #34079 // =0x851f ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v3.4s, #100 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: sshr v2.4s, v1.4s, #5 ; CHECK-NEXT: usra v2.4s, v1.4s, #31 +; CHECK-NEXT: movi v1.4s, #100 +; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: mls v0.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -184,9 +184,9 @@ define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_pow2: ; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v3.4s, v0.4s, #0 +; CHECK-NEXT: cmlt v1.4s, v0.4s, #0 ; CHECK-NEXT: mov v2.16b, v0.16b -; CHECK-NEXT: usra v2.4s, v3.4s, #28 +; CHECK-NEXT: usra v2.4s, v1.4s, #28 ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: bic v2.4s, #15 ; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s @@ -203,11 +203,11 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_int_min: ; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: movi v3.4s, #128, lsl #24 -; CHECK-NEXT: usra v1.4s, v2.4s, #1 -; CHECK-NEXT: and v1.16b, v1.16b, v3.16b +; CHECK-NEXT: cmlt v1.4s, v0.4s, #0 +; CHECK-NEXT: mov v2.16b, v0.16b +; CHECK-NEXT: usra v2.4s, v1.4s, #1 +; CHECK-NEXT: movi v1.4s, #128, lsl #24 +; CHECK-NEXT: and v1.16b, v2.16b, v1.16b ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll --- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll @@ -7,9 +7,9 @@ ; CHECK-NEXT: adrp x8, .LCPI0_1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_1] ; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: adrp x8, .LCPI0_2 +; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: mla v1.4h, v0.4h, v2.4h ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2] @@ -69,9 +69,9 @@ ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: adrp x8, .LCPI3_1 -; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_1] ; CHECK-NEXT: adrp x8, .LCPI3_2 +; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: add v1.4h, v1.4h, v0.4h ; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h @@ -91,18 +91,18 @@ ; CHECK-NEXT: movi d2, #0x00ffff0000ffff ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: adrp x8, .LCPI4_1 -; CHECK-NEXT: and v2.8b, v0.8b, v2.8b ; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_1] -; CHECK-NEXT: adrp x8, .LCPI4_2 +; CHECK-NEXT: and v2.8b, v0.8b, v2.8b ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: add v1.4h, v1.4h, v2.4h -; CHECK-NEXT: sshl v1.4h, v1.4h, v3.4h -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_2] +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_1] +; CHECK-NEXT: adrp x8, .LCPI4_2 +; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h ; CHECK-NEXT: ushr v2.4h, v1.4h, #15 ; CHECK-NEXT: mov v2.h[0], wzr ; CHECK-NEXT: add v1.4h, v1.4h, v2.4h -; CHECK-NEXT: mls v0.4h, v1.4h, v3.4h +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2] +; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -115,19 +115,19 @@ ; CHECK-NEXT: adrp x8, .LCPI5_1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI5_1] ; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_0] ; CHECK-NEXT: adrp x8, .LCPI5_2 +; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: mla v1.4h, v0.4h, v2.4h ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_2] ; CHECK-NEXT: adrp x8, .LCPI5_3 ; CHECK-NEXT: sshl v1.4h, v1.4h, v2.4h -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI5_3] ; CHECK-NEXT: ushr v2.4h, v1.4h, #15 ; CHECK-NEXT: mov v2.h[0], wzr ; CHECK-NEXT: add v1.4h, v1.4h, v2.4h -; CHECK-NEXT: mls v0.4h, v1.4h, v3.4h +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI5_3] +; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -138,38 +138,38 @@ ; CHECK-LABEL: dont_fold_srem_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #8549 // =0x2165 -; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: fmov x10, d1 +; CHECK-NEXT: mov x9, v1.d[1] ; CHECK-NEXT: movk x8, #22795, lsl #16 ; CHECK-NEXT: mov x12, #6055 // =0x17a7 +; CHECK-NEXT: mov x11, v0.d[1] ; CHECK-NEXT: movk x8, #17096, lsl #32 ; CHECK-NEXT: movk x12, #58853, lsl #16 +; CHECK-NEXT: mov x13, #21445 // =0x53c5 ; CHECK-NEXT: movk x8, #45590, lsl #48 -; CHECK-NEXT: mov x14, #21445 // =0x53c5 -; CHECK-NEXT: mov x10, v1.d[1] ; CHECK-NEXT: movk x12, #47142, lsl #32 -; CHECK-NEXT: smulh x8, x9, x8 -; CHECK-NEXT: movk x14, #1603, lsl #16 -; CHECK-NEXT: mov x11, v0.d[1] +; CHECK-NEXT: movk x13, #1603, lsl #16 +; CHECK-NEXT: smulh x8, x10, x8 ; CHECK-NEXT: movk x12, #24749, lsl #48 -; CHECK-NEXT: add x8, x8, x9 -; CHECK-NEXT: movk x14, #15432, lsl #32 -; CHECK-NEXT: asr x13, x8, #4 -; CHECK-NEXT: movk x14, #25653, lsl #48 -; CHECK-NEXT: add x8, x13, x8, lsr #63 -; CHECK-NEXT: mov w13, #23 // =0x17 -; CHECK-NEXT: smulh x12, x10, x12 -; CHECK-NEXT: smulh x14, x11, x14 -; CHECK-NEXT: msub x8, x8, x13, x9 -; CHECK-NEXT: asr x13, x12, #11 -; CHECK-NEXT: add x12, x13, x12, lsr #63 -; CHECK-NEXT: asr x13, x14, #8 -; CHECK-NEXT: mov w9, #5423 // =0x152f -; CHECK-NEXT: add x13, x13, x14, lsr #63 -; CHECK-NEXT: mov w14, #654 // =0x28e -; CHECK-NEXT: msub x9, x12, x9, x10 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: msub x10, x13, x14, x11 +; CHECK-NEXT: movk x13, #15432, lsl #32 +; CHECK-NEXT: movk x13, #25653, lsl #48 ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: smulh x12, x9, x12 +; CHECK-NEXT: smulh x13, x11, x13 +; CHECK-NEXT: add x8, x8, x10 +; CHECK-NEXT: asr x14, x8, #4 +; CHECK-NEXT: asr x15, x12, #11 +; CHECK-NEXT: add x8, x14, x8, lsr #63 +; CHECK-NEXT: mov w14, #23 // =0x17 +; CHECK-NEXT: add x12, x15, x12, lsr #63 +; CHECK-NEXT: msub x8, x8, x14, x10 +; CHECK-NEXT: asr x10, x13, #8 +; CHECK-NEXT: mov w14, #5423 // =0x152f +; CHECK-NEXT: add x10, x10, x13, lsr #63 +; CHECK-NEXT: msub x9, x12, x14, x9 +; CHECK-NEXT: mov w12, #654 // =0x28e +; CHECK-NEXT: msub x10, x10, x12, x11 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v1.d[1], x9 ; CHECK-NEXT: mov v0.d[1], x10 ; CHECK-NEXT: ret @@ -246,14 +246,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #26215 // =0x6667 ; CHECK-NEXT: movk w8, #26214, lsl #16 -; CHECK-NEXT: movi v3.4s, #10 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: sshr v2.4s, v1.4s, #2 ; CHECK-NEXT: usra v2.4s, v1.4s, #31 -; CHECK-NEXT: mls v0.4s, v2.4s, v3.4s +; CHECK-NEXT: movi v1.4s, #10 +; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s ; CHECK-NEXT: ret %1 = srem <4 x i32> %x, ret <4 x i32> %1 @@ -281,18 +281,18 @@ define <2 x i64> @fold_srem_v2i64(<2 x i64> %x) { ; CHECK-LABEL: fold_srem_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #7378697629483820646 // =0x6666666666666666 ; CHECK-NEXT: fmov x10, d0 -; CHECK-NEXT: movk x8, #26215 +; CHECK-NEXT: mov x8, #7378697629483820646 // =0x6666666666666666 ; CHECK-NEXT: mov x9, v0.d[1] +; CHECK-NEXT: movk x8, #26215 ; CHECK-NEXT: smulh x11, x10, x8 -; CHECK-NEXT: asr x12, x11, #2 ; CHECK-NEXT: smulh x8, x9, x8 +; CHECK-NEXT: asr x12, x11, #2 ; CHECK-NEXT: add x11, x12, x11, lsr #63 +; CHECK-NEXT: asr x13, x8, #2 ; CHECK-NEXT: mov w12, #10 // =0xa ; CHECK-NEXT: msub x10, x11, x12, x10 -; CHECK-NEXT: asr x11, x8, #2 -; CHECK-NEXT: add x8, x11, x8, lsr #63 +; CHECK-NEXT: add x8, x13, x8, lsr #63 ; CHECK-NEXT: msub x8, x8, x12, x9 ; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: mov v0.d[1], x8 @@ -305,8 +305,8 @@ ; CHECK-LABEL: fold_srem_v1i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov x8, #7378697629483820646 // =0x6666666666666666 ; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: mov x8, #7378697629483820646 // =0x6666666666666666 ; CHECK-NEXT: movk x8, #26215 ; CHECK-NEXT: smulh x8, x9, x8 ; CHECK-NEXT: asr x10, x8, #2 diff --git a/llvm/test/CodeGen/AArch64/sshl_sat.ll b/llvm/test/CodeGen/AArch64/sshl_sat.ll --- a/llvm/test/CodeGen/AArch64/sshl_sat.ll +++ b/llvm/test/CodeGen/AArch64/sshl_sat.ll @@ -74,7 +74,7 @@ define i16 @combine_shlsat_constfold(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shlsat_constfold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #32 +; CHECK-NEXT: mov w0, #32 // =0x20 ; CHECK-NEXT: ret %tmp = call i16 @llvm.sshl.sat.i16(i16 8, i16 2) ret i16 %tmp @@ -84,7 +84,7 @@ define i16 @combine_shlsat_satmax(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shlsat_satmax: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #32767 +; CHECK-NEXT: mov w0, #32767 // =0x7fff ; CHECK-NEXT: ret %tmp = call i16 @llvm.sshl.sat.i16(i16 8, i16 15) ret i16 %tmp @@ -94,7 +94,7 @@ define i16 @combine_shlsat_satmin(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: combine_shlsat_satmin: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w0, #32768 +; CHECK-NEXT: mov w0, #32768 // =0x8000 ; CHECK-NEXT: ret %tmp = call i16 @llvm.sshl.sat.i16(i16 -8, i16 15) ret i16 %tmp @@ -107,10 +107,10 @@ ; CHECK-LABEL: combine_shlsat_vector: ; CHECK: // %bb.0: ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: mov w0, #32 -; CHECK-NEXT: mov w1, #32767 -; CHECK-NEXT: mov w2, #65504 -; CHECK-NEXT: mov w3, #32768 +; CHECK-NEXT: mov w0, #32 // =0x20 +; CHECK-NEXT: mov w1, #32767 // =0x7fff +; CHECK-NEXT: mov w2, #65504 // =0xffe0 +; CHECK-NEXT: mov w3, #32768 // =0x8000 ; CHECK-NEXT: bl sink4xi16 ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -143,11 +143,11 @@ ; CHECK-LABEL: combine_shlsat_to_shl_no_fold: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: mov w9, #-65536 -; CHECK-NEXT: mov w10, #-2147483648 +; CHECK-NEXT: mov w9, #-65536 // =0xffff0000 +; CHECK-NEXT: mov w10, #-2147483648 // =0x80000000 ; CHECK-NEXT: ands w8, w9, w8, lsl #14 -; CHECK-NEXT: lsl w9, w8, #3 ; CHECK-NEXT: cinv w10, w10, ge +; CHECK-NEXT: lsl w9, w8, #3 ; CHECK-NEXT: cmp w8, w9, asr #3 ; CHECK-NEXT: csel w8, w10, w9, ne ; CHECK-NEXT: asr w0, w8, #16 diff --git a/llvm/test/CodeGen/AArch64/ssub_sat.ll b/llvm/test/CodeGen/AArch64/ssub_sat.ll --- a/llvm/test/CodeGen/AArch64/ssub_sat.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat.ll @@ -36,11 +36,11 @@ ; CHECK-LABEL: func16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: mov w9, #32767 +; CHECK-NEXT: mov w9, #32767 // =0x7fff ; CHECK-NEXT: sub w8, w8, w1, sxth ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-32768 +; CHECK-NEXT: mov w9, #-32768 // =0xffff8000 ; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret @@ -51,12 +51,12 @@ define i8 @func8(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: func8: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: mov w9, #127 -; CHECK-NEXT: sub w8, w8, w1, sxtb -; CHECK-NEXT: cmp w8, #127 -; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-128 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: mov w8, #127 // =0x7f +; CHECK-NEXT: sub w9, w9, w1, sxtb +; CHECK-NEXT: cmp w9, #127 +; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mov w9, #-128 // =0xffffff80 ; CHECK-NEXT: cmn w8, #128 ; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret @@ -67,13 +67,13 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; CHECK-LABEL: func3: ; CHECK: // %bb.0: -; CHECK-NEXT: lsl w8, w1, #28 -; CHECK-NEXT: sbfx w9, w0, #0, #4 -; CHECK-NEXT: sub w8, w9, w8, asr #28 -; CHECK-NEXT: mov w9, #7 -; CHECK-NEXT: cmp w8, #7 -; CHECK-NEXT: csel w8, w8, w9, lt -; CHECK-NEXT: mov w9, #-8 +; CHECK-NEXT: lsl w9, w1, #28 +; CHECK-NEXT: sbfx w10, w0, #0, #4 +; CHECK-NEXT: mov w8, #7 // =0x7 +; CHECK-NEXT: sub w9, w10, w9, asr #28 +; CHECK-NEXT: cmp w9, #7 +; CHECK-NEXT: csel w8, w9, w8, lt +; CHECK-NEXT: mov w9, #-8 // =0xfffffff8 ; CHECK-NEXT: cmn w8, #8 ; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll b/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll --- a/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_plus.ll @@ -37,13 +37,13 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { ; CHECK-LABEL: func16: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: sxth w10, w0 -; CHECK-NEXT: mov w8, #32767 -; CHECK-NEXT: sub w9, w10, w9, sxth -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w8, w9, w8, lt -; CHECK-NEXT: mov w9, #-32768 +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: sub w8, w9, w8, sxth +; CHECK-NEXT: mov w9, #32767 // =0x7fff +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: mov w9, #-32768 // =0xffff8000 ; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret @@ -55,13 +55,13 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { ; CHECK-LABEL: func8: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: sxtb w10, w0 -; CHECK-NEXT: mov w8, #127 -; CHECK-NEXT: sub w9, w10, w9, sxtb -; CHECK-NEXT: cmp w9, #127 -; CHECK-NEXT: csel w8, w9, w8, lt -; CHECK-NEXT: mov w9, #-128 +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: sub w8, w9, w8, sxtb +; CHECK-NEXT: mov w9, #127 // =0x7f +; CHECK-NEXT: cmp w8, #127 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: mov w9, #-128 // =0xffffff80 ; CHECK-NEXT: cmn w8, #128 ; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret @@ -73,14 +73,14 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { ; CHECK-LABEL: func4: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: sbfx w10, w0, #0, #4 -; CHECK-NEXT: mov w8, #7 -; CHECK-NEXT: lsl w9, w9, #28 -; CHECK-NEXT: sub w9, w10, w9, asr #28 -; CHECK-NEXT: cmp w9, #7 -; CHECK-NEXT: csel w8, w9, w8, lt -; CHECK-NEXT: mov w9, #-8 +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: sbfx w9, w0, #0, #4 +; CHECK-NEXT: lsl w8, w8, #28 +; CHECK-NEXT: sub w8, w9, w8, asr #28 +; CHECK-NEXT: mov w9, #7 // =0x7 +; CHECK-NEXT: cmp w8, #7 +; CHECK-NEXT: csel w8, w8, w9, lt +; CHECK-NEXT: mov w9, #-8 // =0xfffffff8 ; CHECK-NEXT: cmn w8, #8 ; CHECK-NEXT: csel w0, w8, w9, gt ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -45,8 +45,8 @@ define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; CHECK-LABEL: v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v0.16b, v0.16b, v2.16b ; CHECK-NEXT: sqsub v1.16b, v1.16b, v3.16b +; CHECK-NEXT: sqsub v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %z = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z @@ -76,8 +76,8 @@ define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { ; CHECK-LABEL: v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v0.8h, v0.8h, v2.8h ; CHECK-NEXT: sqsub v1.8h, v1.8h, v3.8h +; CHECK-NEXT: sqsub v0.8h, v0.8h, v2.8h ; CHECK-NEXT: ret %z = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z @@ -98,9 +98,9 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: sqsub v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sqsub v0.8b, v0.8b, v1.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, ptr %px @@ -117,8 +117,8 @@ ; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 ; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: shl v1.4h, v1.4h, #8 +; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-NEXT: xtn v0.8b, v0.8h @@ -134,15 +134,15 @@ define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.b }[0], [x1] -; CHECK-NEXT: add x8, x1, #1 -; CHECK-NEXT: ld1 { v1.b }[0], [x0] -; CHECK-NEXT: add x9, x0, #1 +; CHECK-NEXT: ld1 { v0.b }[0], [x0] +; CHECK-NEXT: ld1 { v1.b }[0], [x1] +; CHECK-NEXT: add x8, x0, #1 +; CHECK-NEXT: add x9, x1, #1 ; CHECK-NEXT: ld1 { v0.b }[4], [x8] ; CHECK-NEXT: ld1 { v1.b }[4], [x9] -; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: shl v1.2s, v1.2s, #24 -; CHECK-NEXT: sqsub v0.2s, v1.2s, v0.2s +; CHECK-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 @@ -159,9 +159,9 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: sqsub v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, ptr %px @@ -174,15 +174,15 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.h }[0], [x1] -; CHECK-NEXT: add x8, x1, #2 -; CHECK-NEXT: ld1 { v1.h }[0], [x0] -; CHECK-NEXT: add x9, x0, #2 +; CHECK-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-NEXT: add x8, x0, #2 +; CHECK-NEXT: add x9, x1, #2 ; CHECK-NEXT: ld1 { v0.h }[2], [x8] ; CHECK-NEXT: ld1 { v1.h }[2], [x9] -; CHECK-NEXT: shl v0.2s, v0.2s, #16 ; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: sqsub v0.2s, v1.2s, v0.2s +; CHECK-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 @@ -225,9 +225,9 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x1] -; CHECK-NEXT: ldr b1, [x0] -; CHECK-NEXT: sqsub v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: ldr b1, [x1] +; CHECK-NEXT: sqsub v0.8b, v0.8b, v1.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, ptr %px @@ -240,9 +240,9 @@ define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x1] -; CHECK-NEXT: ldr h1, [x0] -; CHECK-NEXT: sqsub v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ldr h1, [x1] +; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, ptr %px @@ -300,8 +300,8 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { ; CHECK-LABEL: v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: sqsub v1.4s, v1.4s, v3.4s +; CHECK-NEXT: sqsub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %z = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z @@ -331,8 +331,8 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { ; CHECK-LABEL: v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v0.2d, v0.2d, v2.2d ; CHECK-NEXT: sqsub v1.2d, v1.2d, v3.2d +; CHECK-NEXT: sqsub v0.2d, v0.2d, v2.2d ; CHECK-NEXT: ret %z = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z @@ -356,16 +356,16 @@ ; CHECK-NEXT: subs x8, x2, x6 ; CHECK-NEXT: sbcs x9, x3, x7 ; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: eor x11, x10, #0x8000000000000000 ; CHECK-NEXT: csel x2, x10, x8, vs -; CHECK-NEXT: eor x8, x10, #0x8000000000000000 -; CHECK-NEXT: csel x3, x8, x9, vs +; CHECK-NEXT: csel x3, x11, x9, vs ; CHECK-NEXT: subs x8, x0, x4 ; CHECK-NEXT: sbcs x9, x1, x5 ; CHECK-NEXT: asr x10, x9, #63 ; CHECK-NEXT: csel x8, x10, x8, vs -; CHECK-NEXT: eor x10, x10, #0x8000000000000000 -; CHECK-NEXT: csel x1, x10, x9, vs +; CHECK-NEXT: eor x11, x10, #0x8000000000000000 ; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: csel x1, x11, x9, vs ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/stack-guard-sysreg.ll b/llvm/test/CodeGen/AArch64/stack-guard-sysreg.ll --- a/llvm/test/CodeGen/AArch64/stack-guard-sysreg.ll +++ b/llvm/test/CodeGen/AArch64/stack-guard-sysreg.ll @@ -1,4 +1,3 @@ -; RUN: split-file %s %t ; RUN: cat %t/main.ll %t/a.ll > %t/a2.ll ; RUN: cat %t/main.ll %t/b.ll > %t/b2.ll ; RUN: cat %t/main.ll %t/c.ll > %t/c2.ll @@ -20,9 +19,9 @@ ; RUN: llc %t/e2.ll -verify-machineinstrs -o - | \ ; RUN: FileCheck --check-prefix=CHECK --check-prefix=CHECK-NPOT-NEG-OFFSET %s ; RUN: llc %t/f2.ll -verify-machineinstrs -o - | \ -; RUN: FileCheck --check-prefix=CHECK --check-prefix=CHECK-257-OFFSET %s +; RUN: FileCheck --check-prefix=CHECK-ADD --check-prefix=CHECK-257-OFFSET %s ; RUN: llc %t/g2.ll -verify-machineinstrs -o - | \ -; RUN: FileCheck --check-prefix=CHECK --check-prefix=CHECK-MINUS-257-OFFSET %s +; RUN: FileCheck --check-prefix=CHECK-ADD --check-prefix=CHECK-MINUS-257-OFFSET %s ; XFAIL ; RUN: not --crash llc %t/h2.ll -o - 2>&1 | \ @@ -39,59 +38,101 @@ ; Verify that we `mrs` from `SP_EL0` twice, rather than load from ; __stack_chk_guard. define dso_local void @foo(i64 %t) local_unnamed_addr #0 { -; CHECK-LABEL: foo: -; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-LABEL: foo: // @foo +; CHECK: .cfi_startproc +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: .cfi_remember_state -; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: mrs x8, SP_EL0 -; CHECK-NEXT: lsl x9, x0, #2 -; CHECK-NO-OFFSET: ldr x8, [x8] +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: mrs x8, SP_EL0 +; CHECK-NEXT: lsl x9, x0, #2 +; CHECK-NO-OFFSET: ldr x8, [x8] ; CHECK-POSITIVE-OFFSET: ldr x8, [x8, #8] ; CHECK-NEGATIVE-OFFSET: ldur x8, [x8, #-8] ; CHECK-NPOT-OFFSET: ldur x8, [x8, #1] ; CHECK-NPOT-NEG-OFFSET: ldur x8, [x8, #-1] -; CHECK-257-OFFSET: add x8, x8, #257 -; CHECK-MINUS-257-OFFSET: sub x8, x8, #257 -; CHECK-NEXT: add x9, x9, #15 -; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 -; CHECK-257-OFFSET-NEXT: ldr x8, [x8] -; CHECK-MINUS-257-OFFSET-NEXT: ldr x8, [x8] -; CHECK-NEXT: stur x8, [x29, #-8] -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: sub x0, x8, x9 -; CHECK-NEXT: mov sp, x0 -; CHECK-NEXT: bl baz -; CHECK-NEXT: mrs x8, SP_EL0 +; CHECK-NEXT: add x9, x9, #15 +; CHECK-NEXT: stur x8, [x29, #-8] +; CHECK-NEXT mov x8, sp +; CHECK-NEXT and x9, x9, #0xfffffffffffffff0 +; CHECK-NEXT sub x0, x8, x9 +; CHECK-NEXT mov sp, x0 +; CHECK-NEXT bl baz +; CHECK-NEXT mrs x8, SP_EL0 ; CHECK-NO-OFFSET: ldr x8, [x8] ; CHECK-POSITIVE-OFFSET: ldr x8, [x8, #8] ; CHECK-NEGATIVE-OFFSET: ldur x8, [x8, #-8] ; CHECK-NPOT-OFFSET: ldur x8, [x8, #1] ; CHECK-NPOT-NEG-OFFSET: ldur x8, [x8, #-1] -; CHECK-257-OFFSET: add x8, x8, #257 -; CHECK-257-OFFSET-NEXT: ldr x8, [x8] -; CHECK-MINUS-257-OFFSET: sub x8, x8, #257 -; CHECK-MINUS-257-OFFSET-NEXT: ldr x8, [x8] -; CHECK-NEXT: ldur x9, [x29, #-8] -; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: b.ne .LBB0_2 -; CHECK-NEXT: // %bb.1: // %entry -; CHECK-NEXT: mov sp, x29 -; CHECK-NEXT: .cfi_def_cfa wsp, 16 -; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-NEXT: .cfi_def_cfa_offset 0 -; CHECK-NEXT: .cfi_restore w30 -; CHECK-NEXT: .cfi_restore w29 -; CHECK-NEXT: ret -; CHECK-NEXT: .LBB0_2: // %entry -; CHECK-NEXT: .cfi_restore_state -; CHECK-NEXT: bl __stack_chk_fail -; CHECK-NOT: __stack_chk_guard +; CHECK-NEXT: ldur x9, [x29, #-8] +; CHECK-NEXT: cmp x8, x9 +; CHECK-NEXT: b.ne .LBB0_2 +; CHECK-NEXT: // %bb.1: // %entry +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: .cfi_def_cfa wsp, 16 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-NEXT: .cfi_def_cfa_offset 0 +; CHECK-NEXT: .cfi_restore w30 +; CHECK-NEXT: .cfi_restore w29 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB0_2: // %entry +; CHECK-NEXT: .cfi_restore_state +; CHECK-NEXT: bl __stack_chk_fail +; CHECK-NEXT: .Lfunc_end0: +; CHECK-NEXT: .size foo, .Lfunc_end0-foo +; CHECK-NEXT: .cfi_endproc +; CHECK-NEXT: // -- End function +; CHECK-NEXT: .section ".note.GNU-stack","",@progbits + + +; CHECK-ADD: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-ADD-NEXT: .cfi_def_cfa_offset 16 +; CHECK-ADD-NEXT: mov x29, sp +; CHECK-ADD-NEXT: .cfi_def_cfa w29, 16 +; CHECK-ADD-NEXT: .cfi_offset w30, -8 +; CHECK-ADD-NEXT: .cfi_offset w29, -16 +; CHECK-ADD-NEXT: .cfi_remember_state +; CHECK-ADD-NEXT: sub sp, sp, #16 +; CHECK-ADD-NEXT: mrs x8, SP_EL0 +; CHECK-ADD-NEXT: lsl x9, x0, #2 +; CHECK-MINUS-257-OFFSET: sub x8, x8, #257 +; CHECK-257-OFFSET: add x8, x8, #257 +; CHECK-ADD-NEXT: ldr x8, [x8] +; CHECK-ADD-NEXT: add x9, x9, #15 +; CHECK-ADD-NEXT: and x9, x9, #0xfffffffffffffff0 +; CHECK-ADD-NEXT: stur x8, [x29, #-8] +; CHECK-ADD-NEXT: mov x8, sp +; CHECK-ADD-NEXT: sub x0, x8, x9 +; CHECK-ADD-NEXT: mov sp, x0 +; CHECK-ADD-NEXT: bl baz +; CHECK-ADD-NEXT: mrs x8, SP_EL0 +; CHECK-257-OFFSET: add x8, x8, #257 +; CHECK-MINUS-257-OFFSET: sub x8, x8, #257 +; CHECK-ADD-NEXT: ldr x8, [x8] +; CHECK-ADD-NEXT: ldur x9, [x29, #-8] +; CHECK-ADD-NEXT: cmp x8, x9 +; CHECK-ADD-NEXT: b.ne .LBB0_2 +; CHECK-ADD-NEXT: // %bb.1: // %entry +; CHECK-ADD-NEXT: mov sp, x29 +; CHECK-ADD-NEXT: .cfi_def_cfa wsp, 16 +; CHECK-ADD-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-ADD-NEXT: .cfi_def_cfa_offset 0 +; CHECK-ADD-NEXT: .cfi_restore w30 +; CHECK-ADD-NEXT: .cfi_restore w29 +; CHECK-ADD-NEXT: ret +; CHECK-ADD-NEXT: .LBB0_2: // %entry +; CHECK-ADD-NEXT: .cfi_restore_state +; CHECK-ADD-NEXT: bl __stack_chk_fail +; CHECK-ADD-NEXT: .Lfunc_end0: +; CHECK-ADD-NEXT: .size foo, .Lfunc_end0-foo +; CHECK-ADD-NEXT: .cfi_endproc +; CHECK-ADD-NEXT: // -- End function +; CHECK-ADD-NEXT: .section ".note.GNU-stack","",@progbits entry: %vla = alloca i32, i64 %t, align 4 call void @baz(ptr nonnull %vla) diff --git a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll --- a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll +++ b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll @@ -123,8 +123,8 @@ ; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov w0, #42 -; CHECK-NEXT: mov w1, #43 +; CHECK-NEXT: mov w0, #42 // =0x2a +; CHECK-NEXT: mov w1, #43 // =0x2b ; CHECK-NEXT: bl varargf ; CHECK-NEXT: .Ltmp6: ; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload @@ -177,7 +177,7 @@ ; CHECK-NEXT: bl consume ; CHECK-NEXT: b .LBB8_3 ; CHECK-NEXT: .LBB8_2: -; CHECK-NEXT: mov w19, #1 +; CHECK-NEXT: mov w19, #1 // =0x1 ; CHECK-NEXT: .LBB8_3: // %common.ret ; CHECK-NEXT: and w0, w19, #0x1 ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload @@ -208,13 +208,13 @@ ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: mov x18, xzr -; CHECK-NEXT: ldr q0, [sp, #48] ; CHECK-NEXT: ldr x8, [sp, #64] -; CHECK-NEXT: mov w0, #42 -; CHECK-NEXT: mov w1, #17 -; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: ldr q0, [sp, #48] +; CHECK-NEXT: mov x18, xzr +; CHECK-NEXT: mov w0, #42 // =0x2a +; CHECK-NEXT: mov w1, #17 // =0x11 ; CHECK-NEXT: str x8, [sp, #16] +; CHECK-NEXT: str q0, [sp] ; CHECK-NEXT: bl consume_attributes ; CHECK-NEXT: .Ltmp9: ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-abd.ll b/llvm/test/CodeGen/AArch64/sve-abd.ll --- a/llvm/test/CodeGen/AArch64/sve-abd.ll +++ b/llvm/test/CodeGen/AArch64/sve-abd.ll @@ -251,9 +251,9 @@ ; CHECK-NEXT: and z1.s, z1.s, #0xff ; CHECK-NEXT: uunpkhi z2.d, z0.s ; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpkhi z3.d, z1.s ; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sub z0.d, z0.d, z1.d ; CHECK-NEXT: sub z1.d, z2.d, z3.d ; CHECK-NEXT: abs z1.d, p0/m, z1.d diff --git a/llvm/test/CodeGen/AArch64/sve-alloca.ll b/llvm/test/CodeGen/AArch64/sve-alloca.ll --- a/llvm/test/CodeGen/AArch64/sve-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-alloca.ll @@ -67,33 +67,33 @@ ; CHECK-NEXT: bl bar ; CHECK-NEXT: addvl sp, x29, #-18 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x28, x19, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-bitcast.ll --- a/llvm/test/CodeGen/AArch64/sve-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/sve-bitcast.ll @@ -492,9 +492,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1b { z0.h }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -508,9 +508,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1b { z0.h }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -533,9 +533,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1b { z0.h }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -549,9 +549,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1b { z0.h }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -574,9 +574,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: ld1b { z0.h }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1b { z0.h }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -594,9 +594,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1h { z0.s }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -610,9 +610,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1h { z0.s }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -643,9 +643,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1h { z0.s }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -680,9 +680,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1w { z0.d }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -696,9 +696,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1w { z0.d }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -721,9 +721,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1w { z0.d }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -754,9 +754,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1w { z0.d }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -840,9 +840,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1h { z0.s }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -864,9 +864,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1h { z0.s }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -889,9 +889,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1h { z0.s }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -926,9 +926,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1w { z0.d }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -942,9 +942,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1w { z0.d }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -975,9 +975,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1w { z0.d }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1000,9 +1000,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1w { z0.d }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1086,9 +1086,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1b { z0.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1h { z0.s }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1110,9 +1110,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1h { z0.s }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1143,9 +1143,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1w { z0.d }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1h { z0.s }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1h { z0.s }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1172,9 +1172,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1b { z0.s }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: ld1b { z0.s }, p1/z, [sp, #3, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1198,9 +1198,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1b { z0.s }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: ld1b { z0.s }, p1/z, [sp, #3, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1216,9 +1216,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1h { z0.d }, p0, [sp, #3, mul vl] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1b { z0.s }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: ld1b { z0.s }, p1/z, [sp, #3, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1236,9 +1236,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1b { z0.s }, p0, [sp, #3, mul vl] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1h { z0.d }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: ld1h { z0.d }, p1/z, [sp, #3, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1304,9 +1304,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1h { z0.d }, p0, [sp] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] +; CHECK-NEXT: ld1w { z0.s }, p1/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1322,9 +1322,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1h { z0.d }, p0, [sp] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] +; CHECK-NEXT: ld1w { z0.s }, p1/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1342,9 +1342,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1b { z0.s }, p0, [sp, #3, mul vl] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1h { z0.d }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: ld1h { z0.d }, p1/z, [sp, #3, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1391,9 +1391,9 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: st1b { z0.s }, p0, [sp, #3, mul vl] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1h { z0.d }, p0/z, [sp, #3, mul vl] +; CHECK-NEXT: ld1h { z0.d }, p1/z, [sp, #3, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret @@ -1506,10 +1506,10 @@ ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z0.h ; CHECK-NEXT: st1h { z0.s }, p0, [sp, #1, mul vl] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ld1w { z0.d }, p0/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1w { z0.d }, p1/z, [sp, #1, mul vl] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll --- a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll +++ b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll @@ -16,9 +16,9 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z2.d -; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar @@ -44,9 +44,9 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z2.d -; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB1_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar @@ -72,9 +72,9 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z2.d -; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB2_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar @@ -100,9 +100,9 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z2.d -; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB3_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar @@ -128,9 +128,9 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z2.d -; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB4_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar @@ -156,9 +156,9 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z2.d -; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB5_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar @@ -184,9 +184,9 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: str z9, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z2.d -; CHECK-NEXT: mov z9.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB6_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar @@ -216,19 +216,19 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z2.d ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z3.d -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB7_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB7_2: // %common.ret ; CHECK-NEXT: mov z0.d, z10.d ; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #3 @@ -248,19 +248,19 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z2.d ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z3.d -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB8_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB8_2: // %common.ret ; CHECK-NEXT: mov z0.d, z10.d ; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #3 @@ -280,19 +280,19 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z2.d ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z3.d -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB9_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB9_2: // %common.ret ; CHECK-NEXT: mov z0.d, z10.d ; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #3 @@ -312,19 +312,19 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z2.d ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z3.d -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB10_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB10_2: // %common.ret ; CHECK-NEXT: mov z0.d, z10.d ; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #3 @@ -344,19 +344,19 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z2.d ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z3.d -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB11_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB11_2: // %common.ret ; CHECK-NEXT: mov z0.d, z10.d ; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #3 @@ -376,19 +376,19 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z2.d ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z3.d -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB12_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB12_2: // %common.ret ; CHECK-NEXT: mov z0.d, z10.d ; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #3 @@ -408,19 +408,19 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z2.d ; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z3.d -; CHECK-NEXT: mov z9.d, z2.d -; CHECK-NEXT: mov z10.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB13_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB13_2: // %common.ret ; CHECK-NEXT: mov z0.d, z10.d ; CHECK-NEXT: mov z1.d, z9.d -; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov z2.d, z8.d ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #3 @@ -444,22 +444,22 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z2.d ; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z3.d ; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z4.d -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB14_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB14_2: // %common.ret ; CHECK-NEXT: mov z0.d, z11.d ; CHECK-NEXT: mov z1.d, z10.d +; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov z2.d, z9.d ; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload @@ -480,22 +480,22 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z2.d ; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z3.d ; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z4.d -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB15_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB15_2: // %common.ret ; CHECK-NEXT: mov z0.d, z11.d ; CHECK-NEXT: mov z1.d, z10.d +; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov z2.d, z9.d ; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload @@ -516,22 +516,22 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z2.d ; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z3.d ; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z4.d -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB16_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB16_2: // %common.ret ; CHECK-NEXT: mov z0.d, z11.d ; CHECK-NEXT: mov z1.d, z10.d +; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov z2.d, z9.d ; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload @@ -552,22 +552,22 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z2.d ; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z3.d ; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z4.d -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB17_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB17_2: // %common.ret ; CHECK-NEXT: mov z0.d, z11.d ; CHECK-NEXT: mov z1.d, z10.d +; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov z2.d, z9.d ; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload @@ -588,22 +588,22 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z2.d ; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z3.d ; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z4.d -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB18_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB18_2: // %common.ret ; CHECK-NEXT: mov z0.d, z11.d ; CHECK-NEXT: mov z1.d, z10.d +; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov z2.d, z9.d ; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload @@ -624,22 +624,22 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z2.d ; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z3.d ; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z4.d -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB19_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB19_2: // %common.ret ; CHECK-NEXT: mov z0.d, z11.d ; CHECK-NEXT: mov z1.d, z10.d +; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov z2.d, z9.d ; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload @@ -660,22 +660,22 @@ ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: str z11, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: str z10, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z10.d, z2.d ; CHECK-NEXT: str z9, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: mov z9.d, z3.d ; CHECK-NEXT: str z8, [sp, #3, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: mov z8.d, z4.d -; CHECK-NEXT: mov z9.d, z3.d -; CHECK-NEXT: mov z10.d, z2.d -; CHECK-NEXT: mov z11.d, z1.d ; CHECK-NEXT: tbz w0, #0, .LBB20_2 ; CHECK-NEXT: // %bb.1: // %L1 ; CHECK-NEXT: bl bar ; CHECK-NEXT: .LBB20_2: // %common.ret ; CHECK-NEXT: mov z0.d, z11.d ; CHECK-NEXT: mov z1.d, z10.d +; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov z2.d, z9.d ; CHECK-NEXT: mov z3.d, z8.d -; CHECK-NEXT: ldr z11, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #3, mul vl] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -14,9 +14,9 @@ ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1] ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x2] -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: st1d { z16.d }, p0, [sp] ; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl] @@ -59,23 +59,23 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: add x8, sp, #16 +; CHECK-NEXT: add x9, sp, #16 +; CHECK-NEXT: mov w2, #2 // =0x2 +; CHECK-NEXT: mov w3, #3 // =0x3 +; CHECK-NEXT: mov w4, #4 // =0x4 +; CHECK-NEXT: mov w5, #5 // =0x5 +; CHECK-NEXT: mov w6, #6 // =0x6 +; CHECK-NEXT: mov w7, #7 // =0x7 ; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0] -; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1] -; CHECK-NEXT: fmov s0, #1.00000000 ; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: mov w1, #1 -; CHECK-NEXT: mov w2, #2 -; CHECK-NEXT: mov w3, #3 -; CHECK-NEXT: mov w4, #4 -; CHECK-NEXT: mov w5, #5 -; CHECK-NEXT: mov w6, #6 -; CHECK-NEXT: mov w7, #7 -; CHECK-NEXT: add x9, sp, #16 +; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z16.d }, p0, [x9] -; CHECK-NEXT: st1d { z17.d }, p0, [x9, #1, mul vl] -; CHECK-NEXT: st1d { z18.d }, p0, [x9, #2, mul vl] +; CHECK-NEXT: mov w1, #1 // =0x1 +; CHECK-NEXT: st1d { z16.d }, p0, [x8] +; CHECK-NEXT: st1d { z17.d }, p0, [x8, #1, mul vl] +; CHECK-NEXT: st1d { z18.d }, p0, [x8, #2, mul vl] ; CHECK-NEXT: st1d { z19.d }, p0, [x9, #3, mul vl] ; CHECK-NEXT: str x8, [sp] ; CHECK-NEXT: bl callee2 @@ -115,11 +115,11 @@ ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: fmov s0, #1.00000000 +; CHECK-NEXT: fmov s1, #2.00000000 ; CHECK-NEXT: ld4d { z2.d - z5.d }, p0/z, [x0] +; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: ld3d { z16.d - z18.d }, p0/z, [x1] ; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2] -; CHECK-NEXT: fmov s1, #2.00000000 -; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: st1d { z16.d }, p0, [sp] ; CHECK-NEXT: st1d { z17.d }, p0, [sp, #1, mul vl] @@ -182,8 +182,8 @@ define double @foo5(i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, ptr %ptr1, ptr %ptr2, double %x0, %x1, %x2) nounwind { ; CHECK-LABEL: foo5: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr x8, [sp] ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr x8, [sp] ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x8, #1, mul vl] ; CHECK-NEXT: ld1d { z6.d }, p0/z, [x8] ; CHECK-NEXT: ld1d { z7.d }, p0/z, [x8, #3, mul vl] @@ -229,8 +229,8 @@ define void @aavpcs1(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, ptr %ptr) nounwind { ; CHECK-LABEL: aavpcs1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp x8, x9, [sp] ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldp x8, x9, [sp] ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x8] ; CHECK-NEXT: ld1w { z24.s }, p0/z, [x7] ; CHECK-NEXT: st1w { z0.s }, p0, [x9] @@ -261,8 +261,8 @@ define void @aavpcs2(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16,ptr %ptr) nounwind { ; CHECK-LABEL: aavpcs2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp x8, x9, [sp] ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldp x8, x9, [sp] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x7] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x6] @@ -299,8 +299,8 @@ define void @aavpcs3(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, %s17, %p0, ptr %ptr) nounwind { ; CHECK-LABEL: aavpcs3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr x8, [sp] ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr x8, [sp] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x7] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x6] @@ -339,8 +339,8 @@ define void @aavpcs4(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, i32 %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, %s17, ptr %ptr) nounwind { ; CHECK-LABEL: aavpcs4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr x8, [sp] ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr x8, [sp] ; CHECK-NEXT: ldr x9, [sp, #16] ; CHECK-NEXT: ld1w { z24.s }, p0/z, [x8] ; CHECK-NEXT: st1w { z0.s }, p0, [x9] @@ -371,8 +371,8 @@ define @aavpcs5(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, %s17, ptr %ptr) nounwind { ; CHECK-LABEL: aavpcs5: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr x8, [sp] ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr x8, [sp] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x8] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x7] ; CHECK-NEXT: ld1w { z3.s }, p0/z, [x6] @@ -409,8 +409,8 @@ define void @aapcs1(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, %s17, ptr %ptr) nounwind { ; CHECK-LABEL: aapcs1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldr x8, [sp] ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldr x8, [sp] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x7] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x6] @@ -456,14 +456,14 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: fmov s1, #1.00000000 +; CHECK-NEXT: addvl x0, sp, #1 ; CHECK-NEXT: fmov s2, #2.00000000 ; CHECK-NEXT: fmov s3, #3.00000000 +; CHECK-NEXT: mov x1, sp ; CHECK-NEXT: fmov s4, #4.00000000 ; CHECK-NEXT: fmov s5, #5.00000000 ; CHECK-NEXT: fmov s6, #6.00000000 ; CHECK-NEXT: fmov s7, #7.00000000 -; CHECK-NEXT: mov x1, sp -; CHECK-NEXT: addvl x0, sp, #1 ; CHECK-NEXT: bl non_sve_callee_high_range ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload @@ -482,8 +482,6 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: movi d0, #0000000000000000 -; CHECK-NEXT: ld1w { z16.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z17.s }, p0/z, [x1] ; CHECK-NEXT: fmov s1, #1.00000000 ; CHECK-NEXT: fmov s2, #2.00000000 ; CHECK-NEXT: fmov s3, #3.00000000 @@ -491,8 +489,10 @@ ; CHECK-NEXT: fmov s5, #5.00000000 ; CHECK-NEXT: fmov s6, #6.00000000 ; CHECK-NEXT: fmov s7, #7.00000000 -; CHECK-NEXT: mov x1, sp +; CHECK-NEXT: ld1w { z16.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z17.s }, p0/z, [x1] ; CHECK-NEXT: addvl x0, sp, #1 +; CHECK-NEXT: mov x1, sp ; CHECK-NEXT: st1w { z17.s }, p0, [sp] ; CHECK-NEXT: st1w { z16.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: bl non_sve_callee_high_range @@ -548,53 +548,53 @@ ; CHECK-NEXT: .cfi_escape 0x10, 0x4d, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x50, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d13 @ cfa - 16 - 48 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x4e, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x48, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d14 @ cfa - 16 - 56 * VG ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z25.d, z0.d ; CHECK-NEXT: str z0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: mov z24.d, z1.d +; CHECK-NEXT: addvl x0, sp, #2 ; CHECK-NEXT: fmov s1, #1.00000000 ; CHECK-NEXT: fmov s2, #2.00000000 +; CHECK-NEXT: addvl x1, sp, #1 ; CHECK-NEXT: fmov s3, #3.00000000 ; CHECK-NEXT: fmov s4, #4.00000000 ; CHECK-NEXT: fmov s5, #5.00000000 ; CHECK-NEXT: fmov s6, #6.00000000 ; CHECK-NEXT: fmov s7, #7.00000000 -; CHECK-NEXT: addvl x0, sp, #2 -; CHECK-NEXT: addvl x1, sp, #1 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: st1w { z24.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1w { z25.s }, p0, [sp, #2, mul vl] ; CHECK-NEXT: bl non_sve_callee_high_range ; CHECK-NEXT: ldr z0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -649,44 +649,44 @@ ; CHECK-NEXT: .cfi_escape 0x10, 0x4f, 0x0a, 0x11, 0x70, 0x22, 0x11, 0x40, 0x92, 0x2e, 0x00, 0x1e, 0x22 // $d15 @ cfa - 16 - 64 * VG ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: fmov s1, #1.00000000 +; CHECK-NEXT: addvl x0, sp, #1 ; CHECK-NEXT: fmov s2, #2.00000000 ; CHECK-NEXT: fmov s3, #3.00000000 +; CHECK-NEXT: mov x1, sp ; CHECK-NEXT: fmov s4, #4.00000000 ; CHECK-NEXT: fmov s5, #5.00000000 ; CHECK-NEXT: fmov s6, #6.00000000 ; CHECK-NEXT: fmov s7, #7.00000000 -; CHECK-NEXT: mov x1, sp -; CHECK-NEXT: addvl x0, sp, #1 ; CHECK-NEXT: bl non_sve_callee_high_range ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-doublereduct.ll b/llvm/test/CodeGen/AArch64/sve-doublereduct.ll --- a/llvm/test/CodeGen/AArch64/sve-doublereduct.ll +++ b/llvm/test/CodeGen/AArch64/sve-doublereduct.ll @@ -58,9 +58,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: fminv s2, p0, z2.s +; CHECK-NEXT: fminv s1, p0, z2.s ; CHECK-NEXT: fminv s0, p0, z0.s -; CHECK-NEXT: fminnm s0, s0, s2 +; CHECK-NEXT: fminnm s0, s0, s1 ; CHECK-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fminimum.nxv8f32( %a) %r2 = call fast float @llvm.vector.reduce.fminimum.nxv4f32( %b) @@ -73,9 +73,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: fmaxv s2, p0, z2.s +; CHECK-NEXT: fmaxv s1, p0, z2.s ; CHECK-NEXT: fmaxv s0, p0, z0.s -; CHECK-NEXT: fmaxnm s0, s0, s2 +; CHECK-NEXT: fmaxnm s0, s0, s1 ; CHECK-NEXT: ret %r1 = call fast float @llvm.vector.reduce.fmaximum.nxv8f32( %a) %r2 = call fast float @llvm.vector.reduce.fmaximum.nxv4f32( %b) @@ -87,8 +87,8 @@ define i32 @add_i32( %a, %b) { ; CHECK-LABEL: add_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: add z0.s, z0.s, z2.s ; CHECK-NEXT: uaddv d0, p0, z0.s ; CHECK-NEXT: fmov x0, d0 @@ -107,9 +107,9 @@ ; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpkhi z3.h, z1.b ; CHECK-NEXT: uunpklo z1.h, z1.b +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: add z0.h, z0.h, z2.h ; CHECK-NEXT: add z1.h, z1.h, z3.h -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: uaddv d0, p0, z0.h ; CHECK-NEXT: fmov x0, d0 @@ -132,11 +132,11 @@ ; CHECK-NEXT: uunpkhi z0.h, z0.b ; CHECK-NEXT: uunpkhi z5.h, z2.b ; CHECK-NEXT: uunpklo z2.h, z2.b +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: add z1.h, z4.h, z3.h ; CHECK-NEXT: add z0.h, z1.h, z0.h ; CHECK-NEXT: add z1.h, z2.h, z5.h -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: uaddv d0, p0, z0.h ; CHECK-NEXT: fmov x0, d0 @@ -160,8 +160,8 @@ define i32 @and_i32( %a, %b) { ; CHECK-LABEL: and_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 @@ -175,8 +175,8 @@ define i32 @or_i32( %a, %b) { ; CHECK-LABEL: or_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/sve-expand-div.ll b/llvm/test/CodeGen/AArch64/sve-expand-div.ll --- a/llvm/test/CodeGen/AArch64/sve-expand-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-expand-div.ll @@ -10,8 +10,8 @@ define @sdiv_i8( %a) #0 { ; CHECK-LABEL: sdiv_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, #86 // =0x56 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z1.b, #86 // =0x56 ; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: lsr z1.b, z0.b, #7 ; CHECK-NEXT: add z0.b, z0.b, z1.b @@ -23,8 +23,8 @@ define @sdiv_i16( %a) #0 { ; CHECK-LABEL: sdiv_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #21846 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov w8, #21846 // =0x5556 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: lsr z1.h, z0.h, #15 @@ -37,8 +37,8 @@ define @sdiv_i32( %a) #0 { ; CHECK-LABEL: sdiv_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #21846 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #21846 // =0x5556 ; CHECK-NEXT: movk w8, #21845, lsl #16 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s @@ -52,8 +52,8 @@ define @sdiv_i64( %a) #0 { ; CHECK-LABEL: sdiv_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #6148914691236517205 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #6148914691236517205 // =0x5555555555555555 ; CHECK-NEXT: movk x8, #21846 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d @@ -71,8 +71,8 @@ define @udiv_i8( %a) #0 { ; CHECK-LABEL: udiv_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.b, #-85 // =0xffffffffffffffab ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z1.b, #-85 // =0xffffffffffffffab ; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: lsr z0.b, z0.b, #1 ; CHECK-NEXT: ret @@ -83,8 +83,8 @@ define @udiv_i16( %a) #0 { ; CHECK-LABEL: udiv_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-21845 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov w8, #-21845 // =0xffffaaab ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: lsr z0.h, z0.h, #1 @@ -96,8 +96,8 @@ define @udiv_i32( %a) #0 { ; CHECK-LABEL: udiv_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: movk w8, #43690, lsl #16 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s @@ -110,8 +110,8 @@ define @udiv_i64( %a) #0 { ; CHECK-LABEL: udiv_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #-6148914691236517206 // =0xaaaaaaaaaaaaaaaa ; CHECK-NEXT: movk x8, #43691 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll @@ -630,8 +630,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: whilels p0.s, xzr, x8 -; CHECK-NEXT: lastb w8, p0, z0.s +; CHECK-NEXT: whilels p1.s, xzr, x8 +; CHECK-NEXT: lastb w8, p1, z0.s ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret %b = extractelement %a, i32 %x diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-from-scalable-vector.ll @@ -70,8 +70,8 @@ ; CHECK-NEXT: addvl sp, sp, #-8 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: add x8, x8, #32 ; CHECK-NEXT: st1h { z3.h }, p0, [sp, #3, mul vl] ; CHECK-NEXT: st1h { z2.h }, p0, [sp, #2, mul vl] @@ -100,15 +100,15 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: sub x8, x8, #2 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmp x8, #8 -; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl] ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl] ; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [sp] @@ -162,8 +162,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 ; CHECK-NEXT: umov w8, v1.b[1] -; CHECK-NEXT: umov w9, v1.b[2] ; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: umov w9, v1.b[2] ; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: umov w8, v1.b[3] ; CHECK-NEXT: mov v0.h[2], w9 @@ -183,11 +183,11 @@ ; CHECK-NEXT: addvl sp, sp, #-8 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ptrue p2.b -; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: st1b { z0.b }, p2, [sp, #1, mul vl] ; CHECK-NEXT: st1b { z1.b }, p2, [sp] ; CHECK-NEXT: st1b { z0.b }, p2, [sp, #3, mul vl] @@ -240,8 +240,8 @@ ; CHECK-NEXT: addvl sp, sp, #-8 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0d, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0xc0, 0x00, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 64 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1b { z0.b }, p0, [sp] @@ -298,8 +298,8 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: ldr q1, [sp, #16] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ldr q1, [sp, #16] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -17,15 +17,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: mov w9, #2 // =0x2 ; CHECK-NEXT: sub x8, x8, #2 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmp x8, #2 -; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -50,15 +50,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cntw x8 -; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: mov w9, #4 // =0x4 ; CHECK-NEXT: sub x8, x8, #4 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmp x8, #4 -; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -84,12 +84,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov x8, #4 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #4 // =0x4 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ptrue p1.d, vl4 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x9, x8, lsl #3] ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: addvl sp, sp, #1 @@ -115,15 +115,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: sub x8, x8, #8 -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmp x8, #8 -; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: lsl x8, x8, #1 +; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -149,12 +149,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov x8, #8 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov x8, #8 // =0x8 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ptrue p1.s, vl8 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] +; CHECK-NEXT: ld1w { z0.s }, p1/z, [x9, x8, lsl #2] ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: addvl sp, sp, #1 @@ -182,12 +182,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov x8, #8 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #8 // =0x8 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ptrue p1.d, vl8 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] -; CHECK-NEXT: ptrue p0.d, vl8 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x9, x8, lsl #3] +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x9, x8, lsl #3] ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -214,14 +214,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov x8, #-16 -; CHECK-NEXT: mov w9, #16 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: mov x8, #-16 // =0xfffffffffffffff0 +; CHECK-NEXT: mov w9, #16 // =0x10 ; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: cmp x8, #16 ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: ldr q0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -247,12 +247,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov x8, #16 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov x8, #16 // =0x10 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ptrue p1.h, vl16 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] -; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x9, x8, lsl #1] +; CHECK-NEXT: ld1h { z0.h }, p1/z, [x9, x8, lsl #1] ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: addvl sp, sp, #1 @@ -280,12 +280,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov x8, #16 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov x8, #16 // =0x10 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ptrue p1.s, vl16 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] -; CHECK-NEXT: ptrue p0.s, vl16 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, x8, lsl #2] +; CHECK-NEXT: ld1w { z0.s }, p1/z, [x9, x8, lsl #2] ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -351,8 +351,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 ; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov w9, v1.s[2] ; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov w9, v1.s[2] ; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: mov w8, v1.s[3] ; CHECK-NEXT: mov v0.h[2], w9 @@ -368,8 +368,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 ; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: umov w9, v1.h[2] ; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: umov w9, v1.h[2] ; CHECK-NEXT: mov v0.b[1], w8 ; CHECK-NEXT: umov w8, v1.h[3] ; CHECK-NEXT: mov v0.b[2], w9 @@ -393,8 +393,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 ; CHECK-NEXT: umov w8, v1.b[1] -; CHECK-NEXT: umov w9, v1.b[2] ; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: umov w9, v1.b[2] ; CHECK-NEXT: mov v0.b[1], w8 ; CHECK-NEXT: umov w8, v1.b[3] ; CHECK-NEXT: mov v0.b[2], w9 diff --git a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-scalable-vector.ll @@ -65,27 +65,29 @@ ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: punpkhi p2.h, p1.b +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: punpklo p1.h, p1.b -; CHECK-NEXT: punpklo p2.h, p2.b -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: punpklo p2.h, p2.b ; CHECK-NEXT: punpkhi p3.h, p1.b -; CHECK-NEXT: punpkhi p4.h, p2.b -; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: punpklo p1.h, p1.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: punpkhi p4.h, p2.b ; CHECK-NEXT: punpklo p2.h, p2.b ; CHECK-NEXT: punpkhi p5.h, p3.b -; CHECK-NEXT: uzp1 p4.s, p4.s, p0.s -; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: punpklo p3.h, p3.b -; CHECK-NEXT: uzp1 p2.s, p5.s, p2.s -; CHECK-NEXT: punpkhi p5.h, p1.b +; CHECK-NEXT: punpkhi p6.h, p1.b ; CHECK-NEXT: punpklo p1.h, p1.b ; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: uzp1 p3.s, p5.s, p3.s +; CHECK-NEXT: uzp1 p2.s, p5.s, p2.s +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: uzp1 p3.s, p6.s, p3.s +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: uzp1 p4.s, p4.s, p0.s ; CHECK-NEXT: uzp1 p0.s, p0.s, p1.s ; CHECK-NEXT: uzp1 p1.h, p2.h, p4.h -; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b @@ -556,18 +558,18 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: uunpkhi z0.h, z0.b ; CHECK-NEXT: uunpklo z2.h, z1.b +; CHECK-NEXT: uunpkhi z1.h, z1.b ; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: uunpklo z4.s, z2.h +; CHECK-NEXT: uunpkhi z2.s, z2.h +; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: uunpkhi z0.d, z0.s ; CHECK-NEXT: uunpklo z5.d, z4.s -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uunpkhi z4.d, z4.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uunpkhi z2.s, z2.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: uunpkhi z1.h, z1.b ; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z3.s, z0.h ; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: uunpklo z3.d, z3.s @@ -580,8 +582,8 @@ ; CHECK-NEXT: uunpkhi z3.d, z3.s ; CHECK-NEXT: uzp1 z3.s, z4.s, z3.s ; CHECK-NEXT: uunpklo z4.d, z2.s -; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h ; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpkhi z3.s, z0.h @@ -596,8 +598,8 @@ ; CHECK-NEXT: uunpkhi z4.d, z4.s ; CHECK-NEXT: uzp1 z2.s, z2.s, z4.s ; CHECK-NEXT: uunpklo z4.d, z1.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z3.h ; CHECK-NEXT: uunpkhi z1.d, z1.s +; CHECK-NEXT: uzp1 z2.h, z2.h, z3.h ; CHECK-NEXT: uzp1 z2.b, z0.b, z2.b ; CHECK-NEXT: uunpkhi z2.h, z2.b ; CHECK-NEXT: uunpklo z3.s, z2.h diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll --- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll @@ -374,8 +374,8 @@ define @ueq_zero( %x) { ; CHECK-LABEL: ueq_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, #0 // =0x0 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, #0 // =0x0 ; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z1.s ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: sel p0.b, p0, p0.b, p1.b diff --git a/llvm/test/CodeGen/AArch64/sve-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/sve-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcopysign.ll @@ -62,13 +62,13 @@ ; CHECK-EXTEND-ROUND: // %bb.0: ; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d ; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.d, z0.s +; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s +; CHECK-EXTEND-ROUND-NEXT: and z3.s, z3.s, #0x7fffffff +; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff ; CHECK-EXTEND-ROUND-NEXT: fcvt z2.s, p0/m, z2.d ; CHECK-EXTEND-ROUND-NEXT: fcvt z1.s, p0/m, z1.d -; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s ; CHECK-EXTEND-ROUND-NEXT: and z2.s, z2.s, #0x80000000 -; CHECK-EXTEND-ROUND-NEXT: and z3.s, z3.s, #0x7fffffff ; CHECK-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000 -; CHECK-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff ; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d ; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d ; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.s, z0.s, z2.s @@ -116,16 +116,16 @@ ; CHECK-LABEL: test_copysign_v4f64_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpkhi z3.d, z2.s -; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: uunpklo z3.d, z2.s +; CHECK-NEXT: uunpkhi z2.d, z2.s +; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff +; CHECK-NEXT: and z1.d, z1.d, #0x7fffffffffffffff ; CHECK-NEXT: fcvt z3.d, p0/m, z3.s ; CHECK-NEXT: fcvt z2.d, p0/m, z2.s -; CHECK-NEXT: and z0.d, z0.d, #0x7fffffffffffffff -; CHECK-NEXT: and z2.d, z2.d, #0x8000000000000000 ; CHECK-NEXT: and z3.d, z3.d, #0x8000000000000000 -; CHECK-NEXT: and z1.d, z1.d, #0x7fffffffffffffff -; CHECK-NEXT: orr z0.d, z0.d, z2.d -; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: and z2.d, z2.d, #0x8000000000000000 +; CHECK-NEXT: orr z0.d, z0.d, z3.d +; CHECK-NEXT: orr z1.d, z1.d, z2.d ; CHECK-NEXT: ret %tmp0 = fpext %b to %r = call @llvm.copysign.v4f64( %a, %tmp0) @@ -192,13 +192,13 @@ ; CHECK-EXTEND-ROUND: // %bb.0: ; CHECK-EXTEND-ROUND-NEXT: ptrue p0.d ; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.d, z0.s +; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s +; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff +; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff ; CHECK-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.d ; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.d -; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.d, z0.s ; CHECK-EXTEND-ROUND-NEXT: and z2.h, z2.h, #0x8000 -; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff ; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000 -; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff ; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d ; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d ; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.s, z0.s, z2.s @@ -239,13 +239,13 @@ ; CHECK-EXTEND-ROUND: // %bb.0: ; CHECK-EXTEND-ROUND-NEXT: ptrue p0.s ; CHECK-EXTEND-ROUND-NEXT: uunpkhi z3.s, z0.h +; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.s, z0.h +; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff +; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff ; CHECK-EXTEND-ROUND-NEXT: fcvt z2.h, p0/m, z2.s ; CHECK-EXTEND-ROUND-NEXT: fcvt z1.h, p0/m, z1.s -; CHECK-EXTEND-ROUND-NEXT: uunpklo z0.s, z0.h ; CHECK-EXTEND-ROUND-NEXT: and z2.h, z2.h, #0x8000 -; CHECK-EXTEND-ROUND-NEXT: and z3.h, z3.h, #0x7fff ; CHECK-EXTEND-ROUND-NEXT: and z1.h, z1.h, #0x8000 -; CHECK-EXTEND-ROUND-NEXT: and z0.h, z0.h, #0x7fff ; CHECK-EXTEND-ROUND-NEXT: orr z2.d, z3.d, z2.d ; CHECK-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d ; CHECK-EXTEND-ROUND-NEXT: uzp1 z0.h, z0.h, z2.h @@ -261,9 +261,9 @@ define @test_copysign_nxv4f32_nxv4f16( %a, %b) #0 { ; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv4f32_nxv4f16: ; CHECK-NO-EXTEND-ROUND: // %bb.0: +; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s ; CHECK-NO-EXTEND-ROUND-NEXT: and z1.s, z1.s, #0x80000000 ; CHECK-NO-EXTEND-ROUND-NEXT: and z0.s, z0.s, #0x7fffffff -; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.s ; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.h, p0/m, z0.s ; CHECK-NO-EXTEND-ROUND-NEXT: ret @@ -285,9 +285,9 @@ define @test_copysign_nxv2f64_nxv2f32( %a, %b) #0 { ; CHECK-NO-EXTEND-ROUND-LABEL: test_copysign_nxv2f64_nxv2f32: ; CHECK-NO-EXTEND-ROUND: // %bb.0: +; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d ; CHECK-NO-EXTEND-ROUND-NEXT: and z1.d, z1.d, #0x8000000000000000 ; CHECK-NO-EXTEND-ROUND-NEXT: and z0.d, z0.d, #0x7fffffffffffffff -; CHECK-NO-EXTEND-ROUND-NEXT: ptrue p0.d ; CHECK-NO-EXTEND-ROUND-NEXT: orr z0.d, z0.d, z1.d ; CHECK-NO-EXTEND-ROUND-NEXT: fcvt z0.s, p0/m, z0.d ; CHECK-NO-EXTEND-ROUND-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll b/llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll --- a/llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll +++ b/llvm/test/CodeGen/AArch64/sve-fix-length-and-combine-512.ll @@ -4,9 +4,9 @@ define void @vls_sve_and_64xi8(ptr %ap, ptr %out) nounwind { ; CHECK-LABEL: vls_sve_and_64xi8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b, vl64 ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: add x8, x8, :lo12:.LCPI0_0 -; CHECK-NEXT: ptrue p0.b, vl64 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x8] ; CHECK-NEXT: and z0.d, z0.d, z1.d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-addressing-modes.ll @@ -7,8 +7,8 @@ define void @masked_gather_base_plus_stride_v8f32(ptr %dst, ptr %src) #0 { ; CHECK-LABEL: masked_gather_base_plus_stride_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: index z0.s, #0, #7 ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: index z0.s, #0, #7 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1, z0.s, sxtw #2] ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret @@ -21,8 +21,8 @@ define void @masked_gather_base_plus_stride_v4f64(ptr %dst, ptr %src) #0 { ; CHECK-LABEL: masked_gather_base_plus_stride_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-32 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: mov x8, #-32 // =0xffffffffffffffe0 ; CHECK-NEXT: index z0.d, #-2, x8 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1, z0.d, lsl #3] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] @@ -37,9 +37,9 @@ ; CHECK-LABEL: masked_scatter_base_plus_stride_v8f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: index z1.s, #0, #-7 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1] -; CHECK-NEXT: st1w { z0.s }, p0, [x0, z1.s, sxtw #2] +; CHECK-NEXT: index z0.s, #0, #-7 +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: st1w { z1.s }, p0, [x0, z0.s, sxtw #2] ; CHECK-NEXT: ret %data = load <8 x float>, ptr %src, align 4 %ptrs = getelementptr float, ptr %dst, <8 x i64> @@ -51,9 +51,9 @@ ; CHECK-LABEL: masked_scatter_base_plus_stride_v4f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: index z1.d, #-2, #3 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] -; CHECK-NEXT: st1d { z0.d }, p0, [x0, z1.d, lsl #3] +; CHECK-NEXT: index z0.d, #-2, #3 +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: st1d { z1.d }, p0, [x0, z0.d, lsl #3] ; CHECK-NEXT: ret %data = load <4 x double>, ptr %src, align 8 %ptrs = getelementptr double, ptr %dst, <4 x i64> diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-bit-counting.ll @@ -46,8 +46,8 @@ define void @ctlz_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: ctlz_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: clz z0.b, p0/m, z0.b @@ -134,8 +134,8 @@ define void @ctlz_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: ctlz_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: clz z0.h, p0/m, z0.h @@ -222,8 +222,8 @@ define void @ctlz_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: ctlz_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: clz z0.s, p0/m, z0.s @@ -276,8 +276,8 @@ define <1 x i64> @ctlz_v1i64(<1 x i64> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: ctlz_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -288,8 +288,8 @@ define <2 x i64> @ctlz_v2i64(<2 x i64> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: ctlz_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -314,8 +314,8 @@ define void @ctlz_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: ctlz_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: clz z0.d, p0/m, z0.d @@ -406,8 +406,8 @@ define void @ctpop_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: ctpop_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: cnt z0.b, p0/m, z0.b @@ -496,8 +496,8 @@ define void @ctpop_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: ctpop_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: cnt z0.h, p0/m, z0.h @@ -588,8 +588,8 @@ define void @ctpop_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: ctpop_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: cnt z0.s, p0/m, z0.s @@ -682,8 +682,8 @@ define void @ctpop_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: ctpop_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: cnt z0.d, p0/m, z0.d @@ -740,8 +740,8 @@ define <8 x i8> @cttz_v8i8(<8 x i8> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: cttz_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b ; CHECK-NEXT: clz v0.8b, v0.8b ; CHECK-NEXT: ret @@ -752,8 +752,8 @@ define <16 x i8> @cttz_v16i8(<16 x i8> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: cttz_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b ; CHECK-NEXT: clz v0.16b, v0.16b ; CHECK-NEXT: ret @@ -779,8 +779,8 @@ define void @cttz_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: cttz_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: rbit z0.b, p0/m, z0.b @@ -838,8 +838,8 @@ define <4 x i16> @cttz_v4i16(<4 x i16> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: cttz_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: clz v0.4h, v0.4h ; CHECK-NEXT: ret @@ -850,8 +850,8 @@ define <8 x i16> @cttz_v8i16(<8 x i16> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: cttz_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: clz v0.8h, v0.8h ; CHECK-NEXT: ret @@ -877,8 +877,8 @@ define void @cttz_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: cttz_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: rbit z0.h, p0/m, z0.h @@ -937,8 +937,8 @@ define <2 x i32> @cttz_v2i32(<2 x i32> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: cttz_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: clz v0.2s, v0.2s ; CHECK-NEXT: ret @@ -950,8 +950,8 @@ define <4 x i32> @cttz_v4i32(<4 x i32> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: cttz_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: clz v0.4s, v0.4s ; CHECK-NEXT: ret @@ -977,8 +977,8 @@ define void @cttz_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: cttz_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: rbit z0.s, p0/m, z0.s @@ -1036,8 +1036,8 @@ define <1 x i64> @cttz_v1i64(<1 x i64> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: cttz_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -1049,8 +1049,8 @@ define <2 x i64> @cttz_v2i64(<2 x i64> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: cttz_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -1077,8 +1077,8 @@ define void @cttz_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: cttz_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: rbit z0.d, p0/m, z0.d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-bitcast.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-bitcast.ll @@ -47,8 +47,8 @@ define void @bitcast_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: bitcast_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1] @@ -135,8 +135,8 @@ define void @bitcast_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: bitcast_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] @@ -223,8 +223,8 @@ define void @bitcast_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: bitcast_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-bitselect.ll @@ -13,14 +13,14 @@ ; CHECK-LABEL: fixed_bitselect_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: mov z3.s, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: ld1w { z2.s }, p0/z, [x2] -; CHECK-NEXT: add z3.s, z0.s, z3.s +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x2] +; CHECK-NEXT: add z1.s, z0.s, z1.s ; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0 -; CHECK-NEXT: and z0.d, z0.d, z1.d -; CHECK-NEXT: and z1.d, z3.d, z2.d +; CHECK-NEXT: and z0.d, z0.d, z2.d +; CHECK-NEXT: and z1.d, z1.d, z3.d ; CHECK-NEXT: orr z0.d, z1.d, z0.d ; CHECK-NEXT: st1w { z0.s }, p0, [x8] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-build-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-build-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-build-vector.ll @@ -42,8 +42,8 @@ define void @build_vector_minus2_dec32_v4i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: build_vector_minus2_dec32_v4i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #-32 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #-32 // =0xffffffffffffffe0 ; VBITS_GE_256-NEXT: index z0.d, #-2, x8 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret @@ -53,16 +53,11 @@ ; Constant but not a sequence. define void @build_vector_no_stride_v4i64(ptr %a) #0 { -; VBITS_GE_256-LABEL: .LCPI4_0: -; VBITS_GE_256: .xword 0 -; VBITS_GE_256-NEXT: .xword 4 -; VBITS_GE_256-NEXT: .xword 1 -; VBITS_GE_256-NEXT: .xword 8 ; VBITS_GE_256-LABEL: build_vector_no_stride_v4i64: ; VBITS_GE_256: // %bb.0: +; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: adrp x8, .LCPI4_0 ; VBITS_GE_256-NEXT: add x8, x8, :lo12:.LCPI4_0 -; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x8] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-concat.ll @@ -35,12 +35,12 @@ define void @concat_v32i8(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b -; CHECK-NEXT: ptrue p0.b, vl32 -; CHECK-NEXT: st1b { z1.b }, p0, [x2] +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ptrue p1.b, vl32 +; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b +; CHECK-NEXT: st1b { z0.b }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <16 x i8>, ptr %a %op2 = load <16 x i8>, ptr %b @@ -56,7 +56,7 @@ ; VBITS_GE_256-LABEL: concat_v64i8: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 -; VBITS_GE_256-NEXT: mov w8, #32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x2, x8] @@ -66,11 +66,11 @@ ; VBITS_GE_512-LABEL: concat_v64i8: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.b, vl32 +; VBITS_GE_512-NEXT: ptrue p1.b, vl64 ; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1] ; VBITS_GE_512-NEXT: splice z0.b, p0, z0.b, z1.b -; VBITS_GE_512-NEXT: ptrue p0.b, vl64 -; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x2] +; VBITS_GE_512-NEXT: st1b { z0.b }, p1, [x2] ; VBITS_GE_512-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b @@ -90,11 +90,11 @@ ; CHECK-LABEL: concat_v128i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl64 +; CHECK-NEXT: ptrue p1.b, vl128 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b -; CHECK-NEXT: ptrue p0.b, vl128 -; CHECK-NEXT: st1b { z0.b }, p0, [x2] +; CHECK-NEXT: st1b { z0.b }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <64 x i8>, ptr %a %op2 = load <64 x i8>, ptr %b @@ -122,11 +122,11 @@ ; CHECK-LABEL: concat_v256i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl128 +; CHECK-NEXT: ptrue p1.b, vl256 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b -; CHECK-NEXT: ptrue p0.b, vl256 -; CHECK-NEXT: st1b { z0.b }, p0, [x2] +; CHECK-NEXT: st1b { z0.b }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <128 x i8>, ptr %a %op2 = load <128 x i8>, ptr %b @@ -195,12 +195,12 @@ define void @concat_v16i16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h -; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: st1h { z1.h }, p0, [x2] +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ptrue p1.h, vl16 +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: st1h { z0.h }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <8 x i16>, ptr %a %op2 = load <8 x i16>, ptr %b @@ -214,7 +214,7 @@ ; VBITS_GE_256-LABEL: concat_v32i16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: mov x8, #16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1] @@ -224,11 +224,11 @@ ; VBITS_GE_512-LABEL: concat_v32i16: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.h, vl16 +; VBITS_GE_512-NEXT: ptrue p1.h, vl32 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] ; VBITS_GE_512-NEXT: splice z0.h, p0, z0.h, z1.h -; VBITS_GE_512-NEXT: ptrue p0.h, vl32 -; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2] +; VBITS_GE_512-NEXT: st1h { z0.h }, p1, [x2] ; VBITS_GE_512-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b @@ -244,11 +244,11 @@ ; CHECK-LABEL: concat_v64i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl32 +; CHECK-NEXT: ptrue p1.h, vl64 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: ptrue p0.h, vl64 -; CHECK-NEXT: st1h { z0.h }, p0, [x2] +; CHECK-NEXT: st1h { z0.h }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <32 x i16>, ptr %a %op2 = load <32 x i16>, ptr %b @@ -268,11 +268,11 @@ ; CHECK-LABEL: concat_v128i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl64 +; CHECK-NEXT: ptrue p1.h, vl128 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: ptrue p0.h, vl128 -; CHECK-NEXT: st1h { z0.h }, p0, [x2] +; CHECK-NEXT: st1h { z0.h }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <64 x i16>, ptr %a %op2 = load <64 x i16>, ptr %b @@ -325,12 +325,12 @@ define void @concat_v8i32(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s -; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: st1w { z1.s }, p0, [x2] +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ptrue p1.s, vl8 +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: st1w { z0.s }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <4 x i32>, ptr %a %op2 = load <4 x i32>, ptr %b @@ -343,7 +343,7 @@ ; VBITS_GE_256-LABEL: concat_v16i32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x8, #8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] @@ -353,11 +353,11 @@ ; VBITS_GE_512-LABEL: concat_v16i32: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.s, vl8 +; VBITS_GE_512-NEXT: ptrue p1.s, vl16 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_512-NEXT: splice z0.s, p0, z0.s, z1.s -; VBITS_GE_512-NEXT: ptrue p0.s, vl16 -; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2] +; VBITS_GE_512-NEXT: st1w { z0.s }, p1, [x2] ; VBITS_GE_512-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b @@ -371,11 +371,11 @@ ; CHECK-LABEL: concat_v32i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl16 +; CHECK-NEXT: ptrue p1.s, vl32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: ptrue p0.s, vl32 -; CHECK-NEXT: st1w { z0.s }, p0, [x2] +; CHECK-NEXT: st1w { z0.s }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a %op2 = load <16 x i32>, ptr %b @@ -391,11 +391,11 @@ ; CHECK-LABEL: concat_v64i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl32 +; CHECK-NEXT: ptrue p1.s, vl64 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: ptrue p0.s, vl64 -; CHECK-NEXT: st1w { z0.s }, p0, [x2] +; CHECK-NEXT: st1w { z0.s }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <32 x i32>, ptr %a %op2 = load <32 x i32>, ptr %b @@ -430,12 +430,12 @@ define void @concat_v4i64(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: splice z1.d, p0, z1.d, z0.d -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: st1d { z1.d }, p0, [x2] +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ptrue p1.d, vl4 +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: st1d { z0.d }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <2 x i64>, ptr %a %op2 = load <2 x i64>, ptr %b @@ -448,7 +448,7 @@ ; VBITS_GE_256-LABEL: concat_v8i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: mov x8, #4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] @@ -458,11 +458,11 @@ ; VBITS_GE_512-LABEL: concat_v8i64: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.d, vl4 +; VBITS_GE_512-NEXT: ptrue p1.d, vl8 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: splice z0.d, p0, z0.d, z1.d -; VBITS_GE_512-NEXT: ptrue p0.d, vl8 -; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] +; VBITS_GE_512-NEXT: st1d { z0.d }, p1, [x2] ; VBITS_GE_512-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b @@ -475,11 +475,11 @@ ; CHECK-LABEL: concat_v16i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl8 +; CHECK-NEXT: ptrue p1.d, vl16 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: ptrue p0.d, vl16 -; CHECK-NEXT: st1d { z0.d }, p0, [x2] +; CHECK-NEXT: st1d { z0.d }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <8 x i64>, ptr %a %op2 = load <8 x i64>, ptr %b @@ -493,11 +493,11 @@ ; CHECK-LABEL: concat_v32i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl16 +; CHECK-NEXT: ptrue p1.d, vl32 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: ptrue p0.d, vl32 -; CHECK-NEXT: st1d { z0.d }, p0, [x2] +; CHECK-NEXT: st1d { z0.d }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <16 x i64>, ptr %a %op2 = load <16 x i64>, ptr %b @@ -538,12 +538,12 @@ define void @concat_v16f16(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h -; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: st1h { z1.h }, p0, [x2] +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ptrue p1.h, vl16 +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: st1h { z0.h }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <8 x half>, ptr %a %op2 = load <8 x half>, ptr %b @@ -557,7 +557,7 @@ ; VBITS_GE_256-LABEL: concat_v32f16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: mov x8, #16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2, x8, lsl #1] @@ -567,11 +567,11 @@ ; VBITS_GE_512-LABEL: concat_v32f16: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.h, vl16 +; VBITS_GE_512-NEXT: ptrue p1.h, vl32 ; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] ; VBITS_GE_512-NEXT: splice z0.h, p0, z0.h, z1.h -; VBITS_GE_512-NEXT: ptrue p0.h, vl32 -; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2] +; VBITS_GE_512-NEXT: st1h { z0.h }, p1, [x2] ; VBITS_GE_512-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b @@ -587,11 +587,11 @@ ; CHECK-LABEL: concat_v64f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl32 +; CHECK-NEXT: ptrue p1.h, vl64 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: ptrue p0.h, vl64 -; CHECK-NEXT: st1h { z0.h }, p0, [x2] +; CHECK-NEXT: st1h { z0.h }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <32 x half>, ptr %a %op2 = load <32 x half>, ptr %b @@ -611,11 +611,11 @@ ; CHECK-LABEL: concat_v128f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl64 +; CHECK-NEXT: ptrue p1.h, vl128 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: ptrue p0.h, vl128 -; CHECK-NEXT: st1h { z0.h }, p0, [x2] +; CHECK-NEXT: st1h { z0.h }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <64 x half>, ptr %a %op2 = load <64 x half>, ptr %b @@ -668,12 +668,12 @@ define void @concat_v8f32(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s -; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: st1w { z1.s }, p0, [x2] +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ptrue p1.s, vl8 +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: st1w { z0.s }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <4 x float>, ptr %a %op2 = load <4 x float>, ptr %b @@ -686,7 +686,7 @@ ; VBITS_GE_256-LABEL: concat_v16f32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x8, #8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2, x8, lsl #2] @@ -696,11 +696,11 @@ ; VBITS_GE_512-LABEL: concat_v16f32: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.s, vl8 +; VBITS_GE_512-NEXT: ptrue p1.s, vl16 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_512-NEXT: splice z0.s, p0, z0.s, z1.s -; VBITS_GE_512-NEXT: ptrue p0.s, vl16 -; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x2] +; VBITS_GE_512-NEXT: st1w { z0.s }, p1, [x2] ; VBITS_GE_512-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b @@ -714,11 +714,11 @@ ; CHECK-LABEL: concat_v32f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl16 +; CHECK-NEXT: ptrue p1.s, vl32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: ptrue p0.s, vl32 -; CHECK-NEXT: st1w { z0.s }, p0, [x2] +; CHECK-NEXT: st1w { z0.s }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <16 x float>, ptr %a %op2 = load <16 x float>, ptr %b @@ -734,11 +734,11 @@ ; CHECK-LABEL: concat_v64f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl32 +; CHECK-NEXT: ptrue p1.s, vl64 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: ptrue p0.s, vl64 -; CHECK-NEXT: st1w { z0.s }, p0, [x2] +; CHECK-NEXT: st1w { z0.s }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <32 x float>, ptr %a %op2 = load <32 x float>, ptr %b @@ -773,12 +773,12 @@ define void @concat_v4f64(ptr %a, ptr %b, ptr %c) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: splice z1.d, p0, z1.d, z0.d -; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: st1d { z1.d }, p0, [x2] +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ptrue p1.d, vl4 +; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: st1d { z0.d }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <2 x double>, ptr %a %op2 = load <2 x double>, ptr %b @@ -791,7 +791,7 @@ ; VBITS_GE_256-LABEL: concat_v8f64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: mov x8, #4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x2, x8, lsl #3] @@ -801,11 +801,11 @@ ; VBITS_GE_512-LABEL: concat_v8f64: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.d, vl4 +; VBITS_GE_512-NEXT: ptrue p1.d, vl8 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: splice z0.d, p0, z0.d, z1.d -; VBITS_GE_512-NEXT: ptrue p0.d, vl8 -; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x2] +; VBITS_GE_512-NEXT: st1d { z0.d }, p1, [x2] ; VBITS_GE_512-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b @@ -818,11 +818,11 @@ ; CHECK-LABEL: concat_v16f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl8 +; CHECK-NEXT: ptrue p1.d, vl16 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: ptrue p0.d, vl16 -; CHECK-NEXT: st1d { z0.d }, p0, [x2] +; CHECK-NEXT: st1d { z0.d }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <8 x double>, ptr %a %op2 = load <8 x double>, ptr %b @@ -836,11 +836,11 @@ ; CHECK-LABEL: concat_v32f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl16 +; CHECK-NEXT: ptrue p1.d, vl32 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d -; CHECK-NEXT: ptrue p0.d, vl32 -; CHECK-NEXT: st1d { z0.d }, p0, [x2] +; CHECK-NEXT: st1d { z0.d }, p1, [x2] ; CHECK-NEXT: ret %op1 = load <16 x double>, ptr %a %op2 = load <16 x double>, ptr %b @@ -859,8 +859,8 @@ define void @concat_v32i8_undef(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v32i8_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.b, vl32 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1b { z0.b }, p0, [x1] ; CHECK-NEXT: ret %op1 = load <16 x i8>, ptr %a @@ -875,8 +875,8 @@ define void @concat_v16i16_undef(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v16i16_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1h { z0.h }, p0, [x1] ; CHECK-NEXT: ret %op1 = load <8 x i16>, ptr %a @@ -889,8 +889,8 @@ define void @concat_v8i32_undef(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v8i32_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1w { z0.s }, p0, [x1] ; CHECK-NEXT: ret %op1 = load <4 x i32>, ptr %a @@ -902,8 +902,8 @@ define void @concat_v4i64_undef(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v4i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: ret %op1 = load <2 x i64>, ptr %a @@ -919,8 +919,8 @@ define void @concat_v32i8_4op(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v32i8_4op: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.b, vl32 +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: st1b { z0.b }, p0, [x1] ; CHECK-NEXT: ret %op1 = load <8 x i8>, ptr %a @@ -937,8 +937,8 @@ define void @concat_v16i16_4op(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v16i16_4op: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: st1h { z0.h }, p0, [x1] ; CHECK-NEXT: ret %op1 = load <4 x i16>, ptr %a @@ -952,8 +952,8 @@ define void @concat_v8i32_4op(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v8i32_4op: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: st1w { z0.s }, p0, [x1] ; CHECK-NEXT: ret %op1 = load <2 x i32>, ptr %a @@ -966,8 +966,8 @@ define void @concat_v4i64_4op(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: concat_v4i64_4op: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: ret %op1 = load <1 x i64>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-ext-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-ext-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-ext-loads.ll @@ -73,8 +73,8 @@ define <64 x i32> @load_zext_v64i16i32(ptr %ap) #0 { ; VBITS_GE_1024-LABEL: load_zext_v64i16i32: ; VBITS_GE_1024: // %bb.0: -; VBITS_GE_1024-NEXT: mov x9, #32 // =0x20 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32 +; VBITS_GE_1024-NEXT: mov x9, #32 // =0x20 ; VBITS_GE_1024-NEXT: ld1h { z0.s }, p0/z, [x0, x9, lsl #1] ; VBITS_GE_1024-NEXT: ld1h { z1.s }, p0/z, [x0] ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2] @@ -142,8 +142,8 @@ define <64 x i32> @load_sext_v64i16i32(ptr %ap) #0 { ; VBITS_GE_1024-LABEL: load_sext_v64i16i32: ; VBITS_GE_1024: // %bb.0: -; VBITS_GE_1024-NEXT: mov x9, #32 // =0x20 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32 +; VBITS_GE_1024-NEXT: mov x9, #32 // =0x20 ; VBITS_GE_1024-NEXT: ld1sh { z0.s }, p0/z, [x0, x9, lsl #1] ; VBITS_GE_1024-NEXT: ld1sh { z1.s }, p0/z, [x0] ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2] @@ -164,8 +164,8 @@ define <32 x i64> @load_zext_v32i8i64(ptr %ap) #0 { ; VBITS_GE_1024-LABEL: load_zext_v32i8i64: ; VBITS_GE_1024: // %bb.0: -; VBITS_GE_1024-NEXT: mov w9, #16 // =0x10 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 +; VBITS_GE_1024-NEXT: mov w9, #16 // =0x10 ; VBITS_GE_1024-NEXT: ld1b { z0.d }, p0/z, [x0, x9] ; VBITS_GE_1024-NEXT: ld1b { z1.d }, p0/z, [x0] ; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10 @@ -187,8 +187,8 @@ define <32 x i64> @load_sext_v32i8i64(ptr %ap) #0 { ; VBITS_GE_1024-LABEL: load_sext_v32i8i64: ; VBITS_GE_1024: // %bb.0: -; VBITS_GE_1024-NEXT: mov w9, #16 // =0x10 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 +; VBITS_GE_1024-NEXT: mov w9, #16 // =0x10 ; VBITS_GE_1024-NEXT: ld1sb { z0.d }, p0/z, [x0, x9] ; VBITS_GE_1024-NEXT: ld1sb { z1.d }, p0/z, [x0] ; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10 @@ -210,8 +210,8 @@ define <32 x i64> @load_zext_v32i16i64(ptr %ap) #0 { ; VBITS_GE_1024-LABEL: load_zext_v32i16i64: ; VBITS_GE_1024: // %bb.0: -; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 +; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_1024-NEXT: ld1h { z0.d }, p0/z, [x0, x9, lsl #1] ; VBITS_GE_1024-NEXT: ld1h { z1.d }, p0/z, [x0] ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3] @@ -232,8 +232,8 @@ define <32 x i64> @load_sext_v32i16i64(ptr %ap) #0 { ; VBITS_GE_1024-LABEL: load_sext_v32i16i64: ; VBITS_GE_1024: // %bb.0: -; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 +; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_1024-NEXT: ld1sh { z0.d }, p0/z, [x0, x9, lsl #1] ; VBITS_GE_1024-NEXT: ld1sh { z1.d }, p0/z, [x0] ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3] @@ -254,8 +254,8 @@ define <32 x i64> @load_zext_v32i32i64(ptr %ap) #0 { ; VBITS_GE_1024-LABEL: load_zext_v32i32i64: ; VBITS_GE_1024: // %bb.0: -; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 +; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_1024-NEXT: ld1w { z0.d }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_1024-NEXT: ld1w { z1.d }, p0/z, [x0] ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3] @@ -276,8 +276,8 @@ define <32 x i64> @load_sext_v32i32i64(ptr %ap) #0 { ; VBITS_GE_1024-LABEL: load_sext_v32i32i64: ; VBITS_GE_1024: // %bb.0: -; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_1024-NEXT: ptrue p0.d, vl16 +; VBITS_GE_1024-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_1024-NEXT: ld1sw { z0.d }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_1024-NEXT: ld1sw { z1.d }, p0/z, [x0] ; VBITS_GE_1024-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll @@ -45,8 +45,8 @@ define void @extract_subvector_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: extract_subvector_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1] ; VBITS_GE_256-NEXT: ret @@ -137,8 +137,8 @@ define void @extract_subvector_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: extract_subvector_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1] ; VBITS_GE_256-NEXT: ret @@ -228,8 +228,8 @@ define void @extract_subvector_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: extract_subvector_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1] ; VBITS_GE_256-NEXT: ret @@ -308,8 +308,8 @@ define void @extract_subvector_v8i64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: extract_subvector_v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: ret @@ -322,14 +322,14 @@ define void @extract_subvector_v16i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: extract_subvector_v16i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 -; VBITS_GE_256-NEXT: mov x9, #12 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #12 // =0xc +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x9, lsl #3] -; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1] ; VBITS_GE_256-NEXT: ret %op = load <16 x i64>, ptr %a %ret = call <8 x i64> @llvm.vector.extract.v8i64.v16i64(<16 x i64> %op, i64 8) @@ -340,8 +340,8 @@ define void @extract_subvector_v32i64(ptr %a, ptr %b) vscale_range(8,0) #0 { ; CHECK-LABEL: extract_subvector_v32i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #16 ; CHECK-NEXT: ptrue p0.d, vl16 +; CHECK-NEXT: mov x8, #16 // =0x10 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: ret @@ -392,8 +392,8 @@ define void @extract_subvector_v32f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: extract_subvector_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1] ; VBITS_GE_256-NEXT: ret @@ -483,8 +483,8 @@ define void @extract_subvector_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: extract_subvector_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1] ; VBITS_GE_256-NEXT: ret @@ -563,8 +563,8 @@ define void @extract_subvector_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: extract_subvector_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_GE_256-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-vector-elt.ll @@ -46,8 +46,8 @@ define half @extractelement_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: extractelement_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: mov z0.h, z0.h[15] ; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -69,10 +69,10 @@ ; CHECK-LABEL: extractelement_v64f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl64 -; CHECK-NEXT: mov w8, #63 +; CHECK-NEXT: mov w8, #63 // =0x3f +; CHECK-NEXT: whilels p1.h, xzr, x8 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: whilels p0.h, xzr, x8 -; CHECK-NEXT: lastb h0, p0, z0.h +; CHECK-NEXT: lastb h0, p1, z0.h ; CHECK-NEXT: ret %op1 = load <64 x half>, ptr %a %r = extractelement <64 x half> %op1, i64 63 @@ -83,10 +83,10 @@ ; CHECK-LABEL: extractelement_v128f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl128 -; CHECK-NEXT: mov w8, #127 +; CHECK-NEXT: mov w8, #127 // =0x7f +; CHECK-NEXT: whilels p1.h, xzr, x8 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: whilels p0.h, xzr, x8 -; CHECK-NEXT: lastb h0, p0, z0.h +; CHECK-NEXT: lastb h0, p1, z0.h ; CHECK-NEXT: ret %op1 = load <128 x half>, ptr %a %r = extractelement <128 x half> %op1, i64 127 @@ -130,8 +130,8 @@ define float @extractelement_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: extractelement_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: mov z0.s, z0.s[7] ; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 killed $z0 @@ -153,10 +153,10 @@ ; CHECK-LABEL: extractelement_v32f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl32 -; CHECK-NEXT: mov w8, #31 +; CHECK-NEXT: mov w8, #31 // =0x1f +; CHECK-NEXT: whilels p1.s, xzr, x8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: whilels p0.s, xzr, x8 -; CHECK-NEXT: lastb s0, p0, z0.s +; CHECK-NEXT: lastb s0, p1, z0.s ; CHECK-NEXT: ret %op1 = load <32 x float>, ptr %a %r = extractelement <32 x float> %op1, i64 31 @@ -167,10 +167,10 @@ ; CHECK-LABEL: extractelement_v64f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl64 -; CHECK-NEXT: mov w8, #63 +; CHECK-NEXT: mov w8, #63 // =0x3f +; CHECK-NEXT: whilels p1.s, xzr, x8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: whilels p0.s, xzr, x8 -; CHECK-NEXT: lastb s0, p0, z0.s +; CHECK-NEXT: lastb s0, p1, z0.s ; CHECK-NEXT: ret %op1 = load <64 x float>, ptr %a %r = extractelement <64 x float> %op1, i64 63 @@ -212,8 +212,8 @@ define double @extractelement_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: extractelement_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: mov z0.d, z0.d[3] ; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -235,10 +235,10 @@ ; CHECK-LABEL: extractelement_v16f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl16 -; CHECK-NEXT: mov w8, #15 +; CHECK-NEXT: mov w8, #15 // =0xf +; CHECK-NEXT: whilels p1.d, xzr, x8 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: whilels p0.d, xzr, x8 -; CHECK-NEXT: lastb d0, p0, z0.d +; CHECK-NEXT: lastb d0, p1, z0.d ; CHECK-NEXT: ret %op1 = load <16 x double>, ptr %a %r = extractelement <16 x double> %op1, i64 15 @@ -249,10 +249,10 @@ ; CHECK-LABEL: extractelement_v32f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl32 -; CHECK-NEXT: mov w8, #31 +; CHECK-NEXT: mov w8, #31 // =0x1f +; CHECK-NEXT: whilels p1.d, xzr, x8 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: whilels p0.d, xzr, x8 -; CHECK-NEXT: lastb d0, p0, z0.d +; CHECK-NEXT: lastb d0, p1, z0.d ; CHECK-NEXT: ret %op1 = load <32 x double>, ptr %a %r = extractelement <32 x double> %op1, i64 31 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fcopysign.ll @@ -15,10 +15,10 @@ define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: test_copysign_v4f16_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: mvni v2.4h, #128, lsl #8 -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-NEXT: mvni v0.4h, #128, lsl #8 +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret %a = load <4 x half>, ptr %ap @@ -31,10 +31,10 @@ define void @test_copysign_v8f16_v8f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: test_copysign_v8f16_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: mvni v2.8h, #128, lsl #8 -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: mvni v0.8h, #128, lsl #8 +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %a = load <8 x half>, ptr %ap @@ -65,15 +65,15 @@ define void @test_copysign_v32f16_v32f16(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: test_copysign_v32f16_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] +; VBITS_GE_256-NEXT: and z2.h, z2.h, #0x8000 ; VBITS_GE_256-NEXT: and z0.h, z0.h, #0x7fff ; VBITS_GE_256-NEXT: and z1.h, z1.h, #0x7fff -; VBITS_GE_256-NEXT: and z2.h, z2.h, #0x8000 ; VBITS_GE_256-NEXT: and z3.h, z3.h, #0x8000 ; VBITS_GE_256-NEXT: orr z0.d, z0.d, z2.d ; VBITS_GE_256-NEXT: orr z1.d, z1.d, z3.d @@ -139,10 +139,10 @@ define void @test_copysign_v2f32_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: test_copysign_v2f32_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: mvni v2.2s, #128, lsl #24 -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-NEXT: mvni v0.2s, #128, lsl #24 +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret %a = load <2 x float>, ptr %ap @@ -155,10 +155,10 @@ define void @test_copysign_v4f32_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: test_copysign_v4f32_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: mvni v2.4s, #128, lsl #24 -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: mvni v0.4s, #128, lsl #24 +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %a = load <4 x float>, ptr %ap @@ -189,15 +189,15 @@ define void @test_copysign_v16f32_v16f32(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: test_copysign_v16f32_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] +; VBITS_GE_256-NEXT: and z2.s, z2.s, #0x80000000 ; VBITS_GE_256-NEXT: and z0.s, z0.s, #0x7fffffff ; VBITS_GE_256-NEXT: and z1.s, z1.s, #0x7fffffff -; VBITS_GE_256-NEXT: and z2.s, z2.s, #0x80000000 ; VBITS_GE_256-NEXT: and z3.s, z3.s, #0x80000000 ; VBITS_GE_256-NEXT: orr z0.d, z0.d, z2.d ; VBITS_GE_256-NEXT: orr z1.d, z1.d, z3.d @@ -298,15 +298,15 @@ define void @test_copysign_v8f64_v8f64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: test_copysign_v8f64_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: and z2.d, z2.d, #0x8000000000000000 ; VBITS_GE_256-NEXT: and z0.d, z0.d, #0x7fffffffffffffff ; VBITS_GE_256-NEXT: and z1.d, z1.d, #0x7fffffffffffffff -; VBITS_GE_256-NEXT: and z2.d, z2.d, #0x8000000000000000 ; VBITS_GE_256-NEXT: and z3.d, z3.d, #0x8000000000000000 ; VBITS_GE_256-NEXT: orr z0.d, z0.d, z2.d ; VBITS_GE_256-NEXT: orr z1.d, z1.d, z3.d @@ -373,10 +373,10 @@ ; CHECK-LABEL: test_copysign_v2f32_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: mvni v2.2s, #128, lsl #24 -; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: mvni v1.2s, #128, lsl #24 +; CHECK-NEXT: ldr d2, [x0] ; CHECK-NEXT: fcvtn v0.2s, v0.2d -; CHECK-NEXT: bit v0.8b, v1.8b, v2.8b +; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret %a = load <2 x float>, ptr %ap @@ -395,10 +395,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mvni v2.4s, #128, lsl #24 -; CHECK-NEXT: fcvt z1.s, p0/m, z1.d +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: fcvt z1.s, p1/m, z1.d ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: str q0, [x0] @@ -454,8 +454,8 @@ ; CHECK_EXTEND_ROUND-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK_EXTEND_ROUND-NEXT: ldr q1, [x1] ; CHECK_EXTEND_ROUND-NEXT: uunpklo z1.d, z1.s -; CHECK_EXTEND_ROUND-NEXT: fcvt z1.d, p0/m, z1.s ; CHECK_EXTEND_ROUND-NEXT: and z0.d, z0.d, #0x7fffffffffffffff +; CHECK_EXTEND_ROUND-NEXT: fcvt z1.d, p0/m, z1.s ; CHECK_EXTEND_ROUND-NEXT: and z1.d, z1.d, #0x8000000000000000 ; CHECK_EXTEND_ROUND-NEXT: orr z0.d, z0.d, z1.d ; CHECK_EXTEND_ROUND-NEXT: st1d { z0.d }, p0, [x0] @@ -474,10 +474,10 @@ ; CHECK-LABEL: test_copysign_v4f16_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: mvni v2.4h, #128, lsl #8 -; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: mvni v1.4h, #128, lsl #8 +; CHECK-NEXT: ldr d2, [x0] ; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: bit v0.8b, v1.8b, v2.8b +; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret %a = load <4 x half>, ptr %ap @@ -493,10 +493,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mvni v2.4h, #128, lsl #8 -; CHECK-NEXT: fcvt z1.h, p0/m, z1.d +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: fcvt z1.h, p1/m, z1.d ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b @@ -520,10 +520,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl8 ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mvni v2.8h, #128, lsl #8 -; CHECK-NEXT: fcvt z1.h, p0/m, z1.s +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: fcvt z1.h, p1/m, z1.s ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: str q0, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-arith.ll @@ -48,8 +48,8 @@ define void @fadd_v32f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fadd_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -146,8 +146,8 @@ define void @fadd_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fadd_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -244,8 +244,8 @@ define void @fadd_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fadd_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -346,14 +346,14 @@ define void @fdiv_v32f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fdiv_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] -; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: fdiv z0.h, p0/m, z0.h, z2.h -; VBITS_GE_256-NEXT: fdiv z1.h, p0/m, z1.h, z3.h +; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1] +; VBITS_GE_256-NEXT: fdiv z1.h, p0/m, z1.h, z2.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret @@ -444,14 +444,14 @@ define void @fdiv_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fdiv_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: fdiv z0.s, p0/m, z0.s, z2.s -; VBITS_GE_256-NEXT: fdiv z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1] +; VBITS_GE_256-NEXT: fdiv z1.s, p0/m, z1.s, z2.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret @@ -542,14 +542,14 @@ define void @fdiv_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fdiv_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: fdiv z0.d, p0/m, z0.d, z2.d -; VBITS_GE_256-NEXT: fdiv z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: fdiv z1.d, p0/m, z1.d, z2.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret @@ -648,8 +648,8 @@ define void @fma_v32f16(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-LABEL: fma_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -758,8 +758,8 @@ define void @fma_v16f32(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-LABEL: fma_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -867,8 +867,8 @@ define void @fma_v8f64(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-LABEL: fma_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -977,8 +977,8 @@ define void @fmul_v32f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fmul_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -1075,8 +1075,8 @@ define void @fmul_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fmul_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -1173,8 +1173,8 @@ define void @fmul_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fmul_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -1273,8 +1273,8 @@ define void @fneg_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: fneg_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: fneg z0.h, p0/m, z0.h @@ -1361,8 +1361,8 @@ define void @fneg_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: fneg_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fneg z0.s, p0/m, z0.s @@ -1449,8 +1449,8 @@ define void @fneg_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: fneg_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fneg z0.d, p0/m, z0.d @@ -1541,11 +1541,11 @@ define void @fsqrt_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: fsqrt_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: fsqrt z0.h, p0/m, z0.h +; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: fsqrt z1.h, p0/m, z1.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] @@ -1629,11 +1629,11 @@ define void @fsqrt_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: fsqrt_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fsqrt z0.s, p0/m, z0.s +; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fsqrt z1.s, p0/m, z1.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] @@ -1717,11 +1717,11 @@ define void @fsqrt_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: fsqrt_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fsqrt z0.d, p0/m, z0.d +; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fsqrt z1.d, p0/m, z1.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] @@ -1811,8 +1811,8 @@ define void @fsub_v32f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fsub_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -1909,8 +1909,8 @@ define void @fsub_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fsub_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -2007,8 +2007,8 @@ define void @fsub_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fsub_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -2107,8 +2107,8 @@ define void @fabs_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: fabs_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: fabs z0.h, p0/m, z0.h @@ -2195,8 +2195,8 @@ define void @fabs_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: fabs_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fabs z0.s, p0/m, z0.s @@ -2283,8 +2283,8 @@ define void @fabs_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: fabs_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fabs z0.d, p0/m, z0.d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-compares.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-compares.ll @@ -52,8 +52,8 @@ define void @fcmp_oeq_v32f16(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-LABEL: fcmp_oeq_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -162,8 +162,8 @@ define void @fcmp_oeq_v16f32(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-LABEL: fcmp_oeq_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -272,8 +272,8 @@ define void @fcmp_oeq_v8f64(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-LABEL: fcmp_oeq_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll @@ -54,8 +54,8 @@ define void @fcvt_v16f16_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvt_v16f16_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1h { z0.s }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvt z0.s, p0/m, z0.h @@ -157,8 +157,8 @@ define void @fcvt_v8f16_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvt_v8f16_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvt z0.d, p0/m, z0.h @@ -257,8 +257,8 @@ define void @fcvt_v8f32_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvt_v8f32_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvt z0.d, p0/m, z0.s @@ -357,8 +357,8 @@ define void @fcvt_v16f32_v16f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvt_v16f32_v16f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvt z0.h, p0/m, z0.s @@ -430,8 +430,8 @@ define void @fcvt_v2f64_v2f16(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: fcvt_v2f64_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h @@ -460,13 +460,13 @@ define void @fcvt_v8f64_v8f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvt_v8f64_v8f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: ptrue p1.d ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] -; VBITS_GE_256-NEXT: ptrue p0.d -; VBITS_GE_256-NEXT: fcvt z0.h, p0/m, z0.d -; VBITS_GE_256-NEXT: fcvt z1.h, p0/m, z1.d +; VBITS_GE_256-NEXT: fcvt z0.h, p1/m, z0.d +; VBITS_GE_256-NEXT: fcvt z1.h, p1/m, z1.d ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h @@ -562,8 +562,8 @@ define void @fcvt_v8f64_v8f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvt_v8f64_v8f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvt z0.s, p0/m, z0.d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-fma.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-fma.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-fma.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-fma.ll @@ -55,8 +55,8 @@ define void @fma_v32f16(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-LABEL: fma_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -171,8 +171,8 @@ define void @fma_v16f32(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-LABEL: fma_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -286,8 +286,8 @@ define void @fma_v8f64(ptr %a, ptr %b, ptr %c) #0 { ; VBITS_GE_256-LABEL: fma_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-minmax.ll @@ -48,8 +48,8 @@ define void @fmaxnm_v32f16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: fmaxnm_v32f16: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #16 ; VBITS_EQ_256-NEXT: ptrue p0.h, vl16 +; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -146,8 +146,8 @@ define void @fmaxnm_v16f32(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: fmaxnm_v16f32: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #8 ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8 +; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -244,8 +244,8 @@ define void @fmaxnm_v8f64(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: fmaxnm_v8f64: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #4 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 +; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4 ; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -346,8 +346,8 @@ define void @fminnm_v32f16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: fminnm_v32f16: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #16 ; VBITS_EQ_256-NEXT: ptrue p0.h, vl16 +; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -444,8 +444,8 @@ define void @fminnm_v16f32(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: fminnm_v16f32: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #8 ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8 +; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -542,8 +542,8 @@ define void @fminnm_v8f64(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: fminnm_v8f64: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #4 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 +; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4 ; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -644,8 +644,8 @@ define void @fmax_v32f16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: fmax_v32f16: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #16 ; VBITS_EQ_256-NEXT: ptrue p0.h, vl16 +; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -742,8 +742,8 @@ define void @fmax_v16f32(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: fmax_v16f32: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #8 ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8 +; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -840,8 +840,8 @@ define void @fmax_v8f64(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: fmax_v8f64: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #4 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 +; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4 ; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -942,8 +942,8 @@ define void @fmin_v32f16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: fmin_v32f16: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #16 ; VBITS_EQ_256-NEXT: ptrue p0.h, vl16 +; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -1040,8 +1040,8 @@ define void @fmin_v16f32(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: fmin_v16f32: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #8 ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8 +; VBITS_EQ_256-NEXT: mov x8, #8 // =0x8 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_EQ_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -1138,8 +1138,8 @@ define void @fmin_v8f64(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: fmin_v8f64: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #4 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 +; VBITS_EQ_256-NEXT: mov x8, #4 // =0x4 ; VBITS_EQ_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-reduce.ll @@ -13,8 +13,8 @@ define half @fadda_v4f16(half %start, <4 x half> %a) vscale_range(1,0) #0 { ; CHECK-LABEL: fadda_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -27,8 +27,8 @@ define half @fadda_v8f16(half %start, <8 x half> %a) vscale_range(1,0) #0 { ; CHECK-LABEL: fadda_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -54,8 +54,8 @@ define half @fadda_v32f16(half %start, ptr %a) #0 { ; VBITS_GE_256-LABEL: fadda_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 def $z0 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] @@ -109,8 +109,8 @@ define float @fadda_v2f32(float %start, <2 x float> %a) vscale_range(1,0) #0 { ; CHECK-LABEL: fadda_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fadda s0, p0, s0, z1.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 @@ -123,8 +123,8 @@ define float @fadda_v4f32(float %start, <4 x float> %a) vscale_range(1,0) #0 { ; CHECK-LABEL: fadda_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fadda s0, p0, s0, z1.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 @@ -150,8 +150,8 @@ define float @fadda_v16f32(float %start, ptr %a) #0 { ; VBITS_GE_256-LABEL: fadda_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 def $z0 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] @@ -215,8 +215,8 @@ define double @fadda_v2f64(double %start, <2 x double> %a) vscale_range(1,0) #0 { ; CHECK-LABEL: fadda_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fadda d0, p0, d0, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -242,8 +242,8 @@ define double @fadda_v8f64(double %start, ptr %a) #0 { ; VBITS_GE_256-LABEL: fadda_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $z0 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] @@ -301,8 +301,8 @@ define half @faddv_v4f16(half %start, <4 x half> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: faddv_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: faddv h1, p0, z1.h ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret @@ -314,8 +314,8 @@ define half @faddv_v8f16(half %start, <8 x half> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: faddv_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: faddv h1, p0, z1.h ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret @@ -339,8 +339,8 @@ define half @faddv_v32f16(half %start, ptr %a) #0 { ; VBITS_GE_256-LABEL: faddv_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: fadd z1.h, p0/m, z1.h, z2.h @@ -401,8 +401,8 @@ define float @faddv_v4f32(float %start, <4 x float> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: faddv_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: faddv s1, p0, z1.s ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret @@ -426,8 +426,8 @@ define float @faddv_v16f32(float %start, ptr %a) #0 { ; VBITS_GE_256-LABEL: faddv_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fadd z1.s, p0/m, z1.s, z2.s @@ -510,8 +510,8 @@ define double @faddv_v8f64(double %start, ptr %a) #0 { ; VBITS_GE_256-LABEL: faddv_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fadd z1.d, p0/m, z1.d, z2.d @@ -597,8 +597,8 @@ define half @fmaxv_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: fmaxv_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h @@ -680,8 +680,8 @@ define float @fmaxv_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: fmaxv_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s @@ -762,8 +762,8 @@ define double @fmaxv_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: fmaxv_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d @@ -849,8 +849,8 @@ define half @fminv_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: fminv_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: fminnm z0.h, p0/m, z0.h, z1.h @@ -932,8 +932,8 @@ define float @fminv_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: fminv_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fminnm z0.s, p0/m, z0.s, z1.s @@ -1014,8 +1014,8 @@ define double @fminv_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: fminv_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fminnm z0.d, p0/m, z0.d, z1.d @@ -1099,8 +1099,8 @@ define half @fmaximumv_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: fmaximumv_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: fmax z0.h, p0/m, z0.h, z1.h @@ -1182,8 +1182,8 @@ define float @fmaximumv_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: fmaximumv_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fmax z0.s, p0/m, z0.s, z1.s @@ -1264,8 +1264,8 @@ define double @fmaximumv_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: fmaximumv_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fmax z0.d, p0/m, z0.d, z1.d @@ -1349,8 +1349,8 @@ define half @fminimumv_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: fminimumv_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: fmin z0.h, p0/m, z0.h, z1.h @@ -1432,8 +1432,8 @@ define float @fminimumv_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: fminimumv_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fmin z0.s, p0/m, z0.s, z1.s @@ -1514,8 +1514,8 @@ define double @fminimumv_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: fminimumv_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fmin z0.d, p0/m, z0.d, z1.d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-rounding.ll @@ -46,8 +46,8 @@ define void @frintp_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintp_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintp z0.h, p0/m, z0.h @@ -134,8 +134,8 @@ define void @frintp_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintp_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintp z0.s, p0/m, z0.s @@ -222,8 +222,8 @@ define void @frintp_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintp_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintp z0.d, p0/m, z0.d @@ -314,8 +314,8 @@ define void @frintm_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintm_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintm z0.h, p0/m, z0.h @@ -402,8 +402,8 @@ define void @frintm_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintm_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintm z0.s, p0/m, z0.s @@ -490,8 +490,8 @@ define void @frintm_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintm_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintm z0.d, p0/m, z0.d @@ -582,8 +582,8 @@ define void @frinti_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: frinti_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: frinti z0.h, p0/m, z0.h @@ -670,8 +670,8 @@ define void @frinti_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: frinti_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: frinti z0.s, p0/m, z0.s @@ -758,8 +758,8 @@ define void @frinti_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: frinti_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: frinti z0.d, p0/m, z0.d @@ -850,8 +850,8 @@ define void @frintx_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintx_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintx z0.h, p0/m, z0.h @@ -938,8 +938,8 @@ define void @frintx_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintx_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintx z0.s, p0/m, z0.s @@ -1026,8 +1026,8 @@ define void @frintx_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintx_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintx z0.d, p0/m, z0.d @@ -1118,8 +1118,8 @@ define void @frinta_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: frinta_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: frinta z0.h, p0/m, z0.h @@ -1206,8 +1206,8 @@ define void @frinta_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: frinta_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: frinta z0.s, p0/m, z0.s @@ -1294,8 +1294,8 @@ define void @frinta_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: frinta_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: frinta z0.d, p0/m, z0.d @@ -1386,8 +1386,8 @@ define void @frintn_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintn_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintn z0.h, p0/m, z0.h @@ -1474,8 +1474,8 @@ define void @frintn_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintn_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintn z0.s, p0/m, z0.s @@ -1562,8 +1562,8 @@ define void @frintn_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintn_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintn z0.d, p0/m, z0.d @@ -1654,8 +1654,8 @@ define void @frintz_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintz_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintz z0.h, p0/m, z0.h @@ -1742,8 +1742,8 @@ define void @frintz_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintz_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintz z0.s, p0/m, z0.s @@ -1830,8 +1830,8 @@ define void @frintz_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: frintz_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: frintz z0.d, p0/m, z0.d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-select.ll @@ -34,14 +34,14 @@ define void @select_v16f16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 -; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.h, p1/z, z0.h, #0 +; CHECK-NEXT: sel z0.h, p1, z1.h, z2.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <16 x half>, ptr %a @@ -54,32 +54,32 @@ define void @select_v32f16(ptr %a, ptr %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 +; VBITS_GE_256-NEXT: and w8, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.h -; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] -; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.h, w9 -; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z4.h, #0 +; VBITS_GE_256-NEXT: mov z0.h, w8 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 +; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] +; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1, x8, lsl #1] +; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z0.h, #0 +; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h -; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z2.h -; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: mov z0.h, p1/m, z2.h +; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v32f16: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 -; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] +; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p1.h -; VBITS_GE_512-NEXT: mov z2.h, w8 -; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0 -; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h +; VBITS_GE_512-NEXT: mov z0.h, w8 +; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x0] +; VBITS_GE_512-NEXT: ld1h { z2.h }, p0/z, [x1] +; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z0.h, #0 +; VBITS_GE_512-NEXT: sel z0.h, p1, z1.h, z2.h ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_512-NEXT: ret %op1 = load volatile <32 x half>, ptr %a @@ -92,14 +92,14 @@ define void @select_v64f16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v64f16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.h, vl64 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 -; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.h, p1/z, z0.h, #0 +; CHECK-NEXT: sel z0.h, p1, z1.h, z2.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <64 x half>, ptr %a @@ -112,14 +112,14 @@ define void @select_v128f16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v128f16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.h, vl128 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 -; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.h, p1/z, z0.h, #0 +; CHECK-NEXT: sel z0.h, p1, z1.h, z2.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <128 x half>, ptr %a @@ -158,14 +158,14 @@ define void @select_v8f32(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 -; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.s, p1/z, z0.s, #0 +; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <8 x float>, ptr %a @@ -178,32 +178,32 @@ define void @select_v16f32(ptr %a, ptr %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 +; VBITS_GE_256-NEXT: and w8, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.s -; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.s, w9 -; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0 +; VBITS_GE_256-NEXT: mov z0.s, w8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 +; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] +; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1, x8, lsl #2] +; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z0.s, #0 +; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s -; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: mov z0.s, p1/m, z2.s +; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v16f32: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 -; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] +; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p1.s -; VBITS_GE_512-NEXT: mov z2.s, w8 -; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0 -; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s +; VBITS_GE_512-NEXT: mov z0.s, w8 +; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0] +; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x1] +; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z0.s, #0 +; VBITS_GE_512-NEXT: sel z0.s, p1, z1.s, z2.s ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: ret %op1 = load volatile <16 x float>, ptr %a @@ -216,14 +216,14 @@ define void @select_v32f32(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v32f32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.s, vl32 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 -; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.s, p1/z, z0.s, #0 +; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <32 x float>, ptr %a @@ -236,14 +236,14 @@ define void @select_v64f32(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v64f32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.s, vl64 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 -; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.s, p1/z, z0.s, #0 +; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <64 x float>, ptr %a @@ -282,15 +282,15 @@ define void @select_v4f64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: and x8, x2, #0x1 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 -; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.d, p1/z, z0.d, #0 +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <4 x double>, ptr %a @@ -303,34 +303,34 @@ define void @select_v8f64(ptr %a, ptr %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: // kill: def $w2 killed $w2 def $x2 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: and x9, x2, #0x1 +; VBITS_GE_256-NEXT: // kill: def $w2 killed $w2 def $x2 +; VBITS_GE_256-NEXT: and x8, x2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.d -; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.d, x9 -; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0 +; VBITS_GE_256-NEXT: mov z0.d, x8 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] +; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z0.d, #0 +; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d -; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: mov z0.d, p1/m, z2.d +; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v8f64: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: // kill: def $w2 killed $w2 def $x2 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 +; VBITS_GE_512-NEXT: // kill: def $w2 killed $w2 def $x2 ; VBITS_GE_512-NEXT: and x8, x2, #0x1 -; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.d -; VBITS_GE_512-NEXT: mov z2.d, x8 -; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0 -; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d +; VBITS_GE_512-NEXT: mov z0.d, x8 +; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0] +; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1] +; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z0.d, #0 +; VBITS_GE_512-NEXT: sel z0.d, p1, z1.d, z2.d ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_512-NEXT: ret %op1 = load volatile <8 x double>, ptr %a @@ -343,15 +343,15 @@ define void @select_v16f64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: ptrue p0.d, vl16 +; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: and x8, x2, #0x1 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 -; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.d, p1/z, z0.d, #0 +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <16 x double>, ptr %a @@ -364,15 +364,15 @@ define void @select_v32f64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v32f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: ptrue p0.d, vl32 +; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: and x8, x2, #0x1 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 -; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.d, p1/z, z0.d, #0 +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <32 x double>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-to-int.ll @@ -50,8 +50,8 @@ define void @fcvtzu_v32f16_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvtzu_v32f16_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvtzu z0.h, p0/m, z0.h @@ -131,8 +131,8 @@ define void @fcvtzu_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: fcvtzu_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h ; CHECK-NEXT: st1w { z0.s }, p0, [x1] @@ -147,7 +147,7 @@ ; VBITS_GE_256-LABEL: fcvtzu_v16f16_v16i32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: mov x8, #8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: uunpklo z1.s, z0.h @@ -250,16 +250,16 @@ ; VBITS_GE_256-LABEL: fcvtzu_v8f16_v8i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr q0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.h -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.h ; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.h +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ret ; @@ -326,8 +326,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu v1.4s, v0.4s ; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov w9, v1.s[2] ; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov w9, v1.s[2] ; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: mov w8, v1.s[3] ; CHECK-NEXT: mov v0.h[2], w9 @@ -355,18 +355,18 @@ define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 +; VBITS_GE_256-NEXT: ptrue p1.h, vl16 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.s ; VBITS_GE_256-NEXT: fcvtzu z1.s, p0/m, z1.s +; VBITS_GE_256-NEXT: ptrue p0.h, vl8 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h -; VBITS_GE_256-NEXT: ptrue p0.h, vl8 ; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h -; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] +; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fcvtzu_v16f32_v16i16: @@ -451,8 +451,8 @@ define void @fcvtzu_v16f32_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvtzu_v16f32_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvtzu z0.s, p0/m, z0.s @@ -532,8 +532,8 @@ define void @fcvtzu_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: fcvtzu_v4f32_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s ; CHECK-NEXT: st1d { z0.d }, p0, [x1] @@ -548,7 +548,7 @@ ; VBITS_GE_256-LABEL: fcvtzu_v8f32_v8i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x8, #4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: uunpklo z1.d, z0.s @@ -610,8 +610,8 @@ define <1 x i16> @fcvtzu_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 { ; CHECK-LABEL: fcvtzu_v1f64_v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h @@ -650,8 +650,8 @@ define <8 x i16> @fcvtzu_v8f64_v8i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d @@ -750,18 +750,18 @@ define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: ptrue p1.s, vl8 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d ; VBITS_GE_256-NEXT: fcvtzu z1.d, p0/m, z1.d +; VBITS_GE_256-NEXT: ptrue p0.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: ptrue p0.s, vl4 ; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] +; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fcvtzu_v8f64_v8i32: @@ -847,8 +847,8 @@ define void @fcvtzu_v8f64_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvtzu_v8f64_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvtzu z0.d, p0/m, z0.d @@ -943,8 +943,8 @@ define void @fcvtzs_v32f16_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvtzs_v32f16_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvtzs z0.h, p0/m, z0.h @@ -1024,8 +1024,8 @@ define void @fcvtzs_v8f16_v8i32(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: fcvtzs_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h ; CHECK-NEXT: st1w { z0.s }, p0, [x1] @@ -1040,7 +1040,7 @@ ; VBITS_GE_256-LABEL: fcvtzs_v16f16_v16i32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: mov x8, #8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: uunpklo z1.s, z0.h @@ -1143,16 +1143,16 @@ ; VBITS_GE_256-LABEL: fcvtzs_v8f16_v8i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr q0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.h -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.h ; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.h +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ret ; @@ -1219,8 +1219,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzs v1.4s, v0.4s ; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov w9, v1.s[2] ; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov w9, v1.s[2] ; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: mov w8, v1.s[3] ; CHECK-NEXT: mov v0.h[2], w9 @@ -1248,18 +1248,18 @@ define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 +; VBITS_GE_256-NEXT: ptrue p1.h, vl16 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.s ; VBITS_GE_256-NEXT: fcvtzs z1.s, p0/m, z1.s +; VBITS_GE_256-NEXT: ptrue p0.h, vl8 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h -; VBITS_GE_256-NEXT: ptrue p0.h, vl8 ; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h -; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] +; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fcvtzs_v16f32_v16i16: @@ -1344,8 +1344,8 @@ define void @fcvtzs_v16f32_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvtzs_v16f32_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvtzs z0.s, p0/m, z0.s @@ -1425,8 +1425,8 @@ define void @fcvtzs_v4f32_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: fcvtzs_v4f32_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: st1d { z0.d }, p0, [x1] @@ -1441,7 +1441,7 @@ ; VBITS_GE_256-LABEL: fcvtzs_v8f32_v8i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x8, #4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: uunpklo z1.d, z0.s @@ -1503,8 +1503,8 @@ define <1 x i16> @fcvtzs_v1f64_v1i16(<1 x double> %op1) vscale_range(2,0) #0 { ; CHECK-LABEL: fcvtzs_v1f64_v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h @@ -1543,8 +1543,8 @@ define <8 x i16> @fcvtzs_v8f64_v8i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d @@ -1643,18 +1643,18 @@ define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: ptrue p1.s, vl8 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d ; VBITS_GE_256-NEXT: fcvtzs z1.d, p0/m, z1.d +; VBITS_GE_256-NEXT: ptrue p0.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: ptrue p0.s, vl4 ; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] +; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: fcvtzs_v8f64_v8i32: @@ -1740,8 +1740,8 @@ define void @fcvtzs_v8f64_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: fcvtzs_v8f64_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: fcvtzs z0.d, p0/m, z0.d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-vselect.ll @@ -51,8 +51,8 @@ define void @select_v32f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: select_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -164,8 +164,8 @@ define void @select_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: select_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -278,8 +278,8 @@ define void @select_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: select_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp128.ll @@ -13,8 +13,8 @@ ; CHECK-NEXT: sub sp, sp, #48 ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: add x8, sp, #48 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: mov x19, x1 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: str z0, [x8, #1, mul vl] // 16-byte Folded Spill ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill @@ -62,8 +62,8 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: sub sp, sp, #128 ; CHECK-NEXT: ldr q1, [x0, #64] -; CHECK-NEXT: mov x19, x1 ; CHECK-NEXT: ldr q0, [x0, #80] +; CHECK-NEXT: mov x19, x1 ; CHECK-NEXT: stp q0, q1, [sp, #96] // 32-byte Folded Spill ; CHECK-NEXT: ldr q1, [x0, #96] ; CHECK-NEXT: ldr q0, [x0, #112] @@ -88,10 +88,10 @@ ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: bl __trunctfdf2 +; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: mov v0.d[1], v1.d[0] ; CHECK-NEXT: ldr z1, [x8, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d @@ -112,14 +112,14 @@ ; CHECK-NEXT: ldr q0, [sp, #112] // 16-byte Folded Reload ; CHECK-NEXT: bl __trunctfdf2 ; CHECK-NEXT: ldr q1, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: add x9, sp, #128 +; CHECK-NEXT: ptrue p1.d, vl2 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: mov x8, #4 -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr z1, [x9] // 16-byte Folded Reload -; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d +; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: ldr z1, [x8] // 16-byte Folded Reload +; CHECK-NEXT: mov x8, #4 // =0x4 +; CHECK-NEXT: splice z0.d, p1, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x19, x8, lsl #3] ; CHECK-NEXT: add x8, sp, #128 ; CHECK-NEXT: ldr z0, [x8, #1, mul vl] // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-frame-offests-crash.ll @@ -11,58 +11,53 @@ define dso_local void @func1(ptr %v1, ptr %v2, ptr %v3, ptr %v4, ptr %v5, ptr %v6, ptr %v7, ptr %v8, ; CHECK-LABEL: func1: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x29, x25, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: str x29, [sp, #-48]! // 8-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w23, -40 -; CHECK-NEXT: .cfi_offset w24, -48 -; CHECK-NEXT: .cfi_offset w25, -56 -; CHECK-NEXT: .cfi_offset w29, -64 -; CHECK-NEXT: add x8, sp, #64 -; CHECK-NEXT: add x9, sp, #128 -; CHECK-NEXT: add x10, sp, #160 -; CHECK-NEXT: add x11, sp, #192 +; CHECK-NEXT: .cfi_offset w29, -48 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: add x20, sp, #192 +; CHECK-NEXT: add x8, sp, #48 +; CHECK-NEXT: add x9, sp, #112 +; CHECK-NEXT: add x10, sp, #144 +; CHECK-NEXT: add x11, sp, #176 +; CHECK-NEXT: add x20, sp, #176 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x9] ; CHECK-NEXT: ld1d { z2.d }, p0/z, [x10] ; CHECK-NEXT: ld1d { z3.d }, p0/z, [x11] -; CHECK-NEXT: ldp x18, x19, [sp, #368] -; CHECK-NEXT: add x21, sp, #160 -; CHECK-NEXT: add x22, sp, #128 -; CHECK-NEXT: ldp x24, x14, [sp, #296] -; CHECK-NEXT: add x23, sp, #64 -; CHECK-NEXT: ldr x25, [sp, #288] -; CHECK-NEXT: ldp x9, x8, [sp, #344] -; CHECK-NEXT: ldp x11, x10, [sp, #328] -; CHECK-NEXT: ldp x13, x12, [sp, #312] -; CHECK-NEXT: ldr x15, [sp, #120] -; CHECK-NEXT: ldur q4, [sp, #104] -; CHECK-NEXT: ldp x16, x17, [sp, #224] +; CHECK-NEXT: ldp x9, x8, [sp, #328] +; CHECK-NEXT: ldp x11, x10, [sp, #312] +; CHECK-NEXT: ldr x15, [sp, #104] +; CHECK-NEXT: ldp x13, x12, [sp, #296] +; CHECK-NEXT: ldur q4, [sp, #88] +; CHECK-NEXT: ldp x18, x14, [sp, #280] +; CHECK-NEXT: ldr x19, [sp, #272] +; CHECK-NEXT: ldp x16, x17, [sp, #208] +; CHECK-NEXT: ldp x21, x22, [sp, #352] ; CHECK-NEXT: st1d { z3.d }, p0, [x20] -; CHECK-NEXT: st1d { z2.d }, p0, [x21] -; CHECK-NEXT: st1d { z1.d }, p0, [x22] -; CHECK-NEXT: st1d { z0.d }, p0, [x23] -; CHECK-NEXT: stp x18, x19, [sp, #368] -; CHECK-NEXT: stp x25, x24, [sp, #288] -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: stp x16, x17, [sp, #224] -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: stur q4, [sp, #104] -; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: str x15, [sp, #120] -; CHECK-NEXT: stp x14, x13, [sp, #304] -; CHECK-NEXT: stp x12, x11, [sp, #320] -; CHECK-NEXT: stp x10, x9, [sp, #336] -; CHECK-NEXT: str x8, [sp, #352] -; CHECK-NEXT: ldp x29, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: add x20, sp, #144 +; CHECK-NEXT: st1d { z2.d }, p0, [x20] +; CHECK-NEXT: add x20, sp, #112 +; CHECK-NEXT: st1d { z1.d }, p0, [x20] +; CHECK-NEXT: add x20, sp, #48 +; CHECK-NEXT: st1d { z0.d }, p0, [x20] +; CHECK-NEXT: stp x21, x22, [sp, #352] +; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: stp x19, x18, [sp, #272] +; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: stp x16, x17, [sp, #208] +; CHECK-NEXT: stur q4, [sp, #88] +; CHECK-NEXT: str x15, [sp, #104] +; CHECK-NEXT: stp x14, x13, [sp, #288] +; CHECK-NEXT: stp x12, x11, [sp, #304] +; CHECK-NEXT: stp x10, x9, [sp, #320] +; CHECK-NEXT: str x8, [sp, #336] +; CHECK-NEXT: ldr x29, [sp], #48 // 8-byte Folded Reload ; CHECK-NEXT: b func2 ptr %v9, ptr %v10, ptr %v11, ptr %v12, ptr %v13, ptr %v14, ptr %v15, ptr %v16, ptr %v17, ptr %v18, ptr %v19, ptr %v20, ptr %v21, ptr %v22, ptr %v23, ptr %v24, diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-insert-vector-elt.ll @@ -36,16 +36,16 @@ define <16 x half> @insertelement_v16f16(ptr %a) vscale_range(2,0) #0 { ; CHECK-LABEL: insertelement_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #15 ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: fmov h2, #5.00000000 -; CHECK-NEXT: index z3.h, #0, #1 +; CHECK-NEXT: mov w9, #15 // =0xf +; CHECK-NEXT: index z0.h, #0, #1 ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z1.h, w9 -; CHECK-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h -; CHECK-NEXT: mov z0.h, p1/m, h2 -; CHECK-NEXT: st1h { z0.h }, p0, [x8] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0] +; CHECK-NEXT: cmpeq p1.h, p1/z, z0.h, z1.h +; CHECK-NEXT: fmov h0, #5.00000000 +; CHECK-NEXT: mov z2.h, p1/m, h0 +; CHECK-NEXT: st1h { z2.h }, p0, [x8] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a %r = insertelement <16 x half> %op1, half 5.0, i64 15 @@ -55,33 +55,33 @@ define <32 x half> @insertelement_v32f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: insertelement_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #16 -; VBITS_GE_256-NEXT: mov w10, #15 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: fmov h3, #5.00000000 -; VBITS_GE_256-NEXT: index z4.h, #0, #1 +; VBITS_GE_256-NEXT: mov w9, #15 // =0xf +; VBITS_GE_256-NEXT: index z0.h, #0, #1 ; VBITS_GE_256-NEXT: ptrue p1.h -; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1] -; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] -; VBITS_GE_256-NEXT: mov z2.h, w10 -; VBITS_GE_256-NEXT: cmpeq p1.h, p1/z, z4.h, z2.h -; VBITS_GE_256-NEXT: mov z0.h, p1/m, h3 -; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x8, x9, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x8] +; VBITS_GE_256-NEXT: mov z1.h, w9 +; VBITS_GE_256-NEXT: mov x9, #16 // =0x10 +; VBITS_GE_256-NEXT: fmov h2, #5.00000000 +; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x0, x9, lsl #1] +; VBITS_GE_256-NEXT: cmpeq p1.h, p1/z, z0.h, z1.h +; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] +; VBITS_GE_256-NEXT: mov z3.h, p1/m, h2 +; VBITS_GE_256-NEXT: st1h { z3.h }, p0, [x8, x9, lsl #1] +; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x8] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: insertelement_v32f16: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: mov w9, #31 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 -; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_512-NEXT: fmov h2, #5.00000000 -; VBITS_GE_512-NEXT: index z3.h, #0, #1 +; VBITS_GE_512-NEXT: mov w9, #31 // =0x1f +; VBITS_GE_512-NEXT: index z0.h, #0, #1 ; VBITS_GE_512-NEXT: ptrue p1.h ; VBITS_GE_512-NEXT: mov z1.h, w9 -; VBITS_GE_512-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h -; VBITS_GE_512-NEXT: mov z0.h, p1/m, h2 -; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x8] +; VBITS_GE_512-NEXT: ld1h { z2.h }, p0/z, [x0] +; VBITS_GE_512-NEXT: cmpeq p1.h, p1/z, z0.h, z1.h +; VBITS_GE_512-NEXT: fmov h0, #5.00000000 +; VBITS_GE_512-NEXT: mov z2.h, p1/m, h0 +; VBITS_GE_512-NEXT: st1h { z2.h }, p0, [x8] ; VBITS_GE_512-NEXT: ret %op1 = load <32 x half>, ptr %a %r = insertelement <32 x half> %op1, half 5.0, i64 31 @@ -91,16 +91,16 @@ define <64 x half> @insertelement_v64f16(ptr %a) vscale_range(8,0) #0 { ; CHECK-LABEL: insertelement_v64f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #63 ; CHECK-NEXT: ptrue p0.h, vl64 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: fmov h2, #5.00000000 -; CHECK-NEXT: index z3.h, #0, #1 +; CHECK-NEXT: mov w9, #63 // =0x3f +; CHECK-NEXT: index z0.h, #0, #1 ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z1.h, w9 -; CHECK-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h -; CHECK-NEXT: mov z0.h, p1/m, h2 -; CHECK-NEXT: st1h { z0.h }, p0, [x8] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0] +; CHECK-NEXT: cmpeq p1.h, p1/z, z0.h, z1.h +; CHECK-NEXT: fmov h0, #5.00000000 +; CHECK-NEXT: mov z2.h, p1/m, h0 +; CHECK-NEXT: st1h { z2.h }, p0, [x8] ; CHECK-NEXT: ret %op1 = load <64 x half>, ptr %a %r = insertelement <64 x half> %op1, half 5.0, i64 63 @@ -110,16 +110,16 @@ define <128 x half> @insertelement_v128f16(ptr %a) vscale_range(16,0) #0 { ; CHECK-LABEL: insertelement_v128f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #127 ; CHECK-NEXT: ptrue p0.h, vl128 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: fmov h2, #5.00000000 -; CHECK-NEXT: index z3.h, #0, #1 +; CHECK-NEXT: mov w9, #127 // =0x7f +; CHECK-NEXT: index z0.h, #0, #1 ; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z1.h, w9 -; CHECK-NEXT: cmpeq p1.h, p1/z, z3.h, z1.h -; CHECK-NEXT: mov z0.h, p1/m, h2 -; CHECK-NEXT: st1h { z0.h }, p0, [x8] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0] +; CHECK-NEXT: cmpeq p1.h, p1/z, z0.h, z1.h +; CHECK-NEXT: fmov h0, #5.00000000 +; CHECK-NEXT: mov z2.h, p1/m, h0 +; CHECK-NEXT: st1h { z2.h }, p0, [x8] ; CHECK-NEXT: ret %op1 = load <128 x half>, ptr %a %r = insertelement <128 x half> %op1, half 5.0, i64 127 @@ -153,16 +153,16 @@ define <8 x float> @insertelement_v8f32(ptr %a) vscale_range(2,0) #0 { ; CHECK-LABEL: insertelement_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #7 ; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: fmov s2, #5.00000000 -; CHECK-NEXT: index z3.s, #0, #1 +; CHECK-NEXT: mov w9, #7 // =0x7 +; CHECK-NEXT: index z0.s, #0, #1 ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z1.s, w9 -; CHECK-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s -; CHECK-NEXT: mov z0.s, p1/m, s2 -; CHECK-NEXT: st1w { z0.s }, p0, [x8] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0] +; CHECK-NEXT: cmpeq p1.s, p1/z, z0.s, z1.s +; CHECK-NEXT: fmov s0, #5.00000000 +; CHECK-NEXT: mov z2.s, p1/m, s0 +; CHECK-NEXT: st1w { z2.s }, p0, [x8] ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a %r = insertelement <8 x float> %op1, float 5.0, i64 7 @@ -172,33 +172,33 @@ define <16 x float> @insertelement_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: insertelement_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #8 -; VBITS_GE_256-NEXT: mov w10, #7 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: fmov s3, #5.00000000 -; VBITS_GE_256-NEXT: index z4.s, #0, #1 +; VBITS_GE_256-NEXT: mov w9, #7 // =0x7 +; VBITS_GE_256-NEXT: index z0.s, #0, #1 ; VBITS_GE_256-NEXT: ptrue p1.s -; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: mov z2.s, w10 -; VBITS_GE_256-NEXT: cmpeq p1.s, p1/z, z4.s, z2.s -; VBITS_GE_256-NEXT: mov z0.s, p1/m, s3 -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8] +; VBITS_GE_256-NEXT: mov z1.s, w9 +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 +; VBITS_GE_256-NEXT: fmov s2, #5.00000000 +; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x9, lsl #2] +; VBITS_GE_256-NEXT: cmpeq p1.s, p1/z, z0.s, z1.s +; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] +; VBITS_GE_256-NEXT: mov z3.s, p1/m, s2 +; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x8, x9, lsl #2] +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: insertelement_v16f32: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: mov w9, #15 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 -; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_512-NEXT: fmov s2, #5.00000000 -; VBITS_GE_512-NEXT: index z3.s, #0, #1 +; VBITS_GE_512-NEXT: mov w9, #15 // =0xf +; VBITS_GE_512-NEXT: index z0.s, #0, #1 ; VBITS_GE_512-NEXT: ptrue p1.s ; VBITS_GE_512-NEXT: mov z1.s, w9 -; VBITS_GE_512-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s -; VBITS_GE_512-NEXT: mov z0.s, p1/m, s2 -; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8] +; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x0] +; VBITS_GE_512-NEXT: cmpeq p1.s, p1/z, z0.s, z1.s +; VBITS_GE_512-NEXT: fmov s0, #5.00000000 +; VBITS_GE_512-NEXT: mov z2.s, p1/m, s0 +; VBITS_GE_512-NEXT: st1w { z2.s }, p0, [x8] ; VBITS_GE_512-NEXT: ret %op1 = load <16 x float>, ptr %a %r = insertelement <16 x float> %op1, float 5.0, i64 15 @@ -208,16 +208,16 @@ define <32 x float> @insertelement_v32f32(ptr %a) vscale_range(8,0) #0 { ; CHECK-LABEL: insertelement_v32f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #31 ; CHECK-NEXT: ptrue p0.s, vl32 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: fmov s2, #5.00000000 -; CHECK-NEXT: index z3.s, #0, #1 +; CHECK-NEXT: mov w9, #31 // =0x1f +; CHECK-NEXT: index z0.s, #0, #1 ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z1.s, w9 -; CHECK-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s -; CHECK-NEXT: mov z0.s, p1/m, s2 -; CHECK-NEXT: st1w { z0.s }, p0, [x8] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0] +; CHECK-NEXT: cmpeq p1.s, p1/z, z0.s, z1.s +; CHECK-NEXT: fmov s0, #5.00000000 +; CHECK-NEXT: mov z2.s, p1/m, s0 +; CHECK-NEXT: st1w { z2.s }, p0, [x8] ; CHECK-NEXT: ret %op1 = load <32 x float>, ptr %a %r = insertelement <32 x float> %op1, float 5.0, i64 31 @@ -227,16 +227,16 @@ define <64 x float> @insertelement_v64f32(ptr %a) vscale_range(16,0) #0 { ; CHECK-LABEL: insertelement_v64f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #63 ; CHECK-NEXT: ptrue p0.s, vl64 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: fmov s2, #5.00000000 -; CHECK-NEXT: index z3.s, #0, #1 +; CHECK-NEXT: mov w9, #63 // =0x3f +; CHECK-NEXT: index z0.s, #0, #1 ; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z1.s, w9 -; CHECK-NEXT: cmpeq p1.s, p1/z, z3.s, z1.s -; CHECK-NEXT: mov z0.s, p1/m, s2 -; CHECK-NEXT: st1w { z0.s }, p0, [x8] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0] +; CHECK-NEXT: cmpeq p1.s, p1/z, z0.s, z1.s +; CHECK-NEXT: fmov s0, #5.00000000 +; CHECK-NEXT: mov z2.s, p1/m, s0 +; CHECK-NEXT: st1w { z2.s }, p0, [x8] ; CHECK-NEXT: ret %op1 = load <64 x float>, ptr %a %r = insertelement <64 x float> %op1, float 5.0, i64 63 @@ -247,7 +247,7 @@ define <1 x double> @insertelement_v1f64(<1 x double> %op1) vscale_range(2,0) #0 { ; CHECK-LABEL: insertelement_v1f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4617315517961601024 +; CHECK-NEXT: mov x8, #4617315517961601024 // =0x4014000000000000 ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ret %r = insertelement <1 x double> %op1, double 5.0, i64 0 @@ -268,16 +268,16 @@ define <4 x double> @insertelement_v4f64(ptr %a) vscale_range(2,0) #0 { ; CHECK-LABEL: insertelement_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #3 ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: fmov d2, #5.00000000 -; CHECK-NEXT: index z3.d, #0, #1 +; CHECK-NEXT: mov w9, #3 // =0x3 +; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z1.d, x9 -; CHECK-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d -; CHECK-NEXT: mov z0.d, p1/m, d2 -; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0] +; CHECK-NEXT: cmpeq p1.d, p1/z, z0.d, z1.d +; CHECK-NEXT: fmov d0, #5.00000000 +; CHECK-NEXT: mov z2.d, p1/m, d0 +; CHECK-NEXT: st1d { z2.d }, p0, [x8] ; CHECK-NEXT: ret %op1 = load <4 x double>, ptr %a %r = insertelement <4 x double> %op1, double 5.0, i64 3 @@ -287,33 +287,33 @@ define <8 x double> @insertelement_v8f64(ptr %a) #0 { ; VBITS_GE_256-LABEL: insertelement_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #4 -; VBITS_GE_256-NEXT: mov w10, #3 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: fmov d3, #5.00000000 -; VBITS_GE_256-NEXT: index z4.d, #0, #1 +; VBITS_GE_256-NEXT: mov w9, #3 // =0x3 +; VBITS_GE_256-NEXT: index z0.d, #0, #1 ; VBITS_GE_256-NEXT: ptrue p1.d -; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] -; VBITS_GE_256-NEXT: mov z2.d, x10 -; VBITS_GE_256-NEXT: cmpeq p1.d, p1/z, z4.d, z2.d -; VBITS_GE_256-NEXT: mov z0.d, p1/m, d3 -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8, x9, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8] +; VBITS_GE_256-NEXT: mov z1.d, x9 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 +; VBITS_GE_256-NEXT: fmov d2, #5.00000000 +; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x0, x9, lsl #3] +; VBITS_GE_256-NEXT: cmpeq p1.d, p1/z, z0.d, z1.d +; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0] +; VBITS_GE_256-NEXT: mov z3.d, p1/m, d2 +; VBITS_GE_256-NEXT: st1d { z3.d }, p0, [x8, x9, lsl #3] +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: insertelement_v8f64: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: mov w9, #7 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 -; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] -; VBITS_GE_512-NEXT: fmov d2, #5.00000000 -; VBITS_GE_512-NEXT: index z3.d, #0, #1 +; VBITS_GE_512-NEXT: mov w9, #7 // =0x7 +; VBITS_GE_512-NEXT: index z0.d, #0, #1 ; VBITS_GE_512-NEXT: ptrue p1.d ; VBITS_GE_512-NEXT: mov z1.d, x9 -; VBITS_GE_512-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d -; VBITS_GE_512-NEXT: mov z0.d, p1/m, d2 -; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x8] +; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x0] +; VBITS_GE_512-NEXT: cmpeq p1.d, p1/z, z0.d, z1.d +; VBITS_GE_512-NEXT: fmov d0, #5.00000000 +; VBITS_GE_512-NEXT: mov z2.d, p1/m, d0 +; VBITS_GE_512-NEXT: st1d { z2.d }, p0, [x8] ; VBITS_GE_512-NEXT: ret %op1 = load <8 x double>, ptr %a %r = insertelement <8 x double> %op1, double 5.0, i64 7 @@ -323,16 +323,16 @@ define <16 x double> @insertelement_v16f64(ptr %a) vscale_range(8,0) #0 { ; CHECK-LABEL: insertelement_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #15 ; CHECK-NEXT: ptrue p0.d, vl16 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: fmov d2, #5.00000000 -; CHECK-NEXT: index z3.d, #0, #1 +; CHECK-NEXT: mov w9, #15 // =0xf +; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z1.d, x9 -; CHECK-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d -; CHECK-NEXT: mov z0.d, p1/m, d2 -; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0] +; CHECK-NEXT: cmpeq p1.d, p1/z, z0.d, z1.d +; CHECK-NEXT: fmov d0, #5.00000000 +; CHECK-NEXT: mov z2.d, p1/m, d0 +; CHECK-NEXT: st1d { z2.d }, p0, [x8] ; CHECK-NEXT: ret %op1 = load <16 x double>, ptr %a %r = insertelement <16 x double> %op1, double 5.0, i64 15 @@ -342,16 +342,16 @@ define <32 x double> @insertelement_v32f64(ptr %a) vscale_range(16,0) #0 { ; CHECK-LABEL: insertelement_v32f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #31 ; CHECK-NEXT: ptrue p0.d, vl32 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: fmov d2, #5.00000000 -; CHECK-NEXT: index z3.d, #0, #1 +; CHECK-NEXT: mov w9, #31 // =0x1f +; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z1.d, x9 -; CHECK-NEXT: cmpeq p1.d, p1/z, z3.d, z1.d -; CHECK-NEXT: mov z0.d, p1/m, d2 -; CHECK-NEXT: st1d { z0.d }, p0, [x8] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x0] +; CHECK-NEXT: cmpeq p1.d, p1/z, z0.d, z1.d +; CHECK-NEXT: fmov d0, #5.00000000 +; CHECK-NEXT: mov z2.d, p1/m, d0 +; CHECK-NEXT: st1d { z2.d }, p0, [x8] ; CHECK-NEXT: ret %op1 = load <32 x double>, ptr %a %r = insertelement <32 x double> %op1, double 5.0, i64 31 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll @@ -48,8 +48,8 @@ define void @add_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: add_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -146,8 +146,8 @@ define void @add_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: add_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -244,8 +244,8 @@ define void @add_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: add_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -342,8 +342,8 @@ define void @add_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: add_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -388,8 +388,8 @@ define void @add_v32i64(ptr %a, ptr %b) vscale_range(8,0) #0 { ; CHECK-LABEL: add_v32i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #16 ; CHECK-NEXT: ptrue p0.d, vl16 +; CHECK-NEXT: mov x8, #16 // =0x10 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -449,8 +449,8 @@ define void @mul_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: mul_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -547,8 +547,8 @@ define void @mul_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: mul_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -645,8 +645,8 @@ define void @mul_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: mul_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -707,8 +707,8 @@ define <1 x i64> @mul_v1i64(<1 x i64> %op1, <1 x i64> %op2) #0 { ; CHECK-LABEL: mul_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -720,8 +720,8 @@ define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 { ; CHECK-LABEL: mul_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -749,8 +749,8 @@ define void @mul_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: mul_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -851,8 +851,8 @@ define void @sub_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: sub_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -949,8 +949,8 @@ define void @sub_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: sub_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -1047,8 +1047,8 @@ define void @sub_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: sub_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -1145,8 +1145,8 @@ define void @sub_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: sub_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -1246,8 +1246,8 @@ define void @abs_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: abs_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: abs z0.b, p0/m, z0.b @@ -1334,8 +1334,8 @@ define void @abs_v32i16(ptr %a) vscale_range(2,0) #0 { ; CHECK-LABEL: abs_v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #16 ; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: mov x8, #16 // =0x10 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] ; CHECK-NEXT: abs z0.h, p0/m, z0.h @@ -1352,21 +1352,21 @@ define void @abs_v64i16(ptr %a) vscale_range(2,0) #0 { ; CHECK-LABEL: abs_v64i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #48 -; CHECK-NEXT: mov x9, #16 -; CHECK-NEXT: mov x10, #32 ; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: mov x8, #32 // =0x20 +; CHECK-NEXT: mov x9, #48 // =0x30 +; CHECK-NEXT: mov x10, #16 // =0x10 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1] ; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0, x10, lsl #1] ; CHECK-NEXT: ld1h { z3.h }, p0/z, [x0] -; CHECK-NEXT: abs z1.h, p0/m, z1.h ; CHECK-NEXT: abs z0.h, p0/m, z0.h +; CHECK-NEXT: abs z1.h, p0/m, z1.h ; CHECK-NEXT: abs z2.h, p0/m, z2.h ; CHECK-NEXT: abs z3.h, p0/m, z3.h -; CHECK-NEXT: st1h { z2.h }, p0, [x0, x10, lsl #1] ; CHECK-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] ; CHECK-NEXT: st1h { z1.h }, p0, [x0, x9, lsl #1] +; CHECK-NEXT: st1h { z2.h }, p0, [x0, x10, lsl #1] ; CHECK-NEXT: st1h { z3.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <64 x i16>, ptr %a @@ -1378,38 +1378,42 @@ define void @abs_v128i16(ptr %a) vscale_range(2,0) #0 { ; CHECK-LABEL: abs_v128i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #96 -; CHECK-NEXT: mov x9, #48 -; CHECK-NEXT: mov x10, #16 -; CHECK-NEXT: mov x11, #80 -; CHECK-NEXT: mov x12, #32 -; CHECK-NEXT: mov x13, #112 -; CHECK-NEXT: mov x14, #64 ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x10, lsl #1] -; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0, x11, lsl #1] -; CHECK-NEXT: ld1h { z3.h }, p0/z, [x0, x12, lsl #1] -; CHECK-NEXT: ld1h { z4.h }, p0/z, [x0, x13, lsl #1] -; CHECK-NEXT: ld1h { z5.h }, p0/z, [x0, x14, lsl #1] -; CHECK-NEXT: ld1h { z6.h }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: mov x8, #96 // =0x60 +; CHECK-NEXT: mov x9, #112 // =0x70 +; CHECK-NEXT: mov x10, #64 // =0x40 +; CHECK-NEXT: mov x11, #80 // =0x50 +; CHECK-NEXT: mov x12, #32 // =0x20 +; CHECK-NEXT: mov x13, #48 // =0x30 +; CHECK-NEXT: mov x14, #16 // =0x10 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x9, lsl #1] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x0, x10, lsl #1] +; CHECK-NEXT: ld1h { z3.h }, p0/z, [x0, x11, lsl #1] +; CHECK-NEXT: ld1h { z4.h }, p0/z, [x0, x12, lsl #1] +; CHECK-NEXT: ld1h { z5.h }, p0/z, [x0, x13, lsl #1] +; CHECK-NEXT: ld1h { z6.h }, p0/z, [x0, x14, lsl #1] ; CHECK-NEXT: ld1h { z7.h }, p0/z, [x0] -; CHECK-NEXT: abs z1.h, p0/m, z1.h ; CHECK-NEXT: abs z0.h, p0/m, z0.h -; CHECK-NEXT: abs z3.h, p0/m, z3.h +; CHECK-NEXT: abs z1.h, p0/m, z1.h ; CHECK-NEXT: abs z2.h, p0/m, z2.h -; CHECK-NEXT: abs z5.h, p0/m, z5.h -; CHECK-NEXT: abs z4.h, p0/m, z4.h -; CHECK-NEXT: abs z6.h, p0/m, z6.h -; CHECK-NEXT: abs z7.h, p0/m, z7.h -; CHECK-NEXT: st1h { z6.h }, p0, [x0, x8, lsl #1] -; CHECK-NEXT: st1h { z4.h }, p0, [x0, x13, lsl #1] -; CHECK-NEXT: st1h { z5.h }, p0, [x0, x14, lsl #1] -; CHECK-NEXT: st1h { z2.h }, p0, [x0, x11, lsl #1] -; CHECK-NEXT: st1h { z3.h }, p0, [x0, x12, lsl #1] -; CHECK-NEXT: st1h { z0.h }, p0, [x0, x9, lsl #1] -; CHECK-NEXT: st1h { z1.h }, p0, [x0, x10, lsl #1] -; CHECK-NEXT: st1h { z7.h }, p0, [x0] +; CHECK-NEXT: abs z3.h, p0/m, z3.h +; CHECK-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] +; CHECK-NEXT: movprfx z0, z4 +; CHECK-NEXT: abs z0.h, p0/m, z4.h +; CHECK-NEXT: st1h { z1.h }, p0, [x0, x9, lsl #1] +; CHECK-NEXT: movprfx z1, z5 +; CHECK-NEXT: abs z1.h, p0/m, z5.h +; CHECK-NEXT: st1h { z2.h }, p0, [x0, x10, lsl #1] +; CHECK-NEXT: movprfx z2, z6 +; CHECK-NEXT: abs z2.h, p0/m, z6.h +; CHECK-NEXT: st1h { z3.h }, p0, [x0, x11, lsl #1] +; CHECK-NEXT: movprfx z3, z7 +; CHECK-NEXT: abs z3.h, p0/m, z7.h +; CHECK-NEXT: st1h { z0.h }, p0, [x0, x12, lsl #1] +; CHECK-NEXT: st1h { z1.h }, p0, [x0, x13, lsl #1] +; CHECK-NEXT: st1h { z2.h }, p0, [x0, x14, lsl #1] +; CHECK-NEXT: st1h { z3.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <128 x i16>, ptr %a %res = call <128 x i16> @llvm.abs.v128i16(<128 x i16> %op1, i1 false) @@ -1454,8 +1458,8 @@ define void @abs_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: abs_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: abs z0.s, p0/m, z0.s @@ -1542,8 +1546,8 @@ define void @abs_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: abs_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: abs z0.d, p0/m, z0.d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-compares.ll @@ -52,8 +52,8 @@ define void @icmp_eq_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: icmp_eq_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -162,8 +162,8 @@ define void @icmp_eq_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: icmp_eq_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -272,8 +272,8 @@ define void @icmp_eq_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: icmp_eq_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -382,8 +382,8 @@ define void @icmp_eq_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: icmp_eq_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll @@ -15,8 +15,8 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 { ; VBITS_GE_128-LABEL: sdiv_v8i8: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: sshll v1.8h, v1.8b, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: sshll v1.8h, v1.8b, #0 ; VBITS_GE_128-NEXT: sshll v0.8h, v0.8b, #0 ; VBITS_GE_128-NEXT: sshll2 v2.4s, v1.8h, #0 ; VBITS_GE_128-NEXT: sshll2 v3.4s, v0.8h, #0 @@ -94,26 +94,26 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 { ; VBITS_GE_128-LABEL: sdiv_v16i8: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: sshll2 v2.8h, v1.16b, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: sshll2 v2.8h, v1.16b, #0 ; VBITS_GE_128-NEXT: sshll2 v3.8h, v0.16b, #0 +; VBITS_GE_128-NEXT: sshll v1.8h, v1.8b, #0 +; VBITS_GE_128-NEXT: sshll v0.8h, v0.8b, #0 ; VBITS_GE_128-NEXT: sshll2 v4.4s, v2.8h, #0 ; VBITS_GE_128-NEXT: sshll2 v5.4s, v3.8h, #0 ; VBITS_GE_128-NEXT: sshll v2.4s, v2.4h, #0 ; VBITS_GE_128-NEXT: sshll v3.4s, v3.4h, #0 ; VBITS_GE_128-NEXT: sdivr z4.s, p0/m, z4.s, z5.s -; VBITS_GE_128-NEXT: sshll v1.8h, v1.8b, #0 -; VBITS_GE_128-NEXT: sshll v0.8h, v0.8b, #0 +; VBITS_GE_128-NEXT: sshll2 v5.4s, v0.8h, #0 +; VBITS_GE_128-NEXT: sshll v0.4s, v0.4h, #0 ; VBITS_GE_128-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_128-NEXT: sshll2 v3.4s, v1.8h, #0 -; VBITS_GE_128-NEXT: sshll2 v5.4s, v0.8h, #0 ; VBITS_GE_128-NEXT: sshll v1.4s, v1.4h, #0 -; VBITS_GE_128-NEXT: sshll v0.4s, v0.4h, #0 ; VBITS_GE_128-NEXT: sdivr z3.s, p0/m, z3.s, z5.s -; VBITS_GE_128-NEXT: uzp1 v2.8h, v2.8h, v4.8h ; VBITS_GE_128-NEXT: sdiv z0.s, p0/m, z0.s, z1.s +; VBITS_GE_128-NEXT: uzp1 v1.8h, v2.8h, v4.8h ; VBITS_GE_128-NEXT: uzp1 v0.8h, v0.8h, v3.8h -; VBITS_GE_128-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; VBITS_GE_128-NEXT: uzp1 v0.16b, v0.16b, v1.16b ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: sdiv_v16i8: @@ -131,11 +131,11 @@ ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h -; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: ptrue p0.h, vl8 -; VBITS_GE_256-NEXT: splice z2.h, p0, z2.h, z0.h -; VBITS_GE_256-NEXT: uzp1 z0.b, z2.b, z2.b +; VBITS_GE_256-NEXT: uzp1 z1.h, z2.h, z2.h +; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h +; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h +; VBITS_GE_256-NEXT: uzp1 z0.b, z1.b, z1.b ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0 ; VBITS_GE_256-NEXT: ret ; @@ -203,6 +203,7 @@ ; CHECK-LABEL: sdiv_v128i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl128 +; CHECK-NEXT: ptrue p1.h, vl128 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] ; CHECK-NEXT: ptrue p0.s, vl64 @@ -216,12 +217,11 @@ ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: ptrue p0.h, vl64 -; CHECK-NEXT: splice z2.h, p0, z2.h, z0.h -; CHECK-NEXT: ptrue p0.h, vl128 -; CHECK-NEXT: st1b { z2.h }, p0, [x0] +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h +; CHECK-NEXT: st1b { z1.h }, p1, [x0] ; CHECK-NEXT: ret %op1 = load <128 x i8>, ptr %a %op2 = load <128 x i8>, ptr %b @@ -237,41 +237,40 @@ ; CHECK-NEXT: ptrue p1.s, vl64 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] -; CHECK-NEXT: ptrue p2.h, vl64 ; CHECK-NEXT: sunpklo z2.h, z1.b ; CHECK-NEXT: sunpklo z3.h, z0.b ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128 ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128 +; CHECK-NEXT: sunpklo z1.h, z1.b ; CHECK-NEXT: sunpklo z4.s, z2.h ; CHECK-NEXT: sunpklo z5.s, z3.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128 ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128 -; CHECK-NEXT: sunpklo z1.h, z1.b ; CHECK-NEXT: sunpklo z0.h, z0.b -; CHECK-NEXT: sdivr z4.s, p1/m, z4.s, z5.s ; CHECK-NEXT: sunpklo z2.s, z2.h ; CHECK-NEXT: sunpklo z3.s, z3.h -; CHECK-NEXT: sunpklo z5.s, z1.h +; CHECK-NEXT: sdivr z4.s, p1/m, z4.s, z5.s +; CHECK-NEXT: sunpklo z5.s, z0.h +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128 +; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s -; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h -; CHECK-NEXT: sunpklo z4.s, z0.h +; CHECK-NEXT: sunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128 ; CHECK-NEXT: sunpklo z1.s, z1.h -; CHECK-NEXT: sunpklo z0.s, z0.h +; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z5.s ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: sdiv z0.s, p1/m, z0.s, z1.s -; CHECK-NEXT: splice z3.h, p2, z3.h, z2.h -; CHECK-NEXT: movprfx z2, z4 -; CHECK-NEXT: sdiv z2.s, p1/m, z2.s, z5.s -; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: ptrue p1.h, vl64 +; CHECK-NEXT: uzp1 z1.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h +; CHECK-NEXT: splice z1.h, p1, z1.h, z2.h ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b -; CHECK-NEXT: splice z1.h, p2, z1.h, z0.h +; CHECK-NEXT: splice z3.h, p1, z3.h, z0.h ; CHECK-NEXT: ptrue p1.b, vl128 ; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b -; CHECK-NEXT: splice z2.b, p1, z2.b, z0.b -; CHECK-NEXT: st1b { z2.b }, p0, [x0] +; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b +; CHECK-NEXT: splice z0.b, p1, z0.b, z1.b +; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <256 x i8>, ptr %a %op2 = load <256 x i8>, ptr %b @@ -285,8 +284,8 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { ; VBITS_GE_128-LABEL: sdiv_v4i16: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: sshll v1.4s, v1.4h, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: sshll v1.4s, v1.4h, #0 ; VBITS_GE_128-NEXT: sshll v0.4s, v0.4h, #0 ; VBITS_GE_128-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; VBITS_GE_128-NEXT: xtn v0.4h, v0.4s @@ -294,13 +293,13 @@ ; ; VBITS_GE_256-LABEL: sdiv_v4i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: sshll v1.4s, v1.4h, #0 ; VBITS_GE_256-NEXT: ptrue p0.s, vl4 +; VBITS_GE_256-NEXT: sshll v1.4s, v1.4h, #0 ; VBITS_GE_256-NEXT: sshll v0.4s, v0.4h, #0 ; VBITS_GE_256-NEXT: sdivr z1.s, p0/m, z1.s, z0.s ; VBITS_GE_256-NEXT: mov w8, v1.s[1] -; VBITS_GE_256-NEXT: mov w9, v1.s[2] ; VBITS_GE_256-NEXT: mov v0.16b, v1.16b +; VBITS_GE_256-NEXT: mov w9, v1.s[2] ; VBITS_GE_256-NEXT: mov v0.h[1], w8 ; VBITS_GE_256-NEXT: mov w8, v1.s[3] ; VBITS_GE_256-NEXT: mov v0.h[2], w9 @@ -310,13 +309,13 @@ ; ; VBITS_GE_512-LABEL: sdiv_v4i16: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: sshll v1.4s, v1.4h, #0 ; VBITS_GE_512-NEXT: ptrue p0.s, vl4 +; VBITS_GE_512-NEXT: sshll v1.4s, v1.4h, #0 ; VBITS_GE_512-NEXT: sshll v0.4s, v0.4h, #0 ; VBITS_GE_512-NEXT: sdivr z1.s, p0/m, z1.s, z0.s ; VBITS_GE_512-NEXT: mov w8, v1.s[1] -; VBITS_GE_512-NEXT: mov w9, v1.s[2] ; VBITS_GE_512-NEXT: mov v0.16b, v1.16b +; VBITS_GE_512-NEXT: mov w9, v1.s[2] ; VBITS_GE_512-NEXT: mov v0.h[1], w8 ; VBITS_GE_512-NEXT: mov w8, v1.s[3] ; VBITS_GE_512-NEXT: mov v0.h[2], w9 @@ -330,8 +329,8 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 { ; VBITS_GE_128-LABEL: sdiv_v8i16: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: sshll2 v2.4s, v1.8h, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: sshll2 v2.4s, v1.8h, #0 ; VBITS_GE_128-NEXT: sshll2 v3.4s, v0.8h, #0 ; VBITS_GE_128-NEXT: sshll v1.4s, v1.4h, #0 ; VBITS_GE_128-NEXT: sshll v0.4s, v0.4h, #0 @@ -342,9 +341,9 @@ ; ; VBITS_GE_256-LABEL: sdiv_v8i16: ; VBITS_GE_256: // %bb.0: +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: sdiv z0.s, p0/m, z0.s, z1.s @@ -354,9 +353,9 @@ ; ; VBITS_GE_512-LABEL: sdiv_v8i16: ; VBITS_GE_512: // %bb.0: +; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: sunpklo z1.s, z1.h ; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_512-NEXT: sdiv z0.s, p0/m, z0.s, z1.s @@ -370,24 +369,24 @@ define void @sdiv_v16i16(ptr %a, ptr %b) #0 { ; VBITS_GE_128-LABEL: sdiv_v16i16: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ldp q3, q0, [x1] ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: ldp q4, q1, [x1] +; VBITS_GE_128-NEXT: ldr q0, [x0, #16] +; VBITS_GE_128-NEXT: sshll2 v2.4s, v1.8h, #0 +; VBITS_GE_128-NEXT: sshll2 v3.4s, v0.8h, #0 +; VBITS_GE_128-NEXT: sshll2 v5.4s, v4.8h, #0 +; VBITS_GE_128-NEXT: sshll v4.4s, v4.4h, #0 +; VBITS_GE_128-NEXT: sshll v1.4s, v1.4h, #0 +; VBITS_GE_128-NEXT: sshll v0.4s, v0.4h, #0 +; VBITS_GE_128-NEXT: sdivr z2.s, p0/m, z2.s, z3.s +; VBITS_GE_128-NEXT: ldr q3, [x0] ; VBITS_GE_128-NEXT: sshll2 v6.4s, v3.8h, #0 ; VBITS_GE_128-NEXT: sshll v3.4s, v3.4h, #0 -; VBITS_GE_128-NEXT: ldp q1, q2, [x0] -; VBITS_GE_128-NEXT: sshll2 v4.4s, v0.8h, #0 -; VBITS_GE_128-NEXT: sshll v0.4s, v0.4h, #0 -; VBITS_GE_128-NEXT: sshll2 v7.4s, v1.8h, #0 -; VBITS_GE_128-NEXT: sshll v1.4s, v1.4h, #0 -; VBITS_GE_128-NEXT: sshll2 v5.4s, v2.8h, #0 -; VBITS_GE_128-NEXT: sshll v2.4s, v2.4h, #0 -; VBITS_GE_128-NEXT: sdiv z1.s, p0/m, z1.s, z3.s -; VBITS_GE_128-NEXT: sdivr z4.s, p0/m, z4.s, z5.s -; VBITS_GE_128-NEXT: sdivr z0.s, p0/m, z0.s, z2.s -; VBITS_GE_128-NEXT: movprfx z2, z7 -; VBITS_GE_128-NEXT: sdiv z2.s, p0/m, z2.s, z6.s -; VBITS_GE_128-NEXT: uzp1 v1.8h, v1.8h, v2.8h -; VBITS_GE_128-NEXT: uzp1 v0.8h, v0.8h, v4.8h +; VBITS_GE_128-NEXT: sdivr z5.s, p0/m, z5.s, z6.s +; VBITS_GE_128-NEXT: sdiv z3.s, p0/m, z3.s, z4.s +; VBITS_GE_128-NEXT: sdiv z0.s, p0/m, z0.s, z1.s +; VBITS_GE_128-NEXT: uzp1 v1.8h, v3.8h, v5.8h +; VBITS_GE_128-NEXT: uzp1 v0.8h, v0.8h, v2.8h ; VBITS_GE_128-NEXT: stp q1, q0, [x0] ; VBITS_GE_128-NEXT: ret ; @@ -405,11 +404,11 @@ ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: sdivr z2.s, p1/m, z2.s, z3.s ; VBITS_GE_256-NEXT: sdiv z0.s, p1/m, z0.s, z1.s -; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h -; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: ptrue p1.h, vl8 -; VBITS_GE_256-NEXT: splice z2.h, p1, z2.h, z0.h -; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] +; VBITS_GE_256-NEXT: uzp1 z1.h, z2.h, z2.h +; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h +; VBITS_GE_256-NEXT: splice z1.h, p1, z1.h, z0.h +; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: sdiv_v16i16: @@ -483,11 +482,11 @@ ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s ; CHECK-NEXT: sdiv z0.s, p1/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: ptrue p1.h, vl64 -; CHECK-NEXT: splice z2.h, p1, z2.h, z0.h -; CHECK-NEXT: st1h { z2.h }, p0, [x0] +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h +; CHECK-NEXT: st1h { z1.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <128 x i16>, ptr %a %op2 = load <128 x i16>, ptr %b @@ -500,8 +499,8 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #0 { ; CHECK-LABEL: sdiv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -514,8 +513,8 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(1,0) #0 { ; CHECK-LABEL: sdiv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -543,31 +542,30 @@ define void @sdiv_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_128-LABEL: sdiv_v16i32: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 -; VBITS_GE_128-NEXT: ldp q4, q5, [x1, #32] -; VBITS_GE_128-NEXT: sdiv z0.s, p0/m, z0.s, z4.s -; VBITS_GE_128-NEXT: sdiv z1.s, p0/m, z1.s, z5.s -; VBITS_GE_128-NEXT: ldp q2, q3, [x0] -; VBITS_GE_128-NEXT: ldp q6, q4, [x1] -; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] -; VBITS_GE_128-NEXT: movprfx z0, z2 -; VBITS_GE_128-NEXT: sdiv z0.s, p0/m, z0.s, z6.s -; VBITS_GE_128-NEXT: movprfx z1, z3 +; VBITS_GE_128-NEXT: ldp q0, q3, [x1] +; VBITS_GE_128-NEXT: ldp q1, q2, [x0] +; VBITS_GE_128-NEXT: ldp q5, q4, [x1, #32] +; VBITS_GE_128-NEXT: sdivr z0.s, p0/m, z0.s, z1.s +; VBITS_GE_128-NEXT: ldr q1, [x0, #48] ; VBITS_GE_128-NEXT: sdiv z1.s, p0/m, z1.s, z4.s -; VBITS_GE_128-NEXT: stp q0, q1, [x0] +; VBITS_GE_128-NEXT: ldr q4, [x0, #32] +; VBITS_GE_128-NEXT: sdiv z4.s, p0/m, z4.s, z5.s +; VBITS_GE_128-NEXT: sdiv z2.s, p0/m, z2.s, z3.s +; VBITS_GE_128-NEXT: stp q4, q1, [x0, #32] +; VBITS_GE_128-NEXT: stp q0, q2, [x0] ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: sdiv_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: sdiv z0.s, p0/m, z0.s, z2.s -; VBITS_GE_256-NEXT: sdiv z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1] +; VBITS_GE_256-NEXT: sdiv z1.s, p0/m, z1.s, z2.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret @@ -623,8 +621,8 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #0 { ; CHECK-LABEL: sdiv_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -637,8 +635,8 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #0 { ; CHECK-LABEL: sdiv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -666,31 +664,30 @@ define void @sdiv_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_128-LABEL: sdiv_v8i64: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] ; VBITS_GE_128-NEXT: ptrue p0.d, vl2 -; VBITS_GE_128-NEXT: ldp q4, q5, [x1, #32] -; VBITS_GE_128-NEXT: sdiv z0.d, p0/m, z0.d, z4.d -; VBITS_GE_128-NEXT: sdiv z1.d, p0/m, z1.d, z5.d -; VBITS_GE_128-NEXT: ldp q2, q3, [x0] -; VBITS_GE_128-NEXT: ldp q6, q4, [x1] -; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] -; VBITS_GE_128-NEXT: movprfx z0, z2 -; VBITS_GE_128-NEXT: sdiv z0.d, p0/m, z0.d, z6.d -; VBITS_GE_128-NEXT: movprfx z1, z3 +; VBITS_GE_128-NEXT: ldp q0, q3, [x1] +; VBITS_GE_128-NEXT: ldp q1, q2, [x0] +; VBITS_GE_128-NEXT: ldp q5, q4, [x1, #32] +; VBITS_GE_128-NEXT: sdivr z0.d, p0/m, z0.d, z1.d +; VBITS_GE_128-NEXT: ldr q1, [x0, #48] ; VBITS_GE_128-NEXT: sdiv z1.d, p0/m, z1.d, z4.d -; VBITS_GE_128-NEXT: stp q0, q1, [x0] +; VBITS_GE_128-NEXT: ldr q4, [x0, #32] +; VBITS_GE_128-NEXT: sdiv z4.d, p0/m, z4.d, z5.d +; VBITS_GE_128-NEXT: sdiv z2.d, p0/m, z2.d, z3.d +; VBITS_GE_128-NEXT: stp q4, q1, [x0, #32] +; VBITS_GE_128-NEXT: stp q0, q2, [x0] ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: sdiv_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: sdiv z0.d, p0/m, z0.d, z2.d -; VBITS_GE_256-NEXT: sdiv z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: sdiv z1.d, p0/m, z1.d, z2.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret @@ -751,8 +748,8 @@ define <8 x i8> @udiv_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 { ; VBITS_GE_128-LABEL: udiv_v8i8: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ushll v1.8h, v1.8b, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: ushll v1.8h, v1.8b, #0 ; VBITS_GE_128-NEXT: ushll v0.8h, v0.8b, #0 ; VBITS_GE_128-NEXT: ushll2 v2.4s, v1.8h, #0 ; VBITS_GE_128-NEXT: ushll2 v3.4s, v0.8h, #0 @@ -830,26 +827,26 @@ define <16 x i8> @udiv_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 { ; VBITS_GE_128-LABEL: udiv_v16i8: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ushll2 v2.8h, v1.16b, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: ushll2 v2.8h, v1.16b, #0 ; VBITS_GE_128-NEXT: ushll2 v3.8h, v0.16b, #0 +; VBITS_GE_128-NEXT: ushll v1.8h, v1.8b, #0 +; VBITS_GE_128-NEXT: ushll v0.8h, v0.8b, #0 ; VBITS_GE_128-NEXT: ushll2 v4.4s, v2.8h, #0 ; VBITS_GE_128-NEXT: ushll2 v5.4s, v3.8h, #0 ; VBITS_GE_128-NEXT: ushll v2.4s, v2.4h, #0 ; VBITS_GE_128-NEXT: ushll v3.4s, v3.4h, #0 ; VBITS_GE_128-NEXT: udivr z4.s, p0/m, z4.s, z5.s -; VBITS_GE_128-NEXT: ushll v1.8h, v1.8b, #0 -; VBITS_GE_128-NEXT: ushll v0.8h, v0.8b, #0 +; VBITS_GE_128-NEXT: ushll2 v5.4s, v0.8h, #0 +; VBITS_GE_128-NEXT: ushll v0.4s, v0.4h, #0 ; VBITS_GE_128-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_128-NEXT: ushll2 v3.4s, v1.8h, #0 -; VBITS_GE_128-NEXT: ushll2 v5.4s, v0.8h, #0 ; VBITS_GE_128-NEXT: ushll v1.4s, v1.4h, #0 -; VBITS_GE_128-NEXT: ushll v0.4s, v0.4h, #0 ; VBITS_GE_128-NEXT: udivr z3.s, p0/m, z3.s, z5.s -; VBITS_GE_128-NEXT: uzp1 v2.8h, v2.8h, v4.8h ; VBITS_GE_128-NEXT: udiv z0.s, p0/m, z0.s, z1.s +; VBITS_GE_128-NEXT: uzp1 v1.8h, v2.8h, v4.8h ; VBITS_GE_128-NEXT: uzp1 v0.8h, v0.8h, v3.8h -; VBITS_GE_128-NEXT: uzp1 v0.16b, v0.16b, v2.16b +; VBITS_GE_128-NEXT: uzp1 v0.16b, v0.16b, v1.16b ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: udiv_v16i8: @@ -867,11 +864,11 @@ ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h -; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: ptrue p0.h, vl8 -; VBITS_GE_256-NEXT: splice z2.h, p0, z2.h, z0.h -; VBITS_GE_256-NEXT: uzp1 z0.b, z2.b, z2.b +; VBITS_GE_256-NEXT: uzp1 z1.h, z2.h, z2.h +; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h +; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h +; VBITS_GE_256-NEXT: uzp1 z0.b, z1.b, z1.b ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0 ; VBITS_GE_256-NEXT: ret ; @@ -940,11 +937,11 @@ ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s ; CHECK-NEXT: udivr z0.s, p1/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: ptrue p1.h, vl64 -; CHECK-NEXT: splice z2.h, p1, z2.h, z0.h -; CHECK-NEXT: st1b { z2.h }, p0, [x0] +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h +; CHECK-NEXT: st1b { z1.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <128 x i8>, ptr %a %op2 = load <128 x i8>, ptr %b @@ -960,41 +957,40 @@ ; CHECK-NEXT: ptrue p1.s, vl64 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] -; CHECK-NEXT: ptrue p2.h, vl64 ; CHECK-NEXT: uunpklo z2.h, z1.b ; CHECK-NEXT: uunpklo z3.h, z0.b ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128 ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128 +; CHECK-NEXT: uunpklo z1.h, z1.b ; CHECK-NEXT: uunpklo z4.s, z2.h ; CHECK-NEXT: uunpklo z5.s, z3.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128 ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128 -; CHECK-NEXT: uunpklo z1.h, z1.b ; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: udivr z4.s, p1/m, z4.s, z5.s ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: uunpklo z5.s, z1.h +; CHECK-NEXT: udivr z4.s, p1/m, z4.s, z5.s +; CHECK-NEXT: uunpklo z5.s, z0.h +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128 +; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s -; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h -; CHECK-NEXT: uunpklo z4.s, z0.h +; CHECK-NEXT: uunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #128 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #128 ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z5.s ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: udiv z0.s, p1/m, z0.s, z1.s -; CHECK-NEXT: splice z3.h, p2, z3.h, z2.h -; CHECK-NEXT: movprfx z2, z4 -; CHECK-NEXT: udiv z2.s, p1/m, z2.s, z5.s -; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: ptrue p1.h, vl64 +; CHECK-NEXT: uzp1 z1.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h +; CHECK-NEXT: splice z1.h, p1, z1.h, z2.h ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b -; CHECK-NEXT: splice z1.h, p2, z1.h, z0.h +; CHECK-NEXT: splice z3.h, p1, z3.h, z0.h ; CHECK-NEXT: ptrue p1.b, vl128 ; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b -; CHECK-NEXT: splice z2.b, p1, z2.b, z0.b -; CHECK-NEXT: st1b { z2.b }, p0, [x0] +; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b +; CHECK-NEXT: splice z0.b, p1, z0.b, z1.b +; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <256 x i8>, ptr %a %op2 = load <256 x i8>, ptr %b @@ -1008,8 +1004,8 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { ; VBITS_GE_128-LABEL: udiv_v4i16: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ushll v1.4s, v1.4h, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: ushll v1.4s, v1.4h, #0 ; VBITS_GE_128-NEXT: ushll v0.4s, v0.4h, #0 ; VBITS_GE_128-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; VBITS_GE_128-NEXT: xtn v0.4h, v0.4s @@ -1017,13 +1013,13 @@ ; ; VBITS_GE_256-LABEL: udiv_v4i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: ushll v1.4s, v1.4h, #0 ; VBITS_GE_256-NEXT: ptrue p0.s, vl4 +; VBITS_GE_256-NEXT: ushll v1.4s, v1.4h, #0 ; VBITS_GE_256-NEXT: ushll v0.4s, v0.4h, #0 ; VBITS_GE_256-NEXT: udivr z1.s, p0/m, z1.s, z0.s ; VBITS_GE_256-NEXT: mov w8, v1.s[1] -; VBITS_GE_256-NEXT: mov w9, v1.s[2] ; VBITS_GE_256-NEXT: mov v0.16b, v1.16b +; VBITS_GE_256-NEXT: mov w9, v1.s[2] ; VBITS_GE_256-NEXT: mov v0.h[1], w8 ; VBITS_GE_256-NEXT: mov w8, v1.s[3] ; VBITS_GE_256-NEXT: mov v0.h[2], w9 @@ -1033,13 +1029,13 @@ ; ; VBITS_GE_512-LABEL: udiv_v4i16: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: ushll v1.4s, v1.4h, #0 ; VBITS_GE_512-NEXT: ptrue p0.s, vl4 +; VBITS_GE_512-NEXT: ushll v1.4s, v1.4h, #0 ; VBITS_GE_512-NEXT: ushll v0.4s, v0.4h, #0 ; VBITS_GE_512-NEXT: udivr z1.s, p0/m, z1.s, z0.s ; VBITS_GE_512-NEXT: mov w8, v1.s[1] -; VBITS_GE_512-NEXT: mov w9, v1.s[2] ; VBITS_GE_512-NEXT: mov v0.16b, v1.16b +; VBITS_GE_512-NEXT: mov w9, v1.s[2] ; VBITS_GE_512-NEXT: mov v0.h[1], w8 ; VBITS_GE_512-NEXT: mov w8, v1.s[3] ; VBITS_GE_512-NEXT: mov v0.h[2], w9 @@ -1053,8 +1049,8 @@ define <8 x i16> @udiv_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 { ; VBITS_GE_128-LABEL: udiv_v8i16: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ushll2 v2.4s, v1.8h, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: ushll2 v2.4s, v1.8h, #0 ; VBITS_GE_128-NEXT: ushll2 v3.4s, v0.8h, #0 ; VBITS_GE_128-NEXT: ushll v1.4s, v1.4h, #0 ; VBITS_GE_128-NEXT: ushll v0.4s, v0.4h, #0 @@ -1065,9 +1061,9 @@ ; ; VBITS_GE_256-LABEL: udiv_v8i16: ; VBITS_GE_256: // %bb.0: +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: udiv z0.s, p0/m, z0.s, z1.s @@ -1077,9 +1073,9 @@ ; ; VBITS_GE_512-LABEL: udiv_v8i16: ; VBITS_GE_512: // %bb.0: +; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: uunpklo z1.s, z1.h ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_512-NEXT: udiv z0.s, p0/m, z0.s, z1.s @@ -1093,24 +1089,24 @@ define void @udiv_v16i16(ptr %a, ptr %b) #0 { ; VBITS_GE_128-LABEL: udiv_v16i16: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ldp q3, q0, [x1] ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: ldp q4, q1, [x1] +; VBITS_GE_128-NEXT: ldr q0, [x0, #16] +; VBITS_GE_128-NEXT: ushll2 v2.4s, v1.8h, #0 +; VBITS_GE_128-NEXT: ushll2 v3.4s, v0.8h, #0 +; VBITS_GE_128-NEXT: ushll2 v5.4s, v4.8h, #0 +; VBITS_GE_128-NEXT: ushll v4.4s, v4.4h, #0 +; VBITS_GE_128-NEXT: ushll v1.4s, v1.4h, #0 +; VBITS_GE_128-NEXT: ushll v0.4s, v0.4h, #0 +; VBITS_GE_128-NEXT: udivr z2.s, p0/m, z2.s, z3.s +; VBITS_GE_128-NEXT: ldr q3, [x0] ; VBITS_GE_128-NEXT: ushll2 v6.4s, v3.8h, #0 ; VBITS_GE_128-NEXT: ushll v3.4s, v3.4h, #0 -; VBITS_GE_128-NEXT: ldp q1, q2, [x0] -; VBITS_GE_128-NEXT: ushll2 v4.4s, v0.8h, #0 -; VBITS_GE_128-NEXT: ushll v0.4s, v0.4h, #0 -; VBITS_GE_128-NEXT: ushll2 v7.4s, v1.8h, #0 -; VBITS_GE_128-NEXT: ushll v1.4s, v1.4h, #0 -; VBITS_GE_128-NEXT: ushll2 v5.4s, v2.8h, #0 -; VBITS_GE_128-NEXT: ushll v2.4s, v2.4h, #0 -; VBITS_GE_128-NEXT: udiv z1.s, p0/m, z1.s, z3.s -; VBITS_GE_128-NEXT: udivr z4.s, p0/m, z4.s, z5.s -; VBITS_GE_128-NEXT: udivr z0.s, p0/m, z0.s, z2.s -; VBITS_GE_128-NEXT: movprfx z2, z7 -; VBITS_GE_128-NEXT: udiv z2.s, p0/m, z2.s, z6.s -; VBITS_GE_128-NEXT: uzp1 v1.8h, v1.8h, v2.8h -; VBITS_GE_128-NEXT: uzp1 v0.8h, v0.8h, v4.8h +; VBITS_GE_128-NEXT: udivr z5.s, p0/m, z5.s, z6.s +; VBITS_GE_128-NEXT: udiv z3.s, p0/m, z3.s, z4.s +; VBITS_GE_128-NEXT: udiv z0.s, p0/m, z0.s, z1.s +; VBITS_GE_128-NEXT: uzp1 v1.8h, v3.8h, v5.8h +; VBITS_GE_128-NEXT: uzp1 v0.8h, v0.8h, v2.8h ; VBITS_GE_128-NEXT: stp q1, q0, [x0] ; VBITS_GE_128-NEXT: ret ; @@ -1128,11 +1124,11 @@ ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: udivr z2.s, p1/m, z2.s, z3.s ; VBITS_GE_256-NEXT: udiv z0.s, p1/m, z0.s, z1.s -; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h -; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: ptrue p1.h, vl8 -; VBITS_GE_256-NEXT: splice z2.h, p1, z2.h, z0.h -; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] +; VBITS_GE_256-NEXT: uzp1 z1.h, z2.h, z2.h +; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h +; VBITS_GE_256-NEXT: splice z1.h, p1, z1.h, z0.h +; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: udiv_v16i16: @@ -1197,11 +1193,11 @@ ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s ; CHECK-NEXT: udiv z0.s, p1/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: ptrue p1.h, vl64 -; CHECK-NEXT: splice z2.h, p1, z2.h, z0.h -; CHECK-NEXT: st1h { z2.h }, p0, [x0] +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h +; CHECK-NEXT: st1h { z1.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <128 x i16>, ptr %a %op2 = load <128 x i16>, ptr %b @@ -1214,8 +1210,8 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(1,0) #0 { ; CHECK-LABEL: udiv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -1228,8 +1224,8 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(1,0) #0 { ; CHECK-LABEL: udiv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -1257,31 +1253,30 @@ define void @udiv_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_128-LABEL: udiv_v16i32: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 -; VBITS_GE_128-NEXT: ldp q4, q5, [x1, #32] -; VBITS_GE_128-NEXT: udiv z0.s, p0/m, z0.s, z4.s -; VBITS_GE_128-NEXT: udiv z1.s, p0/m, z1.s, z5.s -; VBITS_GE_128-NEXT: ldp q2, q3, [x0] -; VBITS_GE_128-NEXT: ldp q6, q4, [x1] -; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] -; VBITS_GE_128-NEXT: movprfx z0, z2 -; VBITS_GE_128-NEXT: udiv z0.s, p0/m, z0.s, z6.s -; VBITS_GE_128-NEXT: movprfx z1, z3 +; VBITS_GE_128-NEXT: ldp q0, q3, [x1] +; VBITS_GE_128-NEXT: ldp q1, q2, [x0] +; VBITS_GE_128-NEXT: ldp q5, q4, [x1, #32] +; VBITS_GE_128-NEXT: udivr z0.s, p0/m, z0.s, z1.s +; VBITS_GE_128-NEXT: ldr q1, [x0, #48] ; VBITS_GE_128-NEXT: udiv z1.s, p0/m, z1.s, z4.s -; VBITS_GE_128-NEXT: stp q0, q1, [x0] +; VBITS_GE_128-NEXT: ldr q4, [x0, #32] +; VBITS_GE_128-NEXT: udiv z4.s, p0/m, z4.s, z5.s +; VBITS_GE_128-NEXT: udiv z2.s, p0/m, z2.s, z3.s +; VBITS_GE_128-NEXT: stp q4, q1, [x0, #32] +; VBITS_GE_128-NEXT: stp q0, q2, [x0] ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: udiv_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: udiv z0.s, p0/m, z0.s, z2.s -; VBITS_GE_256-NEXT: udiv z1.s, p0/m, z1.s, z3.s +; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1] +; VBITS_GE_256-NEXT: udiv z1.s, p0/m, z1.s, z2.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret @@ -1337,8 +1332,8 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #0 { ; CHECK-LABEL: udiv_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -1351,8 +1346,8 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #0 { ; CHECK-LABEL: udiv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -1380,31 +1375,30 @@ define void @udiv_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_128-LABEL: udiv_v8i64: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] ; VBITS_GE_128-NEXT: ptrue p0.d, vl2 -; VBITS_GE_128-NEXT: ldp q4, q5, [x1, #32] -; VBITS_GE_128-NEXT: udiv z0.d, p0/m, z0.d, z4.d -; VBITS_GE_128-NEXT: udiv z1.d, p0/m, z1.d, z5.d -; VBITS_GE_128-NEXT: ldp q2, q3, [x0] -; VBITS_GE_128-NEXT: ldp q6, q4, [x1] -; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] -; VBITS_GE_128-NEXT: movprfx z0, z2 -; VBITS_GE_128-NEXT: udiv z0.d, p0/m, z0.d, z6.d -; VBITS_GE_128-NEXT: movprfx z1, z3 +; VBITS_GE_128-NEXT: ldp q0, q3, [x1] +; VBITS_GE_128-NEXT: ldp q1, q2, [x0] +; VBITS_GE_128-NEXT: ldp q5, q4, [x1, #32] +; VBITS_GE_128-NEXT: udivr z0.d, p0/m, z0.d, z1.d +; VBITS_GE_128-NEXT: ldr q1, [x0, #48] ; VBITS_GE_128-NEXT: udiv z1.d, p0/m, z1.d, z4.d -; VBITS_GE_128-NEXT: stp q0, q1, [x0] +; VBITS_GE_128-NEXT: ldr q4, [x0, #32] +; VBITS_GE_128-NEXT: udiv z4.d, p0/m, z4.d, z5.d +; VBITS_GE_128-NEXT: udiv z2.d, p0/m, z2.d, z3.d +; VBITS_GE_128-NEXT: stp q4, q1, [x0, #32] +; VBITS_GE_128-NEXT: stp q0, q2, [x0] ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: udiv_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: udiv z0.d, p0/m, z0.d, z2.d -; VBITS_GE_256-NEXT: udiv z1.d, p0/m, z1.d, z3.d +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: udiv z1.d, p0/m, z1.d, z2.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll @@ -58,8 +58,8 @@ define void @sext_v16i8_v16i16(<16 x i8> %a, ptr %out) vscale_range(2,0) #0 { ; CHECK-LABEL: sext_v16i8_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: sunpklo z0.h, z0.b ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret @@ -73,7 +73,7 @@ ; VBITS_GE_256-LABEL: sext_v32i8_v32i16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 -; VBITS_GE_256-NEXT: mov x8, #16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 ; VBITS_GE_256-NEXT: add z0.b, z0.b, z0.b @@ -157,13 +157,13 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; VBITS_GE_256-NEXT: mov x8, #8 -; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 +; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b ; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b +; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ret ; @@ -242,14 +242,14 @@ ; VBITS_GE_256-LABEL: sext_v8i8_v8i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: sshll v0.8h, v0.8b, #0 -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ret ; @@ -308,8 +308,8 @@ define void @sext_v8i16_v8i32(<8 x i16> %a, ptr %out) vscale_range(2,0) #0 { ; CHECK-LABEL: sext_v8i16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret @@ -322,7 +322,7 @@ ; VBITS_GE_256-LABEL: sext_v16i16_v16i32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: mov x8, #8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: add z0.h, z0.h, z0.h @@ -406,13 +406,13 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ret ; @@ -472,8 +472,8 @@ define void @sext_v4i32_v4i64(<4 x i32> %a, ptr %out) vscale_range(2,0) #0 { ; CHECK-LABEL: sext_v4i32_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret @@ -486,7 +486,7 @@ ; VBITS_GE_256-LABEL: sext_v8i32_v8i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x8, #4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: add z0.s, z0.s, z0.s @@ -554,8 +554,8 @@ define void @zext_v16i8_v16i16(<16 x i8> %a, ptr %out) vscale_range(2,0) #0 { ; CHECK-LABEL: zext_v16i8_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret @@ -569,7 +569,7 @@ ; VBITS_GE_256-LABEL: zext_v32i8_v32i16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 -; VBITS_GE_256-NEXT: mov x8, #16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 ; VBITS_GE_256-NEXT: add z0.b, z0.b, z0.b @@ -653,13 +653,13 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; VBITS_GE_256-NEXT: mov x8, #8 -; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 +; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b +; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ret ; @@ -738,14 +738,14 @@ ; VBITS_GE_256-LABEL: zext_v8i8_v8i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ushll v0.8h, v0.8b, #0 -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ret ; @@ -804,8 +804,8 @@ define void @zext_v8i16_v8i32(<8 x i16> %a, ptr %out) vscale_range(2,0) #0 { ; CHECK-LABEL: zext_v8i16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret @@ -818,7 +818,7 @@ ; VBITS_GE_256-LABEL: zext_v16i16_v16i32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: mov x8, #8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: add z0.h, z0.h, z0.h @@ -902,13 +902,13 @@ ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ret ; @@ -968,8 +968,8 @@ define void @zext_v4i32_v4i64(<4 x i32> %a, ptr %out) vscale_range(2,0) #0 { ; CHECK-LABEL: zext_v4i32_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret @@ -982,7 +982,7 @@ ; VBITS_GE_256-LABEL: zext_v8i32_v8i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x8, #4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: add z0.s, z0.s, z0.s diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-log.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-log.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-log.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-log.ll @@ -48,8 +48,8 @@ define void @and_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: and_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -146,8 +146,8 @@ define void @and_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: and_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -244,8 +244,8 @@ define void @and_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: and_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -342,8 +342,8 @@ define void @and_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: and_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -444,8 +444,8 @@ define void @or_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: or_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -542,8 +542,8 @@ define void @or_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: or_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -640,8 +640,8 @@ define void @or_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: or_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -738,8 +738,8 @@ define void @or_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: or_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -840,8 +840,8 @@ define void @xor_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: xor_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -938,8 +938,8 @@ define void @xor_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: xor_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -1036,8 +1036,8 @@ define void @xor_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: xor_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -1134,8 +1134,8 @@ define void @xor_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: xor_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-minmax.ll @@ -48,8 +48,8 @@ define void @smax_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: smax_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -146,8 +146,8 @@ define void @smax_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: smax_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -244,8 +244,8 @@ define void @smax_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: smax_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -307,8 +307,8 @@ define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: smax_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -321,8 +321,8 @@ define <2 x i64> @smax_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: smax_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -350,8 +350,8 @@ define void @smax_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: smax_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -452,8 +452,8 @@ define void @smin_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: smin_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -550,8 +550,8 @@ define void @smin_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: smin_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -648,8 +648,8 @@ define void @smin_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: smin_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -711,8 +711,8 @@ define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: smin_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -725,8 +725,8 @@ define <2 x i64> @smin_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: smin_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -754,8 +754,8 @@ define void @smin_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: smin_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -856,8 +856,8 @@ define void @umax_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: umax_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -954,8 +954,8 @@ define void @umax_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: umax_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -1052,8 +1052,8 @@ define void @umax_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: umax_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -1115,8 +1115,8 @@ define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: umax_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -1129,8 +1129,8 @@ define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: umax_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -1158,8 +1158,8 @@ define void @umax_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: umax_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -1260,8 +1260,8 @@ define void @umin_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: umin_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -1358,8 +1358,8 @@ define void @umin_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: umin_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -1456,8 +1456,8 @@ define void @umin_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: umin_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -1519,8 +1519,8 @@ define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: umin_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -1533,8 +1533,8 @@ define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: umin_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -1562,8 +1562,8 @@ define void @umin_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: umin_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll @@ -16,8 +16,8 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: smulh_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -36,8 +36,8 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: smulh_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -73,8 +73,8 @@ define void @smulh_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: smulh_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -150,8 +150,8 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: smulh_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -168,8 +168,8 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: smulh_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -205,8 +205,8 @@ define void @smulh_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: smulh_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -280,8 +280,8 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: smulh_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -298,8 +298,8 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: smulh_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -335,8 +335,8 @@ define void @smulh_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: smulh_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -410,8 +410,8 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: smulh_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -430,8 +430,8 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: smulh_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -467,8 +467,8 @@ define void @smulh_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: smulh_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -547,8 +547,8 @@ define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: umulh_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -565,8 +565,8 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: umulh_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umulh z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -602,8 +602,8 @@ define void @umulh_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: umulh_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -680,8 +680,8 @@ define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: umulh_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -698,8 +698,8 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: umulh_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umulh z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -735,8 +735,8 @@ define void @umulh_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: umulh_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -810,8 +810,8 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: umulh_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -828,8 +828,8 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: umulh_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umulh z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -867,8 +867,8 @@ define void @umulh_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: umulh_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -942,8 +942,8 @@ define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: umulh_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -960,8 +960,8 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(2,0) #0 { ; CHECK-LABEL: umulh_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umulh z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -997,8 +997,8 @@ define void @umulh_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: umulh_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll @@ -48,8 +48,8 @@ define i8 @uaddv_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: uaddv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: add z0.b, z1.b, z0.b @@ -138,8 +138,8 @@ define i16 @uaddv_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: uaddv_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: add z0.h, z1.h, z0.h @@ -228,8 +228,8 @@ define i32 @uaddv_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: uaddv_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: add z0.s, z1.s, z0.s @@ -317,8 +317,8 @@ define i64 @uaddv_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: uaddv_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: add z0.d, z1.d, z0.d @@ -406,8 +406,8 @@ define i8 @smaxv_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: smaxv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: smax z0.b, p0/m, z0.b, z1.b @@ -491,8 +491,8 @@ define i16 @smaxv_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: smaxv_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: smax z0.h, p0/m, z0.h, z1.h @@ -576,8 +576,8 @@ define i32 @smaxv_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: smaxv_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: smax z0.s, p0/m, z0.s, z1.s @@ -638,8 +638,8 @@ define i64 @smaxv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: smaxv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: smaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -663,8 +663,8 @@ define i64 @smaxv_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: smaxv_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: smax z0.d, p0/m, z0.d, z1.d @@ -752,8 +752,8 @@ define i8 @sminv_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: sminv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: smin z0.b, p0/m, z0.b, z1.b @@ -837,8 +837,8 @@ define i16 @sminv_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: sminv_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: smin z0.h, p0/m, z0.h, z1.h @@ -922,8 +922,8 @@ define i32 @sminv_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: sminv_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: smin z0.s, p0/m, z0.s, z1.s @@ -984,8 +984,8 @@ define i64 @sminv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: sminv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: sminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -1009,8 +1009,8 @@ define i64 @sminv_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: sminv_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: smin z0.d, p0/m, z0.d, z1.d @@ -1098,8 +1098,8 @@ define i8 @umaxv_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: umaxv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: umax z0.b, p0/m, z0.b, z1.b @@ -1183,8 +1183,8 @@ define i16 @umaxv_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: umaxv_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: umax z0.h, p0/m, z0.h, z1.h @@ -1268,8 +1268,8 @@ define i32 @umaxv_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: umaxv_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: umax z0.s, p0/m, z0.s, z1.s @@ -1330,8 +1330,8 @@ define i64 @umaxv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: umaxv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: umaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -1355,8 +1355,8 @@ define i64 @umaxv_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: umaxv_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: umax z0.d, p0/m, z0.d, z1.d @@ -1444,8 +1444,8 @@ define i8 @uminv_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: uminv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: umin z0.b, p0/m, z0.b, z1.b @@ -1529,8 +1529,8 @@ define i16 @uminv_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: uminv_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: umin z0.h, p0/m, z0.h, z1.h @@ -1614,8 +1614,8 @@ define i32 @uminv_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: uminv_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: umin z0.s, p0/m, z0.s, z1.s @@ -1676,8 +1676,8 @@ define i64 @uminv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: uminv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -1701,8 +1701,8 @@ define i64 @uminv_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: uminv_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: umin z0.d, p0/m, z0.d, z1.d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll @@ -15,8 +15,8 @@ define <8 x i8> @srem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 { ; VBITS_GE_128-LABEL: srem_v8i8: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: sshll v2.8h, v1.8b, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: sshll v2.8h, v1.8b, #0 ; VBITS_GE_128-NEXT: sshll v3.8h, v0.8b, #0 ; VBITS_GE_128-NEXT: sshll2 v4.4s, v2.8h, #0 ; VBITS_GE_128-NEXT: sshll2 v5.4s, v3.8h, #0 @@ -97,26 +97,25 @@ define <16 x i8> @srem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 { ; VBITS_GE_128-LABEL: srem_v16i8: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: sshll2 v2.8h, v1.16b, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: sshll2 v2.8h, v1.16b, #0 ; VBITS_GE_128-NEXT: sshll2 v3.8h, v0.16b, #0 ; VBITS_GE_128-NEXT: sshll2 v4.4s, v2.8h, #0 ; VBITS_GE_128-NEXT: sshll2 v5.4s, v3.8h, #0 ; VBITS_GE_128-NEXT: sshll v2.4s, v2.4h, #0 ; VBITS_GE_128-NEXT: sshll v3.4s, v3.4h, #0 ; VBITS_GE_128-NEXT: sdivr z4.s, p0/m, z4.s, z5.s -; VBITS_GE_128-NEXT: sshll v6.8h, v1.8b, #0 -; VBITS_GE_128-NEXT: sshll v7.8h, v0.8b, #0 +; VBITS_GE_128-NEXT: sshll v5.8h, v0.8b, #0 +; VBITS_GE_128-NEXT: sshll2 v7.4s, v5.8h, #0 +; VBITS_GE_128-NEXT: sshll v5.4s, v5.4h, #0 ; VBITS_GE_128-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; VBITS_GE_128-NEXT: sshll2 v3.4s, v6.8h, #0 -; VBITS_GE_128-NEXT: sshll2 v5.4s, v7.8h, #0 -; VBITS_GE_128-NEXT: sshll v6.4s, v6.4h, #0 -; VBITS_GE_128-NEXT: sdivr z3.s, p0/m, z3.s, z5.s -; VBITS_GE_128-NEXT: sshll v7.4s, v7.4h, #0 +; VBITS_GE_128-NEXT: sshll v3.8h, v1.8b, #0 +; VBITS_GE_128-NEXT: sshll2 v6.4s, v3.8h, #0 +; VBITS_GE_128-NEXT: sshll v3.4s, v3.4h, #0 +; VBITS_GE_128-NEXT: sdivr z6.s, p0/m, z6.s, z7.s ; VBITS_GE_128-NEXT: uzp1 v2.8h, v2.8h, v4.8h -; VBITS_GE_128-NEXT: movprfx z4, z7 -; VBITS_GE_128-NEXT: sdiv z4.s, p0/m, z4.s, z6.s -; VBITS_GE_128-NEXT: uzp1 v3.8h, v4.8h, v3.8h +; VBITS_GE_128-NEXT: sdivr z3.s, p0/m, z3.s, z5.s +; VBITS_GE_128-NEXT: uzp1 v3.8h, v3.8h, v6.8h ; VBITS_GE_128-NEXT: uzp1 v2.16b, v3.16b, v2.16b ; VBITS_GE_128-NEXT: mls v0.16b, v2.16b, v1.16b ; VBITS_GE_128-NEXT: ret @@ -127,20 +126,20 @@ ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_256-NEXT: sunpklo z2.h, z1.b ; VBITS_GE_256-NEXT: sunpklo z3.h, z0.b +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: sunpklo z4.s, z2.h ; VBITS_GE_256-NEXT: sunpklo z5.s, z3.h ; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 ; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16 -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: sunpklo z2.s, z2.h ; VBITS_GE_256-NEXT: sunpklo z3.s, z3.h ; VBITS_GE_256-NEXT: sdivr z4.s, p0/m, z4.s, z5.s ; VBITS_GE_256-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; VBITS_GE_256-NEXT: uzp1 z4.h, z4.h, z4.h -; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_256-NEXT: ptrue p0.h, vl8 -; VBITS_GE_256-NEXT: splice z4.h, p0, z4.h, z2.h -; VBITS_GE_256-NEXT: uzp1 z2.b, z4.b, z4.b +; VBITS_GE_256-NEXT: uzp1 z3.h, z4.h, z4.h +; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h +; VBITS_GE_256-NEXT: splice z3.h, p0, z3.h, z2.h +; VBITS_GE_256-NEXT: uzp1 z2.b, z3.b, z3.b ; VBITS_GE_256-NEXT: mls v0.16b, v2.16b, v1.16b ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0 ; VBITS_GE_256-NEXT: ret @@ -229,11 +228,11 @@ ; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sdivr z4.s, p1/m, z4.s, z5.s ; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: ptrue p1.h, vl64 -; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h -; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b +; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h +; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret @@ -251,42 +250,42 @@ ; CHECK-NEXT: ptrue p1.s, vl64 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] -; CHECK-NEXT: ptrue p2.h, vl64 ; CHECK-NEXT: sunpklo z2.h, z1.b ; CHECK-NEXT: sunpklo z3.h, z0.b -; CHECK-NEXT: sunpklo z5.s, z2.h -; CHECK-NEXT: sunpklo z6.s, z3.h +; CHECK-NEXT: sunpklo z4.s, z2.h +; CHECK-NEXT: sunpklo z5.s, z3.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128 ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128 -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: sdivr z5.s, p1/m, z5.s, z6.s -; CHECK-NEXT: mov z6.d, z0.d ; CHECK-NEXT: sunpklo z2.s, z2.h ; CHECK-NEXT: sunpklo z3.s, z3.h -; CHECK-NEXT: ext z4.b, z4.b, z1.b, #128 +; CHECK-NEXT: sdivr z4.s, p1/m, z4.s, z5.s +; CHECK-NEXT: mov z5.d, z0.d +; CHECK-NEXT: ext z5.b, z5.b, z0.b, #128 +; CHECK-NEXT: sunpklo z5.h, z5.b +; CHECK-NEXT: sunpklo z7.s, z5.h +; CHECK-NEXT: ext z5.b, z5.b, z5.b, #128 ; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s -; CHECK-NEXT: ext z6.b, z6.b, z0.b, #128 -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: sunpklo z3.h, z4.b -; CHECK-NEXT: sunpklo z4.h, z6.b -; CHECK-NEXT: splice z5.h, p2, z5.h, z2.h -; CHECK-NEXT: sunpklo z2.s, z3.h -; CHECK-NEXT: sunpklo z6.s, z4.h +; CHECK-NEXT: mov z3.d, z1.d +; CHECK-NEXT: sunpklo z5.s, z5.h +; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128 +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: sunpklo z3.h, z3.b +; CHECK-NEXT: sunpklo z6.s, z3.h ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128 -; CHECK-NEXT: ext z4.b, z4.b, z4.b, #128 ; CHECK-NEXT: sunpklo z3.s, z3.h -; CHECK-NEXT: sunpklo z4.s, z4.h -; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z6.s -; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z4.s +; CHECK-NEXT: sdivr z6.s, p1/m, z6.s, z7.s ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z5.s +; CHECK-NEXT: ptrue p1.h, vl64 +; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h +; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h +; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: uzp1 z4.b, z5.b, z5.b -; CHECK-NEXT: splice z2.h, p2, z2.h, z3.h +; CHECK-NEXT: splice z5.h, p1, z5.h, z3.h ; CHECK-NEXT: ptrue p1.b, vl128 -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: splice z4.b, p1, z4.b, z2.b -; CHECK-NEXT: mls z0.b, p0/m, z4.b, z1.b +; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b +; CHECK-NEXT: splice z2.b, p1, z2.b, z3.b +; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <256 x i8>, ptr %a @@ -301,8 +300,8 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { ; VBITS_GE_128-LABEL: srem_v4i16: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: sshll v2.4s, v1.4h, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: sshll v2.4s, v1.4h, #0 ; VBITS_GE_128-NEXT: sshll v3.4s, v0.4h, #0 ; VBITS_GE_128-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_128-NEXT: xtn v2.4h, v2.4s @@ -311,13 +310,13 @@ ; ; VBITS_GE_256-LABEL: srem_v4i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: sshll v2.4s, v1.4h, #0 ; VBITS_GE_256-NEXT: ptrue p0.s, vl4 +; VBITS_GE_256-NEXT: sshll v2.4s, v1.4h, #0 ; VBITS_GE_256-NEXT: sshll v3.4s, v0.4h, #0 ; VBITS_GE_256-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: mov w8, v2.s[1] -; VBITS_GE_256-NEXT: mov w9, v2.s[2] ; VBITS_GE_256-NEXT: mov v3.16b, v2.16b +; VBITS_GE_256-NEXT: mov w9, v2.s[2] ; VBITS_GE_256-NEXT: mov v3.h[1], w8 ; VBITS_GE_256-NEXT: mov w8, v2.s[3] ; VBITS_GE_256-NEXT: mov v3.h[2], w9 @@ -327,13 +326,13 @@ ; ; VBITS_GE_512-LABEL: srem_v4i16: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: sshll v2.4s, v1.4h, #0 ; VBITS_GE_512-NEXT: ptrue p0.s, vl4 +; VBITS_GE_512-NEXT: sshll v2.4s, v1.4h, #0 ; VBITS_GE_512-NEXT: sshll v3.4s, v0.4h, #0 ; VBITS_GE_512-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_512-NEXT: mov w8, v2.s[1] -; VBITS_GE_512-NEXT: mov w9, v2.s[2] ; VBITS_GE_512-NEXT: mov v3.16b, v2.16b +; VBITS_GE_512-NEXT: mov w9, v2.s[2] ; VBITS_GE_512-NEXT: mov v3.h[1], w8 ; VBITS_GE_512-NEXT: mov w8, v2.s[3] ; VBITS_GE_512-NEXT: mov v3.h[2], w9 @@ -347,23 +346,22 @@ define <8 x i16> @srem_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 { ; VBITS_GE_128-LABEL: srem_v8i16: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: sshll2 v2.4s, v1.8h, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: sshll2 v2.4s, v1.8h, #0 ; VBITS_GE_128-NEXT: sshll2 v3.4s, v0.8h, #0 -; VBITS_GE_128-NEXT: sshll v4.4s, v1.4h, #0 +; VBITS_GE_128-NEXT: sshll v4.4s, v0.4h, #0 ; VBITS_GE_128-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; VBITS_GE_128-NEXT: sshll v5.4s, v0.4h, #0 -; VBITS_GE_128-NEXT: movprfx z3, z5 -; VBITS_GE_128-NEXT: sdiv z3.s, p0/m, z3.s, z4.s +; VBITS_GE_128-NEXT: sshll v3.4s, v1.4h, #0 +; VBITS_GE_128-NEXT: sdivr z3.s, p0/m, z3.s, z4.s ; VBITS_GE_128-NEXT: uzp1 v2.8h, v3.8h, v2.8h ; VBITS_GE_128-NEXT: mls v0.8h, v2.8h, v1.8h ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: srem_v8i16: ; VBITS_GE_256: // %bb.0: +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h ; VBITS_GE_256-NEXT: sunpklo z3.s, z0.h ; VBITS_GE_256-NEXT: sdivr z2.s, p0/m, z2.s, z3.s @@ -374,9 +372,9 @@ ; ; VBITS_GE_512-LABEL: srem_v8i16: ; VBITS_GE_512: // %bb.0: +; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: sunpklo z2.s, z1.h ; VBITS_GE_512-NEXT: sunpklo z3.s, z0.h ; VBITS_GE_512-NEXT: sdivr z2.s, p0/m, z2.s, z3.s @@ -391,26 +389,27 @@ define void @srem_v16i16(ptr %a, ptr %b) #0 { ; VBITS_GE_128-LABEL: srem_v16i16: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ldp q0, q1, [x1] ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 -; VBITS_GE_128-NEXT: sshll2 v5.4s, v0.8h, #0 -; VBITS_GE_128-NEXT: sshll v7.4s, v0.4h, #0 -; VBITS_GE_128-NEXT: ldp q2, q3, [x0] -; VBITS_GE_128-NEXT: sshll2 v4.4s, v1.8h, #0 -; VBITS_GE_128-NEXT: sshll2 v6.4s, v2.8h, #0 -; VBITS_GE_128-NEXT: sshll v16.4s, v2.4h, #0 -; VBITS_GE_128-NEXT: sshll2 v17.4s, v3.8h, #0 +; VBITS_GE_128-NEXT: ldp q4, q1, [x1] +; VBITS_GE_128-NEXT: ldr q0, [x0, #16] +; VBITS_GE_128-NEXT: sshll2 v2.4s, v1.8h, #0 +; VBITS_GE_128-NEXT: sshll2 v3.4s, v0.8h, #0 +; VBITS_GE_128-NEXT: sshll2 v5.4s, v4.8h, #0 +; VBITS_GE_128-NEXT: sshll v16.4s, v0.4h, #0 +; VBITS_GE_128-NEXT: sdivr z2.s, p0/m, z2.s, z3.s +; VBITS_GE_128-NEXT: ldr q3, [x0] +; VBITS_GE_128-NEXT: sshll2 v6.4s, v3.8h, #0 +; VBITS_GE_128-NEXT: sshll v7.4s, v3.4h, #0 ; VBITS_GE_128-NEXT: sdivr z5.s, p0/m, z5.s, z6.s -; VBITS_GE_128-NEXT: sshll v6.4s, v1.4h, #0 +; VBITS_GE_128-NEXT: sshll v6.4s, v4.4h, #0 +; VBITS_GE_128-NEXT: sdivr z6.s, p0/m, z6.s, z7.s +; VBITS_GE_128-NEXT: sshll v7.4s, v1.4h, #0 ; VBITS_GE_128-NEXT: sdivr z7.s, p0/m, z7.s, z16.s -; VBITS_GE_128-NEXT: sshll v16.4s, v3.4h, #0 -; VBITS_GE_128-NEXT: sdivr z4.s, p0/m, z4.s, z17.s -; VBITS_GE_128-NEXT: uzp1 v5.8h, v7.8h, v5.8h -; VBITS_GE_128-NEXT: sdivr z6.s, p0/m, z6.s, z16.s -; VBITS_GE_128-NEXT: uzp1 v4.8h, v6.8h, v4.8h -; VBITS_GE_128-NEXT: mls v2.8h, v5.8h, v0.8h -; VBITS_GE_128-NEXT: mls v3.8h, v4.8h, v1.8h -; VBITS_GE_128-NEXT: stp q2, q3, [x0] +; VBITS_GE_128-NEXT: uzp1 v5.8h, v6.8h, v5.8h +; VBITS_GE_128-NEXT: mls v3.8h, v5.8h, v4.8h +; VBITS_GE_128-NEXT: uzp1 v2.8h, v7.8h, v2.8h +; VBITS_GE_128-NEXT: mls v0.8h, v2.8h, v1.8h +; VBITS_GE_128-NEXT: stp q3, q0, [x0] ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: srem_v16i16: @@ -419,19 +418,19 @@ ; VBITS_GE_256-NEXT: ptrue p1.s, vl8 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z3.d, z1.d +; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h +; VBITS_GE_256-NEXT: sunpklo z3.s, z0.h ; VBITS_GE_256-NEXT: mov z4.d, z0.d -; VBITS_GE_256-NEXT: ext z3.b, z3.b, z1.b, #16 ; VBITS_GE_256-NEXT: ext z4.b, z4.b, z0.b, #16 -; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h -; VBITS_GE_256-NEXT: sunpklo z5.s, z0.h -; VBITS_GE_256-NEXT: sunpklo z3.s, z3.h +; VBITS_GE_256-NEXT: sdivr z2.s, p1/m, z2.s, z3.s +; VBITS_GE_256-NEXT: mov z3.d, z1.d ; VBITS_GE_256-NEXT: sunpklo z4.s, z4.h -; VBITS_GE_256-NEXT: sdivr z2.s, p1/m, z2.s, z5.s +; VBITS_GE_256-NEXT: ext z3.b, z3.b, z1.b, #16 +; VBITS_GE_256-NEXT: sunpklo z3.s, z3.h ; VBITS_GE_256-NEXT: sdivr z3.s, p1/m, z3.s, z4.s +; VBITS_GE_256-NEXT: ptrue p1.h, vl8 ; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_256-NEXT: uzp1 z3.h, z3.h, z3.h -; VBITS_GE_256-NEXT: ptrue p1.h, vl8 ; VBITS_GE_256-NEXT: splice z2.h, p1, z2.h, z3.h ; VBITS_GE_256-NEXT: mls z0.h, p0/m, z2.h, z1.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0] @@ -506,19 +505,19 @@ ; CHECK-NEXT: ptrue p1.s, vl64 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: mov z3.d, z1.d +; CHECK-NEXT: sunpklo z2.s, z1.h +; CHECK-NEXT: sunpklo z3.s, z0.h ; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128 ; CHECK-NEXT: ext z4.b, z4.b, z0.b, #128 -; CHECK-NEXT: sunpklo z2.s, z1.h -; CHECK-NEXT: sunpklo z5.s, z0.h -; CHECK-NEXT: sunpklo z3.s, z3.h +; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s +; CHECK-NEXT: mov z3.d, z1.d ; CHECK-NEXT: sunpklo z4.s, z4.h -; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z5.s +; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128 +; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z4.s +; CHECK-NEXT: ptrue p1.h, vl64 ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: ptrue p1.h, vl64 ; CHECK-NEXT: splice z2.h, p1, z2.h, z3.h ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -583,41 +582,41 @@ define void @srem_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_128-LABEL: srem_v16i32: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] +; VBITS_GE_128-NEXT: ldp q0, q3, [x1] ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 -; VBITS_GE_128-NEXT: ldp q2, q3, [x0] -; VBITS_GE_128-NEXT: ldp q4, q5, [x1, #32] -; VBITS_GE_128-NEXT: movprfx z16, z0 -; VBITS_GE_128-NEXT: sdiv z16.s, p0/m, z16.s, z4.s -; VBITS_GE_128-NEXT: mls v0.4s, v16.4s, v4.4s -; VBITS_GE_128-NEXT: movprfx z16, z1 -; VBITS_GE_128-NEXT: sdiv z16.s, p0/m, z16.s, z5.s -; VBITS_GE_128-NEXT: ldp q7, q6, [x1] -; VBITS_GE_128-NEXT: movprfx z4, z3 -; VBITS_GE_128-NEXT: sdiv z4.s, p0/m, z4.s, z6.s -; VBITS_GE_128-NEXT: mls v1.4s, v16.4s, v5.4s -; VBITS_GE_128-NEXT: movprfx z5, z2 -; VBITS_GE_128-NEXT: sdiv z5.s, p0/m, z5.s, z7.s -; VBITS_GE_128-NEXT: mls v2.4s, v5.4s, v7.4s -; VBITS_GE_128-NEXT: mls v3.4s, v4.4s, v6.4s -; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] -; VBITS_GE_128-NEXT: stp q2, q3, [x0] +; VBITS_GE_128-NEXT: ldp q1, q2, [x0] +; VBITS_GE_128-NEXT: movprfx z4, z1 +; VBITS_GE_128-NEXT: sdiv z4.s, p0/m, z4.s, z0.s +; VBITS_GE_128-NEXT: ldp q16, q5, [x0, #32] +; VBITS_GE_128-NEXT: movprfx z19, z2 +; VBITS_GE_128-NEXT: sdiv z19.s, p0/m, z19.s, z3.s +; VBITS_GE_128-NEXT: ldp q17, q6, [x1, #32] +; VBITS_GE_128-NEXT: movprfx z7, z5 +; VBITS_GE_128-NEXT: sdiv z7.s, p0/m, z7.s, z6.s +; VBITS_GE_128-NEXT: movprfx z18, z16 +; VBITS_GE_128-NEXT: sdiv z18.s, p0/m, z18.s, z17.s +; VBITS_GE_128-NEXT: mls v1.4s, v4.4s, v0.4s +; VBITS_GE_128-NEXT: mls v2.4s, v19.4s, v3.4s +; VBITS_GE_128-NEXT: mls v16.4s, v18.4s, v17.4s +; VBITS_GE_128-NEXT: mls v5.4s, v7.4s, v6.4s +; VBITS_GE_128-NEXT: stp q1, q2, [x0] +; VBITS_GE_128-NEXT: stp q16, q5, [x0, #32] ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: srem_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] -; VBITS_GE_256-NEXT: movprfx z4, z0 -; VBITS_GE_256-NEXT: sdiv z4.s, p0/m, z4.s, z2.s +; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x1] +; VBITS_GE_256-NEXT: movprfx z3, z0 +; VBITS_GE_256-NEXT: sdiv z3.s, p0/m, z3.s, z2.s ; VBITS_GE_256-NEXT: movprfx z5, z1 -; VBITS_GE_256-NEXT: sdiv z5.s, p0/m, z5.s, z3.s -; VBITS_GE_256-NEXT: mls z0.s, p0/m, z4.s, z2.s -; VBITS_GE_256-NEXT: mls z1.s, p0/m, z5.s, z3.s +; VBITS_GE_256-NEXT: sdiv z5.s, p0/m, z5.s, z4.s +; VBITS_GE_256-NEXT: mls z0.s, p0/m, z3.s, z2.s +; VBITS_GE_256-NEXT: mls z1.s, p0/m, z5.s, z4.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret @@ -680,8 +679,8 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #0 { ; CHECK-LABEL: srem_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d @@ -697,8 +696,8 @@ define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #0 { ; CHECK-LABEL: srem_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d @@ -730,43 +729,42 @@ define void @srem_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_128-LABEL: srem_v8i64: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] ; VBITS_GE_128-NEXT: ptrue p0.d, vl2 -; VBITS_GE_128-NEXT: ldp q2, q3, [x1, #32] -; VBITS_GE_128-NEXT: movprfx z16, z1 -; VBITS_GE_128-NEXT: sdiv z16.d, p0/m, z16.d, z3.d -; VBITS_GE_128-NEXT: mls z1.d, p0/m, z16.d, z3.d -; VBITS_GE_128-NEXT: movprfx z3, z0 -; VBITS_GE_128-NEXT: sdiv z3.d, p0/m, z3.d, z2.d -; VBITS_GE_128-NEXT: mls z0.d, p0/m, z3.d, z2.d -; VBITS_GE_128-NEXT: ldp q4, q5, [x0] -; VBITS_GE_128-NEXT: ldp q7, q6, [x1] -; VBITS_GE_128-NEXT: movprfx z16, z5 -; VBITS_GE_128-NEXT: sdiv z16.d, p0/m, z16.d, z6.d -; VBITS_GE_128-NEXT: movprfx z2, z4 -; VBITS_GE_128-NEXT: sdiv z2.d, p0/m, z2.d, z7.d -; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] -; VBITS_GE_128-NEXT: movprfx z0, z4 -; VBITS_GE_128-NEXT: mls z0.d, p0/m, z2.d, z7.d -; VBITS_GE_128-NEXT: movprfx z1, z5 -; VBITS_GE_128-NEXT: mls z1.d, p0/m, z16.d, z6.d +; VBITS_GE_128-NEXT: ldp q0, q3, [x1] +; VBITS_GE_128-NEXT: ldp q1, q2, [x0] +; VBITS_GE_128-NEXT: movprfx z4, z1 +; VBITS_GE_128-NEXT: sdiv z4.d, p0/m, z4.d, z0.d +; VBITS_GE_128-NEXT: ldp q16, q5, [x0, #32] +; VBITS_GE_128-NEXT: movprfx z19, z2 +; VBITS_GE_128-NEXT: sdiv z19.d, p0/m, z19.d, z3.d +; VBITS_GE_128-NEXT: ldp q17, q6, [x1, #32] +; VBITS_GE_128-NEXT: movprfx z7, z5 +; VBITS_GE_128-NEXT: sdiv z7.d, p0/m, z7.d, z6.d +; VBITS_GE_128-NEXT: movprfx z18, z16 +; VBITS_GE_128-NEXT: sdiv z18.d, p0/m, z18.d, z17.d +; VBITS_GE_128-NEXT: mls z16.d, p0/m, z18.d, z17.d +; VBITS_GE_128-NEXT: mls z5.d, p0/m, z7.d, z6.d +; VBITS_GE_128-NEXT: msb z0.d, p0/m, z4.d, z1.d +; VBITS_GE_128-NEXT: movprfx z1, z2 +; VBITS_GE_128-NEXT: mls z1.d, p0/m, z19.d, z3.d +; VBITS_GE_128-NEXT: stp q16, q5, [x0, #32] ; VBITS_GE_128-NEXT: stp q0, q1, [x0] ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: srem_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: movprfx z4, z0 -; VBITS_GE_256-NEXT: sdiv z4.d, p0/m, z4.d, z2.d +; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: movprfx z3, z0 +; VBITS_GE_256-NEXT: sdiv z3.d, p0/m, z3.d, z2.d ; VBITS_GE_256-NEXT: movprfx z5, z1 -; VBITS_GE_256-NEXT: sdiv z5.d, p0/m, z5.d, z3.d -; VBITS_GE_256-NEXT: mls z0.d, p0/m, z4.d, z2.d -; VBITS_GE_256-NEXT: mls z1.d, p0/m, z5.d, z3.d +; VBITS_GE_256-NEXT: sdiv z5.d, p0/m, z5.d, z4.d +; VBITS_GE_256-NEXT: mls z0.d, p0/m, z3.d, z2.d +; VBITS_GE_256-NEXT: mls z1.d, p0/m, z5.d, z4.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret @@ -833,8 +831,8 @@ define <8 x i8> @urem_v8i8(<8 x i8> %op1, <8 x i8> %op2) #0 { ; VBITS_GE_128-LABEL: urem_v8i8: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ushll v2.8h, v1.8b, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: ushll v2.8h, v1.8b, #0 ; VBITS_GE_128-NEXT: ushll v3.8h, v0.8b, #0 ; VBITS_GE_128-NEXT: ushll2 v4.4s, v2.8h, #0 ; VBITS_GE_128-NEXT: ushll2 v5.4s, v3.8h, #0 @@ -915,26 +913,25 @@ define <16 x i8> @urem_v16i8(<16 x i8> %op1, <16 x i8> %op2) #0 { ; VBITS_GE_128-LABEL: urem_v16i8: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ushll2 v2.8h, v1.16b, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: ushll2 v2.8h, v1.16b, #0 ; VBITS_GE_128-NEXT: ushll2 v3.8h, v0.16b, #0 ; VBITS_GE_128-NEXT: ushll2 v4.4s, v2.8h, #0 ; VBITS_GE_128-NEXT: ushll2 v5.4s, v3.8h, #0 ; VBITS_GE_128-NEXT: ushll v2.4s, v2.4h, #0 ; VBITS_GE_128-NEXT: ushll v3.4s, v3.4h, #0 ; VBITS_GE_128-NEXT: udivr z4.s, p0/m, z4.s, z5.s -; VBITS_GE_128-NEXT: ushll v6.8h, v1.8b, #0 -; VBITS_GE_128-NEXT: ushll v7.8h, v0.8b, #0 +; VBITS_GE_128-NEXT: ushll v5.8h, v0.8b, #0 +; VBITS_GE_128-NEXT: ushll2 v7.4s, v5.8h, #0 +; VBITS_GE_128-NEXT: ushll v5.4s, v5.4h, #0 ; VBITS_GE_128-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; VBITS_GE_128-NEXT: ushll2 v3.4s, v6.8h, #0 -; VBITS_GE_128-NEXT: ushll2 v5.4s, v7.8h, #0 -; VBITS_GE_128-NEXT: ushll v6.4s, v6.4h, #0 -; VBITS_GE_128-NEXT: udivr z3.s, p0/m, z3.s, z5.s -; VBITS_GE_128-NEXT: ushll v7.4s, v7.4h, #0 +; VBITS_GE_128-NEXT: ushll v3.8h, v1.8b, #0 +; VBITS_GE_128-NEXT: ushll2 v6.4s, v3.8h, #0 +; VBITS_GE_128-NEXT: ushll v3.4s, v3.4h, #0 +; VBITS_GE_128-NEXT: udivr z6.s, p0/m, z6.s, z7.s ; VBITS_GE_128-NEXT: uzp1 v2.8h, v2.8h, v4.8h -; VBITS_GE_128-NEXT: movprfx z4, z7 -; VBITS_GE_128-NEXT: udiv z4.s, p0/m, z4.s, z6.s -; VBITS_GE_128-NEXT: uzp1 v3.8h, v4.8h, v3.8h +; VBITS_GE_128-NEXT: udivr z3.s, p0/m, z3.s, z5.s +; VBITS_GE_128-NEXT: uzp1 v3.8h, v3.8h, v6.8h ; VBITS_GE_128-NEXT: uzp1 v2.16b, v3.16b, v2.16b ; VBITS_GE_128-NEXT: mls v0.16b, v2.16b, v1.16b ; VBITS_GE_128-NEXT: ret @@ -945,20 +942,20 @@ ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 ; VBITS_GE_256-NEXT: uunpklo z2.h, z1.b ; VBITS_GE_256-NEXT: uunpklo z3.h, z0.b +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: uunpklo z4.s, z2.h ; VBITS_GE_256-NEXT: uunpklo z5.s, z3.h ; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 ; VBITS_GE_256-NEXT: ext z3.b, z3.b, z3.b, #16 -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: uunpklo z2.s, z2.h ; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h ; VBITS_GE_256-NEXT: udivr z4.s, p0/m, z4.s, z5.s ; VBITS_GE_256-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; VBITS_GE_256-NEXT: uzp1 z4.h, z4.h, z4.h -; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_256-NEXT: ptrue p0.h, vl8 -; VBITS_GE_256-NEXT: splice z4.h, p0, z4.h, z2.h -; VBITS_GE_256-NEXT: uzp1 z2.b, z4.b, z4.b +; VBITS_GE_256-NEXT: uzp1 z3.h, z4.h, z4.h +; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h +; VBITS_GE_256-NEXT: splice z3.h, p0, z3.h, z2.h +; VBITS_GE_256-NEXT: uzp1 z2.b, z3.b, z3.b ; VBITS_GE_256-NEXT: mls v0.16b, v2.16b, v1.16b ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 killed $z0 ; VBITS_GE_256-NEXT: ret @@ -1047,11 +1044,11 @@ ; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: udivr z4.s, p1/m, z4.s, z5.s ; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: ptrue p1.h, vl64 -; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h -; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b +; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h +; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret @@ -1069,42 +1066,42 @@ ; CHECK-NEXT: ptrue p1.s, vl64 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] -; CHECK-NEXT: ptrue p2.h, vl64 ; CHECK-NEXT: uunpklo z2.h, z1.b ; CHECK-NEXT: uunpklo z3.h, z0.b -; CHECK-NEXT: uunpklo z5.s, z2.h -; CHECK-NEXT: uunpklo z6.s, z3.h +; CHECK-NEXT: uunpklo z4.s, z2.h +; CHECK-NEXT: uunpklo z5.s, z3.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #128 ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128 -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: udivr z5.s, p1/m, z5.s, z6.s -; CHECK-NEXT: mov z6.d, z0.d ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: ext z4.b, z4.b, z1.b, #128 +; CHECK-NEXT: udivr z4.s, p1/m, z4.s, z5.s +; CHECK-NEXT: mov z5.d, z0.d +; CHECK-NEXT: ext z5.b, z5.b, z0.b, #128 +; CHECK-NEXT: uunpklo z5.h, z5.b +; CHECK-NEXT: uunpklo z7.s, z5.h +; CHECK-NEXT: ext z5.b, z5.b, z5.b, #128 ; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s -; CHECK-NEXT: ext z6.b, z6.b, z0.b, #128 -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uunpklo z3.h, z4.b -; CHECK-NEXT: uunpklo z4.h, z6.b -; CHECK-NEXT: splice z5.h, p2, z5.h, z2.h -; CHECK-NEXT: uunpklo z2.s, z3.h -; CHECK-NEXT: uunpklo z6.s, z4.h +; CHECK-NEXT: mov z3.d, z1.d +; CHECK-NEXT: uunpklo z5.s, z5.h +; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128 +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: uunpklo z3.h, z3.b +; CHECK-NEXT: uunpklo z6.s, z3.h ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #128 -; CHECK-NEXT: ext z4.b, z4.b, z4.b, #128 ; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: uunpklo z4.s, z4.h -; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z6.s -; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z4.s +; CHECK-NEXT: udivr z6.s, p1/m, z6.s, z7.s ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z5.s +; CHECK-NEXT: ptrue p1.h, vl64 +; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h +; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h +; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: uzp1 z4.b, z5.b, z5.b -; CHECK-NEXT: splice z2.h, p2, z2.h, z3.h +; CHECK-NEXT: splice z5.h, p1, z5.h, z3.h ; CHECK-NEXT: ptrue p1.b, vl128 -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: splice z4.b, p1, z4.b, z2.b -; CHECK-NEXT: mls z0.b, p0/m, z4.b, z1.b +; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b +; CHECK-NEXT: splice z2.b, p1, z2.b, z3.b +; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <256 x i8>, ptr %a @@ -1119,8 +1116,8 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { ; VBITS_GE_128-LABEL: urem_v4i16: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ushll v2.4s, v1.4h, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: ushll v2.4s, v1.4h, #0 ; VBITS_GE_128-NEXT: ushll v3.4s, v0.4h, #0 ; VBITS_GE_128-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_128-NEXT: xtn v2.4h, v2.4s @@ -1129,13 +1126,13 @@ ; ; VBITS_GE_256-LABEL: urem_v4i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: ushll v2.4s, v1.4h, #0 ; VBITS_GE_256-NEXT: ptrue p0.s, vl4 +; VBITS_GE_256-NEXT: ushll v2.4s, v1.4h, #0 ; VBITS_GE_256-NEXT: ushll v3.4s, v0.4h, #0 ; VBITS_GE_256-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_256-NEXT: mov w8, v2.s[1] -; VBITS_GE_256-NEXT: mov w9, v2.s[2] ; VBITS_GE_256-NEXT: mov v3.16b, v2.16b +; VBITS_GE_256-NEXT: mov w9, v2.s[2] ; VBITS_GE_256-NEXT: mov v3.h[1], w8 ; VBITS_GE_256-NEXT: mov w8, v2.s[3] ; VBITS_GE_256-NEXT: mov v3.h[2], w9 @@ -1145,13 +1142,13 @@ ; ; VBITS_GE_512-LABEL: urem_v4i16: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: ushll v2.4s, v1.4h, #0 ; VBITS_GE_512-NEXT: ptrue p0.s, vl4 +; VBITS_GE_512-NEXT: ushll v2.4s, v1.4h, #0 ; VBITS_GE_512-NEXT: ushll v3.4s, v0.4h, #0 ; VBITS_GE_512-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; VBITS_GE_512-NEXT: mov w8, v2.s[1] -; VBITS_GE_512-NEXT: mov w9, v2.s[2] ; VBITS_GE_512-NEXT: mov v3.16b, v2.16b +; VBITS_GE_512-NEXT: mov w9, v2.s[2] ; VBITS_GE_512-NEXT: mov v3.h[1], w8 ; VBITS_GE_512-NEXT: mov w8, v2.s[3] ; VBITS_GE_512-NEXT: mov v3.h[2], w9 @@ -1165,23 +1162,22 @@ define <8 x i16> @urem_v8i16(<8 x i16> %op1, <8 x i16> %op2) #0 { ; VBITS_GE_128-LABEL: urem_v8i16: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ushll2 v2.4s, v1.8h, #0 ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 +; VBITS_GE_128-NEXT: ushll2 v2.4s, v1.8h, #0 ; VBITS_GE_128-NEXT: ushll2 v3.4s, v0.8h, #0 -; VBITS_GE_128-NEXT: ushll v4.4s, v1.4h, #0 +; VBITS_GE_128-NEXT: ushll v4.4s, v0.4h, #0 ; VBITS_GE_128-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; VBITS_GE_128-NEXT: ushll v5.4s, v0.4h, #0 -; VBITS_GE_128-NEXT: movprfx z3, z5 -; VBITS_GE_128-NEXT: udiv z3.s, p0/m, z3.s, z4.s +; VBITS_GE_128-NEXT: ushll v3.4s, v1.4h, #0 +; VBITS_GE_128-NEXT: udivr z3.s, p0/m, z3.s, z4.s ; VBITS_GE_128-NEXT: uzp1 v2.8h, v3.8h, v2.8h ; VBITS_GE_128-NEXT: mls v0.8h, v2.8h, v1.8h ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: urem_v8i16: ; VBITS_GE_256: // %bb.0: +; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_256-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: uunpklo z2.s, z1.h ; VBITS_GE_256-NEXT: uunpklo z3.s, z0.h ; VBITS_GE_256-NEXT: udivr z2.s, p0/m, z2.s, z3.s @@ -1192,9 +1188,9 @@ ; ; VBITS_GE_512-LABEL: urem_v8i16: ; VBITS_GE_512: // %bb.0: +; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: // kill: def $q1 killed $q1 def $z1 ; VBITS_GE_512-NEXT: // kill: def $q0 killed $q0 def $z0 -; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: uunpklo z2.s, z1.h ; VBITS_GE_512-NEXT: uunpklo z3.s, z0.h ; VBITS_GE_512-NEXT: udivr z2.s, p0/m, z2.s, z3.s @@ -1209,26 +1205,27 @@ define void @urem_v16i16(ptr %a, ptr %b) #0 { ; VBITS_GE_128-LABEL: urem_v16i16: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ldp q0, q1, [x1] ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 -; VBITS_GE_128-NEXT: ushll2 v5.4s, v0.8h, #0 -; VBITS_GE_128-NEXT: ushll v7.4s, v0.4h, #0 -; VBITS_GE_128-NEXT: ldp q2, q3, [x0] -; VBITS_GE_128-NEXT: ushll2 v4.4s, v1.8h, #0 -; VBITS_GE_128-NEXT: ushll2 v6.4s, v2.8h, #0 -; VBITS_GE_128-NEXT: ushll v16.4s, v2.4h, #0 -; VBITS_GE_128-NEXT: ushll2 v17.4s, v3.8h, #0 +; VBITS_GE_128-NEXT: ldp q4, q1, [x1] +; VBITS_GE_128-NEXT: ldr q0, [x0, #16] +; VBITS_GE_128-NEXT: ushll2 v2.4s, v1.8h, #0 +; VBITS_GE_128-NEXT: ushll2 v3.4s, v0.8h, #0 +; VBITS_GE_128-NEXT: ushll2 v5.4s, v4.8h, #0 +; VBITS_GE_128-NEXT: ushll v16.4s, v0.4h, #0 +; VBITS_GE_128-NEXT: udivr z2.s, p0/m, z2.s, z3.s +; VBITS_GE_128-NEXT: ldr q3, [x0] +; VBITS_GE_128-NEXT: ushll2 v6.4s, v3.8h, #0 +; VBITS_GE_128-NEXT: ushll v7.4s, v3.4h, #0 ; VBITS_GE_128-NEXT: udivr z5.s, p0/m, z5.s, z6.s -; VBITS_GE_128-NEXT: ushll v6.4s, v1.4h, #0 +; VBITS_GE_128-NEXT: ushll v6.4s, v4.4h, #0 +; VBITS_GE_128-NEXT: udivr z6.s, p0/m, z6.s, z7.s +; VBITS_GE_128-NEXT: ushll v7.4s, v1.4h, #0 ; VBITS_GE_128-NEXT: udivr z7.s, p0/m, z7.s, z16.s -; VBITS_GE_128-NEXT: ushll v16.4s, v3.4h, #0 -; VBITS_GE_128-NEXT: udivr z4.s, p0/m, z4.s, z17.s -; VBITS_GE_128-NEXT: uzp1 v5.8h, v7.8h, v5.8h -; VBITS_GE_128-NEXT: udivr z6.s, p0/m, z6.s, z16.s -; VBITS_GE_128-NEXT: uzp1 v4.8h, v6.8h, v4.8h -; VBITS_GE_128-NEXT: mls v2.8h, v5.8h, v0.8h -; VBITS_GE_128-NEXT: mls v3.8h, v4.8h, v1.8h -; VBITS_GE_128-NEXT: stp q2, q3, [x0] +; VBITS_GE_128-NEXT: uzp1 v5.8h, v6.8h, v5.8h +; VBITS_GE_128-NEXT: mls v3.8h, v5.8h, v4.8h +; VBITS_GE_128-NEXT: uzp1 v2.8h, v7.8h, v2.8h +; VBITS_GE_128-NEXT: mls v0.8h, v2.8h, v1.8h +; VBITS_GE_128-NEXT: stp q3, q0, [x0] ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: urem_v16i16: @@ -1237,19 +1234,19 @@ ; VBITS_GE_256-NEXT: ptrue p1.s, vl8 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z3.d, z1.d +; VBITS_GE_256-NEXT: uunpklo z2.s, z1.h +; VBITS_GE_256-NEXT: uunpklo z3.s, z0.h ; VBITS_GE_256-NEXT: mov z4.d, z0.d -; VBITS_GE_256-NEXT: ext z3.b, z3.b, z1.b, #16 ; VBITS_GE_256-NEXT: ext z4.b, z4.b, z0.b, #16 -; VBITS_GE_256-NEXT: uunpklo z2.s, z1.h -; VBITS_GE_256-NEXT: uunpklo z5.s, z0.h -; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h +; VBITS_GE_256-NEXT: udivr z2.s, p1/m, z2.s, z3.s +; VBITS_GE_256-NEXT: mov z3.d, z1.d ; VBITS_GE_256-NEXT: uunpklo z4.s, z4.h -; VBITS_GE_256-NEXT: udivr z2.s, p1/m, z2.s, z5.s +; VBITS_GE_256-NEXT: ext z3.b, z3.b, z1.b, #16 +; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h ; VBITS_GE_256-NEXT: udivr z3.s, p1/m, z3.s, z4.s +; VBITS_GE_256-NEXT: ptrue p1.h, vl8 ; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_256-NEXT: uzp1 z3.h, z3.h, z3.h -; VBITS_GE_256-NEXT: ptrue p1.h, vl8 ; VBITS_GE_256-NEXT: splice z2.h, p1, z2.h, z3.h ; VBITS_GE_256-NEXT: mls z0.h, p0/m, z2.h, z1.h ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0] @@ -1324,19 +1321,19 @@ ; CHECK-NEXT: ptrue p1.s, vl64 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: mov z3.d, z1.d +; CHECK-NEXT: uunpklo z2.s, z1.h +; CHECK-NEXT: uunpklo z3.s, z0.h ; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128 ; CHECK-NEXT: ext z4.b, z4.b, z0.b, #128 -; CHECK-NEXT: uunpklo z2.s, z1.h -; CHECK-NEXT: uunpklo z5.s, z0.h -; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z3.s +; CHECK-NEXT: mov z3.d, z1.d ; CHECK-NEXT: uunpklo z4.s, z4.h -; CHECK-NEXT: udivr z2.s, p1/m, z2.s, z5.s +; CHECK-NEXT: ext z3.b, z3.b, z1.b, #128 +; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z4.s +; CHECK-NEXT: ptrue p1.h, vl64 ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: ptrue p1.h, vl64 ; CHECK-NEXT: splice z2.h, p1, z2.h, z3.h ; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] @@ -1401,41 +1398,41 @@ define void @urem_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_128-LABEL: urem_v16i32: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] +; VBITS_GE_128-NEXT: ldp q0, q3, [x1] ; VBITS_GE_128-NEXT: ptrue p0.s, vl4 -; VBITS_GE_128-NEXT: ldp q2, q3, [x0] -; VBITS_GE_128-NEXT: ldp q4, q5, [x1, #32] -; VBITS_GE_128-NEXT: movprfx z16, z0 -; VBITS_GE_128-NEXT: udiv z16.s, p0/m, z16.s, z4.s -; VBITS_GE_128-NEXT: mls v0.4s, v16.4s, v4.4s -; VBITS_GE_128-NEXT: movprfx z16, z1 -; VBITS_GE_128-NEXT: udiv z16.s, p0/m, z16.s, z5.s -; VBITS_GE_128-NEXT: ldp q7, q6, [x1] -; VBITS_GE_128-NEXT: movprfx z4, z3 -; VBITS_GE_128-NEXT: udiv z4.s, p0/m, z4.s, z6.s -; VBITS_GE_128-NEXT: mls v1.4s, v16.4s, v5.4s -; VBITS_GE_128-NEXT: movprfx z5, z2 -; VBITS_GE_128-NEXT: udiv z5.s, p0/m, z5.s, z7.s -; VBITS_GE_128-NEXT: mls v2.4s, v5.4s, v7.4s -; VBITS_GE_128-NEXT: mls v3.4s, v4.4s, v6.4s -; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] -; VBITS_GE_128-NEXT: stp q2, q3, [x0] +; VBITS_GE_128-NEXT: ldp q1, q2, [x0] +; VBITS_GE_128-NEXT: movprfx z4, z1 +; VBITS_GE_128-NEXT: udiv z4.s, p0/m, z4.s, z0.s +; VBITS_GE_128-NEXT: ldp q16, q5, [x0, #32] +; VBITS_GE_128-NEXT: movprfx z19, z2 +; VBITS_GE_128-NEXT: udiv z19.s, p0/m, z19.s, z3.s +; VBITS_GE_128-NEXT: ldp q17, q6, [x1, #32] +; VBITS_GE_128-NEXT: movprfx z7, z5 +; VBITS_GE_128-NEXT: udiv z7.s, p0/m, z7.s, z6.s +; VBITS_GE_128-NEXT: movprfx z18, z16 +; VBITS_GE_128-NEXT: udiv z18.s, p0/m, z18.s, z17.s +; VBITS_GE_128-NEXT: mls v1.4s, v4.4s, v0.4s +; VBITS_GE_128-NEXT: mls v2.4s, v19.4s, v3.4s +; VBITS_GE_128-NEXT: mls v16.4s, v18.4s, v17.4s +; VBITS_GE_128-NEXT: mls v5.4s, v7.4s, v6.4s +; VBITS_GE_128-NEXT: stp q1, q2, [x0] +; VBITS_GE_128-NEXT: stp q16, q5, [x0, #32] ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: urem_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] -; VBITS_GE_256-NEXT: movprfx z4, z0 -; VBITS_GE_256-NEXT: udiv z4.s, p0/m, z4.s, z2.s +; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x1] +; VBITS_GE_256-NEXT: movprfx z3, z0 +; VBITS_GE_256-NEXT: udiv z3.s, p0/m, z3.s, z2.s ; VBITS_GE_256-NEXT: movprfx z5, z1 -; VBITS_GE_256-NEXT: udiv z5.s, p0/m, z5.s, z3.s -; VBITS_GE_256-NEXT: mls z0.s, p0/m, z4.s, z2.s -; VBITS_GE_256-NEXT: mls z1.s, p0/m, z5.s, z3.s +; VBITS_GE_256-NEXT: udiv z5.s, p0/m, z5.s, z4.s +; VBITS_GE_256-NEXT: mls z0.s, p0/m, z3.s, z2.s +; VBITS_GE_256-NEXT: mls z1.s, p0/m, z5.s, z4.s ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret @@ -1498,8 +1495,8 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) vscale_range(1,0) #0 { ; CHECK-LABEL: urem_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d @@ -1515,8 +1512,8 @@ define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) vscale_range(1,0) #0 { ; CHECK-LABEL: urem_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d @@ -1548,43 +1545,42 @@ define void @urem_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_128-LABEL: urem_v8i64: ; VBITS_GE_128: // %bb.0: -; VBITS_GE_128-NEXT: ldp q0, q1, [x0, #32] ; VBITS_GE_128-NEXT: ptrue p0.d, vl2 -; VBITS_GE_128-NEXT: ldp q2, q3, [x1, #32] -; VBITS_GE_128-NEXT: movprfx z16, z1 -; VBITS_GE_128-NEXT: udiv z16.d, p0/m, z16.d, z3.d -; VBITS_GE_128-NEXT: mls z1.d, p0/m, z16.d, z3.d -; VBITS_GE_128-NEXT: movprfx z3, z0 -; VBITS_GE_128-NEXT: udiv z3.d, p0/m, z3.d, z2.d -; VBITS_GE_128-NEXT: mls z0.d, p0/m, z3.d, z2.d -; VBITS_GE_128-NEXT: ldp q4, q5, [x0] -; VBITS_GE_128-NEXT: ldp q7, q6, [x1] -; VBITS_GE_128-NEXT: movprfx z16, z5 -; VBITS_GE_128-NEXT: udiv z16.d, p0/m, z16.d, z6.d -; VBITS_GE_128-NEXT: movprfx z2, z4 -; VBITS_GE_128-NEXT: udiv z2.d, p0/m, z2.d, z7.d -; VBITS_GE_128-NEXT: stp q0, q1, [x0, #32] -; VBITS_GE_128-NEXT: movprfx z0, z4 -; VBITS_GE_128-NEXT: mls z0.d, p0/m, z2.d, z7.d -; VBITS_GE_128-NEXT: movprfx z1, z5 -; VBITS_GE_128-NEXT: mls z1.d, p0/m, z16.d, z6.d +; VBITS_GE_128-NEXT: ldp q0, q3, [x1] +; VBITS_GE_128-NEXT: ldp q1, q2, [x0] +; VBITS_GE_128-NEXT: movprfx z4, z1 +; VBITS_GE_128-NEXT: udiv z4.d, p0/m, z4.d, z0.d +; VBITS_GE_128-NEXT: ldp q16, q5, [x0, #32] +; VBITS_GE_128-NEXT: movprfx z19, z2 +; VBITS_GE_128-NEXT: udiv z19.d, p0/m, z19.d, z3.d +; VBITS_GE_128-NEXT: ldp q17, q6, [x1, #32] +; VBITS_GE_128-NEXT: movprfx z7, z5 +; VBITS_GE_128-NEXT: udiv z7.d, p0/m, z7.d, z6.d +; VBITS_GE_128-NEXT: movprfx z18, z16 +; VBITS_GE_128-NEXT: udiv z18.d, p0/m, z18.d, z17.d +; VBITS_GE_128-NEXT: mls z16.d, p0/m, z18.d, z17.d +; VBITS_GE_128-NEXT: mls z5.d, p0/m, z7.d, z6.d +; VBITS_GE_128-NEXT: msb z0.d, p0/m, z4.d, z1.d +; VBITS_GE_128-NEXT: movprfx z1, z2 +; VBITS_GE_128-NEXT: mls z1.d, p0/m, z19.d, z3.d +; VBITS_GE_128-NEXT: stp q16, q5, [x0, #32] ; VBITS_GE_128-NEXT: stp q0, q1, [x0] ; VBITS_GE_128-NEXT: ret ; ; VBITS_GE_256-LABEL: urem_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: movprfx z4, z0 -; VBITS_GE_256-NEXT: udiv z4.d, p0/m, z4.d, z2.d +; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: movprfx z3, z0 +; VBITS_GE_256-NEXT: udiv z3.d, p0/m, z3.d, z2.d ; VBITS_GE_256-NEXT: movprfx z5, z1 -; VBITS_GE_256-NEXT: udiv z5.d, p0/m, z5.d, z3.d -; VBITS_GE_256-NEXT: mls z0.d, p0/m, z4.d, z2.d -; VBITS_GE_256-NEXT: mls z1.d, p0/m, z5.d, z3.d +; VBITS_GE_256-NEXT: udiv z5.d, p0/m, z5.d, z4.d +; VBITS_GE_256-NEXT: mls z0.d, p0/m, z3.d, z2.d +; VBITS_GE_256-NEXT: mls z1.d, p0/m, z5.d, z4.d ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-select.ll @@ -34,14 +34,14 @@ define void @select_v32i8(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.b, vl32 -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mov z2.b, w8 -; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0 -; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b +; CHECK-NEXT: mov z0.b, w8 +; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0] +; CHECK-NEXT: ld1b { z2.b }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.b, p1/z, z0.b, #0 +; CHECK-NEXT: sel z0.b, p1, z1.b, z2.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <32 x i8>, ptr %a @@ -54,32 +54,32 @@ define void @select_v64i8(ptr %a, ptr %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 +; VBITS_GE_256-NEXT: and w8, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.b -; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] -; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] -; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.b, w9 -; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z4.b, #0 +; VBITS_GE_256-NEXT: mov z0.b, w8 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 +; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0, x8] +; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x0] +; VBITS_GE_256-NEXT: ld1b { z3.b }, p0/z, [x1, x8] +; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z0.b, #0 +; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: sel z1.b, p1, z1.b, z3.b -; VBITS_GE_256-NEXT: sel z0.b, p1, z0.b, z2.b -; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0, x8] -; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0] +; VBITS_GE_256-NEXT: mov z0.b, p1/m, z2.b +; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0, x8] +; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v64i8: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p0.b, vl64 -; VBITS_GE_512-NEXT: ld1b { z0.b }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x1] +; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p1.b -; VBITS_GE_512-NEXT: mov z2.b, w8 -; VBITS_GE_512-NEXT: cmpne p1.b, p1/z, z2.b, #0 -; VBITS_GE_512-NEXT: sel z0.b, p1, z0.b, z1.b +; VBITS_GE_512-NEXT: mov z0.b, w8 +; VBITS_GE_512-NEXT: ld1b { z1.b }, p0/z, [x0] +; VBITS_GE_512-NEXT: ld1b { z2.b }, p0/z, [x1] +; VBITS_GE_512-NEXT: cmpne p1.b, p1/z, z0.b, #0 +; VBITS_GE_512-NEXT: sel z0.b, p1, z1.b, z2.b ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x0] ; VBITS_GE_512-NEXT: ret %op1 = load volatile <64 x i8>, ptr %a @@ -92,14 +92,14 @@ define void @select_v128i8(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v128i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.b, vl128 -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mov z2.b, w8 -; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0 -; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b +; CHECK-NEXT: mov z0.b, w8 +; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0] +; CHECK-NEXT: ld1b { z2.b }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.b, p1/z, z0.b, #0 +; CHECK-NEXT: sel z0.b, p1, z1.b, z2.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <128 x i8>, ptr %a @@ -112,14 +112,14 @@ define void @select_v256i8(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v256i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.b, vl256 -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mov z2.b, w8 -; CHECK-NEXT: cmpne p1.b, p1/z, z2.b, #0 -; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b +; CHECK-NEXT: mov z0.b, w8 +; CHECK-NEXT: ld1b { z1.b }, p0/z, [x0] +; CHECK-NEXT: ld1b { z2.b }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.b, p1/z, z0.b, #0 +; CHECK-NEXT: sel z0.b, p1, z1.b, z2.b ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <256 x i8>, ptr %a @@ -158,14 +158,14 @@ define void @select_v16i16(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 -; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.h, p1/z, z0.h, #0 +; CHECK-NEXT: sel z0.h, p1, z1.h, z2.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <16 x i16>, ptr %a @@ -178,32 +178,32 @@ define void @select_v32i16(ptr %a, ptr %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 +; VBITS_GE_256-NEXT: and w8, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.h -; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] -; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.h, w9 -; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z4.h, #0 +; VBITS_GE_256-NEXT: mov z0.h, w8 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 +; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] +; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1, x8, lsl #1] +; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z0.h, #0 +; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: sel z1.h, p1, z1.h, z3.h -; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z2.h -; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: mov z0.h, p1/m, z2.h +; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v32i16: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 -; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] +; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p1.h -; VBITS_GE_512-NEXT: mov z2.h, w8 -; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0 -; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h +; VBITS_GE_512-NEXT: mov z0.h, w8 +; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x0] +; VBITS_GE_512-NEXT: ld1h { z2.h }, p0/z, [x1] +; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z0.h, #0 +; VBITS_GE_512-NEXT: sel z0.h, p1, z1.h, z2.h ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_512-NEXT: ret %op1 = load volatile <32 x i16>, ptr %a @@ -216,14 +216,14 @@ define void @select_v64i16(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v64i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.h, vl64 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 -; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.h, p1/z, z0.h, #0 +; CHECK-NEXT: sel z0.h, p1, z1.h, z2.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <64 x i16>, ptr %a @@ -236,14 +236,14 @@ define void @select_v128i16(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v128i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.h, vl128 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0 -; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.h, p1/z, z0.h, #0 +; CHECK-NEXT: sel z0.h, p1, z1.h, z2.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <128 x i16>, ptr %a @@ -282,14 +282,14 @@ define void @select_v8i32(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 -; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.s, p1/z, z0.s, #0 +; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <8 x i32>, ptr %a @@ -302,32 +302,32 @@ define void @select_v16i32(ptr %a, ptr %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: and w9, w2, #0x1 +; VBITS_GE_256-NEXT: and w8, w2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.s -; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.s, w9 -; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z4.s, #0 +; VBITS_GE_256-NEXT: mov z0.s, w8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 +; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] +; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1, x8, lsl #2] +; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z0.s, #0 +; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: sel z1.s, p1, z1.s, z3.s -; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z2.s -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: mov z0.s, p1/m, z2.s +; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v16i32: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 -; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] +; VBITS_GE_512-NEXT: and w8, w2, #0x1 ; VBITS_GE_512-NEXT: ptrue p1.s -; VBITS_GE_512-NEXT: mov z2.s, w8 -; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0 -; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s +; VBITS_GE_512-NEXT: mov z0.s, w8 +; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0] +; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x1] +; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z0.s, #0 +; VBITS_GE_512-NEXT: sel z0.s, p1, z1.s, z2.s ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: ret %op1 = load volatile <16 x i32>, ptr %a @@ -340,14 +340,14 @@ define void @select_v32i32(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v32i32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.s, vl32 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 -; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.s, p1/z, z0.s, #0 +; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <32 x i32>, ptr %a @@ -360,14 +360,14 @@ define void @select_v64i32(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v64i32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p0.s, vl64 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: and w8, w2, #0x1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0 -; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.s, p1/z, z0.s, #0 +; CHECK-NEXT: sel z0.s, p1, z1.s, z2.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <64 x i32>, ptr %a @@ -406,15 +406,15 @@ define void @select_v4i64(ptr %a, ptr %b, i1 %mask) vscale_range(2,0) #0 { ; CHECK-LABEL: select_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: and x8, x2, #0x1 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 -; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.d, p1/z, z0.d, #0 +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <4 x i64>, ptr %a @@ -427,34 +427,34 @@ define void @select_v8i64(ptr %a, ptr %b, i1 %mask) #0 { ; VBITS_GE_256-LABEL: select_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: // kill: def $w2 killed $w2 def $x2 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: and x9, x2, #0x1 +; VBITS_GE_256-NEXT: // kill: def $w2 killed $w2 def $x2 +; VBITS_GE_256-NEXT: and x8, x2, #0x1 ; VBITS_GE_256-NEXT: ptrue p1.d -; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z4.d, x9 -; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z4.d, #0 +; VBITS_GE_256-NEXT: mov z0.d, x8 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] +; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z0.d, #0 +; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: sel z1.d, p1, z1.d, z3.d -; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z2.d -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: mov z0.d, p1/m, z2.d +; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: select_v8i64: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: // kill: def $w2 killed $w2 def $x2 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 +; VBITS_GE_512-NEXT: // kill: def $w2 killed $w2 def $x2 ; VBITS_GE_512-NEXT: and x8, x2, #0x1 -; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: ptrue p1.d -; VBITS_GE_512-NEXT: mov z2.d, x8 -; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0 -; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d +; VBITS_GE_512-NEXT: mov z0.d, x8 +; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0] +; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1] +; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z0.d, #0 +; VBITS_GE_512-NEXT: sel z0.d, p1, z1.d, z2.d ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_512-NEXT: ret %op1 = load volatile <8 x i64>, ptr %a @@ -467,15 +467,15 @@ define void @select_v16i64(ptr %a, ptr %b, i1 %mask) vscale_range(8,0) #0 { ; CHECK-LABEL: select_v16i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: ptrue p0.d, vl16 +; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: and x8, x2, #0x1 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 -; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.d, p1/z, z0.d, #0 +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <16 x i64>, ptr %a @@ -488,15 +488,15 @@ define void @select_v32i64(ptr %a, ptr %b, i1 %mask) vscale_range(16,0) #0 { ; CHECK-LABEL: select_v32i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: ptrue p0.d, vl32 +; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: and x8, x2, #0x1 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0 -; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: cmpne p1.d, p1/z, z0.d, #0 +; CHECK-NEXT: sel z0.d, p1, z1.d, z2.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load volatile <32 x i64>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-shifts.ll @@ -50,8 +50,8 @@ define void @ashr_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: ashr_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -150,8 +150,8 @@ define void @ashr_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: ashr_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -250,8 +250,8 @@ define void @ashr_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: ashr_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -350,8 +350,8 @@ define void @ashr_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: ashr_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -454,8 +454,8 @@ define void @lshr_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: lshr_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -554,8 +554,8 @@ define void @lshr_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: lshr_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -654,8 +654,8 @@ define void @lshr_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: lshr_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -754,8 +754,8 @@ define void @lshr_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: lshr_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] @@ -856,8 +856,8 @@ define void @shl_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shl_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -954,8 +954,8 @@ define void @shl_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shl_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -1052,8 +1052,8 @@ define void @shl_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shl_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -1150,8 +1150,8 @@ define void @shl_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shl_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll @@ -50,8 +50,8 @@ define void @ucvtf_v32i16_v32f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: ucvtf_v32i16_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ucvtf z0.h, p0/m, z0.h @@ -131,8 +131,8 @@ define void @ucvtf_v8i16_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: ucvtf_v8i16_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: st1w { z0.s }, p0, [x1] @@ -147,7 +147,7 @@ ; VBITS_GE_256-LABEL: ucvtf_v16i16_v16f32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: mov x8, #8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: uunpklo z1.s, z0.h @@ -252,16 +252,16 @@ ; VBITS_GE_256-LABEL: ucvtf_v8i16_v8f64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr q0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: ucvtf z0.d, p0/m, z0.d -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_256-NEXT: ucvtf z0.d, p0/m, z0.d ; VBITS_GE_256-NEXT: ucvtf z1.d, p0/m, z1.d +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ret ; @@ -352,18 +352,18 @@ define void @ucvtf_v16i32_v16f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: ucvtf_v16i32_v16f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 +; VBITS_GE_256-NEXT: ptrue p1.h, vl16 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ucvtf z0.h, p0/m, z0.s ; VBITS_GE_256-NEXT: ucvtf z1.h, p0/m, z1.s +; VBITS_GE_256-NEXT: ptrue p0.h, vl8 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h -; VBITS_GE_256-NEXT: ptrue p0.h, vl8 ; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h -; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] +; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: ucvtf_v16i32_v16f16: @@ -454,8 +454,8 @@ define void @ucvtf_v16i32_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: ucvtf_v16i32_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ucvtf z0.s, p0/m, z0.s @@ -535,8 +535,8 @@ define void @ucvtf_v4i32_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: ucvtf_v4i32_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: st1d { z0.d }, p0, [x1] @@ -551,7 +551,7 @@ ; VBITS_GE_256-LABEL: ucvtf_v8i32_v8f64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x8, #4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: uunpklo z1.d, z0.s @@ -624,8 +624,8 @@ define <2 x half> @ucvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 { ; CHECK-LABEL: ucvtf_v2i64_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h @@ -653,8 +653,8 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ucvtf z0.h, p0/m, z0.d @@ -757,18 +757,18 @@ define void @ucvtf_v8i64_v8f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: ptrue p1.s, vl8 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ucvtf z0.s, p0/m, z0.d ; VBITS_GE_256-NEXT: ucvtf z1.s, p0/m, z1.d +; VBITS_GE_256-NEXT: ptrue p0.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: ptrue p0.s, vl4 ; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] +; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: ucvtf_v8i64_v8f32: @@ -861,8 +861,8 @@ define void @ucvtf_v8i64_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: ucvtf_v8i64_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ucvtf z0.d, p0/m, z0.d @@ -957,8 +957,8 @@ define void @scvtf_v32i16_v32f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: scvtf_v32i16_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: scvtf z0.h, p0/m, z0.h @@ -1038,8 +1038,8 @@ define void @scvtf_v8i16_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: scvtf_v8i16_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: st1w { z0.s }, p0, [x1] @@ -1054,7 +1054,7 @@ ; VBITS_GE_256-LABEL: scvtf_v16i16_v16f32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: mov x8, #8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: sunpklo z1.s, z0.h @@ -1165,16 +1165,16 @@ ; VBITS_GE_256-LABEL: scvtf_v8i16_v8f64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr q0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: scvtf z0.d, p0/m, z0.d -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s +; VBITS_GE_256-NEXT: scvtf z0.d, p0/m, z0.d ; VBITS_GE_256-NEXT: scvtf z1.d, p0/m, z1.d +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ret ; @@ -1271,18 +1271,18 @@ define void @scvtf_v16i32_v16f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: scvtf_v16i32_v16f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 +; VBITS_GE_256-NEXT: ptrue p1.h, vl16 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: scvtf z0.h, p0/m, z0.s ; VBITS_GE_256-NEXT: scvtf z1.h, p0/m, z1.s +; VBITS_GE_256-NEXT: ptrue p0.h, vl8 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h -; VBITS_GE_256-NEXT: ptrue p0.h, vl8 ; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h -; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] +; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: scvtf_v16i32_v16f16: @@ -1373,8 +1373,8 @@ define void @scvtf_v16i32_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: scvtf_v16i32_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: scvtf z0.s, p0/m, z0.s @@ -1454,8 +1454,8 @@ define void @scvtf_v4i32_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: scvtf_v4i32_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: st1d { z0.d }, p0, [x1] @@ -1470,7 +1470,7 @@ ; VBITS_GE_256-LABEL: scvtf_v8i32_v8f64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x8, #4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: sunpklo z1.d, z0.s @@ -1549,8 +1549,8 @@ define <2 x half> @scvtf_v2i64_v2f16(<2 x i64> %op1) vscale_range(2,0) #0 { ; CHECK-LABEL: scvtf_v2i64_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: scvtf z0.h, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h @@ -1578,8 +1578,8 @@ define <8 x half> @scvtf_v8i64_v8f16(ptr %a) #0 { ; VBITS_GE_256-LABEL: scvtf_v8i64_v8f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: scvtf z0.h, p0/m, z0.d @@ -1682,18 +1682,18 @@ define void @scvtf_v8i64_v8f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: scvtf_v8i64_v8f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: ptrue p1.s, vl8 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: scvtf z0.s, p0/m, z0.d ; VBITS_GE_256-NEXT: scvtf z1.s, p0/m, z1.d +; VBITS_GE_256-NEXT: ptrue p0.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: ptrue p0.s, vl4 ; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] +; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: scvtf_v8i64_v8f32: @@ -1786,8 +1786,8 @@ define void @scvtf_v8i64_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: scvtf_v8i64_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: scvtf z0.d, p0/m, z0.d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-vselect.ll @@ -50,8 +50,8 @@ define void @select_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: select_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x8] @@ -163,8 +163,8 @@ define void @select_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: select_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] @@ -276,8 +276,8 @@ define void @select_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: select_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -390,8 +390,8 @@ define void @select_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: select_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-limit-duplane.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-limit-duplane.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-limit-duplane.ll @@ -6,14 +6,14 @@ define <4 x i32> @test(ptr %arg1, ptr %arg2) { ; CHECK-LABEL: test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0] ; CHECK-NEXT: add z1.s, z0.s, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16 -; CHECK-NEXT: dup v0.4s, v0.s[2] ; CHECK-NEXT: add z2.s, z2.s, z2.s +; CHECK-NEXT: dup v0.4s, v0.s[2] ; CHECK-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] ; CHECK-NEXT: st1w { z2.s }, p0, [x0] ; CHECK-NEXT: ret @@ -29,14 +29,14 @@ define <2 x i32> @test2(ptr %arg1, ptr %arg2) { ; CHECK-LABEL: test2: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x0] ; CHECK-NEXT: add z1.s, z0.s, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #24 -; CHECK-NEXT: dup v0.2s, v0.s[0] ; CHECK-NEXT: add z2.s, z2.s, z2.s +; CHECK-NEXT: dup v0.2s, v0.s[0] ; CHECK-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] ; CHECK-NEXT: st1w { z2.s }, p0, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-loads.ll @@ -52,8 +52,8 @@ define <16 x float> @load_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: load_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2] @@ -87,24 +87,24 @@ define <32 x float> @load_v32f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: load_v32f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #16 -; VBITS_GE_256-NEXT: mov x10, #24 -; VBITS_GE_256-NEXT: mov x11, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x9, #24 // =0x18 +; VBITS_GE_256-NEXT: mov x10, #16 // =0x10 +; VBITS_GE_256-NEXT: mov x11, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x10, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x10, lsl #2] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2] +; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x10, lsl #2] ; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x11, lsl #2] ; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x8] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: load_v32f32: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: mov x9, #16 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 +; VBITS_GE_512-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2] @@ -131,52 +131,52 @@ define <64 x float> @load_v64f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: load_v64f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #8 -; VBITS_GE_256-NEXT: mov x10, #48 -; VBITS_GE_256-NEXT: mov x11, #56 -; VBITS_GE_256-NEXT: mov x12, #32 -; VBITS_GE_256-NEXT: mov x13, #40 -; VBITS_GE_256-NEXT: mov x14, #16 -; VBITS_GE_256-NEXT: mov x15, #24 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x10, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x11, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x12, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x13, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x14, lsl #2] +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 +; VBITS_GE_256-NEXT: mov x10, #16 // =0x10 +; VBITS_GE_256-NEXT: mov x11, #24 // =0x18 +; VBITS_GE_256-NEXT: mov x12, #56 // =0x38 +; VBITS_GE_256-NEXT: mov x13, #32 // =0x20 +; VBITS_GE_256-NEXT: mov x14, #48 // =0x30 +; VBITS_GE_256-NEXT: mov x15, #40 // =0x28 +; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x12, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x9, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0, x14, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x0, x10, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x0, x11, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z5.s }, p0/z, [x0, x15, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x9, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z6.s }, p0/z, [x0, x13, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z7.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x11, lsl #2] -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x10, lsl #2] -; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x8, x13, lsl #2] -; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x12, lsl #2] +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8, x12, lsl #2] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x8, x14, lsl #2] ; VBITS_GE_256-NEXT: st1w { z5.s }, p0, [x8, x15, lsl #2] -; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x8, x14, lsl #2] -; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x8, x9, lsl #2] +; VBITS_GE_256-NEXT: st1w { z6.s }, p0, [x8, x13, lsl #2] +; VBITS_GE_256-NEXT: st1w { z4.s }, p0, [x8, x11, lsl #2] +; VBITS_GE_256-NEXT: st1w { z3.s }, p0, [x8, x10, lsl #2] +; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x9, lsl #2] ; VBITS_GE_256-NEXT: st1w { z7.s }, p0, [x8] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: load_v64f32: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: mov x9, #32 -; VBITS_GE_512-NEXT: mov x10, #48 -; VBITS_GE_512-NEXT: mov x11, #16 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 +; VBITS_GE_512-NEXT: mov x9, #48 // =0x30 +; VBITS_GE_512-NEXT: mov x10, #32 // =0x20 +; VBITS_GE_512-NEXT: mov x11, #16 // =0x10 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0, x10, lsl #2] ; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x0, x11, lsl #2] ; VBITS_GE_512-NEXT: ld1w { z3.s }, p0/z, [x0] -; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x8, x10, lsl #2] ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2] +; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x8, x10, lsl #2] ; VBITS_GE_512-NEXT: st1w { z2.s }, p0, [x8, x11, lsl #2] ; VBITS_GE_512-NEXT: st1w { z3.s }, p0, [x8] ; VBITS_GE_512-NEXT: ret ; ; VBITS_GE_1024-LABEL: load_v64f32: ; VBITS_GE_1024: // %bb.0: -; VBITS_GE_1024-NEXT: mov x9, #32 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32 +; VBITS_GE_1024-NEXT: mov x9, #32 // =0x20 ; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_1024-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x8, x9, lsl #2] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-log-reduce.ll @@ -13,8 +13,8 @@ define i8 @andv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: andv_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: andv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -26,8 +26,8 @@ define i8 @andv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: andv_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: andv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -51,8 +51,8 @@ define i8 @andv_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: andv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d @@ -102,8 +102,8 @@ define i16 @andv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: andv_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -115,8 +115,8 @@ define i16 @andv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: andv_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -140,8 +140,8 @@ define i16 @andv_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: andv_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d @@ -191,8 +191,8 @@ define i32 @andv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: andv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -204,8 +204,8 @@ define i32 @andv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: andv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -229,8 +229,8 @@ define i32 @andv_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: andv_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d @@ -291,8 +291,8 @@ define i64 @andv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: andv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: andv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -316,8 +316,8 @@ define i64 @andv_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: andv_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: and z0.d, z1.d, z0.d @@ -371,8 +371,8 @@ define i8 @eorv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: eorv_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: eorv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -384,8 +384,8 @@ define i8 @eorv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: eorv_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: eorv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -409,8 +409,8 @@ define i8 @eorv_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: eorv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d @@ -460,8 +460,8 @@ define i16 @eorv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: eorv_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -473,8 +473,8 @@ define i16 @eorv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: eorv_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -498,8 +498,8 @@ define i16 @eorv_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: eorv_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d @@ -549,8 +549,8 @@ define i32 @eorv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: eorv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -562,8 +562,8 @@ define i32 @eorv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: eorv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -587,8 +587,8 @@ define i32 @eorv_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: eorv_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d @@ -649,8 +649,8 @@ define i64 @eorv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: eorv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: eorv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -674,8 +674,8 @@ define i64 @eorv_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: eorv_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: eor z0.d, z1.d, z0.d @@ -729,8 +729,8 @@ define i8 @orv_v8i8(<8 x i8> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: orv_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: orv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -742,8 +742,8 @@ define i8 @orv_v16i8(<16 x i8> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: orv_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: orv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -767,8 +767,8 @@ define i8 @orv_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: orv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d @@ -818,8 +818,8 @@ define i16 @orv_v4i16(<4 x i16> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: orv_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -831,8 +831,8 @@ define i16 @orv_v8i16(<8 x i16> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: orv_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -856,8 +856,8 @@ define i16 @orv_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: orv_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d @@ -907,8 +907,8 @@ define i32 @orv_v2i32(<2 x i32> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: orv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -920,8 +920,8 @@ define i32 @orv_v4i32(<4 x i32> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: orv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -945,8 +945,8 @@ define i32 @orv_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: orv_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d @@ -1007,8 +1007,8 @@ define i64 @orv_v2i64(<2 x i64> %a) vscale_range(2,0) #0 { ; CHECK-LABEL: orv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: orv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -1032,8 +1032,8 @@ define i64 @orv_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: orv_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: orr z0.d, z1.d, z0.d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-mask-opt.ll @@ -12,8 +12,8 @@ define void @masked_gather_v2i8(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_gather_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: xtn v0.2s, v0.2d @@ -42,8 +42,8 @@ define void @masked_gather_v8i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: masked_gather_v8i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: ld1b { z0.d }, p0/z, [z0.d] @@ -114,8 +114,8 @@ define void @masked_gather_v2i16(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_gather_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: xtn v0.2s, v0.2d @@ -146,8 +146,8 @@ define void @masked_gather_v8i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: masked_gather_v8i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [z0.d] @@ -214,8 +214,8 @@ define void @masked_gather_v2i32(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_gather_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: str d0, [x0] @@ -244,8 +244,9 @@ define void @masked_gather_v8i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: masked_gather_v8i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: ptrue p1.s, vl8 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: ld1w { z0.d }, p0/z, [z0.d] @@ -254,8 +255,7 @@ ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_gather_v8i32: @@ -310,8 +310,8 @@ define void @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_gather_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x1] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret @@ -338,8 +338,8 @@ define void @masked_gather_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: masked_gather_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [z0.d] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -13,15 +13,15 @@ ; CHECK-LABEL: masked_gather_v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x0, #1] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrb w8, [x0, #1] -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: cmeq v0.2s, v0.2s, #0 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 -; CHECK-NEXT: ld1b { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: st1b { z0.s }, p0, [x0] @@ -39,9 +39,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: cmeq v0.4h, v0.4h, #0 +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, #0 @@ -60,13 +60,12 @@ ; VBITS_GE_256-LABEL: masked_gather_v8i8: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr d0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: cmeq v0.8b, v0.8b, #0 -; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: zip2 v1.8b, v0.8b, v0.8b ; VBITS_GE_256-NEXT: zip1 v0.8b, v0.8b, v0.8b +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: shl v1.4h, v1.4h, #8 ; VBITS_GE_256-NEXT: shl v0.4h, v0.4h, #8 ; VBITS_GE_256-NEXT: sshr v1.4h, v1.4h, #8 @@ -76,9 +75,10 @@ ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z1.d, #0 +; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; VBITS_GE_256-NEXT: ld1b { z0.d }, p1/z, [z2.d] -; VBITS_GE_256-NEXT: ld1b { z1.d }, p0/z, [z3.d] +; VBITS_GE_256-NEXT: ld1b { z1.d }, p0/z, [z1.d] ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h @@ -91,9 +91,9 @@ ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr d0, [x0] ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 -; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: cmeq v0.8b, v0.8b, #0 ; VBITS_GE_512-NEXT: sunpklo z0.h, z0.b +; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z0.d, #0 @@ -116,9 +116,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl16 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: cmeq v0.16b, v0.16b, #0 ; CHECK-NEXT: sunpklo z0.h, z0.b +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 @@ -142,12 +142,12 @@ ; CHECK-NEXT: ptrue p0.b, vl32 ; CHECK-NEXT: ptrue p1.d, vl32 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1b { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: st1b { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cval = load <32 x i8>, ptr %a @@ -166,15 +166,15 @@ ; CHECK-LABEL: masked_gather_v2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x0, #2] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrh w8, [x0, #2] -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: cmeq v0.2s, v0.2s, #0 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 -; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: st1h { z0.s }, p0, [x0] @@ -192,9 +192,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: cmeq v0.4h, v0.4h, #0 ; CHECK-NEXT: sunpklo z0.s, z0.h +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d] @@ -214,23 +214,23 @@ ; VBITS_GE_256-LABEL: masked_gather_v8i16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr q0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: cmeq v0.8h, v0.8h, #0 -; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s +; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z0.d, #0 ; VBITS_GE_256-NEXT: ld1h { z0.d }, p1/z, [z3.d] -; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h -; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [z2.d] ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s -; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h +; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [z2.d] ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s +; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_256-NEXT: mov v0.d[1], v1.d[0] ; VBITS_GE_256-NEXT: str q0, [x0] @@ -240,9 +240,9 @@ ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr q0, [x0] ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 -; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: cmeq v0.8h, v0.8h, #0 ; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h +; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; VBITS_GE_512-NEXT: ld1h { z0.d }, p0/z, [z1.d] @@ -264,11 +264,11 @@ ; CHECK-NEXT: ptrue p0.h, vl16 ; CHECK-NEXT: ptrue p1.d, vl16 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: st1h { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cval = load <16 x i16>, ptr %a @@ -285,11 +285,11 @@ ; CHECK-NEXT: ptrue p0.h, vl32 ; CHECK-NEXT: ptrue p1.d, vl32 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: st1h { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cval = load <32 x i16>, ptr %a @@ -309,11 +309,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: cmeq v0.2s, v0.2s, #0 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 -; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret @@ -330,9 +330,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: sunpklo z0.d, z0.s +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d] ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s @@ -350,25 +350,25 @@ ; VBITS_GE_256-LABEL: masked_gather_v8i32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x8, #4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: ptrue p2.d, vl4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: ptrue p1.d, vl4 -; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z2.d }, p1/z, [x1] -; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z0.s, #0 -; VBITS_GE_256-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: punpklo p2.h, p2.b +; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z2.d }, p2/z, [x1] +; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0 +; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: punpklo p1.h, p1.b ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_GE_256-NEXT: and p2.b, p2/z, p2.b, p1.b ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: ld1w { z2.d }, p2/z, [z2.d] -; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_256-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_256-NEXT: and p1.b, p1/z, p1.b, p2.b +; VBITS_GE_256-NEXT: cmpne p2.d, p2/z, z0.d, #0 +; VBITS_GE_256-NEXT: ld1w { z0.d }, p1/z, [z2.d] ; VBITS_GE_256-NEXT: ptrue p1.s, vl4 -; VBITS_GE_256-NEXT: uzp1 z1.s, z2.s, z2.s +; VBITS_GE_256-NEXT: ld1w { z1.d }, p2/z, [z1.d] ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s -; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s +; VBITS_GE_256-NEXT: splice z0.s, p1, z0.s, z1.s +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_gather_v8i32: @@ -376,10 +376,10 @@ ; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: ptrue p1.d, vl8 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_512-NEXT: cmpeq p0.s, p0/z, z0.s, #0 +; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x1] ; VBITS_GE_512-NEXT: punpklo p0.h, p0.b -; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [z1.d] +; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [z0.d] ; VBITS_GE_512-NEXT: st1w { z0.d }, p1, [x0] ; VBITS_GE_512-NEXT: ret %cval = load <8 x i32>, ptr %a @@ -396,10 +396,10 @@ ; CHECK-NEXT: ptrue p0.s, vl16 ; CHECK-NEXT: ptrue p1.d, vl16 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: st1w { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cval = load <16 x i32>, ptr %a @@ -416,10 +416,10 @@ ; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: ptrue p1.d, vl32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: st1w { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cval = load <32 x i32>, ptr %a @@ -460,12 +460,12 @@ define void @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_gather_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: cmeq v0.2d, v0.2d, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %cval = load <2 x i64>, ptr %a @@ -497,18 +497,18 @@ define void @masked_gather_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: masked_gather_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 +; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [z2.d] -; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [z3.d] -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [z2.d] +; VBITS_GE_256-NEXT: ld1d { z0.d }, p2/z, [z0.d] +; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_gather_v8i64: @@ -572,9 +572,8 @@ ; CHECK-LABEL: masked_gather_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, #0.0 ; CHECK-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-NEXT: mov v0.h[0], v1.h[0] @@ -583,7 +582,8 @@ ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 -; CHECK-NEXT: ld1h { z0.d }, p0/z, [z2.d] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: str s0, [x0] @@ -601,9 +601,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: fcmeq v0.4h, v0.4h, #0.0 ; CHECK-NEXT: sunpklo z0.s, z0.h +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d] @@ -623,23 +623,23 @@ ; VBITS_GE_256-LABEL: masked_gather_v8f16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr q0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: fcmeq v0.8h, v0.8h, #0.0 -; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s +; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z0.d, #0 ; VBITS_GE_256-NEXT: ld1h { z0.d }, p1/z, [z3.d] -; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h -; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [z2.d] ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s -; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h +; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [z2.d] ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s +; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_256-NEXT: mov v0.d[1], v1.d[0] ; VBITS_GE_256-NEXT: str q0, [x0] @@ -649,9 +649,9 @@ ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr q0, [x0] ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 -; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: fcmeq v0.8h, v0.8h, #0.0 ; VBITS_GE_512-NEXT: sunpklo z0.s, z0.h +; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; VBITS_GE_512-NEXT: ld1h { z0.d }, p0/z, [z1.d] @@ -673,11 +673,11 @@ ; CHECK-NEXT: ptrue p0.h, vl16 ; CHECK-NEXT: ptrue p1.d, vl16 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: st1h { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cval = load <16 x half>, ptr %a @@ -694,11 +694,11 @@ ; CHECK-NEXT: ptrue p0.h, vl32 ; CHECK-NEXT: ptrue p1.d, vl32 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1h { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: st1h { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cval = load <32 x half>, ptr %a @@ -718,11 +718,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: fcmeq v0.2s, v0.2s, #0.0 ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 -; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret @@ -739,9 +739,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: fcmeq v0.4s, v0.4s, #0.0 ; CHECK-NEXT: sunpklo z0.d, z0.s +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d] ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s @@ -759,25 +759,25 @@ ; VBITS_GE_256-LABEL: masked_gather_v8f32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x8, #4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: ptrue p2.d, vl4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: ptrue p1.d, vl4 -; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z2.d }, p1/z, [x1] -; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_256-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: punpklo p2.h, p2.b +; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z2.d }, p2/z, [x1] +; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 +; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: punpklo p1.h, p1.b ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_GE_256-NEXT: and p2.b, p2/z, p2.b, p1.b ; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: ld1w { z2.d }, p2/z, [z2.d] -; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_256-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_256-NEXT: and p1.b, p1/z, p1.b, p2.b +; VBITS_GE_256-NEXT: cmpne p2.d, p2/z, z0.d, #0 +; VBITS_GE_256-NEXT: ld1w { z0.d }, p1/z, [z2.d] ; VBITS_GE_256-NEXT: ptrue p1.s, vl4 -; VBITS_GE_256-NEXT: uzp1 z1.s, z2.s, z2.s +; VBITS_GE_256-NEXT: ld1w { z1.d }, p2/z, [z1.d] ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s -; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s +; VBITS_GE_256-NEXT: splice z0.s, p1, z0.s, z1.s +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_gather_v8f32: @@ -785,10 +785,10 @@ ; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: ptrue p1.d, vl8 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_512-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x1] ; VBITS_GE_512-NEXT: punpklo p0.h, p0.b -; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [z1.d] +; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [z0.d] ; VBITS_GE_512-NEXT: st1w { z0.d }, p1, [x0] ; VBITS_GE_512-NEXT: ret %cval = load <8 x float>, ptr %a @@ -805,10 +805,10 @@ ; CHECK-NEXT: ptrue p0.s, vl16 ; CHECK-NEXT: ptrue p1.d, vl16 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: st1w { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cval = load <16 x float>, ptr %a @@ -825,10 +825,10 @@ ; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: ptrue p1.d, vl32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: st1w { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cval = load <32 x float>, ptr %a @@ -869,12 +869,12 @@ define void @masked_gather_v2f64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_gather_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: fcmeq v0.2d, v0.2d, #0.0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %cval = load <2 x double>, ptr %a @@ -890,9 +890,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: ld1d { z0.d }, p1/z, [z1.d] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] +; CHECK-NEXT: ld1d { z0.d }, p1/z, [z0.d] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %cval = load <4 x double>, ptr %a @@ -906,16 +906,16 @@ define void @masked_gather_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: masked_gather_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 ; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z1.d, #0.0 -; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [z2.d] -; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [z3.d] +; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: ld1d { z0.d }, p1/z, [z0.d] +; VBITS_GE_256-NEXT: ld1d { z1.d }, p2/z, [z1.d] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret @@ -924,9 +924,9 @@ ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_512-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 -; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [z1.d] +; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] +; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [z0.d] ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_512-NEXT: ret %cval = load <8 x double>, ptr %a @@ -942,9 +942,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl16 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: ld1d { z0.d }, p1/z, [z1.d] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] +; CHECK-NEXT: ld1d { z0.d }, p1/z, [z0.d] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %cval = load <16 x double>, ptr %a @@ -960,9 +960,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl32 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: ld1d { z0.d }, p1/z, [z1.d] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] +; CHECK-NEXT: ld1d { z0.d }, p1/z, [z0.d] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %cval = load <32 x double>, ptr %a @@ -982,10 +982,10 @@ ; CHECK-NEXT: ptrue p0.h, vl32 ; CHECK-NEXT: ptrue p1.s, vl32 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1] ; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1w { z0.s }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1h { z0.s }, p0/z, [x2, z1.s, sxtw #1] +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x2, z0.s, sxtw #1] ; CHECK-NEXT: st1h { z0.s }, p1, [x0] ; CHECK-NEXT: ret %cvals = load <32 x half>, ptr %a @@ -1003,9 +1003,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: ld1w { z0.s }, p1/z, [x2, z1.s, sxtw #2] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1] +; CHECK-NEXT: ld1w { z0.s }, p1/z, [x2, z0.s, sxtw #2] ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %cvals = load <32 x float>, ptr %a @@ -1023,9 +1023,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl32 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x1] ; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: ld1d { z0.d }, p1/z, [x2, z1.d, lsl #3] +; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x1] +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x2, z0.d, lsl #3] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %cvals = load <32 x double>, ptr %a @@ -1044,10 +1044,10 @@ ; CHECK-NEXT: ptrue p0.h, vl32 ; CHECK-NEXT: ptrue p1.s, vl32 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1] ; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1w { z0.s }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1h { z0.s }, p0/z, [x2, z1.s, uxtw #1] +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x2, z0.s, uxtw #1] ; CHECK-NEXT: st1h { z0.s }, p1, [x0] ; CHECK-NEXT: ret %cvals = load <32 x half>, ptr %a @@ -1066,10 +1066,10 @@ ; CHECK-NEXT: ptrue p0.h, vl32 ; CHECK-NEXT: ptrue p1.s, vl32 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1] ; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1w { z0.s }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1h { z0.s }, p0/z, [x2, z1.s, sxtw] +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x2, z0.s, sxtw] ; CHECK-NEXT: st1h { z0.s }, p1, [x0] ; CHECK-NEXT: ret %cvals = load <32 x half>, ptr %a @@ -1089,10 +1089,10 @@ ; CHECK-NEXT: ptrue p0.h, vl32 ; CHECK-NEXT: ptrue p1.s, vl32 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1] ; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: ld1w { z0.s }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1h { z0.s }, p0/z, [x2, z1.s, uxtw] +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x2, z0.s, uxtw] ; CHECK-NEXT: st1h { z0.s }, p1, [x0] ; CHECK-NEXT: ret %cvals = load <32 x half>, ptr %a @@ -1112,10 +1112,10 @@ ; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: ptrue p1.d, vl32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x2, z1.d, lsl #2] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x2, z0.d, lsl #2] ; CHECK-NEXT: st1w { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cvals = load <32 x float>, ptr %a @@ -1133,10 +1133,10 @@ ; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: ptrue p1.d, vl32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x2, z1.d] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x2, z0.d] ; CHECK-NEXT: st1w { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cvals = load <32 x float>, ptr %a @@ -1155,10 +1155,10 @@ ; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: ptrue p1.d, vl32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x2, z1.d] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x2, z0.d] ; CHECK-NEXT: st1w { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cvals = load <32 x float>, ptr %a @@ -1177,10 +1177,10 @@ ; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: ptrue p1.d, vl32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d, #4] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d, #4] ; CHECK-NEXT: st1w { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cvals = load <32 x float>, ptr %a @@ -1197,15 +1197,15 @@ ; CHECK-LABEL: masked_gather_passthru: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl32 -; CHECK-NEXT: ptrue p1.d, vl32 +; CHECK-NEXT: ptrue p2.d, vl32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x2] -; CHECK-NEXT: punpklo p2.h, p1.b -; CHECK-NEXT: ld1w { z1.d }, p2/z, [z1.d] -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: mov z0.s, p1/m, z1.s +; CHECK-NEXT: ld1d { z0.d }, p2/z, [x1] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x2] +; CHECK-NEXT: punpklo p3.h, p1.b +; CHECK-NEXT: ld1w { z0.d }, p3/z, [z0.d] +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %cvals = load <32 x float>, ptr %a @@ -1223,10 +1223,10 @@ ; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: ptrue p1.d, vl32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p1/z, [x1] ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ld1d { z0.d }, p1/z, [x1] ; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: ld1w { z0.d }, p0/z, [z1.d] +; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] ; CHECK-NEXT: st1w { z0.d }, p1, [x0] ; CHECK-NEXT: ret %cvals = load <32 x float>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll @@ -13,9 +13,9 @@ ; CHECK-LABEL: masked_load_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: ldr s2, [x1] ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, v2.4h ; CHECK-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-NEXT: mov v0.h[0], v1.h[0] @@ -35,8 +35,8 @@ define <2 x float> @masked_load_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_load_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: fcmeq v0.2s, v0.2s, v1.2s ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 @@ -53,8 +53,8 @@ define <4 x float> @masked_load_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_load_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: fcmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 @@ -88,8 +88,8 @@ define <16 x float> @masked_load_v16f32(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x9, lsl #2] @@ -155,8 +155,8 @@ define <64 x i8> @masked_load_v64i8(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w9, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w9, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x9] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1, x9] @@ -188,8 +188,8 @@ define <32 x i16> @masked_load_v32i16(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x9, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x9, lsl #1] @@ -221,8 +221,8 @@ define <16 x i32> @masked_load_v16i32(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x9, lsl #2] @@ -254,8 +254,8 @@ define <8 x i64> @masked_load_v8i64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x9, lsl #3] @@ -287,8 +287,8 @@ define <8 x i64> @masked_load_passthru_v8i64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_passthru_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x9, lsl #3] @@ -323,8 +323,8 @@ define <8 x double> @masked_load_passthru_v8f64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_passthru_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x9, lsl #3] @@ -360,7 +360,7 @@ ; VBITS_GE_256-LABEL: masked_load_sext_v32i8i16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 -; VBITS_GE_256-NEXT: mov x9, #16 +; VBITS_GE_256-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0] @@ -390,17 +390,17 @@ define <16 x i32> @masked_load_sext_v16i8i32(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_sext_v16i8i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: ldr q0, [x1] ; VBITS_GE_256-NEXT: ptrue p0.b, vl16 -; VBITS_GE_256-NEXT: mov x9, #8 +; VBITS_GE_256-NEXT: ldr q0, [x1] +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 ; VBITS_GE_256-NEXT: cmeq v0.16b, v0.16b, #0 ; VBITS_GE_256-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b -; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b +; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x9, lsl #2] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8] @@ -424,9 +424,9 @@ define <8 x i64> @masked_load_sext_v8i8i64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_sext_v8i8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: ldr d0, [x1] ; VBITS_GE_256-NEXT: ptrue p0.b, vl8 -; VBITS_GE_256-NEXT: mov x9, #4 +; VBITS_GE_256-NEXT: ldr d0, [x1] +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: cmeq v0.8b, v0.8b, #0 ; VBITS_GE_256-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0] @@ -434,8 +434,8 @@ ; VBITS_GE_256-NEXT: sshll v0.8h, v0.8b, #0 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8] @@ -460,7 +460,7 @@ ; VBITS_GE_256-LABEL: masked_load_sext_v16i16i32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: mov x9, #8 +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] @@ -490,17 +490,17 @@ define <8 x i64> @masked_load_sext_v8i16i64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_sext_v8i16i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: ldr q0, [x1] ; VBITS_GE_256-NEXT: ptrue p0.h, vl8 -; VBITS_GE_256-NEXT: mov x9, #4 +; VBITS_GE_256-NEXT: ldr q0, [x1] +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: cmeq v0.8h, v0.8h, #0 ; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8] @@ -525,7 +525,7 @@ ; VBITS_GE_256-LABEL: masked_load_sext_v8i32i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x9, #4 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] @@ -556,7 +556,7 @@ ; VBITS_GE_256-LABEL: masked_load_zext_v32i8i16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 -; VBITS_GE_256-NEXT: mov x9, #16 +; VBITS_GE_256-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0] @@ -586,17 +586,17 @@ define <16 x i32> @masked_load_zext_v16i8i32(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_zext_v16i8i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: ldr q0, [x1] ; VBITS_GE_256-NEXT: ptrue p0.b, vl16 -; VBITS_GE_256-NEXT: mov x9, #8 +; VBITS_GE_256-NEXT: ldr q0, [x1] +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 ; VBITS_GE_256-NEXT: cmeq v0.16b, v0.16b, #0 ; VBITS_GE_256-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b -; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b +; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x9, lsl #2] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8] @@ -620,9 +620,9 @@ define <8 x i64> @masked_load_zext_v8i8i64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_zext_v8i8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: ldr d0, [x1] ; VBITS_GE_256-NEXT: ptrue p0.b, vl8 -; VBITS_GE_256-NEXT: mov x9, #4 +; VBITS_GE_256-NEXT: ldr d0, [x1] +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: cmeq v0.8b, v0.8b, #0 ; VBITS_GE_256-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0] @@ -630,8 +630,8 @@ ; VBITS_GE_256-NEXT: ushll v0.8h, v0.8b, #0 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8] @@ -656,7 +656,7 @@ ; VBITS_GE_256-LABEL: masked_load_zext_v16i16i32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: mov x9, #8 +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] @@ -686,17 +686,17 @@ define <8 x i64> @masked_load_zext_v8i16i64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_zext_v8i16i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: ldr q0, [x1] ; VBITS_GE_256-NEXT: ptrue p0.h, vl8 -; VBITS_GE_256-NEXT: mov x9, #4 +; VBITS_GE_256-NEXT: ldr q0, [x1] +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: cmeq v0.8h, v0.8h, #0 ; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8] @@ -721,7 +721,7 @@ ; VBITS_GE_256-LABEL: masked_load_zext_v8i32i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x9, #4 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] @@ -751,20 +751,20 @@ define <32 x i16> @masked_load_sext_v32i8i16_m16(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_sext_v32i8i16_m16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x1, x9, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, #0 +; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z1.h, #0 ; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, #0 -; VBITS_GE_256-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.b, vl16 +; VBITS_GE_256-NEXT: ptrue p2.b, vl32 ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b -; VBITS_GE_256-NEXT: ptrue p1.b, vl16 ; VBITS_GE_256-NEXT: splice z1.b, p1, z1.b, z0.b -; VBITS_GE_256-NEXT: ptrue p1.b, vl32 -; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z1.b, #0 +; VBITS_GE_256-NEXT: cmpne p1.b, p2/z, z1.b, #0 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0] ; VBITS_GE_256-NEXT: sunpklo z1.h, z0.b ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 @@ -791,26 +791,26 @@ define <16 x i32> @masked_load_sext_v16i8i32_m32(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_sext_v16i8i32_m32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0 ; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, #0 ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff ; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.b, vl16 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] -; VBITS_GE_256-NEXT: ptrue p1.b, vl16 ; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z1.b, #0 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0] ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z0.h, z0.b -; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: sunpklo z1.h, z1.b +; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x9, lsl #2] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8] @@ -834,17 +834,17 @@ define <8 x i64> @masked_load_sext_v8i8i64_m64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_sext_v8i8i64_m64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 +; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s ; VBITS_GE_256-NEXT: ptrue p1.b, vl8 ; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h @@ -854,8 +854,8 @@ ; VBITS_GE_256-NEXT: sshll v0.8h, v0.8b, #0 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8] @@ -879,20 +879,20 @@ define <16 x i32> @masked_load_sext_v16i16i32_m32(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_sext_v16i16i32_m32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0 ; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, #0 ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff ; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.h, vl16 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] -; VBITS_GE_256-NEXT: ptrue p1.h, vl16 ; VBITS_GE_256-NEXT: sunpklo z0.h, z1.b ; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z0.h, #0 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0] @@ -921,17 +921,17 @@ define <8 x i64> @masked_load_sext_v8i16i64_m64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_sext_v8i16i64_m64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 +; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s ; VBITS_GE_256-NEXT: ptrue p1.h, vl8 ; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h @@ -939,8 +939,8 @@ ; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0] ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: sunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8] @@ -964,20 +964,20 @@ define <8 x i64> @masked_load_sext_v8i32i64_m64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_sext_v8i32i64_m64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 +; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.s, vl4 +; VBITS_GE_256-NEXT: ptrue p2.s, vl8 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s -; VBITS_GE_256-NEXT: ptrue p1.s, vl8 -; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z1.s, #0 +; VBITS_GE_256-NEXT: cmpne p1.s, p2/z, z1.s, #0 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0] ; VBITS_GE_256-NEXT: sunpklo z1.d, z0.s ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 @@ -1004,20 +1004,20 @@ define <32 x i16> @masked_load_zext_v32i8i16_m16(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_zext_v32i8i16_m16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x1, x9, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, #0 +; VBITS_GE_256-NEXT: cmpeq p2.h, p0/z, z1.h, #0 ; VBITS_GE_256-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z1.h, #0 -; VBITS_GE_256-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.b, vl16 +; VBITS_GE_256-NEXT: ptrue p2.b, vl32 ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b -; VBITS_GE_256-NEXT: ptrue p1.b, vl16 ; VBITS_GE_256-NEXT: splice z1.b, p1, z1.b, z0.b -; VBITS_GE_256-NEXT: ptrue p1.b, vl32 -; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z1.b, #0 +; VBITS_GE_256-NEXT: cmpne p1.b, p2/z, z1.b, #0 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0] ; VBITS_GE_256-NEXT: uunpklo z1.h, z0.b ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 @@ -1044,26 +1044,26 @@ define <16 x i32> @masked_load_zext_v16i8i32_m32(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_zext_v16i8i32_m32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0 ; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, #0 ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff ; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.b, vl16 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] -; VBITS_GE_256-NEXT: ptrue p1.b, vl16 ; VBITS_GE_256-NEXT: cmpne p1.b, p1/z, z1.b, #0 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p1/z, [x0] ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.h, z0.b -; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: uunpklo z1.h, z1.b +; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x8, x9, lsl #2] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x8] @@ -1087,17 +1087,17 @@ define <8 x i64> @masked_load_zext_v8i8i64_m64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_zext_v8i8i64_m64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 +; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s ; VBITS_GE_256-NEXT: ptrue p1.b, vl8 ; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h @@ -1107,8 +1107,8 @@ ; VBITS_GE_256-NEXT: ushll v0.8h, v0.8b, #0 ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8] @@ -1132,20 +1132,20 @@ define <16 x i32> @masked_load_zext_v16i16i32_m32(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_zext_v16i16i32_m32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x9, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1, x9, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, #0 ; VBITS_GE_256-NEXT: cmpeq p2.s, p0/z, z1.s, #0 ; VBITS_GE_256-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff ; VBITS_GE_256-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.h, vl16 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] -; VBITS_GE_256-NEXT: ptrue p1.h, vl16 ; VBITS_GE_256-NEXT: sunpklo z0.h, z1.b ; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z0.h, #0 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0] @@ -1174,17 +1174,17 @@ define <8 x i64> @masked_load_zext_v8i16i64_m64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_zext_v8i16i64_m64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 +; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s ; VBITS_GE_256-NEXT: ptrue p1.h, vl8 ; VBITS_GE_256-NEXT: uzp1 z0.h, z1.h, z1.h @@ -1192,8 +1192,8 @@ ; VBITS_GE_256-NEXT: ld1h { z0.h }, p1/z, [x0] ; VBITS_GE_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x8, x9, lsl #3] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x8] @@ -1217,20 +1217,20 @@ define <8 x i64> @masked_load_zext_v8i32i64_m64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_load_zext_v8i32i64_m64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x9, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x9, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 +; VBITS_GE_256-NEXT: cmpeq p2.d, p0/z, z1.d, #0 ; VBITS_GE_256-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: mov z1.d, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.s, vl4 +; VBITS_GE_256-NEXT: ptrue p2.s, vl8 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s -; VBITS_GE_256-NEXT: ptrue p1.s, vl8 -; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z1.s, #0 +; VBITS_GE_256-NEXT: cmpne p1.s, p2/z, z1.s, #0 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p1/z, [x0] ; VBITS_GE_256-NEXT: uunpklo z1.d, z0.s ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 @@ -1450,7 +1450,7 @@ ; VBITS_GE_256-LABEL: masked_load_sext_ugt_v8i32i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x9, #4 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] @@ -1481,7 +1481,7 @@ ; VBITS_GE_256-LABEL: masked_load_zext_sgt_v8i32i64: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x9, #4 +; VBITS_GE_256-NEXT: mov x9, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpgt p0.s, p0/z, z0.s, #0 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -13,16 +13,16 @@ ; CHECK-LABEL: masked_scatter_v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x0, #1] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrb w8, [x0, #1] -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: cmeq v1.2s, v0.2s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: sshll v1.2d, v1.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: st1b { z0.d }, p0, [z2.d] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: st1b { z0.d }, p0, [z1.d] ; CHECK-NEXT: ret %vals = load <2 x i8>, ptr %a %ptrs = load <2 x ptr>, ptr %b @@ -36,15 +36,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: cmeq v2.4h, v0.4h, #0 +; CHECK-NEXT: cmeq v1.4h, v0.4h, #0 ; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: sunpklo z2.s, z2.h -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: st1b { z0.d }, p0, [z1.d] +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; CHECK-NEXT: st1b { z0.d }, p0, [z2.d] ; CHECK-NEXT: ret %vals = load <4 x i8>, ptr %a %ptrs = load <4 x ptr>, ptr %b @@ -57,18 +57,19 @@ ; VBITS_GE_256-LABEL: masked_scatter_v8i8: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr d0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: cmeq v1.8b, v0.8b, #0 -; VBITS_GE_256-NEXT: zip1 v5.8b, v0.8b, v0.8b -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: zip1 v3.8b, v0.8b, v0.8b ; VBITS_GE_256-NEXT: zip1 v2.8b, v1.8b, v0.8b ; VBITS_GE_256-NEXT: zip2 v1.8b, v1.8b, v0.8b ; VBITS_GE_256-NEXT: zip2 v0.8b, v0.8b, v0.8b +; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h +; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: shl v2.4h, v2.4h, #8 ; VBITS_GE_256-NEXT: shl v1.4h, v1.4h, #8 ; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_256-NEXT: uunpklo z3.d, z3.s ; VBITS_GE_256-NEXT: sshr v2.4h, v2.4h, #8 ; VBITS_GE_256-NEXT: sshr v1.4h, v1.4h, #8 ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s @@ -77,27 +78,26 @@ ; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s ; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z2.d, #0 +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: uunpklo z1.s, z5.h -; VBITS_GE_256-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_256-NEXT: st1b { z1.d }, p1, [z4.d] -; VBITS_GE_256-NEXT: st1b { z0.d }, p0, [z3.d] +; VBITS_GE_256-NEXT: st1b { z3.d }, p1, [z2.d] +; VBITS_GE_256-NEXT: st1b { z0.d }, p0, [z4.d] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_scatter_v8i8: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr d0, [x0] ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 -; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] -; VBITS_GE_512-NEXT: cmeq v2.8b, v0.8b, #0 +; VBITS_GE_512-NEXT: cmeq v1.8b, v0.8b, #0 ; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b +; VBITS_GE_512-NEXT: sunpklo z1.h, z1.b ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_512-NEXT: sunpklo z2.h, z2.b +; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1] +; VBITS_GE_512-NEXT: sunpklo z1.s, z1.h ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_512-NEXT: sunpklo z2.s, z2.h -; VBITS_GE_512-NEXT: sunpklo z2.d, z2.s -; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [z1.d] +; VBITS_GE_512-NEXT: sunpklo z1.d, z1.s +; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; VBITS_GE_512-NEXT: st1b { z0.d }, p0, [z2.d] ; VBITS_GE_512-NEXT: ret %vals = load <8 x i8>, ptr %a %ptrs = load <8 x ptr>, ptr %b @@ -111,16 +111,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl16 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: cmeq v2.16b, v0.16b, #0 +; CHECK-NEXT: cmeq v1.16b, v0.16b, #0 ; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: sunpklo z1.h, z1.b ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: sunpklo z2.h, z2.b +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: sunpklo z2.s, z2.h -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: st1b { z0.d }, p0, [z1.d] +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; CHECK-NEXT: st1b { z0.d }, p0, [z2.d] ; CHECK-NEXT: ret %vals = load <16 x i8>, ptr %a %ptrs = load <16 x ptr>, ptr %b @@ -140,9 +140,9 @@ ; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: st1b { z0.d }, p0, [z1.d] ; CHECK-NEXT: ret %vals = load <32 x i8>, ptr %a @@ -160,16 +160,16 @@ ; CHECK-LABEL: masked_scatter_v2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x0, #2] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrh w8, [x0, #2] -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: cmeq v1.2s, v0.2s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: sshll v1.2d, v1.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: st1h { z0.d }, p0, [z2.d] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: st1h { z0.d }, p0, [z1.d] ; CHECK-NEXT: ret %vals = load <2 x i16>, ptr %a %ptrs = load <2 x ptr>, ptr %b @@ -183,14 +183,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: cmeq v2.4h, v0.4h, #0 +; CHECK-NEXT: cmeq v1.4h, v0.4h, #0 ; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: sunpklo z2.s, z2.h -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: st1h { z0.d }, p0, [z1.d] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; CHECK-NEXT: st1h { z0.d }, p0, [z2.d] ; CHECK-NEXT: ret %vals = load <4 x i16>, ptr %a %ptrs = load <4 x ptr>, ptr %b @@ -203,39 +203,39 @@ ; VBITS_GE_256-LABEL: masked_scatter_v8i16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr q0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: cmeq v1.8h, v0.8h, #0 -; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_256-NEXT: uunpklo z3.s, z0.h +; VBITS_GE_256-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h -; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_256-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_256-NEXT: uunpklo z3.d, z3.s +; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h ; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s +; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z2.d, #0 ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h -; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h -; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s -; VBITS_GE_256-NEXT: st1h { z0.d }, p1, [z2.d] +; VBITS_GE_256-NEXT: st1h { z3.d }, p1, [z2.d] ; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: uunpklo z1.d, z3.s -; VBITS_GE_256-NEXT: st1h { z1.d }, p0, [z4.d] +; VBITS_GE_256-NEXT: st1h { z0.d }, p0, [z4.d] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_scatter_v8i16: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr q0, [x0] ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 -; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] -; VBITS_GE_512-NEXT: cmeq v2.8h, v0.8h, #0 +; VBITS_GE_512-NEXT: cmeq v1.8h, v0.8h, #0 ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_512-NEXT: sunpklo z1.s, z1.h ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_512-NEXT: sunpklo z2.s, z2.h -; VBITS_GE_512-NEXT: sunpklo z2.d, z2.s -; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [z1.d] +; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1] +; VBITS_GE_512-NEXT: sunpklo z1.d, z1.s +; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_GE_512-NEXT: ret %vals = load <8 x i16>, ptr %a %ptrs = load <8 x ptr>, ptr %b @@ -295,12 +295,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: cmeq v1.2s, v0.2s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: sshll v1.2d, v1.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: st1w { z0.d }, p0, [z2.d] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: st1w { z0.d }, p0, [z1.d] ; CHECK-NEXT: ret %vals = load <2 x i32>, ptr %a %ptrs = load <2 x ptr>, ptr %b @@ -314,12 +314,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: cmeq v2.4s, v0.4s, #0 +; CHECK-NEXT: cmeq v1.4s, v0.4s, #0 ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: st1w { z0.d }, p0, [z1.d] +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; CHECK-NEXT: st1w { z0.d }, p0, [z2.d] ; CHECK-NEXT: ret %vals = load <4 x i32>, ptr %a %ptrs = load <4 x ptr>, ptr %b @@ -332,23 +332,23 @@ ; VBITS_GE_256-LABEL: masked_scatter_v8i32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p1.d, vl4 -; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p1/z, [x1] +; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] +; VBITS_GE_256-NEXT: ld1d { z3.d }, p1/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z4.d }, p1/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p0.s, p0/z, z0.s, #0 -; VBITS_GE_256-NEXT: uunpklo z4.d, z0.s -; VBITS_GE_256-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: punpklo p0.h, p0.b -; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 +; VBITS_GE_256-NEXT: uunpklo z2.d, z0.s ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s -; VBITS_GE_256-NEXT: and p0.b, p0/z, p0.b, p1.b -; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z2.d, #0 +; VBITS_GE_256-NEXT: punpklo p2.h, p0.b +; VBITS_GE_256-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: st1w { z4.d }, p0, [z3.d] -; VBITS_GE_256-NEXT: st1w { z0.d }, p1, [z1.d] +; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16 +; VBITS_GE_256-NEXT: and p0.b, p2/z, p2.b, p1.b +; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s +; VBITS_GE_256-NEXT: st1w { z2.d }, p0, [z4.d] +; VBITS_GE_256-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [z3.d] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_scatter_v8i32: @@ -434,11 +434,11 @@ define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_scatter_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: cmeq v1.2d, v0.2d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 ; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: cmeq v2.2d, v0.2d, #0 -; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 ; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] ; CHECK-NEXT: ret %vals = load <2 x i64>, ptr %a @@ -467,16 +467,16 @@ define void @masked_scatter_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: masked_scatter_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, #0 -; VBITS_GE_256-NEXT: cmpeq p0.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [z3.d] -; VBITS_GE_256-NEXT: st1d { z0.d }, p1, [z2.d] +; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z1.d, #0 +; VBITS_GE_256-NEXT: cmpeq p0.d, p0/z, z0.d, #0 +; VBITS_GE_256-NEXT: st1d { z1.d }, p1, [z3.d] +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [z2.d] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_scatter_v8i64: @@ -534,20 +534,20 @@ ; CHECK-LABEL: masked_scatter_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q3, [x1] +; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: fcmeq v2.4h, v1.4h, #0.0 ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: sshll v2.4s, v2.4h, #0 +; CHECK-NEXT: uunpklo z1.d, z1.s ; CHECK-NEXT: mov v0.h[0], v2.h[0] ; CHECK-NEXT: mov w8, v2.s[1] ; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 -; CHECK-NEXT: uunpklo z0.d, z1.s -; CHECK-NEXT: st1h { z0.d }, p0, [z3.d] +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: st1h { z1.d }, p0, [z0.d] ; CHECK-NEXT: ret %vals = load <2 x half>, ptr %a %ptrs = load <2 x ptr>, ptr %b @@ -561,14 +561,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: fcmeq v2.4h, v0.4h, #0.0 +; CHECK-NEXT: fcmeq v1.4h, v0.4h, #0.0 ; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: sunpklo z2.s, z2.h -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: st1h { z0.d }, p0, [z1.d] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; CHECK-NEXT: st1h { z0.d }, p0, [z2.d] ; CHECK-NEXT: ret %vals = load <4 x half>, ptr %a %ptrs = load <4 x ptr>, ptr %b @@ -581,39 +581,39 @@ ; VBITS_GE_256-LABEL: masked_scatter_v8f16: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ldr q0, [x0] -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: fcmeq v1.8h, v0.8h, #0.0 -; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_256-NEXT: uunpklo z3.s, z0.h +; VBITS_GE_256-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; VBITS_GE_256-NEXT: sunpklo z2.s, z1.h ; VBITS_GE_256-NEXT: ext v1.16b, v1.16b, v1.16b, #8 +; VBITS_GE_256-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_256-NEXT: uunpklo z3.d, z3.s +; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h ; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s +; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s ; VBITS_GE_256-NEXT: cmpne p1.d, p0/z, z2.d, #0 ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: uunpklo z3.s, z3.h -; VBITS_GE_256-NEXT: sunpklo z1.s, z1.h -; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s -; VBITS_GE_256-NEXT: st1h { z0.d }, p1, [z2.d] +; VBITS_GE_256-NEXT: st1h { z3.d }, p1, [z2.d] ; VBITS_GE_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; VBITS_GE_256-NEXT: uunpklo z1.d, z3.s -; VBITS_GE_256-NEXT: st1h { z1.d }, p0, [z4.d] +; VBITS_GE_256-NEXT: st1h { z0.d }, p0, [z4.d] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_scatter_v8f16: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr q0, [x0] ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 -; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] -; VBITS_GE_512-NEXT: fcmeq v2.8h, v0.8h, #0.0 +; VBITS_GE_512-NEXT: fcmeq v1.8h, v0.8h, #0.0 ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_512-NEXT: sunpklo z1.s, z1.h ; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_512-NEXT: sunpklo z2.s, z2.h -; VBITS_GE_512-NEXT: sunpklo z2.d, z2.s -; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [z1.d] +; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1] +; VBITS_GE_512-NEXT: sunpklo z1.d, z1.s +; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; VBITS_GE_512-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_GE_512-NEXT: ret %vals = load <8 x half>, ptr %a %ptrs = load <8 x ptr>, ptr %b @@ -673,12 +673,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: fcmeq v1.2s, v0.2s, #0.0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: sshll v1.2d, v1.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: st1w { z0.d }, p0, [z2.d] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: st1w { z0.d }, p0, [z1.d] ; CHECK-NEXT: ret %vals = load <2 x float>, ptr %a %ptrs = load <2 x ptr>, ptr %b @@ -692,12 +692,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: fcmeq v2.4s, v0.4s, #0.0 +; CHECK-NEXT: fcmeq v1.4s, v0.4s, #0.0 ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 -; CHECK-NEXT: st1w { z0.d }, p0, [z1.d] +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 +; CHECK-NEXT: st1w { z0.d }, p0, [z2.d] ; CHECK-NEXT: ret %vals = load <4 x float>, ptr %a %ptrs = load <4 x ptr>, ptr %b @@ -710,23 +710,23 @@ ; VBITS_GE_256-LABEL: masked_scatter_v8f32: ; VBITS_GE_256: // %bb.0: ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p1.d, vl4 -; VBITS_GE_256-NEXT: ld1d { z1.d }, p1/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p1/z, [x1] +; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0] +; VBITS_GE_256-NEXT: ld1d { z3.d }, p1/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z4.d }, p1/z, [x1] ; VBITS_GE_256-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 -; VBITS_GE_256-NEXT: uunpklo z4.d, z0.s -; VBITS_GE_256-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: punpklo p0.h, p0.b -; VBITS_GE_256-NEXT: ext z2.b, z2.b, z2.b, #16 +; VBITS_GE_256-NEXT: uunpklo z2.d, z0.s ; VBITS_GE_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_GE_256-NEXT: sunpklo z2.d, z2.s -; VBITS_GE_256-NEXT: and p0.b, p0/z, p0.b, p1.b -; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z2.d, #0 ; VBITS_GE_256-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_256-NEXT: st1w { z4.d }, p0, [z3.d] -; VBITS_GE_256-NEXT: st1w { z0.d }, p1, [z1.d] +; VBITS_GE_256-NEXT: punpklo p2.h, p0.b +; VBITS_GE_256-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ext z1.b, z1.b, z1.b, #16 +; VBITS_GE_256-NEXT: and p0.b, p2/z, p2.b, p1.b +; VBITS_GE_256-NEXT: sunpklo z1.d, z1.s +; VBITS_GE_256-NEXT: st1w { z2.d }, p0, [z4.d] +; VBITS_GE_256-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_256-NEXT: st1w { z0.d }, p0, [z3.d] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_scatter_v8f32: @@ -812,12 +812,12 @@ define void @masked_scatter_v2f64(ptr %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_scatter_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: fcmeq v1.2d, v0.2d, #0.0 ; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: st1d { z0.d }, p0, [z2.d] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] ; CHECK-NEXT: ret %vals = load <2 x double>, ptr %a %ptrs = load <2 x ptr>, ptr %b @@ -832,8 +832,8 @@ ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] +; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: st1d { z0.d }, p1, [z1.d] ; CHECK-NEXT: ret %vals = load <4 x double>, ptr %a %ptrs = load <4 x ptr>, ptr %b @@ -845,16 +845,16 @@ define void @masked_scatter_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: masked_scatter_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 -; VBITS_GE_256-NEXT: fcmeq p0.d, p0/z, z1.d, #0.0 -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [z3.d] -; VBITS_GE_256-NEXT: st1d { z0.d }, p1, [z2.d] +; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z1.d, #0.0 +; VBITS_GE_256-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0 +; VBITS_GE_256-NEXT: st1d { z1.d }, p1, [z3.d] +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [z2.d] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: masked_scatter_v8f64: @@ -862,8 +862,8 @@ ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 ; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] ; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] -; VBITS_GE_512-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0 -; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [z1.d] +; VBITS_GE_512-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 +; VBITS_GE_512-NEXT: st1d { z0.d }, p1, [z1.d] ; VBITS_GE_512-NEXT: ret %vals = load <8 x double>, ptr %a %ptrs = load <8 x ptr>, ptr %b @@ -878,8 +878,8 @@ ; CHECK-NEXT: ptrue p0.d, vl16 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] +; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: st1d { z0.d }, p1, [z1.d] ; CHECK-NEXT: ret %vals = load <16 x double>, ptr %a %ptrs = load <16 x ptr>, ptr %b @@ -894,8 +894,8 @@ ; CHECK-NEXT: ptrue p0.d, vl32 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] +; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: st1d { z0.d }, p1, [z1.d] ; CHECK-NEXT: ret %vals = load <32 x double>, ptr %a %ptrs = load <32 x ptr>, ptr %b @@ -934,8 +934,8 @@ ; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: st1w { z0.s }, p0, [x2, z1.s, sxtw #2] +; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: st1w { z0.s }, p1, [x2, z1.s, sxtw #2] ; CHECK-NEXT: ret %vals = load <32 x float>, ptr %a %idxs = load <32 x i32>, ptr %b @@ -952,8 +952,8 @@ ; CHECK-NEXT: ptrue p0.d, vl32 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x1] -; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: st1d { z0.d }, p0, [x2, z1.d, lsl #3] +; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: st1d { z0.d }, p1, [x2, z1.d, lsl #3] ; CHECK-NEXT: ret %vals = load <32 x double>, ptr %a %idxs = load <32 x i32>, ptr %b @@ -1127,9 +1127,9 @@ ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: tbz w2, #0, .LBB47_2 ; CHECK-NEXT: // %bb.1: // %bb.1 +; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, #0.0 ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] +; CHECK-NEXT: st1d { z0.d }, p1, [z1.d] ; CHECK-NEXT: .LBB47_2: // %bb.2 ; CHECK-NEXT: ret %vals = load volatile <8 x double>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -13,9 +13,9 @@ ; CHECK-LABEL: masked_store_v2f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: ldr s2, [x1] ; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: fcmeq v2.4h, v1.4h, v2.4h ; CHECK-NEXT: sshll v2.4s, v2.4h, #0 ; CHECK-NEXT: mov v0.h[0], v2.h[0] @@ -34,8 +34,8 @@ define void @masked_store_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_store_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: fcmeq v1.2s, v0.2s, v1.2s ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 @@ -51,8 +51,8 @@ define void @masked_store_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: masked_store_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: fcmeq v1.4s, v0.4s, v1.4s ; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 @@ -84,8 +84,8 @@ define void @masked_store_v16f32(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: masked_store_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -146,25 +146,25 @@ define void @masked_store_trunc_v8i64i8(ptr %ap, ptr %bp, ptr %dest) #0 { ; VBITS_GE_256-LABEL: masked_store_trunc_v8i64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z2.d +; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: cmpeq p0.d, p0/z, z1.d, z3.d +; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_256-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.s, vl8 ; VBITS_GE_256-NEXT: mov z3.d, p0/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: ptrue p1.s, vl4 +; VBITS_GE_256-NEXT: ptrue p0.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z2.s, z2.s, z2.s ; VBITS_GE_256-NEXT: uzp1 z3.s, z3.s, z3.s -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: splice z3.s, p1, z3.s, z2.s -; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s -; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: cmpne p0.s, p0/z, z3.s, #0 -; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s +; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s +; VBITS_GE_256-NEXT: splice z3.s, p0, z3.s, z2.s +; VBITS_GE_256-NEXT: cmpne p0.s, p1/z, z3.s, #0 ; VBITS_GE_256-NEXT: st1b { z1.s }, p0, [x2] ; VBITS_GE_256-NEXT: ret ; @@ -187,28 +187,28 @@ define void @masked_store_trunc_v8i64i16(ptr %ap, ptr %bp, ptr %dest) #0 { ; VBITS_GE_256-LABEL: masked_store_trunc_v8i64i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z2.d -; VBITS_GE_256-NEXT: cmpeq p0.d, p0/z, z1.d, z3.d ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_256-NEXT: cmpeq p0.d, p0/z, z1.d, z3.d ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_256-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff ; VBITS_GE_256-NEXT: mov z3.d, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p0.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h -; VBITS_GE_256-NEXT: ptrue p1.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z2.s, z2.s, z2.s ; VBITS_GE_256-NEXT: uzp1 z3.s, z3.s, z3.s -; VBITS_GE_256-NEXT: ptrue p0.h, vl8 ; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] -; VBITS_GE_256-NEXT: splice z3.s, p1, z3.s, z2.s -; VBITS_GE_256-NEXT: uzp1 z0.h, z3.h, z3.h -; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z0.h, #0 +; VBITS_GE_256-NEXT: splice z3.s, p0, z3.s, z2.s +; VBITS_GE_256-NEXT: ptrue p0.h, vl8 +; VBITS_GE_256-NEXT: uzp1 z2.h, z3.h, z3.h +; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z2.h, #0 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2] ; VBITS_GE_256-NEXT: ret ; @@ -231,25 +231,25 @@ define void @masked_store_trunc_v8i64i32(ptr %ap, ptr %bp, ptr %dest) #0 { ; VBITS_GE_256-LABEL: masked_store_trunc_v8i64i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.d, p0/z, z0.d, z2.d +; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: cmpeq p0.d, p0/z, z1.d, z3.d +; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_256-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.s, vl8 ; VBITS_GE_256-NEXT: mov z3.d, p0/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: ptrue p1.s, vl4 +; VBITS_GE_256-NEXT: ptrue p0.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z2.s, z2.s, z2.s ; VBITS_GE_256-NEXT: uzp1 z3.s, z3.s, z3.s -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: splice z3.s, p1, z3.s, z2.s -; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s -; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_256-NEXT: cmpne p0.s, p0/z, z3.s, #0 -; VBITS_GE_256-NEXT: splice z1.s, p1, z1.s, z0.s +; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s +; VBITS_GE_256-NEXT: splice z3.s, p0, z3.s, z2.s +; VBITS_GE_256-NEXT: cmpne p0.s, p1/z, z3.s, #0 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x2] ; VBITS_GE_256-NEXT: ret ; @@ -272,27 +272,27 @@ define void @masked_store_trunc_v16i32i8(ptr %ap, ptr %bp, ptr %dest) #0 { ; VBITS_GE_256-LABEL: masked_store_trunc_v16i32i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, z2.s +; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: cmpeq p0.s, p0/z, z1.s, z3.s +; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_256-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff ; VBITS_GE_256-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p0.b, vl16 +; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b +; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b ; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_256-NEXT: uzp1 z3.h, z3.h, z3.h -; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h -; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h +; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] ; VBITS_GE_256-NEXT: uzp1 z2.b, z2.b, z2.b ; VBITS_GE_256-NEXT: uzp1 z3.b, z3.b, z3.b ; VBITS_GE_256-NEXT: mov v3.d[1], v2.d[0] -; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b -; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b -; VBITS_GE_256-NEXT: ptrue p0.b, vl16 -; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] ; VBITS_GE_256-NEXT: cmpne p0.b, p0/z, z3.b, #0 ; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x2] ; VBITS_GE_256-NEXT: ret @@ -316,28 +316,28 @@ define void @masked_store_trunc_v16i32i16(ptr %ap, ptr %bp, ptr %dest) #0 { ; VBITS_GE_256-LABEL: masked_store_trunc_v16i32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.s, p0/z, z0.s, z2.s +; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: cmpeq p0.s, p0/z, z1.s, z3.s +; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_256-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.h, vl16 ; VBITS_GE_256-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p0.h, vl8 ; VBITS_GE_256-NEXT: uzp1 z2.h, z2.h, z2.h ; VBITS_GE_256-NEXT: uzp1 z3.h, z3.h, z3.h ; VBITS_GE_256-NEXT: uzp1 z2.b, z2.b, z2.b +; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z3.b, z3.b, z3.b ; VBITS_GE_256-NEXT: mov v3.d[1], v2.d[0] -; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h -; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h -; VBITS_GE_256-NEXT: ptrue p1.h, vl8 -; VBITS_GE_256-NEXT: splice z1.h, p1, z1.h, z0.h ; VBITS_GE_256-NEXT: sunpklo z2.h, z3.b -; VBITS_GE_256-NEXT: cmpne p0.h, p0/z, z2.h, #0 +; VBITS_GE_256-NEXT: cmpne p0.h, p1/z, z2.h, #0 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x2] ; VBITS_GE_256-NEXT: ret ; @@ -360,25 +360,25 @@ define void @masked_store_trunc_v32i16i8(ptr %ap, ptr %bp, ptr %dest) #0 { ; VBITS_GE_256-LABEL: masked_store_trunc_v32i16i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_GE_256-NEXT: cmpeq p1.h, p0/z, z0.h, z2.h +; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b ; VBITS_GE_256-NEXT: cmpeq p0.h, p0/z, z1.h, z3.h +; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b ; VBITS_GE_256-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_256-NEXT: ptrue p1.b, vl32 ; VBITS_GE_256-NEXT: mov z3.h, p0/z, #-1 // =0xffffffffffffffff -; VBITS_GE_256-NEXT: ptrue p1.b, vl16 +; VBITS_GE_256-NEXT: ptrue p0.b, vl16 ; VBITS_GE_256-NEXT: uzp1 z2.b, z2.b, z2.b ; VBITS_GE_256-NEXT: uzp1 z3.b, z3.b, z3.b -; VBITS_GE_256-NEXT: ptrue p0.b, vl32 -; VBITS_GE_256-NEXT: splice z3.b, p1, z3.b, z2.b -; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b -; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b -; VBITS_GE_256-NEXT: cmpne p0.b, p0/z, z3.b, #0 -; VBITS_GE_256-NEXT: splice z1.b, p1, z1.b, z0.b +; VBITS_GE_256-NEXT: splice z1.b, p0, z1.b, z0.b +; VBITS_GE_256-NEXT: splice z3.b, p0, z3.b, z2.b +; VBITS_GE_256-NEXT: cmpne p0.b, p1/z, z3.b, #0 ; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x2] ; VBITS_GE_256-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-no-vscale-range.ll @@ -6,8 +6,8 @@ define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) #0 { ; CHECK-LABEL: mul_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -19,8 +19,8 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) #0 { ; CHECK-LABEL: sdiv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll @@ -165,8 +165,8 @@ define void @test_revhv32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: test_revhv32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p1.d ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] @@ -204,14 +204,14 @@ ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: mov z1.d, z0.d[2] -; CHECK-NEXT: fmov x11, d0 +; CHECK-NEXT: mov z2.d, z0.d[3] +; CHECK-NEXT: mov x9, v0.d[1] ; CHECK-NEXT: fmov x8, d1 -; CHECK-NEXT: mov z1.d, z0.d[3] -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: stp x9, x8, [sp, #16] +; CHECK-NEXT: fmov x10, d2 +; CHECK-NEXT: stp x10, x8, [sp, #16] +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: stp x9, x8, [sp] ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: stp x10, x11, [sp] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: mov sp, x29 @@ -272,22 +272,22 @@ ; CHECK-NEXT: ptrue p0.s, vl8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w10, s0 ; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: mov w11, v0.s[3] +; CHECK-NEXT: mov w10, v0.s[3] +; CHECK-NEXT: fmov w11, s0 ; CHECK-NEXT: mov z1.s, z0.s[4] ; CHECK-NEXT: mov z2.s, z0.s[5] ; CHECK-NEXT: mov z3.s, z0.s[6] ; CHECK-NEXT: mov z0.s, z0.s[7] -; CHECK-NEXT: stp w8, w10, [sp, #24] -; CHECK-NEXT: fmov w10, s1 -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: stp w11, w9, [sp, #16] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: fmov w11, s0 -; CHECK-NEXT: stp w8, w10, [sp, #8] +; CHECK-NEXT: stp w8, w11, [sp, #24] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: stp w10, w9, [sp, #16] +; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: stp w9, w8, [sp, #8] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: stp w9, w8, [sp] ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: stp w11, w9, [sp] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8] ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: mov sp, x29 @@ -390,45 +390,45 @@ ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: mov z1.h, z0.h[8] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: mov z4.h, z0.h[11] -; CHECK-NEXT: mov z5.h, z0.h[12] ; CHECK-NEXT: mov z2.h, z0.h[9] -; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s4 ; CHECK-NEXT: mov z3.h, z0.h[10] -; CHECK-NEXT: strh w9, [sp, #30] -; CHECK-NEXT: fmov w9, s5 -; CHECK-NEXT: mov z16.h, z0.h[15] -; CHECK-NEXT: fmov w11, s2 -; CHECK-NEXT: fmov w12, s3 -; CHECK-NEXT: strh w8, [sp, #24] -; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: mov z6.h, z0.h[13] -; CHECK-NEXT: mov z7.h, z0.h[14] -; CHECK-NEXT: umov w10, v0.h[1] -; CHECK-NEXT: strh w9, [sp, #22] -; CHECK-NEXT: umov w9, v0.h[2] -; CHECK-NEXT: strh w11, [sp, #28] -; CHECK-NEXT: fmov w11, s6 -; CHECK-NEXT: strh w12, [sp, #26] -; CHECK-NEXT: fmov w12, s7 -; CHECK-NEXT: strh w8, [sp, #16] -; CHECK-NEXT: umov w8, v0.h[5] -; CHECK-NEXT: strh w10, [sp, #12] -; CHECK-NEXT: strh w11, [sp, #20] -; CHECK-NEXT: umov w11, v0.h[3] -; CHECK-NEXT: strh w12, [sp, #18] -; CHECK-NEXT: umov w12, v0.h[4] -; CHECK-NEXT: umov w10, v0.h[6] -; CHECK-NEXT: strh w9, [sp, #10] -; CHECK-NEXT: umov w9, v0.h[7] -; CHECK-NEXT: strh w8, [sp, #4] +; CHECK-NEXT: mov z4.h, z0.h[11] +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov z1.h, z0.h[12] +; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: mov z2.h, z0.h[13] +; CHECK-NEXT: strh w8, [sp, #30] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.h, z0.h[14] +; CHECK-NEXT: strh w9, [sp, #28] +; CHECK-NEXT: fmov w9, s4 +; CHECK-NEXT: mov z4.h, z0.h[15] +; CHECK-NEXT: fmov w10, s2 +; CHECK-NEXT: strh w8, [sp, #26] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: fmov w11, s3 +; CHECK-NEXT: strh w9, [sp, #24] +; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: fmov w12, s4 +; CHECK-NEXT: strh w10, [sp, #20] +; CHECK-NEXT: umov w10, v0.h[3] +; CHECK-NEXT: strh w8, [sp, #22] +; CHECK-NEXT: umov w8, v0.h[2] +; CHECK-NEXT: strh w11, [sp, #18] +; CHECK-NEXT: umov w11, v0.h[4] +; CHECK-NEXT: strh w12, [sp, #16] +; CHECK-NEXT: umov w12, v0.h[5] +; CHECK-NEXT: strh w9, [sp, #12] +; CHECK-NEXT: umov w9, v0.h[6] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: umov w8, v0.h[7] +; CHECK-NEXT: strh w10, [sp, #8] +; CHECK-NEXT: strh w11, [sp, #6] +; CHECK-NEXT: strh w12, [sp, #4] +; CHECK-NEXT: strh w9, [sp, #2] +; CHECK-NEXT: strh w8, [sp] ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: strh w11, [sp, #8] -; CHECK-NEXT: strh w12, [sp, #6] -; CHECK-NEXT: strh w10, [sp, #2] -; CHECK-NEXT: strh w9, [sp] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: mov sp, x29 @@ -453,40 +453,40 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov x8, sp ; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: orr x9, x8, #0x1e ; CHECK-NEXT: orr x10, x8, #0x1c -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: orr x11, x8, #0x18 -; CHECK-NEXT: orr x12, x8, #0x10 -; CHECK-NEXT: str h0, [sp, #22] +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: st1 { v0.h }[4], [x9] +; CHECK-NEXT: orr x9, x8, #0x18 +; CHECK-NEXT: st1 { v0.h }[7], [x9] ; CHECK-NEXT: orr x9, x8, #0xe -; CHECK-NEXT: st1 { v0.h }[5], [x10] -; CHECK-NEXT: orr x10, x8, #0xc -; CHECK-NEXT: st1 { v0.h }[7], [x11] -; CHECK-NEXT: orr x11, x8, #0x8 ; CHECK-NEXT: st1 { v1.h }[4], [x9] +; CHECK-NEXT: orr x9, x8, #0xc +; CHECK-NEXT: st1 { v1.h }[5], [x9] +; CHECK-NEXT: orr x9, x8, #0x8 +; CHECK-NEXT: st1 { v0.h }[5], [x10] +; CHECK-NEXT: orr x10, x8, #0x10 +; CHECK-NEXT: st1 { v1.h }[7], [x9] ; CHECK-NEXT: orr x9, x8, #0x4 -; CHECK-NEXT: st1 { v1.h }[5], [x10] +; CHECK-NEXT: st1 { v0.h }[3], [x10] ; CHECK-NEXT: mov w10, #26 // =0x1a -; CHECK-NEXT: orr x10, x8, x10 -; CHECK-NEXT: st1 { v0.h }[3], [x12] ; CHECK-NEXT: st1 { v1.h }[1], [x9] ; CHECK-NEXT: orr x9, x8, #0x2 -; CHECK-NEXT: st1 { v1.h }[7], [x11] -; CHECK-NEXT: mov w11, #20 // =0x14 -; CHECK-NEXT: mov w12, #18 // =0x12 -; CHECK-NEXT: st1 { v0.h }[6], [x10] -; CHECK-NEXT: mov w10, #10 // =0xa -; CHECK-NEXT: orr x11, x8, x11 ; CHECK-NEXT: st1 { v1.h }[2], [x9] -; CHECK-NEXT: orr x9, x8, x12 -; CHECK-NEXT: orr x10, x8, x10 -; CHECK-NEXT: st1 { v1.h }[3], [x8] -; CHECK-NEXT: st1 { v0.h }[1], [x11] -; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: orr x9, x8, x10 +; CHECK-NEXT: mov w10, #20 // =0x14 +; CHECK-NEXT: st1 { v0.h }[6], [x9] +; CHECK-NEXT: orr x9, x8, x10 +; CHECK-NEXT: mov w10, #18 // =0x12 +; CHECK-NEXT: st1 { v0.h }[1], [x9] +; CHECK-NEXT: orr x9, x8, x10 ; CHECK-NEXT: st1 { v0.h }[2], [x9] -; CHECK-NEXT: st1 { v1.h }[6], [x10] +; CHECK-NEXT: mov w9, #10 // =0xa +; CHECK-NEXT: orr x9, x8, x9 +; CHECK-NEXT: st1 { v1.h }[3], [x8] +; CHECK-NEXT: st1 { v1.h }[6], [x9] +; CHECK-NEXT: str h0, [sp, #22] ; CHECK-NEXT: str h1, [sp, #6] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x8] ; CHECK-NEXT: st1h { z0.h }, p0, [x2] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-zip-uzp-trn.ll @@ -32,19 +32,19 @@ define void @zip_v32i16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: zip_v32i16: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #16 ; VBITS_EQ_256-NEXT: ptrue p0.h +; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1] +; VBITS_EQ_256-NEXT: zip2 z5.h, z0.h, z2.h +; VBITS_EQ_256-NEXT: zip1 z0.h, z0.h, z2.h ; VBITS_EQ_256-NEXT: zip2 z4.h, z1.h, z3.h ; VBITS_EQ_256-NEXT: zip1 z1.h, z1.h, z3.h -; VBITS_EQ_256-NEXT: zip2 z3.h, z0.h, z2.h -; VBITS_EQ_256-NEXT: zip1 z0.h, z0.h, z2.h +; VBITS_EQ_256-NEXT: add z2.h, z4.h, z5.h ; VBITS_EQ_256-NEXT: add z0.h, z1.h, z0.h -; VBITS_EQ_256-NEXT: add z1.h, z4.h, z3.h -; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0, x8, lsl #1] +; VBITS_EQ_256-NEXT: st1h { z2.h }, p0, [x0, x8, lsl #1] ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_EQ_256-NEXT: ret ; @@ -144,13 +144,13 @@ ; VBITS_EQ_512-NEXT: ld1d { z1.d }, p0/z, [x1] ; VBITS_EQ_512-NEXT: mov z2.d, z1.d[3] ; VBITS_EQ_512-NEXT: mov z3.d, z0.d[3] -; VBITS_EQ_512-NEXT: stp d3, d2, [sp, #16] -; VBITS_EQ_512-NEXT: mov z2.d, z1.d[2] -; VBITS_EQ_512-NEXT: mov z3.d, z0.d[2] +; VBITS_EQ_512-NEXT: mov z4.d, z1.d[2] +; VBITS_EQ_512-NEXT: mov z5.d, z0.d[2] ; VBITS_EQ_512-NEXT: zip1 z0.d, z0.d, z1.d -; VBITS_EQ_512-NEXT: stp d3, d2, [sp] -; VBITS_EQ_512-NEXT: ld1d { z2.d }, p0/z, [x8] -; VBITS_EQ_512-NEXT: fadd z0.d, p0/m, z0.d, z2.d +; VBITS_EQ_512-NEXT: stp d3, d2, [sp, #16] +; VBITS_EQ_512-NEXT: stp d5, d4, [sp] +; VBITS_EQ_512-NEXT: ld1d { z1.d }, p0/z, [x8] +; VBITS_EQ_512-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; VBITS_EQ_512-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_EQ_512-NEXT: mov sp, x29 ; VBITS_EQ_512-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload @@ -240,18 +240,18 @@ define void @trn_v32i16(ptr %a, ptr %b) #0 { ; VBITS_EQ_256-LABEL: trn_v32i16: ; VBITS_EQ_256: // %bb.0: -; VBITS_EQ_256-NEXT: mov x8, #16 ; VBITS_EQ_256-NEXT: ptrue p0.h +; VBITS_EQ_256-NEXT: mov x8, #16 // =0x10 ; VBITS_EQ_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_EQ_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] ; VBITS_EQ_256-NEXT: ld1h { z3.h }, p0/z, [x1] ; VBITS_EQ_256-NEXT: trn1 z4.h, z0.h, z2.h -; VBITS_EQ_256-NEXT: trn1 z5.h, z1.h, z3.h ; VBITS_EQ_256-NEXT: trn2 z0.h, z0.h, z2.h +; VBITS_EQ_256-NEXT: trn1 z2.h, z1.h, z3.h ; VBITS_EQ_256-NEXT: trn2 z1.h, z1.h, z3.h ; VBITS_EQ_256-NEXT: add z0.h, z4.h, z0.h -; VBITS_EQ_256-NEXT: add z1.h, z5.h, z1.h +; VBITS_EQ_256-NEXT: add z1.h, z2.h, z1.h ; VBITS_EQ_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] ; VBITS_EQ_256-NEXT: st1h { z1.h }, p0, [x0] ; VBITS_EQ_256-NEXT: ret @@ -513,18 +513,18 @@ define void @uzp_v32i16(ptr %a, ptr %b) #1 { ; CHECK-LABEL: uzp_v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #16 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov x8, #16 // =0x10 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] ; CHECK-NEXT: ld1h { z3.h }, p0/z, [x1] -; CHECK-NEXT: uzp1 z5.h, z1.h, z0.h -; CHECK-NEXT: uzp2 z0.h, z1.h, z0.h ; CHECK-NEXT: uzp1 z4.h, z3.h, z2.h ; CHECK-NEXT: uzp2 z2.h, z3.h, z2.h -; CHECK-NEXT: add z0.h, z5.h, z0.h +; CHECK-NEXT: uzp1 z3.h, z1.h, z0.h +; CHECK-NEXT: uzp2 z0.h, z1.h, z0.h ; CHECK-NEXT: add z1.h, z4.h, z2.h +; CHECK-NEXT: add z0.h, z3.h, z0.h ; CHECK-NEXT: st1h { z1.h }, p0, [x0, x8, lsl #1] ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret @@ -661,13 +661,13 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: mov z2.d, z1.d[3] ; CHECK-NEXT: mov z3.d, z0.d[3] -; CHECK-NEXT: stp d3, d2, [sp, #16] -; CHECK-NEXT: mov z2.d, z1.d[2] -; CHECK-NEXT: mov z3.d, z0.d[2] +; CHECK-NEXT: mov z4.d, z1.d[2] +; CHECK-NEXT: mov z5.d, z0.d[2] ; CHECK-NEXT: zip1 z0.d, z0.d, z1.d -; CHECK-NEXT: stp d3, d2, [sp] -; CHECK-NEXT: ld1d { z2.d }, p0/z, [x8] -; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: stp d3, d2, [sp, #16] +; CHECK-NEXT: stp d5, d4, [sp] +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x8] +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: mov sp, x29 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-ptest.ll @@ -4,8 +4,8 @@ define i1 @ptest_v16i1_256bit_min_sve(ptr %a, ptr %b) vscale_range(2, 0) { ; CHECK-LABEL: ptest_v16i1_256bit_min_sve: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] ; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 @@ -101,9 +101,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1] +; CHECK-NEXT: fcmne p0.s, p1/z, z0.s, #0.0 ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-reshuffle.ll @@ -10,8 +10,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 ; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov w9, v1.s[2] ; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: mov w9, v1.s[2] ; CHECK-NEXT: mov v0.h[1], w8 ; CHECK-NEXT: mov w8, v1.s[3] ; CHECK-NEXT: mov v0.h[2], w9 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-rev.ll @@ -12,8 +12,8 @@ define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: bitreverse_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -24,8 +24,8 @@ define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: bitreverse_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -50,8 +50,8 @@ define void @bitreverse_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: bitreverse_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: rbit z0.b, p0/m, z0.b @@ -104,8 +104,8 @@ define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: bitreverse_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -116,8 +116,8 @@ define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: bitreverse_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -142,8 +142,8 @@ define void @bitreverse_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: bitreverse_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: rbit z0.h, p0/m, z0.h @@ -196,8 +196,8 @@ define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: bitreverse_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -208,8 +208,8 @@ define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: bitreverse_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -234,8 +234,8 @@ define void @bitreverse_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: bitreverse_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: rbit z0.s, p0/m, z0.s @@ -288,8 +288,8 @@ define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: bitreverse_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -300,8 +300,8 @@ define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) vscale_range(2,0) #0 { ; CHECK-LABEL: bitreverse_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -326,8 +326,8 @@ define void @bitreverse_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: bitreverse_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: rbit z0.d, p0/m, z0.d @@ -418,8 +418,8 @@ define void @bswap_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: bswap_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: revb z0.h, p0/m, z0.h @@ -506,8 +506,8 @@ define void @bswap_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: bswap_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: revb z0.s, p0/m, z0.s @@ -594,8 +594,8 @@ define void @bswap_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: bswap_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: revb z0.d, p0/m, z0.d diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-sdiv-pow2.ll @@ -8,8 +8,8 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) vscale_range(2,0) #0 { ; CHECK-LABEL: sdiv_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #1 ; CHECK-NEXT: subr z0.b, z0.b, #0 // =0x0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -21,8 +21,8 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) vscale_range(2,0) #0 { ; CHECK-LABEL: sdiv_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -47,8 +47,8 @@ define void @sdiv_v64i8(ptr %a) #0 { ; VBITS_GE_256-LABEL: sdiv_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x0] ; VBITS_GE_256-NEXT: asrd z0.b, p0/m, z0.b, #5 @@ -102,8 +102,8 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) vscale_range(2,0) #0 { ; CHECK-LABEL: sdiv_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -114,8 +114,8 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) vscale_range(2,0) #0 { ; CHECK-LABEL: sdiv_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #3 ; CHECK-NEXT: subr z0.h, z0.h, #0 // =0x0 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -141,8 +141,8 @@ define void @sdiv_v32i16(ptr %a) #0 { ; VBITS_GE_256-LABEL: sdiv_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: asrd z0.h, p0/m, z0.h, #5 @@ -196,8 +196,8 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) vscale_range(2,0) #0 { ; CHECK-LABEL: sdiv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: subr z0.s, z0.s, #0 // =0x0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -209,8 +209,8 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) vscale_range(2,0) #0 { ; CHECK-LABEL: sdiv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -236,8 +236,8 @@ define void @sdiv_v16i32(ptr %a) #0 { ; VBITS_GE_256-LABEL: sdiv_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: asrd z0.s, p0/m, z0.s, #5 @@ -290,8 +290,8 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) vscale_range(2,0) #0 { ; CHECK-LABEL: sdiv_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #7 ; CHECK-NEXT: subr z0.d, z0.d, #0 // =0x0 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -304,8 +304,8 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) vscale_range(2,0) #0 { ; CHECK-LABEL: sdiv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -331,8 +331,8 @@ define void @sdiv_v8i64(ptr %a) #0 { ; VBITS_GE_256-LABEL: sdiv_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: asrd z0.d, p0/m, z0.d, #5 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll @@ -31,77 +31,77 @@ ; CHECK-NEXT: mov z0.b, #0 // =0x0 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: umov w8, v0.b[8] -; CHECK-NEXT: umov w9, v0.b[1] -; CHECK-NEXT: umov w10, v0.b[9] -; CHECK-NEXT: umov w11, v0.b[2] -; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: umov w9, v0.b[9] +; CHECK-NEXT: umov w10, v0.b[1] +; CHECK-NEXT: mov v2.16b, v0.16b +; CHECK-NEXT: umov w11, v0.b[15] +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: umov w8, v0.b[10] -; CHECK-NEXT: mov v1.b[1], w9 -; CHECK-NEXT: umov w9, v0.b[3] ; CHECK-NEXT: mov v2.b[1], w10 ; CHECK-NEXT: umov w10, v0.b[11] -; CHECK-NEXT: mov v1.b[2], w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: umov w8, v0.b[4] -; CHECK-NEXT: mov v1.b[3], w9 +; CHECK-NEXT: mov v1.b[1], w9 +; CHECK-NEXT: umov w9, v0.b[2] +; CHECK-NEXT: mov v1.b[2], w8 +; CHECK-NEXT: umov w8, v0.b[3] +; CHECK-NEXT: mov v2.b[2], w9 ; CHECK-NEXT: umov w9, v0.b[12] -; CHECK-NEXT: mov v2.b[3], w10 -; CHECK-NEXT: umov w10, v0.b[5] -; CHECK-NEXT: mov v1.b[4], w8 +; CHECK-NEXT: mov v1.b[3], w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: mov v2.b[3], w8 ; CHECK-NEXT: umov w8, v0.b[13] -; CHECK-NEXT: mov v2.b[4], w9 -; CHECK-NEXT: umov w9, v0.b[6] -; CHECK-NEXT: mov v1.b[5], w10 +; CHECK-NEXT: mov v1.b[4], w9 +; CHECK-NEXT: umov w9, v0.b[5] +; CHECK-NEXT: mov v2.b[4], w10 ; CHECK-NEXT: umov w10, v0.b[14] -; CHECK-NEXT: mov v2.b[5], w8 -; CHECK-NEXT: mov x8, #16 // =0x10 -; CHECK-NEXT: mov v1.b[6], w9 -; CHECK-NEXT: mov x9, #24 // =0x18 -; CHECK-NEXT: ld1w { z4.s }, p0/z, [x0, x8, lsl #2] -; CHECK-NEXT: mov v2.b[6], w10 -; CHECK-NEXT: umov w10, v0.b[15] +; CHECK-NEXT: mov v1.b[5], w8 +; CHECK-NEXT: umov w8, v0.b[6] +; CHECK-NEXT: mov v2.b[5], w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #16 -; CHECK-NEXT: ld1w { z5.s }, p0/z, [x0, x9, lsl #2] +; CHECK-NEXT: mov v1.b[6], w10 +; CHECK-NEXT: mov v2.b[6], w8 ; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: mov x8, #16 // =0x10 +; CHECK-NEXT: mov x10, #8 // =0x8 +; CHECK-NEXT: ld1w { z4.s }, p0/z, [x0, x8, lsl #2] ; CHECK-NEXT: mov v1.b[7], w11 -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: mov v2.b[7], w10 -; CHECK-NEXT: lsl z0.s, z0.s, #31 -; CHECK-NEXT: asr z0.s, z0.s, #31 -; CHECK-NEXT: mov x11, #8 // =0x8 +; CHECK-NEXT: mov v2.b[7], w9 ; CHECK-NEXT: uunpklo z3.h, z3.b -; CHECK-NEXT: and z0.s, z0.s, #0x1 -; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 -; CHECK-NEXT: lsl z3.s, z3.s, #31 +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: mov x9, #24 // =0x18 ; CHECK-NEXT: uunpklo z1.h, z1.b -; CHECK-NEXT: asr z0.s, z3.s, #31 ; CHECK-NEXT: uunpklo z2.h, z2.b -; CHECK-NEXT: and z0.s, z0.s, #0x1 +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: lsl z0.s, z0.s, #31 ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: ld1w { z3.s }, p0/z, [x0, x11, lsl #2] -; CHECK-NEXT: cmpne p2.s, p0/z, z0.s, #0 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: lsl z3.s, z3.s, #31 +; CHECK-NEXT: asr z0.s, z0.s, #31 +; CHECK-NEXT: asr z3.s, z3.s, #31 ; CHECK-NEXT: lsl z1.s, z1.s, #31 ; CHECK-NEXT: lsl z2.s, z2.s, #31 +; CHECK-NEXT: and z0.s, z0.s, #0x1 +; CHECK-NEXT: and z3.s, z3.s, #0x1 ; CHECK-NEXT: asr z1.s, z1.s, #31 ; CHECK-NEXT: asr z2.s, z2.s, #31 +; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x9, lsl #2] +; CHECK-NEXT: cmpne p2.s, p0/z, z3.s, #0 +; CHECK-NEXT: ld1w { z3.s }, p0/z, [x0, x10, lsl #2] ; CHECK-NEXT: and z1.s, z1.s, #0x1 ; CHECK-NEXT: and z2.s, z2.s, #0x1 ; CHECK-NEXT: mov z4.s, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z5.s, p2/m, #0 // =0x0 -; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0 -; CHECK-NEXT: cmpne p2.s, p0/z, z2.s, #0 -; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z3.s, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0 +; CHECK-NEXT: cmpne p3.s, p0/z, z1.s, #0 +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] +; CHECK-NEXT: cmpne p1.s, p0/z, z2.s, #0 ; CHECK-NEXT: st1w { z4.s }, p0, [x0, x8, lsl #2] -; CHECK-NEXT: st1w { z5.s }, p0, [x0, x9, lsl #2] -; CHECK-NEXT: st1w { z3.s }, p0, [x0, x11, lsl #2] -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2] +; CHECK-NEXT: mov z3.s, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 +; CHECK-NEXT: st1w { z3.s }, p0, [x0, x10, lsl #2] +; CHECK-NEXT: st1w { z1.s }, p0, [x0] ; CHECK-NEXT: .LBB1_2: // %exit ; CHECK-NEXT: ret %broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-splat-vector.ll @@ -47,11 +47,11 @@ define void @splat_v64i8(i8 %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: splat_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 -; VBITS_GE_256-NEXT: mov z0.b, w0 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 -; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1] +; VBITS_GE_256-NEXT: mov z0.b, w0 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x8] +; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v64i8: @@ -130,11 +130,11 @@ define void @splat_v32i16(i16 %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: splat_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 -; VBITS_GE_256-NEXT: mov z0.h, w0 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1] +; VBITS_GE_256-NEXT: mov z0.h, w0 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1] +; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v32i16: @@ -213,11 +213,11 @@ define void @splat_v16i32(i32 %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: splat_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 -; VBITS_GE_256-NEXT: mov z0.s, w0 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1] +; VBITS_GE_256-NEXT: mov z0.s, w0 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v16i32: @@ -296,11 +296,11 @@ define void @splat_v8i64(i64 %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: splat_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: mov z0.d, x0 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] +; VBITS_GE_256-NEXT: mov z0.d, x0 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v8i64: @@ -372,8 +372,8 @@ define void @splat_v16f16(half %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: splat_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl16 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret @@ -386,18 +386,18 @@ define void @splat_v32f16(half %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: splat_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 -; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 def $z0 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 def $z0 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: mov z0.h, h0 -; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v32f16: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 def $z0 ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 +; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 def $z0 ; VBITS_GE_512-NEXT: mov z0.h, h0 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] ; VBITS_GE_512-NEXT: ret @@ -410,8 +410,8 @@ define void @splat_v64f16(half %a, ptr %b) vscale_range(8,0) #0 { ; CHECK-LABEL: splat_v64f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl64 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret @@ -424,8 +424,8 @@ define void @splat_v128f16(half %a, ptr %b) vscale_range(16,0) #0 { ; CHECK-LABEL: splat_v128f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl128 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: mov z0.h, h0 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret @@ -462,8 +462,8 @@ define void @splat_v8f32(float %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: splat_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret @@ -476,18 +476,18 @@ define void @splat_v16f32(float %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: splat_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 -; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 def $z0 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 def $z0 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: mov z0.s, s0 -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v16f32: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 def $z0 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 +; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 def $z0 ; VBITS_GE_512-NEXT: mov z0.s, s0 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: ret @@ -500,8 +500,8 @@ define void @splat_v32f32(float %a, ptr %b) vscale_range(8,0) #0 { ; CHECK-LABEL: splat_v32f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl32 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret @@ -514,8 +514,8 @@ define void @splat_v64f32(float %a, ptr %b) vscale_range(16,0) #0 { ; CHECK-LABEL: splat_v64f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl64 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: mov z0.s, s0 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret @@ -550,8 +550,8 @@ define void @splat_v4f64(double %a, ptr %b) vscale_range(2,0) #0 { ; CHECK-LABEL: splat_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret @@ -564,18 +564,18 @@ define void @splat_v8f64(double %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: splat_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 -; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $z0 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $z0 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: mov z0.d, d0 -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: splat_v8f64: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 def $z0 ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 +; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 def $z0 ; VBITS_GE_512-NEXT: mov z0.d, d0 ; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] ; VBITS_GE_512-NEXT: ret @@ -588,8 +588,8 @@ define void @splat_v16f64(double %a, ptr %b) vscale_range(8,0) #0 { ; CHECK-LABEL: splat_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl16 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret @@ -602,8 +602,8 @@ define void @splat_v32f64(double %a, ptr %b) vscale_range(16,0) #0 { ; CHECK-LABEL: splat_v32f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl32 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z0.d, d0 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret @@ -620,8 +620,8 @@ define void @splat_imm_v64i8(ptr %a) vscale_range(4,0) #0 { ; CHECK-LABEL: splat_imm_v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.b, #1 // =0x1 ; CHECK-NEXT: ptrue p0.b, vl64 +; CHECK-NEXT: mov z0.b, #1 // =0x1 ; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %insert = insertelement <64 x i8> undef, i8 1, i64 0 @@ -633,8 +633,8 @@ define void @splat_imm_v32i16(ptr %a) vscale_range(4,0) #0 { ; CHECK-LABEL: splat_imm_v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.h, #2 // =0x2 ; CHECK-NEXT: ptrue p0.h, vl32 +; CHECK-NEXT: mov z0.h, #2 // =0x2 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %insert = insertelement <32 x i16> undef, i16 2, i64 0 @@ -646,8 +646,8 @@ define void @splat_imm_v16i32(ptr %a) vscale_range(4,0) #0 { ; CHECK-LABEL: splat_imm_v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.s, #3 // =0x3 ; CHECK-NEXT: ptrue p0.s, vl16 +; CHECK-NEXT: mov z0.s, #3 // =0x3 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %insert = insertelement <16 x i32> undef, i32 3, i64 0 @@ -659,8 +659,8 @@ define void @splat_imm_v8i64(ptr %a) vscale_range(4,0) #0 { ; CHECK-LABEL: splat_imm_v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #4 // =0x4 ; CHECK-NEXT: ptrue p0.d, vl8 +; CHECK-NEXT: mov z0.d, #4 // =0x4 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %insert = insertelement <8 x i64> undef, i64 4, i64 0 @@ -676,8 +676,8 @@ define void @splat_imm_v32f16(ptr %a) vscale_range(4,0) #0 { ; CHECK-LABEL: splat_imm_v32f16: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov z0.h, #5.00000000 ; CHECK-NEXT: ptrue p0.h, vl32 +; CHECK-NEXT: fmov z0.h, #5.00000000 ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %insert = insertelement <32 x half> undef, half 5.0, i64 0 @@ -689,8 +689,8 @@ define void @splat_imm_v16f32(ptr %a) vscale_range(4,0) #0 { ; CHECK-LABEL: splat_imm_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov z0.s, #6.00000000 ; CHECK-NEXT: ptrue p0.s, vl16 +; CHECK-NEXT: fmov z0.s, #6.00000000 ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %insert = insertelement <16 x float> undef, float 6.0, i64 0 @@ -702,8 +702,8 @@ define void @splat_imm_v8f64(ptr %a) vscale_range(4,0) #0 { ; CHECK-LABEL: splat_imm_v8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov z0.d, #7.00000000 ; CHECK-NEXT: ptrue p0.d, vl8 +; CHECK-NEXT: fmov z0.d, #7.00000000 ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %insert = insertelement <8 x double> undef, double 7.0, i64 0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-stores.ll @@ -52,11 +52,11 @@ define void @store_v16f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: store_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: mov z0.s, #0 // =0x0 -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: store_v16f32: @@ -86,24 +86,24 @@ define void @store_v32f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: store_v32f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #24 -; VBITS_GE_256-NEXT: mov x9, #16 -; VBITS_GE_256-NEXT: mov x10, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: mov z0.s, #0 // =0x0 +; VBITS_GE_256-NEXT: mov x8, #24 // =0x18 +; VBITS_GE_256-NEXT: mov x9, #16 // =0x10 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2] -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x10, lsl #2] +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: store_v32f32: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: mov x8, #16 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 ; VBITS_GE_512-NEXT: mov z0.s, #0 // =0x0 -; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] +; VBITS_GE_512-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: ret ; ; VBITS_GE_1024-LABEL: store_v32f32: @@ -126,45 +126,45 @@ define void @store_v64f32(ptr %a) #0 { ; VBITS_GE_256-LABEL: store_v64f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #56 -; VBITS_GE_256-NEXT: mov x9, #48 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 ; VBITS_GE_256-NEXT: mov z0.s, #0 // =0x0 -; VBITS_GE_256-NEXT: mov x10, #40 -; VBITS_GE_256-NEXT: mov x11, #32 +; VBITS_GE_256-NEXT: mov x8, #56 // =0x38 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: mov x8, #24 -; VBITS_GE_256-NEXT: mov x12, #16 -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2] -; VBITS_GE_256-NEXT: mov x9, #8 -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x10, lsl #2] -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x11, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #48 // =0x30 +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #40 // =0x28 +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #32 // =0x20 +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #24 // =0x18 +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 +; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x12, lsl #2] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: store_v64f32: ; VBITS_GE_512: // %bb.0: -; VBITS_GE_512-NEXT: mov x8, #48 -; VBITS_GE_512-NEXT: mov x9, #32 -; VBITS_GE_512-NEXT: mov x10, #16 ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 ; VBITS_GE_512-NEXT: mov z0.s, #0 // =0x0 +; VBITS_GE_512-NEXT: mov x8, #48 // =0x30 +; VBITS_GE_512-NEXT: mov x9, #32 // =0x20 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_512-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x9, lsl #2] -; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x10, lsl #2] +; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: ret ; ; VBITS_GE_1024-LABEL: store_v64f32: ; VBITS_GE_1024: // %bb.0: -; VBITS_GE_1024-NEXT: mov x8, #32 ; VBITS_GE_1024-NEXT: ptrue p0.s, vl32 ; VBITS_GE_1024-NEXT: mov z0.s, #0 // =0x0 -; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0] +; VBITS_GE_1024-NEXT: mov x8, #32 // =0x20 ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_1024-NEXT: ret ; ; VBITS_GE_2048-LABEL: store_v64f32: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll @@ -46,8 +46,8 @@ define void @subvector_v32i16(ptr %in, ptr %out) #0 { ; VBITS_GE_256-LABEL: subvector_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1] @@ -101,8 +101,8 @@ define void @subvector_v16i32(ptr %in, ptr %out) #0 { ; VBITS_GE_256-LABEL: subvector_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] @@ -157,8 +157,8 @@ define void @subvector_v8i64(ptr %in, ptr %out) vscale_range(2,0) #0 { ; CHECK-LABEL: subvector_v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4 ; CHECK-NEXT: ptrue p0.d, vl4 +; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] ; CHECK-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] @@ -234,8 +234,8 @@ define void @subvector_v32f16(ptr %in, ptr %out) #0 { ; VBITS_GE_256-LABEL: subvector_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1] @@ -289,8 +289,8 @@ define void @subvector_v16f32(ptr %in, ptr %out) #0 { ; VBITS_GE_256-LABEL: subvector_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] @@ -343,8 +343,8 @@ define void @subvector_v8f64(ptr %in, ptr %out) #0 { ; VBITS_GE_256-LABEL: subvector_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc-stores.ll @@ -8,8 +8,8 @@ define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) vscale_range(2,0) #0 { ; CHECK-LABEL: store_trunc_v2i64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1b { z0.d }, p0, [x1] ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %ap @@ -34,16 +34,16 @@ define void @store_trunc_v8i64i8(ptr %ap, ptr %dest) #0 { ; VBITS_GE_256-LABEL: store_trunc_v8i64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: ptrue p1.s, vl8 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: st1b { z1.s }, p0, [x1] +; VBITS_GE_256-NEXT: st1b { z1.s }, p1, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: store_trunc_v8i64i8: @@ -88,8 +88,8 @@ ; Currently does not use the truncating store ; VBITS_GE_256-LABEL: store_trunc_v8i64i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s @@ -115,16 +115,16 @@ define void @store_trunc_v8i64i32(ptr %ap, ptr %dest) #0 { ; VBITS_GE_256-LABEL: store_trunc_v8i64i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: ptrue p1.s, vl8 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.s, vl4 ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s -; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x1] +; VBITS_GE_256-NEXT: st1w { z1.s }, p1, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: store_trunc_v8i64i32: @@ -143,8 +143,8 @@ ; Currently does not use the truncating store ; VBITS_GE_256-LABEL: store_trunc_v16i32i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h @@ -170,16 +170,16 @@ define void @store_trunc_v16i32i16(ptr %ap, ptr %dest) #0 { ; VBITS_GE_256-LABEL: store_trunc_v16i32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 +; VBITS_GE_256-NEXT: ptrue p1.h, vl16 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.h, vl8 ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_256-NEXT: splice z1.h, p0, z1.h, z0.h -; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x1] +; VBITS_GE_256-NEXT: st1h { z1.h }, p1, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: store_trunc_v16i32i16: @@ -197,16 +197,16 @@ define void @store_trunc_v32i16i8(ptr %ap, ptr %dest) #0 { ; VBITS_GE_256-LABEL: store_trunc_v32i16i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 +; VBITS_GE_256-NEXT: ptrue p1.b, vl32 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.b, vl16 ; VBITS_GE_256-NEXT: uzp1 z0.b, z0.b, z0.b ; VBITS_GE_256-NEXT: uzp1 z1.b, z1.b, z1.b ; VBITS_GE_256-NEXT: splice z1.b, p0, z1.b, z0.b -; VBITS_GE_256-NEXT: ptrue p0.b, vl32 -; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x1] +; VBITS_GE_256-NEXT: st1b { z1.b }, p1, [x1] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: store_trunc_v32i16i8: diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-trunc.ll @@ -26,8 +26,8 @@ define void @trunc_v32i16_v32i8(ptr %in, ptr %out) #0 { ; VBITS_GE_256-LABEL: trunc_v32i16_v32i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.b, vl16 @@ -112,8 +112,8 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) #0 { ; VBITS_GE_256-LABEL: trunc_v16i32_v16i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h @@ -196,8 +196,8 @@ define void @trunc_v16i32_v16i16(ptr %in, ptr %out) #0 { ; VBITS_GE_256-LABEL: trunc_v16i32_v16i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.h, vl8 @@ -283,8 +283,8 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) #0 { ; VBITS_GE_256-LABEL: trunc_v8i64_v8i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.s, vl4 @@ -366,8 +366,8 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) #0 { ; VBITS_GE_256-LABEL: trunc_v8i64_v8i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s @@ -450,8 +450,8 @@ define void @trunc_v8i64_v8i32(ptr %in, ptr %out) #0 { ; VBITS_GE_256-LABEL: trunc_v8i64_v8i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] ; VBITS_GE_256-NEXT: ptrue p0.s, vl4 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-vector-shuffle.ll @@ -50,19 +50,19 @@ define void @shuffle_ext_byone_v64i8(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v64i8: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ptrue p0.b, vl32 +; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 ; VBITS_GE_256-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; VBITS_GE_256-NEXT: ld1b { z1.b }, p0/z, [x1, x8] ; VBITS_GE_256-NEXT: ld1b { z2.b }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z0.b, z0.b[31] ; VBITS_GE_256-NEXT: mov z3.b, z2.b[31] +; VBITS_GE_256-NEXT: mov z0.b, z0.b[31] +; VBITS_GE_256-NEXT: fmov w9, s3 +; VBITS_GE_256-NEXT: insr z1.b, w9 ; VBITS_GE_256-NEXT: fmov w9, s0 -; VBITS_GE_256-NEXT: fmov w10, s3 ; VBITS_GE_256-NEXT: insr z2.b, w9 -; VBITS_GE_256-NEXT: insr z1.b, w10 -; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0] ; VBITS_GE_256-NEXT: st1b { z1.b }, p0, [x0, x8] +; VBITS_GE_256-NEXT: st1b { z2.b }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: shuffle_ext_byone_v64i8: @@ -94,12 +94,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl128 ; CHECK-NEXT: mov w8, #127 // =0x7f -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] ; CHECK-NEXT: whilels p1.b, xzr, x8 +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: lastb w8, p1, z0.b -; CHECK-NEXT: insr z1.b, w8 -; CHECK-NEXT: st1b { z1.b }, p0, [x0] +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x1] +; CHECK-NEXT: insr z0.b, w8 +; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <128 x i8>, ptr %a %op2 = load <128 x i8>, ptr %b @@ -128,12 +128,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b, vl256 ; CHECK-NEXT: mov w8, #255 // =0xff -; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] -; CHECK-NEXT: ld1b { z1.b }, p0/z, [x1] ; CHECK-NEXT: whilels p1.b, xzr, x8 +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] ; CHECK-NEXT: lastb w8, p1, z0.b -; CHECK-NEXT: insr z1.b, w8 -; CHECK-NEXT: st1b { z1.b }, p0, [x0] +; CHECK-NEXT: ld1b { z0.b }, p0/z, [x1] +; CHECK-NEXT: insr z0.b, w8 +; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <256 x i8>, ptr %a %op2 = load <256 x i8>, ptr %b @@ -215,19 +215,19 @@ define void @shuffle_ext_byone_v32i16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v32i16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z0.h, z0.h[15] ; VBITS_GE_256-NEXT: mov z3.h, z2.h[15] +; VBITS_GE_256-NEXT: mov z0.h, z0.h[15] +; VBITS_GE_256-NEXT: fmov w9, s3 +; VBITS_GE_256-NEXT: insr z1.h, w9 ; VBITS_GE_256-NEXT: fmov w9, s0 -; VBITS_GE_256-NEXT: fmov w10, s3 ; VBITS_GE_256-NEXT: insr z2.h, w9 -; VBITS_GE_256-NEXT: insr z1.h, w10 -; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: shuffle_ext_byone_v32i16: @@ -255,12 +255,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl64 ; CHECK-NEXT: mov w8, #63 // =0x3f -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: whilels p1.h, xzr, x8 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: lastb w8, p1, z0.h -; CHECK-NEXT: insr z1.h, w8 -; CHECK-NEXT: st1h { z1.h }, p0, [x0] +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x1] +; CHECK-NEXT: insr z0.h, w8 +; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <64 x i16>, ptr %a %op2 = load <64 x i16>, ptr %b @@ -281,12 +281,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl128 ; CHECK-NEXT: mov w8, #127 // =0x7f -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] ; CHECK-NEXT: whilels p1.h, xzr, x8 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: lastb w8, p1, z0.h -; CHECK-NEXT: insr z1.h, w8 -; CHECK-NEXT: st1h { z1.h }, p0, [x0] +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x1] +; CHECK-NEXT: insr z0.h, w8 +; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <128 x i16>, ptr %a %op2 = load <128 x i16>, ptr %b @@ -351,19 +351,19 @@ define void @shuffle_ext_byone_v16i32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v16i32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z0.s, z0.s[7] ; VBITS_GE_256-NEXT: mov z3.s, z2.s[7] +; VBITS_GE_256-NEXT: mov z0.s, z0.s[7] +; VBITS_GE_256-NEXT: fmov w9, s3 +; VBITS_GE_256-NEXT: insr z1.s, w9 ; VBITS_GE_256-NEXT: fmov w9, s0 -; VBITS_GE_256-NEXT: fmov w10, s3 ; VBITS_GE_256-NEXT: insr z2.s, w9 -; VBITS_GE_256-NEXT: insr z1.s, w10 -; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: shuffle_ext_byone_v16i32: @@ -389,12 +389,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: mov w8, #31 // =0x1f -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: whilels p1.s, xzr, x8 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: lastb w8, p1, z0.s -; CHECK-NEXT: insr z1.s, w8 -; CHECK-NEXT: st1w { z1.s }, p0, [x0] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1] +; CHECK-NEXT: insr z0.s, w8 +; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i32>, ptr %a %op2 = load <32 x i32>, ptr %b @@ -411,12 +411,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl64 ; CHECK-NEXT: mov w8, #63 // =0x3f -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] ; CHECK-NEXT: whilels p1.s, xzr, x8 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: lastb w8, p1, z0.s -; CHECK-NEXT: insr z1.s, w8 -; CHECK-NEXT: st1w { z1.s }, p0, [x0] +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1] +; CHECK-NEXT: insr z0.s, w8 +; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <64 x i32>, ptr %a %op2 = load <64 x i32>, ptr %b @@ -463,19 +463,19 @@ define void @shuffle_ext_byone_v8i64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v8i64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z0.d, z0.d[3] ; VBITS_GE_256-NEXT: mov z3.d, z2.d[3] +; VBITS_GE_256-NEXT: mov z0.d, z0.d[3] +; VBITS_GE_256-NEXT: fmov x9, d3 +; VBITS_GE_256-NEXT: insr z1.d, x9 ; VBITS_GE_256-NEXT: fmov x9, d0 -; VBITS_GE_256-NEXT: fmov x10, d3 ; VBITS_GE_256-NEXT: insr z2.d, x9 -; VBITS_GE_256-NEXT: insr z1.d, x10 -; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: shuffle_ext_byone_v8i64: @@ -500,12 +500,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl16 ; CHECK-NEXT: mov w8, #15 // =0xf -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: whilels p1.d, xzr, x8 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: lastb x8, p1, z0.d -; CHECK-NEXT: insr z1.d, x8 -; CHECK-NEXT: st1d { z1.d }, p0, [x0] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] +; CHECK-NEXT: insr z0.d, x8 +; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i64>, ptr %a %op2 = load <16 x i64>, ptr %b @@ -520,12 +520,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl32 ; CHECK-NEXT: mov w8, #31 // =0x1f -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: whilels p1.d, xzr, x8 +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: lastb x8, p1, z0.d -; CHECK-NEXT: insr z1.d, x8 -; CHECK-NEXT: st1d { z1.d }, p0, [x0] +; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] +; CHECK-NEXT: insr z0.d, x8 +; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i64>, ptr %a %op2 = load <32 x i64>, ptr %b @@ -578,17 +578,17 @@ define void @shuffle_ext_byone_v32f16(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 ; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x1, x8, lsl #1] ; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z0.h, z0.h[15] ; VBITS_GE_256-NEXT: mov z3.h, z2.h[15] -; VBITS_GE_256-NEXT: insr z2.h, h0 +; VBITS_GE_256-NEXT: mov z0.h, z0.h[15] ; VBITS_GE_256-NEXT: insr z1.h, h3 -; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] +; VBITS_GE_256-NEXT: insr z2.h, h0 ; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: shuffle_ext_byone_v32f16: @@ -615,9 +615,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl64 ; CHECK-NEXT: mov w8, #63 // =0x3f +; CHECK-NEXT: whilels p1.h, xzr, x8 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: whilels p1.h, xzr, x8 ; CHECK-NEXT: lastb h0, p1, z0.h ; CHECK-NEXT: insr z1.h, h0 ; CHECK-NEXT: st1h { z1.h }, p0, [x0] @@ -641,9 +641,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl128 ; CHECK-NEXT: mov w8, #127 // =0x7f +; CHECK-NEXT: whilels p1.h, xzr, x8 ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: whilels p1.h, xzr, x8 ; CHECK-NEXT: lastb h0, p1, z0.h ; CHECK-NEXT: insr z1.h, h0 ; CHECK-NEXT: st1h { z1.h }, p0, [x0] @@ -710,17 +710,17 @@ define void @shuffle_ext_byone_v16f32(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 ; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] ; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z0.s, z0.s[7] ; VBITS_GE_256-NEXT: mov z3.s, z2.s[7] -; VBITS_GE_256-NEXT: insr z2.s, s0 +; VBITS_GE_256-NEXT: mov z0.s, z0.s[7] ; VBITS_GE_256-NEXT: insr z1.s, s3 -; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] +; VBITS_GE_256-NEXT: insr z2.s, s0 ; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: shuffle_ext_byone_v16f32: @@ -745,9 +745,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl32 ; CHECK-NEXT: mov w8, #31 // =0x1f +; CHECK-NEXT: whilels p1.s, xzr, x8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: whilels p1.s, xzr, x8 ; CHECK-NEXT: lastb s0, p1, z0.s ; CHECK-NEXT: insr z1.s, s0 ; CHECK-NEXT: st1w { z1.s }, p0, [x0] @@ -767,9 +767,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl64 ; CHECK-NEXT: mov w8, #63 // =0x3f +; CHECK-NEXT: whilels p1.s, xzr, x8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: whilels p1.s, xzr, x8 ; CHECK-NEXT: lastb s0, p1, z0.s ; CHECK-NEXT: insr z1.s, s0 ; CHECK-NEXT: st1w { z1.s }, p0, [x0] @@ -818,17 +818,17 @@ define void @shuffle_ext_byone_v8f64(ptr %a, ptr %b) #0 { ; VBITS_GE_256-LABEL: shuffle_ext_byone_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 ; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1, x8, lsl #3] ; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: mov z0.d, z0.d[3] ; VBITS_GE_256-NEXT: mov z3.d, z2.d[3] -; VBITS_GE_256-NEXT: insr z2.d, d0 +; VBITS_GE_256-NEXT: mov z0.d, z0.d[3] ; VBITS_GE_256-NEXT: insr z1.d, d3 -; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] +; VBITS_GE_256-NEXT: insr z2.d, d0 ; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: shuffle_ext_byone_v8f64: @@ -852,9 +852,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl16 ; CHECK-NEXT: mov w8, #15 // =0xf +; CHECK-NEXT: whilels p1.d, xzr, x8 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: whilels p1.d, xzr, x8 ; CHECK-NEXT: lastb d0, p1, z0.d ; CHECK-NEXT: insr z1.d, d0 ; CHECK-NEXT: st1d { z1.d }, p0, [x0] @@ -872,9 +872,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl32 ; CHECK-NEXT: mov w8, #31 // =0x1f +; CHECK-NEXT: whilels p1.d, xzr, x8 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: whilels p1.d, xzr, x8 ; CHECK-NEXT: lastb d0, p1, z0.d ; CHECK-NEXT: insr z1.d, d0 ; CHECK-NEXT: st1d { z1.d }, p0, [x0] @@ -921,10 +921,10 @@ ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] ; CHECK-NEXT: mov z2.d, z1.d[1] -; CHECK-NEXT: stp d1, d2, [sp, #16] -; CHECK-NEXT: mov z1.d, z0.d[3] +; CHECK-NEXT: mov z3.d, z0.d[3] ; CHECK-NEXT: mov z0.d, z0.d[2] -; CHECK-NEXT: stp d0, d1, [sp] +; CHECK-NEXT: stp d1, d2, [sp, #16] +; CHECK-NEXT: stp d0, d3, [sp] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: mov sp, x29 diff --git a/llvm/test/CodeGen/AArch64/sve-fold-vscale.ll b/llvm/test/CodeGen/AArch64/sve-fold-vscale.ll --- a/llvm/test/CodeGen/AArch64/sve-fold-vscale.ll +++ b/llvm/test/CodeGen/AArch64/sve-fold-vscale.ll @@ -7,9 +7,9 @@ define void @ld1w_reg_loop(ptr %addr) { ; CHECK-LABEL: ld1w_reg_loop: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: cntw x9 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: .LBB0_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] @@ -37,9 +37,9 @@ define void @st1w_reg_loop(ptr %addr, %val) { ; CHECK-LABEL: st1w_reg_loop: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: cntw x9 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: .LBB1_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] diff --git a/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll b/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll --- a/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll +++ b/llvm/test/CodeGen/AArch64/sve-forward-st-to-ld.ll @@ -65,9 +65,9 @@ ; CHECK-LABEL: sti64ldi32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: st1d { z0.d }, p0, [x0, #1, mul vl] -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, #1, mul vl] +; CHECK-NEXT: ld1w { z0.s }, p1/z, [x0, #1, mul vl] ; CHECK-NEXT: ret entry: %0 = bitcast * %P to * diff --git a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll --- a/llvm/test/CodeGen/AArch64/sve-fp-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-combine.ll @@ -956,9 +956,9 @@ define @fadd_sel_fmul_h( %a, %b, %c, %mask) { ; CHECK-LABEL: fadd_sel_fmul_h: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z3.h, #0 // =0x0 ; CHECK-NEXT: fmul z1.h, z1.h, z2.h -; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h +; CHECK-NEXT: mov z2.h, #0 // =0x0 +; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h ; CHECK-NEXT: fadd z0.h, z0.h, z1.h ; CHECK-NEXT: ret %fmul = fmul %b, %c @@ -970,9 +970,9 @@ define @fadd_sel_fmul_s( %a, %b, %c, %mask) { ; CHECK-LABEL: fadd_sel_fmul_s: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z3.s, #0 // =0x0 ; CHECK-NEXT: fmul z1.s, z1.s, z2.s -; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s +; CHECK-NEXT: mov z2.s, #0 // =0x0 +; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s ; CHECK-NEXT: fadd z0.s, z0.s, z1.s ; CHECK-NEXT: ret %fmul = fmul %b, %c @@ -984,9 +984,9 @@ define @fadd_sel_fmul_d( %a, %b, %c, %mask) { ; CHECK-LABEL: fadd_sel_fmul_d: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z3.d, #0 // =0x0 ; CHECK-NEXT: fmul z1.d, z1.d, z2.d -; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d +; CHECK-NEXT: mov z2.d, #0 // =0x0 +; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d ; CHECK-NEXT: fadd z0.d, z0.d, z1.d ; CHECK-NEXT: ret %fmul = fmul %b, %c diff --git a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll --- a/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-int-min-max.ll @@ -6,18 +6,18 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mov w8, #3745 // =0xea1 -; CHECK-NEXT: movk w8, #16618, lsl #16 -; CHECK-NEXT: ld1w { z3.d }, p0/z, [x0] -; CHECK-NEXT: mov w9, #57344 // =0xe000 ; CHECK-NEXT: mov z6.d, #1023 // =0x3ff -; CHECK-NEXT: movk w9, #17535, lsl #16 +; CHECK-NEXT: movk w8, #16618, lsl #16 ; CHECK-NEXT: mov z4.s, w8 +; CHECK-NEXT: mov w8, #57344 // =0xe000 +; CHECK-NEXT: movk w8, #17535, lsl #16 +; CHECK-NEXT: mov z5.s, w8 +; CHECK-NEXT: ld1w { z3.d }, p0/z, [x0] ; CHECK-NEXT: fmul z4.s, p0/m, z4.s, z3.s -; CHECK-NEXT: mov z5.s, w9 ; CHECK-NEXT: fadd z4.s, p0/m, z4.s, z5.s ; CHECK-NEXT: mov z5.d, #0 // =0x0 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s ; CHECK-NEXT: sxtw z5.d, p0/m, z5.d +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.s ; CHECK-NEXT: smax z4.d, p0/m, z4.d, z5.d ; CHECK-NEXT: movprfx z5, z6 ; CHECK-NEXT: sxtw z5.d, p0/m, z6.d diff --git a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll --- a/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp-reduce-fadda.ll @@ -11,8 +11,8 @@ define half @fadda_nxv2f16(half %init, %a) { ; CHECK-LABEL: fadda_nxv2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret @@ -23,8 +23,8 @@ define half @fadda_nxv4f16(half %init, %a) { ; CHECK-LABEL: fadda_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret @@ -35,8 +35,8 @@ define half @fadda_nxv8f16(half %init, %a) { ; CHECK-LABEL: fadda_nxv8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret @@ -51,12 +51,12 @@ ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: fmov s0, s1 -; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: st1h { z2.d }, p1, [sp, #3, mul vl] ; CHECK-NEXT: ld1h { z2.h }, p0/z, [sp] ; CHECK-NEXT: fadda h0, p0, h0, z2.h @@ -75,23 +75,23 @@ ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 24 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 +; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: fadda h2, p0, h2, z0.h ; CHECK-NEXT: st1h { z1.h }, p0, [sp] -; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 -; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: mov z0.h, w8 ; CHECK-NEXT: addvl x8, sp, #1 -; CHECK-NEXT: st1h { z3.d }, p1, [sp, #1, mul vl] -; CHECK-NEXT: fadda h2, p0, h2, z0.h +; CHECK-NEXT: st1h { z0.d }, p1, [sp, #1, mul vl] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp] ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1h { z3.d }, p1, [sp, #6, mul vl] +; CHECK-NEXT: st1h { z0.d }, p1, [sp, #6, mul vl] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #1, mul vl] ; CHECK-NEXT: st1h { z1.h }, p0, [sp, #2, mul vl] -; CHECK-NEXT: st1h { z3.d }, p1, [x8, #7, mul vl] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #2, mul vl] -; CHECK-NEXT: fadda h2, p0, h2, z1.h +; CHECK-NEXT: st1h { z0.d }, p1, [x8, #7, mul vl] +; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp, #2, mul vl] +; CHECK-NEXT: fadda h2, p0, h2, z0.h ; CHECK-NEXT: fmov s0, s2 ; CHECK-NEXT: addvl sp, sp, #3 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -103,14 +103,14 @@ define half @fadda_nxv12f16( %v, half %s) { ; CHECK-LABEL: fadda_nxv12f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 // =0x8000 -; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 -; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 +; CHECK-NEXT: mov w8, #32768 // =0x8000 +; CHECK-NEXT: fadda h2, p0, h2, z0.h +; CHECK-NEXT: uunpklo z0.s, z1.h +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h ; CHECK-NEXT: fadda h2, p0, h2, z0.h -; CHECK-NEXT: mov z3.h, w8 -; CHECK-NEXT: uzp1 z1.h, z1.h, z3.h -; CHECK-NEXT: fadda h2, p0, h2, z1.h ; CHECK-NEXT: fmov s0, s2 ; CHECK-NEXT: ret %res = call half @llvm.vector.reduce.fadd.nxv12f16(half %s, %v) @@ -120,8 +120,8 @@ define float @fadda_nxv2f32(float %init, %a) { ; CHECK-LABEL: fadda_nxv2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: fadda s0, p0, s0, z1.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret @@ -132,8 +132,8 @@ define float @fadda_nxv4f32(float %init, %a) { ; CHECK-LABEL: fadda_nxv4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: fadda s0, p0, s0, z1.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret @@ -144,8 +144,8 @@ define double @fadda_nxv2f64(double %init, %a) { ; CHECK-LABEL: fadda_nxv2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fadda d0, p0, d0, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fpext-load.ll b/llvm/test/CodeGen/AArch64/sve-fpext-load.ll --- a/llvm/test/CodeGen/AArch64/sve-fpext-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-fpext-load.ll @@ -45,7 +45,6 @@ ; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: fcvt z1.d, p0/m, z1.h ; CHECK-NEXT: uunpkhi z4.d, z0.s ; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvt z0.d, p0/m, z2.h @@ -53,6 +52,7 @@ ; CHECK-NEXT: fcvt z2.d, p0/m, z3.h ; CHECK-NEXT: movprfx z3, z4 ; CHECK-NEXT: fcvt z3.d, p0/m, z4.h +; CHECK-NEXT: fcvt z1.d, p0/m, z1.h ; CHECK-NEXT: ret %load = load , * %ptr, align 4 %load.ext = fpext %load to diff --git a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll --- a/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptosi-sat.ll @@ -14,23 +14,22 @@ define @test_signed_v2f32_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f32_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-822083584 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #-822083584 // =0xcf000000 ; CHECK-NEXT: mov z2.d, #0xffffffff80000000 ; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: mov w8, #1325400063 +; CHECK-NEXT: mov w8, #1325400063 // =0x4effffff +; CHECK-NEXT: mov z3.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s +; CHECK-NEXT: mov z3.d, #0x7fffffff ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z2.d, #0x7fffffff -; CHECK-NEXT: mov z1.d, p1/m, z2.d ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f32.nxv2i32( %f) ret %x @@ -39,23 +38,22 @@ define @test_signed_v4f32_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-822083584 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #-822083584 // =0xcf000000 ; CHECK-NEXT: mov z2.s, #0x80000000 ; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: mov w8, #1325400063 +; CHECK-NEXT: mov w8, #1325400063 // =0x4effffff +; CHECK-NEXT: mov z3.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s +; CHECK-NEXT: mov z3.s, #0x7fffffff ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.s, p1/m, z2.s -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z2.s, #0x7fffffff -; CHECK-NEXT: mov z1.s, p1/m, z2.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: sel z0.s, p2, z3.s, z1.s +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f32.nxv4i32( %f) ret %x @@ -64,33 +62,39 @@ define @test_signed_v8f32_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-822083584 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov w9, #1325400063 +; CHECK-NEXT: mov w8, #-822083584 // =0xcf000000 ; CHECK-NEXT: mov z3.s, #0x80000000 -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzs z4.s, p0/m, z0.s -; CHECK-NEXT: mov z6.s, #0x7fffffff ; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov w8, #1325400063 // =0x4effffff +; CHECK-NEXT: mov z6.s, #0x7fffffff +; CHECK-NEXT: mov z4.s, w8 +; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: fcvtzs z5.s, p0/m, z1.s ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z5.s, w9 -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z5.s -; CHECK-NEXT: mov z4.s, p1/m, z3.s -; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s +; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, z2.s +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzs z2.s, p0/m, z0.s +; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z4.s +; CHECK-NEXT: fcmgt p4.s, p0/z, z1.s, z4.s ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: movprfx z2, z1 -; CHECK-NEXT: fcvtzs z2.s, p0/m, z1.s -; CHECK-NEXT: sel z3.s, p1, z3.s, z2.s -; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z5.s -; CHECK-NEXT: sel z2.s, p2, z6.s, z4.s -; CHECK-NEXT: mov z3.s, p1/m, z6.s +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.s, p1/m, z3.s ; CHECK-NEXT: fcmuo p1.s, p0/z, z0.s, z0.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z1.s, z1.s -; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z3.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: sel z3.s, p2, z3.s, z5.s +; CHECK-NEXT: sel z0.s, p3, z6.s, z2.s +; CHECK-NEXT: sel z1.s, p4, z6.s, z3.s +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f32.nxv8i32( %f) ret %x @@ -99,23 +103,22 @@ define @test_signed_v4f32_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-956301312 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #-956301312 // =0xc7000000 ; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: mov w8, #65024 +; CHECK-NEXT: mov w8, #65024 // =0xfe00 ; CHECK-NEXT: movk w8, #18175, lsl #16 +; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z2.s ; CHECK-NEXT: mov z2.s, #32767 // =0x7fff -; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: sel z0.s, p2, z2.s, z1.s +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f32.nxv4i16( %f) ret %x @@ -124,32 +127,40 @@ define @test_signed_v8f32_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-956301312 -; CHECK-NEXT: mov w9, #65024 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: movk w9, #18175, lsl #16 -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.s, p0/m, z1.s -; CHECK-NEXT: mov z3.s, #32767 // =0x7fff +; CHECK-NEXT: mov w8, #-956301312 // =0xc7000000 +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzs z4.s, p0/m, z0.s ; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov w8, #65024 // =0xfe00 +; CHECK-NEXT: mov z5.s, #32767 // =0x7fff +; CHECK-NEXT: movk w8, #18175, lsl #16 +; CHECK-NEXT: mov z3.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s -; CHECK-NEXT: mov z5.s, w9 -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z5.s -; CHECK-NEXT: mov z4.s, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z2.s -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzs z2.s, p0/m, z0.s +; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z2.s +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.s, p0/m, z1.s +; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z3.s +; CHECK-NEXT: fcmgt p4.s, p0/z, z0.s, z3.s ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b ; CHECK-NEXT: mov z2.s, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z5.s -; CHECK-NEXT: mov z4.s, p2/m, z3.s -; CHECK-NEXT: mov z2.s, p1/m, z3.s ; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z4.s, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0 -; CHECK-NEXT: uzp1 z0.h, z2.h, z4.h +; CHECK-NEXT: mov z4.s, p2/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: sel z0.s, p3, z5.s, z2.s +; CHECK-NEXT: sel z1.s, p4, z5.s, z4.s +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f32.nxv8i16( %f) ret %x @@ -158,23 +169,22 @@ define @test_signed_v2f32_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-553648128 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 ; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: mov w8, #1593835519 +; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff +; CHECK-NEXT: mov z3.s, w8 ; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, z1.s ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.s +; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z3.s +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff -; CHECK-NEXT: mov z1.d, p1/m, z2.d ; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s -; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f32.nxv2i64( %f) ret %x @@ -183,33 +193,41 @@ define @test_signed_v4f32_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-553648128 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: mov w9, #1593835519 -; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: uunpkhi z5.d, z0.s -; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z3.s -; CHECK-NEXT: fcmge p1.s, p0/z, z3.s, z1.s -; CHECK-NEXT: mov z4.s, w9 -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.s, p0/z, z3.s, z4.s -; CHECK-NEXT: mov z0.d, p1/m, z2.d -; CHECK-NEXT: fcmge p1.s, p0/z, z5.s, z1.s -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.s -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov w8, #-553648128 // =0xdf000000 +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov w8, #1593835519 // =0x5effffff +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: mov z4.s, w8 +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.s ; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: fcmgt p1.s, p0/z, z5.s, z4.s -; CHECK-NEXT: mov z0.d, p2/m, z6.d -; CHECK-NEXT: mov z1.d, p1/m, z6.d -; CHECK-NEXT: fcmuo p1.s, p0/z, z3.s, z3.s -; CHECK-NEXT: fcmuo p0.s, p0/z, z5.s, z5.s +; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, z2.s +; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, z2.s +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.s +; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z4.s +; CHECK-NEXT: fcmgt p4.s, p0/z, z0.s, z4.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.d, p1/m, z3.d +; CHECK-NEXT: fcmuo p1.s, p0/z, z1.s, z1.s +; CHECK-NEXT: fcmuo p0.s, p0/z, z0.s, z0.s +; CHECK-NEXT: sel z3.d, p2, z3.d, z5.d +; CHECK-NEXT: sel z0.d, p3, z6.d, z2.d +; CHECK-NEXT: sel z1.d, p4, z6.d, z3.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f32.nxv4i64( %f) ret %x @@ -228,24 +246,23 @@ define @test_signed_v2f64_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f64_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4476578029606273024 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #-4476578029606273024 // =0xc1e0000000000000 ; CHECK-NEXT: mov z2.d, #0xffffffff80000000 ; CHECK-NEXT: mov z1.d, x8 -; CHECK-NEXT: mov x8, #281474972516352 +; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000 ; CHECK-NEXT: movk x8, #16863, lsl #48 +; CHECK-NEXT: mov z3.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z3.d +; CHECK-NEXT: mov z3.d, #0x7fffffff ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z2.d -; CHECK-NEXT: mov z2.d, #0x7fffffff -; CHECK-NEXT: mov z1.d, p1/m, z2.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f64.nxv2i32( %f) ret %x @@ -254,33 +271,41 @@ define @test_signed_v4f64_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4476578029606273024 -; CHECK-NEXT: mov x9, #281474972516352 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: movk x9, #16863, lsl #48 +; CHECK-NEXT: mov x8, #-4476578029606273024 // =0xc1e0000000000000 ; CHECK-NEXT: mov z3.d, #0xffffffff80000000 -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.d ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000 ; CHECK-NEXT: mov z6.d, #0x7fffffff +; CHECK-NEXT: movk x8, #16863, lsl #48 +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.d +; CHECK-NEXT: mov z4.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d -; CHECK-NEXT: mov z5.d, x9 -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z5.d -; CHECK-NEXT: mov z4.d, p1/m, z3.d -; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z0.d +; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z2.d +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z4.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z0.d, z4.d ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b ; CHECK-NEXT: mov z2.d, p1/m, z3.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z5.d -; CHECK-NEXT: sel z3.d, p2, z6.d, z4.d -; CHECK-NEXT: mov z2.d, p1/m, z6.d ; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 -; CHECK-NEXT: uzp1 z0.s, z2.s, z3.s +; CHECK-NEXT: sel z3.d, p2, z3.d, z5.d +; CHECK-NEXT: sel z0.d, p3, z6.d, z2.d +; CHECK-NEXT: sel z1.d, p4, z6.d, z3.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f64.nxv4i32( %f) ret %x @@ -289,52 +314,66 @@ define @test_signed_v8f64_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f64_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4476578029606273024 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z5.d, #0xffffffff80000000 +; CHECK-NEXT: mov x8, #-4476578029606273024 // =0xc1e0000000000000 ; CHECK-NEXT: movprfx z6, z1 ; CHECK-NEXT: fcvtzs z6.d, p0/m, z1.d -; CHECK-NEXT: mov z24.d, #0x7fffffff ; CHECK-NEXT: mov z4.d, x8 -; CHECK-NEXT: mov x8, #281474972516352 +; CHECK-NEXT: mov x8, #281474972516352 // =0xffffffc00000 +; CHECK-NEXT: movprfx z7, z0 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z0.d ; CHECK-NEXT: movk x8, #16863, lsl #48 +; CHECK-NEXT: mov z26.d, #0x7fffffff +; CHECK-NEXT: movprfx z24, z3 +; CHECK-NEXT: fcvtzs z24.d, p0/m, z3.d +; CHECK-NEXT: mov z5.d, x8 +; CHECK-NEXT: movprfx z25, z2 +; CHECK-NEXT: fcvtzs z25.d, p0/m, z2.d ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z4.d -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d -; CHECK-NEXT: mov z6.d, p1/m, z5.d -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z7.d, x8 -; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z7.d -; CHECK-NEXT: mov z6.d, p1/m, z24.d -; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d -; CHECK-NEXT: mov z6.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p2/m, z5.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z7.d -; CHECK-NEXT: mov z1.d, p2/m, z24.d -; CHECK-NEXT: fcmge p2.d, p0/z, z3.d, z4.d -; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z0.d -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z3.d -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p2/m, z5.d -; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d -; CHECK-NEXT: movprfx z4, z2 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z2.d +; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, z4.d +; CHECK-NEXT: fcmge p4.d, p0/z, z2.d, z4.d +; CHECK-NEXT: mov z4.d, #0xffffffff80000000 +; CHECK-NEXT: fcmgt p5.d, p0/z, z1.d, z5.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z0.d, z5.d +; CHECK-NEXT: fcmgt p7.d, p0/z, z3.d, z5.d +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z7.d -; CHECK-NEXT: mov z4.d, p2/m, z5.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z2.d, z7.d -; CHECK-NEXT: sel z5.d, p1, z24.d, z0.d -; CHECK-NEXT: mov z4.d, p2/m, z24.d -; CHECK-NEXT: fcmuo p1.d, p0/z, z3.d, z3.d +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z6.d, p1/m, z4.d +; CHECK-NEXT: fcmgt p1.d, p0/z, z2.d, z5.d +; CHECK-NEXT: sel z5.d, p2, z4.d, z7.d +; CHECK-NEXT: fcmuo p2.d, p0/z, z1.d, z1.d +; CHECK-NEXT: sel z7.d, p3, z4.d, z24.d +; CHECK-NEXT: fcmuo p3.d, p0/z, z0.d, z0.d +; CHECK-NEXT: sel z4.d, p4, z4.d, z25.d +; CHECK-NEXT: fcmuo p4.d, p0/z, z3.d, z3.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z2.d, z2.d -; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z4.d, p0/m, #0 // =0x0 -; CHECK-NEXT: uzp1 z0.s, z1.s, z6.s -; CHECK-NEXT: uzp1 z1.s, z4.s, z5.s +; CHECK-NEXT: sel z0.d, p5, z26.d, z6.d +; CHECK-NEXT: sel z1.d, p6, z26.d, z5.d +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z2.d, p7, z26.d, z7.d +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z3.d, p1, z26.d, z4.d +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p4/m, #0 // =0x0 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s +; CHECK-NEXT: uzp1 z1.s, z3.s, z2.s +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f64.nxv8i32( %f) ret %x @@ -343,32 +382,40 @@ define @test_signed_v4f64_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4548635623644200960 -; CHECK-NEXT: mov x9, #281200098803712 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: movk x9, #16607, lsl #48 -; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z1.d -; CHECK-NEXT: mov z3.d, #32767 // =0x7fff +; CHECK-NEXT: mov x8, #-4548635623644200960 // =0xc0e0000000000000 +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov x8, #281200098803712 // =0xffc000000000 +; CHECK-NEXT: mov z5.d, #32767 // =0x7fff +; CHECK-NEXT: movk x8, #16607, lsl #48 +; CHECK-NEXT: mov z3.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d -; CHECK-NEXT: mov z5.d, x9 -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z5.d -; CHECK-NEXT: mov z4.d, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z0.d +; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z2.d +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z3.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z0.d, z3.d ; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b ; CHECK-NEXT: mov z2.d, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z5.d -; CHECK-NEXT: mov z4.d, p2/m, z3.d -; CHECK-NEXT: mov z2.d, p1/m, z3.d ; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: mov z4.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 -; CHECK-NEXT: uzp1 z0.s, z2.s, z4.s +; CHECK-NEXT: mov z4.d, p2/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: sel z0.d, p3, z5.d, z2.d +; CHECK-NEXT: sel z1.d, p4, z5.d, z4.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f64.nxv4i16( %f) ret %x @@ -377,52 +424,66 @@ define @test_signed_v8f64_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f64_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4548635623644200960 -; CHECK-NEXT: mov x9, #281200098803712 -; CHECK-NEXT: movk x9, #16607, lsl #48 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #-4548635623644200960 // =0xc0e0000000000000 ; CHECK-NEXT: movprfx z5, z3 ; CHECK-NEXT: fcvtzs z5.d, p0/m, z3.d -; CHECK-NEXT: mov z7.d, #32767 // =0x7fff ; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: mov x8, #281200098803712 // =0xffc000000000 +; CHECK-NEXT: movprfx z6, z2 +; CHECK-NEXT: fcvtzs z6.d, p0/m, z2.d +; CHECK-NEXT: movk x8, #16607, lsl #48 +; CHECK-NEXT: movprfx z7, z1 +; CHECK-NEXT: fcvtzs z7.d, p0/m, z1.d +; CHECK-NEXT: movprfx z24, z0 +; CHECK-NEXT: fcvtzs z24.d, p0/m, z0.d +; CHECK-NEXT: mov z25.d, #32767 // =0x7fff ; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, z4.d -; CHECK-NEXT: mov z6.d, x9 -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.d, p0/z, z3.d, z6.d -; CHECK-NEXT: mov z5.d, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmuo p1.d, p0/z, z3.d, z3.d -; CHECK-NEXT: mov z5.d, p2/m, z7.d ; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, z4.d -; CHECK-NEXT: movprfx z3, z2 -; CHECK-NEXT: fcvtzs z3.d, p0/m, z2.d -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z3.d, p2/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmgt p2.d, p0/z, z2.d, z6.d -; CHECK-NEXT: mov z3.d, p2/m, z7.d -; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z4.d -; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmuo p1.d, p0/z, z2.d, z2.d -; CHECK-NEXT: movprfx z2, z1 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z2.d, p2/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, z4.d -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d +; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, z4.d +; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, z4.d +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p5.d, p0/z, z3.d, z4.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z2.d, z4.d ; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z6.d -; CHECK-NEXT: mov z4.d, p2/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z6.d -; CHECK-NEXT: mov z2.d, p1/m, z7.d -; CHECK-NEXT: mov z4.d, p2/m, z7.d -; CHECK-NEXT: fcmuo p1.d, p0/z, z1.d, z1.d +; CHECK-NEXT: fcmgt p7.d, p0/z, z1.d, z4.d +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z5.d, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z4.d +; CHECK-NEXT: mov z6.d, p2/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmuo p2.d, p0/z, z3.d, z3.d +; CHECK-NEXT: mov z7.d, p3/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmuo p3.d, p0/z, z2.d, z2.d +; CHECK-NEXT: mov z24.d, p4/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: fcmuo p4.d, p0/z, z1.d, z1.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z4.d, p0/m, #0 // =0x0 -; CHECK-NEXT: uzp1 z0.s, z3.s, z5.s -; CHECK-NEXT: uzp1 z1.s, z4.s, z2.s +; CHECK-NEXT: sel z2.d, p5, z25.d, z5.d +; CHECK-NEXT: sel z0.d, p6, z25.d, z6.d +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z1.d, p7, z25.d, z7.d +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z3.d, p1, z25.d, z24.d +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z2.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p4/m, #0 // =0x0 +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 +; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z3.s, z1.s ; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f64.nxv8i16( %f) ret %x @@ -431,23 +492,22 @@ define @test_signed_v2f64_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f64_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4332462841530417152 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 ; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.d, x8 -; CHECK-NEXT: mov x8, #4890909195324358655 +; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff +; CHECK-NEXT: mov z3.d, x8 ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z1.d ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.d +; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z3.d +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z2.d -; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff -; CHECK-NEXT: mov z1.d, p1/m, z2.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z0.d, z0.d -; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f64.nxv2i64( %f) ret %x @@ -456,33 +516,39 @@ define @test_signed_v4f64_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-4332462841530417152 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov x9, #4890909195324358655 +; CHECK-NEXT: mov x8, #-4332462841530417152 // =0xc3e0000000000000 ; CHECK-NEXT: mov z3.d, #0x8000000000000000 -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: fcvtzs z4.d, p0/m, z0.d -; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov x8, #4890909195324358655 // =0x43dfffffffffffff +; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z1.d ; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, z2.d -; CHECK-NEXT: mov z5.d, x9 -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z5.d -; CHECK-NEXT: mov z4.d, p1/m, z3.d -; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, z2.d +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, z2.d +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z0.d +; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z4.d +; CHECK-NEXT: fcmgt p4.d, p0/z, z1.d, z4.d ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: movprfx z2, z1 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.d -; CHECK-NEXT: sel z3.d, p1, z3.d, z2.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z5.d -; CHECK-NEXT: sel z2.d, p2, z6.d, z4.d -; CHECK-NEXT: mov z3.d, p1/m, z6.d +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.d, p1/m, z3.d ; CHECK-NEXT: fcmuo p1.d, p0/z, z0.d, z0.d ; CHECK-NEXT: fcmuo p0.d, p0/z, z1.d, z1.d -; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: mov z1.d, z3.d +; CHECK-NEXT: sel z3.d, p2, z3.d, z5.d +; CHECK-NEXT: sel z0.d, p3, z6.d, z2.d +; CHECK-NEXT: sel z1.d, p4, z6.d, z3.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f64.nxv4i64( %f) ret %x @@ -502,23 +568,22 @@ define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: mov z2.d, #0xffffffff80000000 ; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: mov w8, #31743 +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z3.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h +; CHECK-NEXT: mov z3.d, #0x7fffffff ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x7fffffff -; CHECK-NEXT: mov z1.d, p1/m, z2.d ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f16.nxv2i32( %f) ret %x @@ -527,23 +592,22 @@ define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: mov z2.s, #0x80000000 ; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: mov w8, #31743 +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z3.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h +; CHECK-NEXT: mov z3.s, #0x7fffffff ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.s, p1/m, z2.s -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.s, #0x7fffffff -; CHECK-NEXT: mov z1.s, p1/m, z2.s ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: sel z0.s, p2, z3.s, z1.s +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i32( %f) ret %x @@ -552,33 +616,41 @@ define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: uunpklo z3.s, z0.h -; CHECK-NEXT: mov w9, #31743 -; CHECK-NEXT: mov z2.s, #0x80000000 -; CHECK-NEXT: uunpkhi z5.s, z0.h -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvtzs z0.s, p0/m, z3.h -; CHECK-NEXT: fcmge p1.h, p0/z, z3.h, z1.h -; CHECK-NEXT: mov z4.h, w9 -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z4.h -; CHECK-NEXT: mov z0.s, p1/m, z2.s -; CHECK-NEXT: fcmge p1.h, p0/z, z5.h, z1.h -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fcvtzs z1.s, p0/m, z5.h -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: uunpkhi z0.s, z0.h +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z3.s, #0x80000000 +; CHECK-NEXT: mov z4.h, w8 +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: fcvtzs z5.s, p0/m, z0.h ; CHECK-NEXT: mov z6.s, #0x7fffffff -; CHECK-NEXT: mov z1.s, p1/m, z2.s -; CHECK-NEXT: fcmgt p1.h, p0/z, z5.h, z4.h -; CHECK-NEXT: mov z0.s, p2/m, z6.s -; CHECK-NEXT: mov z1.s, p1/m, z6.s -; CHECK-NEXT: fcmuo p1.h, p0/z, z3.h, z3.h -; CHECK-NEXT: fcmuo p0.h, p0/z, z5.h, z5.h +; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.s, p0/m, z1.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z1.h, z4.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z0.h, z4.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.s, p1/m, z3.s +; CHECK-NEXT: fcmuo p1.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: sel z3.s, p2, z3.s, z5.s +; CHECK-NEXT: sel z0.s, p3, z6.s, z2.s +; CHECK-NEXT: sel z1.s, p4, z6.s, z3.s +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f16.nxv8i32( %f) ret %x @@ -587,22 +659,21 @@ define @test_signed_v4f16_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #63488 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #63488 // =0xf800 ; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: mov w8, #30719 +; CHECK-NEXT: mov w8, #30719 // =0x77ff +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.s, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z2.s, #32767 // =0x7fff -; CHECK-NEXT: mov z1.s, p1/m, z2.s +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.s, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: sel z0.s, p2, z2.s, z1.s +; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i16( %f) ret %x @@ -611,22 +682,21 @@ define @test_signed_v8f16_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #63488 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov w8, #63488 // =0xf800 ; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: mov w8, #30719 +; CHECK-NEXT: mov w8, #30719 // =0x77ff +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.h, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov z1.h, p1/m, #-32768 // =0xffffffffffff8000 -; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z2.h ; CHECK-NEXT: mov z2.h, #32767 // =0x7fff -; CHECK-NEXT: mov z1.h, p1/m, z2.h +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.h, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.h, p1/m, #-32768 // =0xffffffffffff8000 +; CHECK-NEXT: sel z0.h, p2, z2.h, z1.h +; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv8f16.nxv8i16( %f) ret %x @@ -635,23 +705,22 @@ define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #64511 // =0xfbff ; CHECK-NEXT: mov z2.d, #0x8000000000000000 ; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: mov w8, #31743 +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z3.h, w8 ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzs z1.d, p0/m, z0.h +; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z3.h +; CHECK-NEXT: mov z3.d, #0x7fffffffffffffff ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff -; CHECK-NEXT: mov z1.d, p1/m, z2.d ; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h -; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: mov z1.d, p1/m, z2.d +; CHECK-NEXT: sel z0.d, p2, z3.d, z1.d +; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv2f16.nxv2i64( %f) ret %x @@ -660,33 +729,41 @@ define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64511 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: mov w9, #31743 -; CHECK-NEXT: mov z2.d, #0x8000000000000000 -; CHECK-NEXT: uunpkhi z5.d, z0.s -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z3.h -; CHECK-NEXT: fcmge p1.h, p0/z, z3.h, z1.h -; CHECK-NEXT: mov z4.h, w9 -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z4.h -; CHECK-NEXT: mov z0.d, p1/m, z2.d -; CHECK-NEXT: fcmge p1.h, p0/z, z5.h, z1.h -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z5.h -; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: mov w8, #64511 // =0xfbff +; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: uunpkhi z0.d, z0.s +; CHECK-NEXT: mov z2.h, w8 +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: mov z3.d, #0x8000000000000000 +; CHECK-NEXT: mov z4.h, w8 +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: fcvtzs z5.d, p0/m, z0.h ; CHECK-NEXT: mov z6.d, #0x7fffffffffffffff -; CHECK-NEXT: mov z1.d, p1/m, z2.d -; CHECK-NEXT: fcmgt p1.h, p0/z, z5.h, z4.h -; CHECK-NEXT: mov z0.d, p2/m, z6.d -; CHECK-NEXT: mov z1.d, p1/m, z6.d -; CHECK-NEXT: fcmuo p1.h, p0/z, z3.h, z3.h -; CHECK-NEXT: fcmuo p0.h, p0/z, z5.h, z5.h +; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: fcmge p2.h, p0/z, z0.h, z2.h +; CHECK-NEXT: movprfx z2, z1 +; CHECK-NEXT: fcvtzs z2.d, p0/m, z1.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z1.h, z4.h +; CHECK-NEXT: fcmgt p4.h, p0/z, z0.h, z4.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: mov z2.d, p1/m, z3.d +; CHECK-NEXT: fcmuo p1.h, p0/z, z1.h, z1.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z0.h, z0.h +; CHECK-NEXT: sel z3.d, p2, z3.d, z5.d +; CHECK-NEXT: sel z0.d, p3, z6.d, z2.d +; CHECK-NEXT: sel z1.d, p4, z6.d, z3.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptosi.sat.nxv4f16.nxv4i64( %f) ret %x diff --git a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll --- a/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptoui-sat.ll @@ -14,16 +14,17 @@ define @test_signed_v2f32_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f32_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1333788671 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.s ; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z1.s -; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z1.d, #0xffffffff -; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p2/m, z1.d +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z0.d, #0xffffffff +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f32.nxv2i32( %f) ret %x @@ -32,15 +33,15 @@ define @test_signed_v4f32_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1333788671 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.s -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -51,22 +52,22 @@ define @test_signed_v8f32_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1333788671 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: mov w8, #1333788671 // =0x4f7fffff ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: mov z4.s, w8 ; CHECK-NEXT: movprfx z3, z1 ; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: fcmgt p3.s, p0/z, z0.s, z4.s +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.s, p0/z, z0.s, z4.s ; CHECK-NEXT: fcmgt p0.s, p0/z, z1.s, z4.s +; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z3.s, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z2.s, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z2.s, p3/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z3.s, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z2.d ; CHECK-NEXT: mov z1.d, z3.d @@ -78,17 +79,18 @@ define @test_signed_v4f32_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65280 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #65280 // =0xff00 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.s ; CHECK-NEXT: movk w8, #18303, lsl #16 -; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: fcmgt p2.s, p0/z, z0.s, z1.s -; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z1.s, #65535 // =0xffff -; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.s, p2/m, z1.s +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: mov z0.s, #65535 // =0xffff +; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f32.nxv4i16( %f) ret %x @@ -97,26 +99,26 @@ define @test_signed_v8f32_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f32_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #65280 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: movk w8, #18303, lsl #16 -; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: mov w8, #65280 // =0xff00 ; CHECK-NEXT: movprfx z3, z1 ; CHECK-NEXT: fcvtzu z3.s, p0/m, z1.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: movk w8, #18303, lsl #16 +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzu z4.s, p0/m, z0.s ; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: fcmge p1.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: fcmge p2.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmgt p3.s, p0/z, z1.s, z2.s ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.s, p0/z, z1.s, z2.s -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.s +; CHECK-NEXT: not p2.b, p0/z, p2.b ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s ; CHECK-NEXT: mov z0.s, #65535 // =0xffff -; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 -; CHECK-NEXT: sel z2.s, p2, z0.s, z3.s -; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s -; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z4.s, p2/m, #0 // =0x0 +; CHECK-NEXT: sel z1.s, p3, z0.s, z3.s +; CHECK-NEXT: sel z0.s, p0, z0.s, z4.s +; CHECK-NEXT: uzp1 z0.h, z0.h, z1.h ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f32.nxv8i16( %f) ret %x @@ -125,15 +127,15 @@ define @test_signed_v2f32_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1602224127 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: mov w8, #1602224127 // =0x5f7fffff ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.s -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p0.s, p0/z, z0.s, z2.s +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -144,24 +146,24 @@ define @test_signed_v4f32_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1602224127 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: uunpklo z2.d, z0.s ; CHECK-NEXT: uunpkhi z3.d, z0.s -; CHECK-NEXT: fcmge p2.s, p0/z, z1.s, #0.0 -; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.s -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmge p2.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: fcmgt p1.s, p0/z, z1.s, z2.s +; CHECK-NEXT: mov w8, #1602224127 // =0x5f7fffff +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.s ; CHECK-NEXT: movprfx z1, z3 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.s +; CHECK-NEXT: mov z4.s, w8 +; CHECK-NEXT: fcmge p1.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: fcmge p2.s, p0/z, z3.s, #0.0 +; CHECK-NEXT: fcmgt p3.s, p0/z, z2.s, z4.s +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: fcmgt p0.s, p0/z, z3.s, z2.s +; CHECK-NEXT: fcmgt p0.s, p0/z, z3.s, z4.s +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, p3/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f32.nxv4i64( %f) @@ -181,17 +183,18 @@ define @test_signed_v2f64_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f64_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #281474974613504 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000 +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d ; CHECK-NEXT: movk x8, #16879, lsl #48 -; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 ; CHECK-NEXT: mov z1.d, x8 -; CHECK-NEXT: fcmgt p2.d, p0/z, z0.d, z1.d -; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z1.d, #0xffffffff -; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p2/m, z1.d +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: mov z0.d, #0xffffffff +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f64.nxv2i32( %f) ret %x @@ -200,26 +203,26 @@ define @test_signed_v4f64_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #281474974613504 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: movk x8, #16879, lsl #48 -; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000 ; CHECK-NEXT: movprfx z3, z1 ; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: movk x8, #16879, lsl #48 +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z2.d ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z2.d -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d +; CHECK-NEXT: not p2.b, p0/z, p2.b ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d ; CHECK-NEXT: mov z0.d, #0xffffffff -; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 -; CHECK-NEXT: sel z2.d, p2, z0.d, z3.d -; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d -; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s +; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z4.d, p2/m, #0 // =0x0 +; CHECK-NEXT: sel z1.d, p3, z0.d, z3.d +; CHECK-NEXT: sel z0.d, p0, z0.d, z4.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f64.nxv4i32( %f) ret %x @@ -228,40 +231,53 @@ define @test_signed_v8f64_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f64_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #281474974613504 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: movk x8, #16879, lsl #48 -; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: mov x8, #281474974613504 // =0xffffffe00000 ; CHECK-NEXT: movprfx z5, z1 ; CHECK-NEXT: fcvtzu z5.d, p0/m, z1.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: movk x8, #16879, lsl #48 ; CHECK-NEXT: movprfx z6, z0 ; CHECK-NEXT: fcvtzu z6.d, p0/m, z0.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z4.d -; CHECK-NEXT: mov z1.d, #0xffffffff +; CHECK-NEXT: movprfx z7, z3 +; CHECK-NEXT: fcvtzu z7.d, p0/m, z3.d +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: movprfx z24, z2 +; CHECK-NEXT: fcvtzu z24.d, p0/m, z2.d +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: fcmge p3.d, p0/z, z3.d, #0.0 +; CHECK-NEXT: fcmge p4.d, p0/z, z2.d, #0.0 +; CHECK-NEXT: fcmgt p5.d, p0/z, z1.d, z4.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z0.d, z4.d +; CHECK-NEXT: mov z0.d, #0xffffffff ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z6.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z4.d -; CHECK-NEXT: sel z0.d, p2, z1.d, z5.d -; CHECK-NEXT: fcmge p2.d, p0/z, z3.d, #0.0 -; CHECK-NEXT: sel z5.d, p1, z1.d, z6.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z4.d -; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.d -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, #0.0 -; CHECK-NEXT: movprfx z6, z2 -; CHECK-NEXT: fcvtzu z6.d, p0/m, z2.d ; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.d, p0/z, z3.d, z4.d ; CHECK-NEXT: fcmgt p0.d, p0/z, z2.d, z4.d ; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0 -; CHECK-NEXT: sel z2.d, p1, z1.d, z3.d -; CHECK-NEXT: sel z1.d, p0, z1.d, z6.d -; CHECK-NEXT: uzp1 z0.s, z5.s, z0.s -; CHECK-NEXT: uzp1 z1.s, z1.s, z2.s +; CHECK-NEXT: mov z7.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z24.d, p4/m, #0 // =0x0 +; CHECK-NEXT: sel z1.d, p5, z0.d, z5.d +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z2.d, p6, z0.d, z6.d +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z3.d, p1, z0.d, z7.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z4.d, p0, z0.d, z24.d +; CHECK-NEXT: uzp1 z0.s, z2.s, z1.s +; CHECK-NEXT: uzp1 z1.s, z4.s, z3.s +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f64.nxv8i32( %f) ret %x @@ -270,26 +286,26 @@ define @test_signed_v4f64_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #281337537757184 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: movk x8, #16623, lsl #48 -; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000 ; CHECK-NEXT: movprfx z3, z1 ; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: movk x8, #16623, lsl #48 +; CHECK-NEXT: movprfx z4, z0 +; CHECK-NEXT: fcvtzu z4.d, p0/m, z0.d ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: fcmge p1.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: fcmgt p3.d, p0/z, z1.d, z2.d ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: fcmgt p2.d, p0/z, z1.d, z2.d -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d +; CHECK-NEXT: not p2.b, p0/z, p2.b ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d ; CHECK-NEXT: mov z0.d, #65535 // =0xffff -; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 -; CHECK-NEXT: sel z2.d, p2, z0.d, z3.d -; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d -; CHECK-NEXT: uzp1 z0.s, z0.s, z2.s +; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z4.d, p2/m, #0 // =0x0 +; CHECK-NEXT: sel z1.d, p3, z0.d, z3.d +; CHECK-NEXT: sel z0.d, p0, z0.d, z4.d +; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f64.nxv4i16( %f) ret %x @@ -298,41 +314,54 @@ define @test_signed_v8f64_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f64_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #281337537757184 +; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: movk x8, #16623, lsl #48 -; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, #0.0 +; CHECK-NEXT: mov x8, #281337537757184 // =0xffe000000000 ; CHECK-NEXT: movprfx z5, z3 ; CHECK-NEXT: fcvtzu z5.d, p0/m, z3.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmge p1.d, p0/z, z2.d, #0.0 -; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: movk x8, #16623, lsl #48 ; CHECK-NEXT: movprfx z6, z2 ; CHECK-NEXT: fcvtzu z6.d, p0/m, z2.d -; CHECK-NEXT: fcmgt p2.d, p0/z, z3.d, z4.d -; CHECK-NEXT: mov z3.d, #65535 // =0xffff +; CHECK-NEXT: movprfx z7, z1 +; CHECK-NEXT: fcvtzu z7.d, p0/m, z1.d +; CHECK-NEXT: mov z4.d, x8 +; CHECK-NEXT: movprfx z24, z0 +; CHECK-NEXT: fcvtzu z24.d, p0/m, z0.d +; CHECK-NEXT: fcmge p1.d, p0/z, z3.d, #0.0 +; CHECK-NEXT: fcmge p2.d, p0/z, z2.d, #0.0 +; CHECK-NEXT: fcmge p3.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: fcmge p4.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: fcmgt p5.d, p0/z, z3.d, z4.d +; CHECK-NEXT: fcmgt p6.d, p0/z, z2.d, z4.d +; CHECK-NEXT: mov z2.d, #65535 // =0xffff ; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z6.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.d, p0/z, z2.d, z4.d -; CHECK-NEXT: sel z2.d, p2, z3.d, z5.d -; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0 -; CHECK-NEXT: sel z5.d, p1, z3.d, z6.d -; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d -; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmge p2.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: movprfx z6, z0 -; CHECK-NEXT: fcvtzu z6.d, p0/m, z0.d ; CHECK-NEXT: not p2.b, p0/z, p2.b +; CHECK-NEXT: not p3.b, p0/z, p3.b +; CHECK-NEXT: not p4.b, p0/z, p4.b +; CHECK-NEXT: mov z5.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmgt p1.d, p0/z, z1.d, z4.d ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z4.d ; CHECK-NEXT: mov z6.d, p2/m, #0 // =0x0 -; CHECK-NEXT: sel z0.d, p1, z3.d, z1.d -; CHECK-NEXT: sel z1.d, p0, z3.d, z6.d -; CHECK-NEXT: uzp1 z2.s, z5.s, z2.s +; CHECK-NEXT: mov z7.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z24.d, p4/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p5, z2.d, z5.d +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z1.d, p6, z2.d, z6.d +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z3.d, p1, z2.d, z7.d +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: sel z2.d, p0, z2.d, z24.d ; CHECK-NEXT: uzp1 z0.s, z1.s, z0.s -; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h +; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s +; CHECK-NEXT: uzp1 z0.h, z1.h, z0.h +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f64.nxv8i16( %f) ret %x @@ -341,15 +370,15 @@ define @test_signed_v2f64_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f64_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4895412794951729151 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: mov x8, #4895412794951729151 // =0x43efffffffffffff ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.d -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p0.d, p0/z, z0.d, z2.d +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -360,22 +389,22 @@ define @test_signed_v4f64_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f64_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4895412794951729151 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 -; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: mov x8, #4895412794951729151 // =0x43efffffffffffff ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.d -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: mov z4.d, x8 ; CHECK-NEXT: movprfx z3, z1 ; CHECK-NEXT: fcvtzu z3.d, p0/m, z1.d +; CHECK-NEXT: fcmge p1.d, p0/z, z0.d, #0.0 +; CHECK-NEXT: fcmge p2.d, p0/z, z1.d, #0.0 +; CHECK-NEXT: fcmgt p3.d, p0/z, z0.d, z4.d +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 -; CHECK-NEXT: fcmgt p1.d, p0/z, z0.d, z4.d ; CHECK-NEXT: fcmgt p0.d, p0/z, z1.d, z4.d +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z2.d, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z2.d, p3/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z3.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z2.d ; CHECK-NEXT: mov z1.d, z3.d @@ -398,16 +427,17 @@ define @test_signed_v2f16_v2i32( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzu z2.d, p0/m, z0.h ; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h -; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.h -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z1.d, #0xffffffff -; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p2/m, z1.d +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z0.d, #0xffffffff +; CHECK-NEXT: mov z2.d, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv2f16.nxv2i32( %f) ret %x @@ -416,15 +446,15 @@ define @test_signed_v4f16_v4i32( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzu z1.s, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -435,24 +465,24 @@ define @test_signed_v8f16_v8i32( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: uunpklo z1.s, z0.h +; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: uunpkhi z3.s, z0.h -; CHECK-NEXT: fcmge p2.h, p0/z, z1.h, #0.0 -; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: fcvtzu z0.s, p0/m, z1.h -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov z0.s, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0 -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fcvtzu z0.s, p0/m, z2.h ; CHECK-NEXT: movprfx z1, z3 ; CHECK-NEXT: fcvtzu z1.s, p0/m, z3.h +; CHECK-NEXT: mov z4.h, w8 +; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0 +; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0 +; CHECK-NEXT: fcmgt p3.h, p0/z, z2.h, z4.h +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z2.h +; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h +; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.s, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z0.s, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.s, p3/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.s, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv8f16.nxv8i32( %f) @@ -462,16 +492,17 @@ define @test_signed_v4f16_v4i16( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: movprfx z2, z0 +; CHECK-NEXT: fcvtzu z2.s, p0/m, z0.h ; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: fcmgt p2.h, p0/z, z0.h, z1.h -; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: mov z1.s, #65535 // =0xffff -; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.s, p2/m, z1.s +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: mov z0.s, #65535 // =0xffff +; CHECK-NEXT: mov z2.s, p1/m, #0 // =0x0 +; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f16.nxv4i16( %f) ret %x @@ -480,15 +511,15 @@ define @test_signed_v8f16_v8i16( %f) { ; CHECK-LABEL: test_signed_v8f16_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzu z1.h, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.h, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -499,15 +530,15 @@ define @test_signed_v2f16_v2i64( %f) { ; CHECK-LABEL: test_signed_v2f16_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: mov w8, #31743 // =0x7bff ; CHECK-NEXT: movprfx z1, z0 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 +; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, #0.0 +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: fcmgt p0.h, p0/z, z0.h, z2.h +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret @@ -518,24 +549,24 @@ define @test_signed_v4f16_v4i64( %f) { ; CHECK-LABEL: test_signed_v4f16_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #31743 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: uunpklo z1.d, z0.s +; CHECK-NEXT: uunpklo z2.d, z0.s ; CHECK-NEXT: uunpkhi z3.d, z0.s -; CHECK-NEXT: fcmge p2.h, p0/z, z1.h, #0.0 -; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: fcvtzu z0.d, p0/m, z1.h -; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: mov z0.d, p2/m, #0 // =0x0 -; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0 -; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: mov w8, #31743 // =0x7bff +; CHECK-NEXT: movprfx z0, z2 +; CHECK-NEXT: fcvtzu z0.d, p0/m, z2.h ; CHECK-NEXT: movprfx z1, z3 ; CHECK-NEXT: fcvtzu z1.d, p0/m, z3.h +; CHECK-NEXT: mov z4.h, w8 +; CHECK-NEXT: fcmge p1.h, p0/z, z2.h, #0.0 +; CHECK-NEXT: fcmge p2.h, p0/z, z3.h, #0.0 +; CHECK-NEXT: fcmgt p3.h, p0/z, z2.h, z4.h +; CHECK-NEXT: not p1.b, p0/z, p1.b ; CHECK-NEXT: not p2.b, p0/z, p2.b -; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z2.h +; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z4.h +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 ; CHECK-NEXT: mov z1.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p1/m, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.d, p3/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.d, p0/m, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ret %x = call @llvm.fptoui.sat.nxv4f16.nxv4i64( %f) diff --git a/llvm/test/CodeGen/AArch64/sve-fptrunc-store.ll b/llvm/test/CodeGen/AArch64/sve-fptrunc-store.ll --- a/llvm/test/CodeGen/AArch64/sve-fptrunc-store.ll +++ b/llvm/test/CodeGen/AArch64/sve-fptrunc-store.ll @@ -69,14 +69,14 @@ ; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1, #1, mul vl] ; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1, #2, mul vl] ; CHECK-NEXT: ld1d { z3.d }, p0/z, [x1, #3, mul vl] +; CHECK-NEXT: fcvt z2.h, p0/m, z2.d ; CHECK-NEXT: fcvt z1.h, p0/m, z1.d ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d ; CHECK-NEXT: fcvt z3.h, p0/m, z3.d -; CHECK-NEXT: fcvt z2.h, p0/m, z2.d -; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll --- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll +++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-addr-opts.ll @@ -6,10 +6,10 @@ define void @scatter_i8_index_offset_maximum(ptr %base, i64 %offset, %pg, %data) #0 { ; CHECK-LABEL: scatter_i8_index_offset_maximum: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #33554431 -; CHECK-NEXT: add x9, x0, x1 +; CHECK-NEXT: mov w8, #33554431 // =0x1ffffff ; CHECK-NEXT: index z1.s, #0, w8 -; CHECK-NEXT: st1b { z0.s }, p0, [x9, z1.s, sxtw] +; CHECK-NEXT: add x8, x0, x1 +; CHECK-NEXT: st1b { z0.s }, p0, [x8, z1.s, sxtw] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 %t1 = shufflevector %t0, undef, zeroinitializer @@ -27,10 +27,10 @@ define void @scatter_i16_index_offset_minimum(ptr %base, i64 %offset, %pg, %data) #0 { ; CHECK-LABEL: scatter_i16_index_offset_minimum: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-33554432 -; CHECK-NEXT: add x9, x0, x1, lsl #1 +; CHECK-NEXT: mov w8, #-33554432 // =0xfe000000 ; CHECK-NEXT: index z1.s, #0, w8 -; CHECK-NEXT: st1h { z0.s }, p0, [x9, z1.s, sxtw #1] +; CHECK-NEXT: add x8, x0, x1, lsl #1 +; CHECK-NEXT: st1h { z0.s }, p0, [x8, z1.s, sxtw #1] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 %t1 = shufflevector %t0, undef, zeroinitializer @@ -48,8 +48,8 @@ define @gather_i8_index_offset_8(ptr %base, i64 %offset, %pg) #0 { ; CHECK-LABEL: gather_i8_index_offset_8: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x1 ; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: add x8, x0, x1 ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x8, z0.s, sxtw] ; CHECK-NEXT: ret %splat.insert0 = insertelement undef, i64 %offset, i32 0 @@ -73,17 +73,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mov z2.d, z1.d -; CHECK-NEXT: mov z3.d, x1 -; CHECK-NEXT: incd z2.d -; CHECK-NEXT: mad z1.d, p1/m, z3.d, z3.d -; CHECK-NEXT: mad z2.d, p1/m, z3.d, z3.d -; CHECK-NEXT: punpklo p1.h, p0.b +; CHECK-NEXT: mov z2.d, x1 +; CHECK-NEXT: movprfx z4, z2 +; CHECK-NEXT: mla z4.d, p1/m, z1.d, z2.d +; CHECK-NEXT: punpklo p2.h, p0.b ; CHECK-NEXT: uunpklo z3.d, z0.s ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: st1h { z3.d }, p1, [x0, z1.d, lsl #1] -; CHECK-NEXT: st1h { z0.d }, p0, [x0, z2.d, lsl #1] +; CHECK-NEXT: incd z1.d +; CHECK-NEXT: st1h { z3.d }, p2, [x0, z4.d, lsl #1] +; CHECK-NEXT: mad z1.d, p1/m, z2.d, z2.d +; CHECK-NEXT: st1h { z0.d }, p0, [x0, z1.d, lsl #1] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 %t1 = shufflevector %t0, undef, zeroinitializer @@ -101,18 +101,18 @@ define void @scatter_i8_index_offset_maximum_plus_one(ptr %base, i64 %offset, %pg, %data) #0 { ; CHECK-LABEL: scatter_i8_index_offset_maximum_plus_one: ; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: mov w9, #67108864 -; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: add x11, x0, x1 -; CHECK-NEXT: mov w10, #33554432 ; CHECK-NEXT: punpklo p1.h, p0.b -; CHECK-NEXT: madd x8, x8, x9, x11 +; CHECK-NEXT: mov w8, #33554432 // =0x2000000 ; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: index z1.d, #0, x8 +; CHECK-NEXT: rdvl x9, #1 +; CHECK-NEXT: add x8, x0, x1 +; CHECK-NEXT: lsr x9, x9, #4 +; CHECK-NEXT: mov w10, #67108864 // =0x4000000 ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: index z1.d, #0, x10 -; CHECK-NEXT: st1b { z2.d }, p1, [x11, z1.d] +; CHECK-NEXT: st1b { z2.d }, p1, [x8, z1.d] +; CHECK-NEXT: madd x8, x9, x10, x8 ; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 @@ -131,19 +131,19 @@ define void @scatter_i8_index_offset_minimum_minus_one(ptr %base, i64 %offset, %pg, %data) #0 { ; CHECK-LABEL: scatter_i8_index_offset_minimum_minus_one: ; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: mov x9, #-2 -; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: movk x9, #64511, lsl #16 -; CHECK-NEXT: add x11, x0, x1 -; CHECK-NEXT: mov x10, #-33554433 -; CHECK-NEXT: madd x8, x8, x9, x11 ; CHECK-NEXT: punpklo p1.h, p0.b +; CHECK-NEXT: mov x8, #-33554433 // =0xfffffffffdffffff ; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: index z1.d, #0, x8 +; CHECK-NEXT: rdvl x9, #1 +; CHECK-NEXT: mov x10, #-2 // =0xfffffffffffffffe +; CHECK-NEXT: lsr x9, x9, #4 +; CHECK-NEXT: add x8, x0, x1 +; CHECK-NEXT: movk x10, #64511, lsl #16 ; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: index z1.d, #0, x10 ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: st1b { z2.d }, p1, [x11, z1.d] +; CHECK-NEXT: st1b { z2.d }, p1, [x8, z1.d] +; CHECK-NEXT: madd x8, x9, x10, x8 ; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 @@ -162,18 +162,18 @@ define void @scatter_i8_index_stride_too_big(ptr %base, i64 %offset, %pg, %data) #0 { ; CHECK-LABEL: scatter_i8_index_stride_too_big: ; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: mov x9, #-9223372036854775808 -; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: add x11, x0, x1 -; CHECK-NEXT: mov x10, #4611686018427387904 ; CHECK-NEXT: punpklo p1.h, p0.b -; CHECK-NEXT: madd x8, x8, x9, x11 +; CHECK-NEXT: mov x8, #4611686018427387904 // =0x4000000000000000 ; CHECK-NEXT: uunpklo z2.d, z0.s +; CHECK-NEXT: index z1.d, #0, x8 +; CHECK-NEXT: rdvl x9, #1 +; CHECK-NEXT: add x8, x0, x1 +; CHECK-NEXT: lsr x9, x9, #4 +; CHECK-NEXT: mov x10, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: uunpkhi z0.d, z0.s -; CHECK-NEXT: index z1.d, #0, x10 -; CHECK-NEXT: st1b { z2.d }, p1, [x11, z1.d] +; CHECK-NEXT: st1b { z2.d }, p1, [x8, z1.d] +; CHECK-NEXT: madd x8, x9, x10, x8 ; CHECK-NEXT: st1b { z0.d }, p0, [x8, z1.d] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 @@ -194,8 +194,8 @@ define @gather_8i8_index_offset_8(ptr %base, i64 %offset, %pg) #0 { ; CHECK-LABEL: gather_8i8_index_offset_8: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x1, lsl #3 ; CHECK-NEXT: index z0.s, #0, #8 +; CHECK-NEXT: add x8, x0, x1, lsl #3 ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x8, z0.s, sxtw] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 @@ -214,10 +214,10 @@ define @gather_f32_index_offset_8(ptr %base, i64 %offset, %pg) #0 { ; CHECK-LABEL: gather_f32_index_offset_8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: add x9, x0, x1, lsl #5 +; CHECK-NEXT: mov w8, #32 // =0x20 ; CHECK-NEXT: index z0.s, #0, w8 -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x9, z0.s, sxtw] +; CHECK-NEXT: add x8, x0, x1, lsl #5 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x8, z0.s, sxtw] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 %t1 = shufflevector %t0, undef, zeroinitializer @@ -235,8 +235,8 @@ define void @scatter_i8_index_offset_8(ptr %base, i64 %offset, %pg, %data) #0 { ; CHECK-LABEL: scatter_i8_index_offset_8: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x1, lsl #3 ; CHECK-NEXT: index z1.s, #0, #8 +; CHECK-NEXT: add x8, x0, x1, lsl #3 ; CHECK-NEXT: st1b { z0.s }, p0, [x8, z1.s, sxtw] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 @@ -255,10 +255,10 @@ define void @scatter_f16_index_offset_8(ptr %base, i64 %offset, %pg, %data) #0 { ; CHECK-LABEL: scatter_f16_index_offset_8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: add x9, x0, x1, lsl #4 +; CHECK-NEXT: mov w8, #16 // =0x10 ; CHECK-NEXT: index z1.s, #0, w8 -; CHECK-NEXT: st1h { z0.s }, p0, [x9, z1.s, sxtw] +; CHECK-NEXT: add x8, x0, x1, lsl #4 +; CHECK-NEXT: st1h { z0.s }, p0, [x8, z1.s, sxtw] ; CHECK-NEXT: ret %t0 = insertelement undef, i64 %offset, i32 0 %t1 = shufflevector %t0, undef, zeroinitializer @@ -274,11 +274,11 @@ define void @scatter_f16_index_add_add(ptr %base, i64 %offset, i64 %offset2, %pg, %data) #0 { ; CHECK-LABEL: scatter_f16_index_add_add: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: mov w8, #16 // =0x10 ; CHECK-NEXT: add x9, x0, x2, lsl #4 -; CHECK-NEXT: add x9, x9, x1, lsl #4 ; CHECK-NEXT: index z1.s, #0, w8 -; CHECK-NEXT: st1h { z0.s }, p0, [x9, z1.s, sxtw] +; CHECK-NEXT: add x8, x9, x1, lsl #4 +; CHECK-NEXT: st1h { z0.s }, p0, [x8, z1.s, sxtw] ; CHECK-NEXT: ret %splat.offset.ins = insertelement undef, i64 %offset, i32 0 %splat.offset = shufflevector %splat.offset.ins, undef, zeroinitializer @@ -297,11 +297,11 @@ define void @scatter_f16_index_add_add_mul(ptr %base, i64 %offset, i64 %offset2, %pg, %data) #0 { ; CHECK-LABEL: scatter_f16_index_add_add_mul: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 +; CHECK-NEXT: mov w8, #128 // =0x80 ; CHECK-NEXT: add x9, x0, x2, lsl #7 -; CHECK-NEXT: add x9, x9, x1, lsl #7 ; CHECK-NEXT: index z1.s, #0, w8 -; CHECK-NEXT: st1h { z0.s }, p0, [x9, z1.s, sxtw] +; CHECK-NEXT: add x8, x9, x1, lsl #7 +; CHECK-NEXT: st1h { z0.s }, p0, [x8, z1.s, sxtw] ; CHECK-NEXT: ret %splat.offset.ins = insertelement undef, i64 %offset, i32 0 %splat.offset = shufflevector %splat.offset.ins, undef, zeroinitializer @@ -322,7 +322,7 @@ define @masked_gather_nxv2i64_const_with_vec_offsets( %vector_offsets, %pg) #0 { ; CHECK-LABEL: masked_gather_nxv2i64_const_with_vec_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: mov w8, #8 // =0x8 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d, lsl #3] ; CHECK-NEXT: ret %ptrs = getelementptr i64, ptr inttoptr (i64 8 to ptr), %vector_offsets @@ -347,7 +347,7 @@ define @masked_gather_nxv2i64_null_with__vec_plus_imm_offsets( %vector_offsets, %pg) #0 { ; CHECK-LABEL: masked_gather_nxv2i64_null_with__vec_plus_imm_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: mov w8, #8 // =0x8 ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x8, z0.d, lsl #3] ; CHECK-NEXT: ret %scalar_offset.ins = insertelement undef, i64 1, i64 0 @@ -400,7 +400,7 @@ define void @masked_scatter_nxv2i64_const_with_vec_offsets( %vector_offsets, %pg, %data) #0 { ; CHECK-LABEL: masked_scatter_nxv2i64_const_with_vec_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: mov w8, #8 // =0x8 ; CHECK-NEXT: st1d { z1.d }, p0, [x8, z0.d, lsl #3] ; CHECK-NEXT: ret %ptrs = getelementptr i64, ptr inttoptr (i64 8 to ptr), %vector_offsets @@ -425,7 +425,7 @@ define void @masked_scatter_nxv2i64_null_with__vec_plus_imm_offsets( %vector_offsets, %pg, %data) #0 { ; CHECK-LABEL: masked_scatter_nxv2i64_null_with__vec_plus_imm_offsets: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: mov w8, #8 // =0x8 ; CHECK-NEXT: st1d { z1.d }, p0, [x8, z0.d, lsl #3] ; CHECK-NEXT: ret %scalar_offset.ins = insertelement undef, i64 1, i64 0 diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll --- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll @@ -31,9 +31,9 @@ ; CHECK-LABEL: no_dag_combine_sext: ; CHECK: // %bb.0: ; CHECK-NEXT: ld1b { z1.d }, p0/z, [z0.d, #16] -; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ptrue p2.d ; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: sxtb z0.d, p0/m, z1.d +; CHECK-NEXT: sxtb z0.d, p2/m, z1.d ; CHECK-NEXT: st1b { z1.d }, p1, [x0] ; CHECK-NEXT: ret %base, @@ -76,8 +76,8 @@ define @narrow_i64_gather_index_i8_zext(i8* %out, i8* %in, %d, i64 %ptr){ ; CHECK-LABEL: narrow_i64_gather_index_i8_zext: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x1, x2 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: add x8, x1, x2 ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x1, x2] ; CHECK-NEXT: ld1b { z1.s }, p0/z, [x8, #1, mul vl] ; CHECK-NEXT: ld1b { z2.s }, p0/z, [x8, #2, mul vl] @@ -102,8 +102,8 @@ define @narrow_i64_gather_index_i8_sext(i8* %out, i8* %in, %d, i64 %ptr){ ; CHECK-LABEL: narrow_i64_gather_index_i8_sext: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x1, x2 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: add x8, x1, x2 ; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x1, x2] ; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x8, #1, mul vl] ; CHECK-NEXT: ld1sb { z2.s }, p0/z, [x8, #2, mul vl] @@ -128,8 +128,8 @@ define @narrow_i64_gather_index_i16_zext(i16* %out, i16* %in, %d, i64 %ptr){ ; CHECK-LABEL: narrow_i64_gather_index_i16_zext: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x1, x2, lsl #1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: add x8, x1, x2, lsl #1 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x1, x2, lsl #1] ; CHECK-NEXT: ld1h { z1.s }, p0/z, [x8, #1, mul vl] ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x1, z0.s, uxtw #1] @@ -148,8 +148,8 @@ define @narrow_i64_gather_index_i16_sext(i16* %out, i16* %in, %d, i64 %ptr){ ; CHECK-LABEL: narrow_i64_gather_index_i16_sext: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x1, x2, lsl #1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: add x8, x1, x2, lsl #1 ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x1, x2, lsl #1] ; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x8, #1, mul vl] ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x1, z0.s, sxtw #1] diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll --- a/llvm/test/CodeGen/AArch64/sve-gep.ll +++ b/llvm/test/CodeGen/AArch64/sve-gep.ll @@ -225,8 +225,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: sxtw z1.d, p0/m, z1.d ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: sxtw z1.d, p0/m, z1.d ; CHECK-NEXT: mla z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: ret %d = getelementptr , *> %base, %idx diff --git a/llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll b/llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll --- a/llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll +++ b/llvm/test/CodeGen/AArch64/sve-implicit-zero-filling.ll @@ -175,15 +175,15 @@ define @zero_fill_non_zero_index( %pg, %a) #0 { ; CHECK-LABEL: zero_fill_non_zero_index: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: uminv d0, p0, z0.d -; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z0.d, x8 -; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z0.d +; CHECK-NEXT: uminv d3, p0, z0.d +; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: mov z0.d, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p0/m, x9 +; CHECK-NEXT: fmov x8, d3 +; CHECK-NEXT: cmpeq p0.d, p1/z, z1.d, z2.d +; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: ret %t1 = call i64 @llvm.aarch64.sve.uminv.nxv2i64( %pg, %a) %t2 = insertelement zeroinitializer, i64 %t1, i64 1 @@ -195,8 +195,8 @@ define @zero_fill_type_mismatch( %pg, %a) #0 { ; CHECK-LABEL: zero_fill_type_mismatch: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, #0 // =0x0 ; CHECK-NEXT: uminv d0, p0, z0.d +; CHECK-NEXT: mov z1.d, #0 // =0x0 ; CHECK-NEXT: ret %t1 = call i64 @llvm.aarch64.sve.uminv.nxv2i64( %pg, %a) %t2 = insertelement zeroinitializer, i64 %t1, i64 0 @@ -210,11 +210,12 @@ define @zero_fill_no_zero_upper_lanes( %pg, %a) #0 { ; CHECK-LABEL: zero_fill_no_zero_upper_lanes: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.d, vl1 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z0.d -; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: mov z1.d, #0 // =0x0 ; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: mov z0.d, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p0/m, x8 +; CHECK-NEXT: mov z1.d, p1/m, x8 +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret %t1 = call @llvm.aarch64.sve.umin.nxv2i64( %pg, %a, %a) %t2 = extractelement %t1, i64 0 diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -4,8 +4,8 @@ define @test_lane0_16xi8( %a) { ; CHECK-LABEL: test_lane0_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #30 ; CHECK-NEXT: ptrue p0.b, vl1 +; CHECK-NEXT: mov w8, #30 // =0x1e ; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %b = insertelement %a, i8 30, i32 0 @@ -15,8 +15,8 @@ define @test_lane0_8xi16( %a) { ; CHECK-LABEL: test_lane0_8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #30 ; CHECK-NEXT: ptrue p0.h, vl1 +; CHECK-NEXT: mov w8, #30 // =0x1e ; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: ret %b = insertelement %a, i16 30, i32 0 @@ -26,8 +26,8 @@ define @test_lane0_4xi32( %a) { ; CHECK-LABEL: test_lane0_4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #30 ; CHECK-NEXT: ptrue p0.s, vl1 +; CHECK-NEXT: mov w8, #30 // =0x1e ; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: ret %b = insertelement %a, i32 30, i32 0 @@ -37,8 +37,8 @@ define @test_lane0_2xi64( %a) { ; CHECK-LABEL: test_lane0_2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #30 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: mov w8, #30 // =0x1e ; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: ret %b = insertelement %a, i64 30, i32 0 @@ -48,8 +48,8 @@ define @test_lane0_2xf64( %a) { ; CHECK-LABEL: test_lane0_2xf64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d1, #1.00000000 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: fmov d1, #1.00000000 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret %b = insertelement %a, double 1.0, i32 0 @@ -59,8 +59,8 @@ define @test_lane0_4xf32( %a) { ; CHECK-LABEL: test_lane0_4xf32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s1, #1.00000000 ; CHECK-NEXT: ptrue p0.s, vl1 +; CHECK-NEXT: fmov s1, #1.00000000 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret %b = insertelement %a, float 1.0, i32 0 @@ -70,8 +70,8 @@ define @test_lane0_8xf16( %a) { ; CHECK-LABEL: test_lane0_8xf16: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov h1, #1.00000000 ; CHECK-NEXT: ptrue p0.h, vl1 +; CHECK-NEXT: fmov h1, #1.00000000 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret %b = insertelement %a, half 1.0, i32 0 @@ -93,13 +93,13 @@ define @test_lane4_2xi64( %a) { ; CHECK-LABEL: test_lane4_2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: mov w9, #30 -; CHECK-NEXT: index z2.d, #0, #1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z1.d, x8 -; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z1.d -; CHECK-NEXT: mov z0.d, p0/m, x9 +; CHECK-NEXT: mov w8, #4 // =0x4 +; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov w8, #30 // =0x1e +; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d +; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: ret %b = insertelement %a, i64 30, i32 4 ret %b @@ -109,12 +109,12 @@ define @test_lane9_8xf16( %a) { ; CHECK-LABEL: test_lane9_8xf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: fmov h1, #1.00000000 -; CHECK-NEXT: index z3.h, #0, #1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov w8, #9 // =0x9 +; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: cmpeq p0.h, p0/z, z3.h, z2.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h +; CHECK-NEXT: fmov h1, #1.00000000 ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret %b = insertelement %a, half 1.0, i32 9 @@ -124,11 +124,11 @@ define @test_lane9_8xbf16( %a, bfloat %x) { ; CHECK-LABEL: test_lane9_8xbf16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: index z3.h, #0, #1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: cmpeq p0.h, p0/z, z3.h, z2.h +; CHECK-NEXT: mov w8, #9 // =0x9 +; CHECK-NEXT: index z2.h, #0, #1 +; CHECK-NEXT: mov z3.h, w8 +; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: ret %b = insertelement %a, bfloat %x, i32 9 @@ -138,13 +138,13 @@ define @test_lane1_16xi8( %a) { ; CHECK-LABEL: test_lane1_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: mov w9, #30 -; CHECK-NEXT: index z2.b, #0, #1 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z1.b, w8 -; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z1.b -; CHECK-NEXT: mov z0.b, p0/m, w9 +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: index z1.b, #0, #1 +; CHECK-NEXT: mov z2.b, w8 +; CHECK-NEXT: mov w8, #30 // =0x1e +; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b +; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %b = insertelement %a, i8 30, i32 1 ret %b @@ -153,13 +153,13 @@ define @test_lanex_16xi8( %a, i32 %x) { ; CHECK-LABEL: test_lanex_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: mov w9, #30 -; CHECK-NEXT: index z2.b, #0, #1 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z1.b, w8 -; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z1.b -; CHECK-NEXT: mov z0.b, p0/m, w9 +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: index z1.b, #0, #1 +; CHECK-NEXT: mov z2.b, w8 +; CHECK-NEXT: mov w8, #30 // =0x1e +; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b +; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %b = insertelement %a, i8 30, i32 %x ret %b @@ -179,11 +179,11 @@ define @test_lane6_undef_8xi16(i16 %a) { ; CHECK-LABEL: test_lane6_undef_8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z0.h, w8 -; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z0.h +; CHECK-NEXT: mov w8, #6 // =0x6 +; CHECK-NEXT: index z0.h, #0, #1 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h ; CHECK-NEXT: mov z0.h, p0/m, w0 ; CHECK-NEXT: ret %b = insertelement undef, i16 %a, i32 6 @@ -202,8 +202,8 @@ define @test_insert0_of_extract0_16xi8( %a, %b) { ; CHECK-LABEL: test_insert0_of_extract0_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: ptrue p0.b, vl1 +; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %c = extractelement %b, i32 0 @@ -214,14 +214,14 @@ define @test_insert64_of_extract64_16xi8( %a, %b) { ; CHECK-LABEL: test_insert64_of_extract64_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #64 -; CHECK-NEXT: index z3.b, #0, #1 +; CHECK-NEXT: mov w8, #64 // =0x40 ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: whilels p0.b, xzr, x8 ; CHECK-NEXT: mov z2.b, w8 -; CHECK-NEXT: lastb w8, p0, z1.b -; CHECK-NEXT: cmpeq p0.b, p1/z, z3.b, z2.b -; CHECK-NEXT: mov z0.b, p0/m, w8 +; CHECK-NEXT: lastb w9, p0, z1.b +; CHECK-NEXT: index z1.b, #0, #1 +; CHECK-NEXT: cmpeq p0.b, p1/z, z1.b, z2.b +; CHECK-NEXT: mov z0.b, p0/m, w9 ; CHECK-NEXT: ret %c = extractelement %b, i32 64 %d = insertelement %a, i8 %c, i32 64 @@ -231,13 +231,13 @@ define @test_insert3_of_extract1_16xi8( %a, %b) { ; CHECK-LABEL: test_insert3_of_extract1_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: umov w9, v1.b[1] -; CHECK-NEXT: index z2.b, #0, #1 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z1.b, w8 -; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z1.b -; CHECK-NEXT: mov z0.b, p0/m, w9 +; CHECK-NEXT: mov w8, #3 // =0x3 +; CHECK-NEXT: index z2.b, #0, #1 +; CHECK-NEXT: mov z3.b, w8 +; CHECK-NEXT: umov w8, v1.b[1] +; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b +; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: ret %c = extractelement %b, i32 1 %d = insertelement %a, i8 %c, i32 3 @@ -329,9 +329,9 @@ define @test_insert_with_index_nxv2f16(half %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv2f16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d ; CHECK-NEXT: mov z0.h, p0/m, h0 ; CHECK-NEXT: ret @@ -342,9 +342,9 @@ define @test_insert_with_index_nxv4f16(half %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv4f16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: index z1.s, #0, #1 ; CHECK-NEXT: mov z2.s, w0 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s ; CHECK-NEXT: mov z0.h, p0/m, h0 ; CHECK-NEXT: ret @@ -355,9 +355,9 @@ define @test_insert_with_index_nxv8f16(half %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv8f16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: mov z2.h, w0 -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h ; CHECK-NEXT: mov z0.h, p0/m, h0 ; CHECK-NEXT: ret @@ -368,9 +368,9 @@ define @test_insert_with_index_nxv2bf16(bfloat %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv2bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d ; CHECK-NEXT: mov z0.h, p0/m, h0 ; CHECK-NEXT: ret @@ -381,9 +381,9 @@ define @test_insert_with_index_nxv4bf16(bfloat %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv4bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: index z1.s, #0, #1 ; CHECK-NEXT: mov z2.s, w0 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s ; CHECK-NEXT: mov z0.h, p0/m, h0 ; CHECK-NEXT: ret @@ -394,9 +394,9 @@ define @test_insert_with_index_nxv8bf16(bfloat %h, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv8bf16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: mov z2.h, w0 -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h ; CHECK-NEXT: mov z0.h, p0/m, h0 ; CHECK-NEXT: ret @@ -407,9 +407,9 @@ define @test_insert_with_index_nxv2f32(float %f, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv2f32: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d ; CHECK-NEXT: mov z0.s, p0/m, s0 ; CHECK-NEXT: ret @@ -420,9 +420,9 @@ define @test_insert_with_index_nxv4f32(float %f, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv4f32: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: index z1.s, #0, #1 ; CHECK-NEXT: mov z2.s, w0 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s ; CHECK-NEXT: mov z0.s, p0/m, s0 ; CHECK-NEXT: ret @@ -433,9 +433,9 @@ define @test_insert_with_index_nxv2f64(double %d, i64 %idx) { ; CHECK-LABEL: test_insert_with_index_nxv2f64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: mov z2.d, x0 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d ; CHECK-NEXT: mov z0.d, p0/m, d0 ; CHECK-NEXT: ret @@ -450,8 +450,8 @@ ; CHECK-NEXT: ptrue p1.d, vl1 ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: mov z0.d, p1/m, x0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z0.d, p1/m, x0 ; CHECK-NEXT: and z0.d, z0.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ret @@ -462,11 +462,11 @@ define @test_predicate_insert_4xi1_immediate ( %val, i1 %elt) { ; CHECK-LABEL: test_predicate_insert_4xi1_immediate: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: index z1.s, #0, #1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z0.s, w8 -; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z0.s +; CHECK-NEXT: mov w8, #2 // =0x2 +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z1.s ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 ; CHECK-NEXT: mov z0.s, p2/m, w0 ; CHECK-NEXT: and z0.s, z0.s, #0x1 @@ -479,14 +479,14 @@ define @test_predicate_insert_8xi1_immediate ( %val, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_8xi1_immediate: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w0 -; CHECK-NEXT: mov w9, #1 -; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z0.h, w8 -; CHECK-NEXT: cmpeq p2.h, p1/z, z1.h, z0.h +; CHECK-NEXT: mov w8, w0 +; CHECK-NEXT: index z0.h, #0, #1 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: cmpeq p2.h, p1/z, z0.h, z1.h ; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.h, p2/m, w9 +; CHECK-NEXT: mov z0.h, p2/m, w8 ; CHECK-NEXT: and z0.h, z0.h, #0x1 ; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0 ; CHECK-NEXT: ret @@ -497,12 +497,12 @@ define @test_predicate_insert_16xi1_immediate ( %val) { ; CHECK-LABEL: test_predicate_insert_16xi1_immediate: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w9, #4 -; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: index z1.b, #0, #1 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mov z0.b, w9 -; CHECK-NEXT: cmpeq p2.b, p1/z, z1.b, z0.b +; CHECK-NEXT: mov w8, #4 // =0x4 +; CHECK-NEXT: index z0.b, #0, #1 +; CHECK-NEXT: mov z1.b, w8 +; CHECK-NEXT: mov w8, wzr +; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b ; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 ; CHECK-NEXT: mov z0.b, p2/m, w8 ; CHECK-NEXT: and z0.b, z0.b, #0x1 @@ -516,12 +516,12 @@ define @test_predicate_insert_2xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_2xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w1 -; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: mov w8, w1 +; CHECK-NEXT: index z0.d, #0, #1 +; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: mov z0.d, x8 -; CHECK-NEXT: cmpeq p2.d, p1/z, z1.d, z0.d +; CHECK-NEXT: cmpeq p2.d, p1/z, z0.d, z1.d ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 ; CHECK-NEXT: mov z0.d, p2/m, x0 ; CHECK-NEXT: and z0.d, z0.d, #0x1 @@ -534,11 +534,11 @@ define @test_predicate_insert_4xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_4xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w1 -; CHECK-NEXT: index z1.s, #0, #1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z0.s, w8 -; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z0.s +; CHECK-NEXT: mov w8, w1 +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z1.s ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 ; CHECK-NEXT: mov z0.s, p2/m, w0 ; CHECK-NEXT: and z0.s, z0.s, #0x1 @@ -550,11 +550,11 @@ define @test_predicate_insert_8xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_8xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w1 -; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z0.h, w8 -; CHECK-NEXT: cmpeq p2.h, p1/z, z1.h, z0.h +; CHECK-NEXT: mov w8, w1 +; CHECK-NEXT: index z0.h, #0, #1 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: cmpeq p2.h, p1/z, z0.h, z1.h ; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 ; CHECK-NEXT: mov z0.h, p2/m, w0 ; CHECK-NEXT: and z0.h, z0.h, #0x1 @@ -567,11 +567,11 @@ define @test_predicate_insert_16xi1( %val, i1 %elt, i32 %idx) { ; CHECK-LABEL: test_predicate_insert_16xi1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, w1 -; CHECK-NEXT: index z1.b, #0, #1 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mov z0.b, w8 -; CHECK-NEXT: cmpeq p2.b, p1/z, z1.b, z0.b +; CHECK-NEXT: mov w8, w1 +; CHECK-NEXT: index z0.b, #0, #1 +; CHECK-NEXT: mov z1.b, w8 +; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b ; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 ; CHECK-NEXT: mov z0.b, p2/m, w0 ; CHECK-NEXT: and z0.b, z0.b, #0x1 @@ -589,24 +589,24 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: mov w9, w1 +; CHECK-NEXT: ptrue p2.b +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 -; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl] -; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 ; CHECK-NEXT: addvl x8, x8, #2 -; CHECK-NEXT: st1b { z0.b }, p1, [sp] +; CHECK-NEXT: mov w9, w1 ; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1b { z0.b }, p2, [sp, #1, mul vl] +; CHECK-NEXT: st1b { z1.b }, p2, [sp] ; CHECK-NEXT: strb w0, [x9, x8] -; CHECK-NEXT: ld1b { z0.b }, p1/z, [sp] -; CHECK-NEXT: ld1b { z1.b }, p1/z, [sp, #1, mul vl] +; CHECK-NEXT: ld1b { z0.b }, p2/z, [sp] +; CHECK-NEXT: ld1b { z1.b }, p2/z, [sp, #1, mul vl] ; CHECK-NEXT: and z0.b, z0.b, #0x1 ; CHECK-NEXT: and z1.b, z1.b, #0x1 -; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 -; CHECK-NEXT: cmpne p1.b, p1/z, z1.b, #0 +; CHECK-NEXT: cmpne p0.b, p2/z, z0.b, #0 +; CHECK-NEXT: cmpne p1.b, p2/z, z1.b, #0 ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: .cfi_def_cfa wsp, 16 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-vector.ll @@ -17,15 +17,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cntd x8 -; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: mov w9, #2 // =0x2 ; CHECK-NEXT: sub x8, x8, #2 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmp x8, #2 -; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: lsl x8, x8, #3 +; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 @@ -51,15 +51,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cntw x8 -; CHECK-NEXT: mov w9, #4 +; CHECK-NEXT: mov w9, #4 // =0x4 ; CHECK-NEXT: sub x8, x8, #4 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmp x8, #4 -; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: lsl x8, x8, #2 +; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 @@ -85,15 +85,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: sub x8, x8, #8 -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmp x8, #8 -; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: lsl x8, x8, #1 +; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 @@ -119,15 +119,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov x8, #-16 -; CHECK-NEXT: mov w9, #16 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: mov x8, #-16 // =0xfffffffffffffff0 +; CHECK-NEXT: mov w9, #16 // =0x10 ; CHECK-NEXT: addvl x8, x8, #1 +; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: cmp x8, #16 ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: str q1, [x9, x8] +; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: str q1, [x10, x8] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -221,8 +221,8 @@ define void @insert_v2i64_nxv16i64_lo0(<2 x i64>* %psv, * %out) { ; CHECK-LABEL: insert_v2i64_nxv16i64_lo0: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: ret %sv = load <2 x i64>, <2 x i64>* %psv @@ -239,8 +239,8 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: str q0, [sp, #16] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: ld1d { z1.d }, p0/z, [sp, #1, mul vl] diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll @@ -55,8 +55,8 @@ define @smax_i16_out_of_range( %a) { ; CHECK-LABEL: smax_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %elt = insertelement undef, i16 257, i32 0 @@ -93,8 +93,8 @@ define @smax_i32_out_of_range( %a) { ; CHECK-LABEL: smax_i32_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, #-129 // =0xffffffffffffff7f ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, #-129 // =0xffffffffffffff7f ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %elt = insertelement undef, i32 -129, i32 0 @@ -131,8 +131,8 @@ define @smax_i64_out_of_range( %a) { ; CHECK-LABEL: smax_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %elt = insertelement undef, i64 65535, i32 0 @@ -196,8 +196,8 @@ define @smin_i16_out_of_range( %a) { ; CHECK-LABEL: smin_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %elt = insertelement undef, i16 257, i32 0 @@ -234,8 +234,8 @@ define @smin_i32_out_of_range( %a) { ; CHECK-LABEL: smin_i32_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, #-129 // =0xffffffffffffff7f ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, #-129 // =0xffffffffffffff7f ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %elt = insertelement undef, i32 -129, i32 0 @@ -272,8 +272,8 @@ define @smin_i64_out_of_range( %a) { ; CHECK-LABEL: smin_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %elt = insertelement undef, i64 65535, i32 0 @@ -325,8 +325,8 @@ define @umax_i16_out_of_range( %a) { ; CHECK-LABEL: umax_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %elt = insertelement undef, i16 257, i32 0 @@ -351,8 +351,8 @@ define @umax_i32_out_of_range( %a) { ; CHECK-LABEL: umax_i32_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret @@ -378,8 +378,8 @@ define @umax_i64_out_of_range( %a) { ; CHECK-LABEL: umax_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %elt = insertelement undef, i64 65535, i32 0 @@ -431,8 +431,8 @@ define @umin_i16_out_of_range( %a) { ; CHECK-LABEL: umin_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: dupm z1.b, #0x1 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %elt = insertelement undef, i16 257, i32 0 @@ -457,8 +457,8 @@ define @umin_i32_out_of_range( %a) { ; CHECK-LABEL: umin_i32_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret @@ -484,8 +484,8 @@ define @umin_i64_out_of_range( %a) { ; CHECK-LABEL: umin_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, #65535 // =0xffff ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %elt = insertelement undef, i64 65535, i32 0 @@ -589,8 +589,8 @@ define @mul_i16_range( %a) { ; CHECK-LABEL: mul_i16_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.h, #255 // =0xff ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z1.h, #255 // =0xff ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret %elt = insertelement undef, i16 255, i32 0 @@ -602,8 +602,8 @@ define @mul_i32_range( %a) { ; CHECK-LABEL: mul_i32_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.s, #255 // =0xff ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, #255 // =0xff ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret %elt = insertelement undef, i32 255, i32 0 @@ -615,8 +615,8 @@ define @mul_i64_range( %a) { ; CHECK-LABEL: mul_i64_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, #255 // =0xff ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov z1.d, #255 // =0xff ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 @@ -766,8 +766,8 @@ define @sdiv_const( %a) #0 { ; CHECK-LABEL: sdiv_const: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z1.s, #3 // =0x3 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, #3 // =0x3 ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret entry: @@ -778,8 +778,8 @@ define @udiv_const( %a) #0 { ; CHECK-LABEL: udiv_const: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z1.s, #3 // =0x3 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z1.s, #3 // =0x3 ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-int-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-arith.ll @@ -165,9 +165,9 @@ ; CHECK-LABEL: abs_nxv8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: abs z2.d, p0/m, z2.d ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: abs z1.d, p0/m, z1.d +; CHECK-NEXT: abs z2.d, p0/m, z2.d ; CHECK-NEXT: abs z3.d, p0/m, z3.d ; CHECK-NEXT: ret %res = call @llvm.abs.nxv8i64( %a, i1 false) @@ -748,8 +748,8 @@ define @multiple_fused_ops( %a, %b) ; CHECK-LABEL: multiple_fused_ops: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #200 // =0xc8 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov w8, #200 // =0xc8 ; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: mla z2.h, p0/m, z0.h, z1.h ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h @@ -770,19 +770,19 @@ ; CHECK-NEXT: b.lt .LBB70_3 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: mov w9, w3 +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: mov z0.s, #1 // =0x1 +; CHECK-NEXT: whilelo p0.s, xzr, x9 ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: cntw x10 -; CHECK-NEXT: mov z0.s, #1 // =0x1 -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: whilelo p1.s, xzr, x9 ; CHECK-NEXT: .LBB70_2: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ld1w { z1.s }, p1/z, [x1, x8, lsl #2] -; CHECK-NEXT: ld1w { z2.s }, p1/z, [x2, x8, lsl #2] -; CHECK-NEXT: mad z1.s, p0/m, z2.s, z0.s -; CHECK-NEXT: st1w { z1.s }, p1, [x0, x8, lsl #2] +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1, x8, lsl #2] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x2, x8, lsl #2] +; CHECK-NEXT: mad z1.s, p1/m, z2.s, z0.s +; CHECK-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] ; CHECK-NEXT: add x8, x8, x10 -; CHECK-NEXT: whilelo p1.s, x8, x9 +; CHECK-NEXT: whilelo p0.s, x8, x9 ; CHECK-NEXT: b.mi .LBB70_2 ; CHECK-NEXT: .LBB70_3: // %for.cond.cleanup ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-int-reduce.ll @@ -380,12 +380,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: uunpkhi z2.h, z0.b ; CHECK-NEXT: mov z1.d, #127 // =0x7f +; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: uunpklo z3.s, z2.h ; CHECK-NEXT: uunpkhi z2.s, z2.h ; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uzp1 z3.s, z3.s, z1.s -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: uzp1 z2.h, z3.h, z2.h ; CHECK-NEXT: uzp1 z2.b, z0.b, z2.b ; CHECK-NEXT: uunpkhi z2.h, z2.b @@ -416,10 +416,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: uunpkhi z2.h, z0.b ; CHECK-NEXT: mov z1.s, #0 // =0x0 -; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b ; CHECK-NEXT: uaddv d0, p0, z0.b ; CHECK-NEXT: fmov x0, d0 @@ -436,12 +436,12 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: uunpkhi z2.h, z0.b ; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: uunpkhi z3.s, z2.h ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uzp1 z1.s, z3.s, z1.s -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: uzp1 z1.h, z2.h, z1.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z1.b ; CHECK-NEXT: umaxv b0, p0, z0.b diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems-i32.ll @@ -111,9 +111,9 @@ define i32 @inch_mul(i32 %a) { ; NO_SCALAR_INC-LABEL: inch_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #5 -; NO_SCALAR_INC-NEXT: cnth x9, vl8 -; NO_SCALAR_INC-NEXT: madd w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cnth x8, vl8 +; NO_SCALAR_INC-NEXT: mov w9, #5 // =0x5 +; NO_SCALAR_INC-NEXT: madd w0, w8, w9, w0 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: inch_mul: @@ -155,9 +155,9 @@ define i32 @dech_mul(i32 %a) { ; NO_SCALAR_INC-LABEL: dech_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #7 -; NO_SCALAR_INC-NEXT: cnth x9, vl16 -; NO_SCALAR_INC-NEXT: msub w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cnth x8, vl16 +; NO_SCALAR_INC-NEXT: mov w9, #7 // =0x7 +; NO_SCALAR_INC-NEXT: msub w0, w8, w9, w0 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: dech_mul: @@ -199,9 +199,9 @@ define i32 @incw_mul(i32 %a) { ; NO_SCALAR_INC-LABEL: incw_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #12 -; NO_SCALAR_INC-NEXT: cntw x9, vl32 -; NO_SCALAR_INC-NEXT: madd w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cntw x8, vl32 +; NO_SCALAR_INC-NEXT: mov w9, #12 // =0xc +; NO_SCALAR_INC-NEXT: madd w0, w8, w9, w0 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: incw_mul: @@ -284,9 +284,9 @@ define i32 @incd_mul(i32 %base) { ; NO_SCALAR_INC-LABEL: incd_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #15 -; NO_SCALAR_INC-NEXT: cntd x9, vl64 -; NO_SCALAR_INC-NEXT: madd w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cntd x8, vl64 +; NO_SCALAR_INC-NEXT: mov w9, #15 // =0xf +; NO_SCALAR_INC-NEXT: madd w0, w8, w9, w0 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: incd_mul: @@ -328,9 +328,9 @@ define i32 @decd_mul(i32 %a) { ; NO_SCALAR_INC-LABEL: decd_mul: ; NO_SCALAR_INC: // %bb.0: -; NO_SCALAR_INC-NEXT: mov w8, #9 -; NO_SCALAR_INC-NEXT: cntd x9, vl2 -; NO_SCALAR_INC-NEXT: msub w0, w9, w8, w0 +; NO_SCALAR_INC-NEXT: cntd x8, vl2 +; NO_SCALAR_INC-NEXT: mov w9, #9 // =0x9 +; NO_SCALAR_INC-NEXT: msub w0, w8, w9, w0 ; NO_SCALAR_INC-NEXT: ret ; ; CHECK-LABEL: decd_mul: diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll @@ -44,8 +44,8 @@ define @index_ii_range() { ; CHECK-LABEL: index_ii_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: mov x9, #-17 +; CHECK-NEXT: mov w8, #16 // =0x10 +; CHECK-NEXT: mov x9, #-17 // =0xffffffffffffffef ; CHECK-NEXT: index z0.d, x9, x8 ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.index.nxv2i64(i64 -17, i64 16) @@ -109,7 +109,7 @@ define @index_ir_range(i32 %a) { ; CHECK-LABEL: index_ir_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-17 +; CHECK-NEXT: mov w8, #-17 // =0xffffffef ; CHECK-NEXT: index z0.s, w8, w0 ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.index.nxv4i32(i32 -17, i32 %a) @@ -174,7 +174,7 @@ define @index_ri_range(i16 %a) { ; CHECK-LABEL: index_ri_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: mov w8, #16 // =0x10 ; CHECK-NEXT: index z0.h, w0, w8 ; CHECK-NEXT: ret %out = call @llvm.aarch64.sve.index.nxv8i16(i16 %a, i16 16) @@ -239,10 +239,10 @@ define @index_rr_i32_not_combine(i32 %a, i32 %b) { ; CHECK-LABEL: index_rr_i32_not_combine: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: index z0.s, #0, #1 ; CHECK-NEXT: mov z1.s, w0 ; CHECK-NEXT: mov z2.s, w1 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mla z1.s, p0/m, z0.s, z2.s ; CHECK-NEXT: add z0.s, z1.s, z0.s ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll @@ -63,7 +63,7 @@ define @add_i32_out_of_range( %a) { ; CHECK-LABEL: add_i32_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: ret @@ -93,7 +93,7 @@ define @add_i64_out_of_range( %a) { ; CHECK-LABEL: add_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: add z0.d, z0.d, z1.d ; CHECK-NEXT: ret @@ -168,7 +168,7 @@ define @sub_i32_out_of_range( %a) { ; CHECK-LABEL: sub_i32_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: sub z0.s, z0.s, z1.s ; CHECK-NEXT: ret @@ -198,7 +198,7 @@ define @sub_i64_out_of_range( %a) { ; CHECK-LABEL: sub_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: sub z0.d, z0.d, z1.d ; CHECK-NEXT: ret @@ -323,7 +323,7 @@ define @subr_i32_out_of_range( %a) { ; CHECK-LABEL: subr_i32_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: sub z0.s, z1.s, z0.s ; CHECK-NEXT: ret @@ -353,7 +353,7 @@ define @subr_i64_out_of_range( %a) { ; CHECK-LABEL: subr_i64_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: sub z0.d, z1.d, z0.d ; CHECK-NEXT: ret @@ -449,8 +449,8 @@ define @smax_i16_out_of_range( %a) { ; CHECK-LABEL: smax_i16_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #129 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov w8, #129 // =0x81 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: ret @@ -636,8 +636,8 @@ define @smin_i32_out_of_range( %a) { ; CHECK-LABEL: smin_i32_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret @@ -794,8 +794,8 @@ define @umax_i32_out_of_range( %a) { ; CHECK-LABEL: umax_i32_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret @@ -951,8 +951,8 @@ define @umin_i32_out_of_range( %a) { ; CHECK-LABEL: umin_i32_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #257 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #257 // =0x101 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll @@ -415,9 +415,9 @@ ; CHECK-LABEL: bic_i64_zero_no_comm: ; CHECK: // %bb.0: ; CHECK-NEXT: mov z2.d, #0 // =0x0 -; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d -; CHECK-NEXT: bic z1.d, p0/m, z1.d, z0.d +; CHECK-NEXT: mov z2.d, p0/m, z0.d ; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: bic z0.d, p0/m, z0.d, z2.d ; CHECK-NEXT: ret %a_z = select %pg, %a, zeroinitializer %out = call @llvm.aarch64.sve.bic.nxv2i64( %pg, diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll @@ -452,10 +452,10 @@ define @dupq_lane_i8( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d ; CHECK-NEXT: ret @@ -467,10 +467,10 @@ define @dupq_lane_i16( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d ; CHECK-NEXT: ret @@ -482,10 +482,10 @@ define @dupq_lane_i32( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d ; CHECK-NEXT: ret @@ -497,10 +497,10 @@ define @dupq_lane_i64( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d ; CHECK-NEXT: ret @@ -512,10 +512,10 @@ define @dupq_lane_f16( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_f16: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d ; CHECK-NEXT: ret @@ -527,10 +527,10 @@ define @dupq_lane_bf16( %a, i64 %idx) #0 { ; CHECK-LABEL: dupq_lane_bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d ; CHECK-NEXT: ret @@ -542,10 +542,10 @@ define @dupq_lane_f32( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_f32: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d ; CHECK-NEXT: ret @@ -557,10 +557,10 @@ define @dupq_lane_f64( %a, i64 %idx) { ; CHECK-LABEL: dupq_lane_f64: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: index z1.d, #0, #1 -; CHECK-NEXT: and z1.d, z1.d, #0x1 +; CHECK-NEXT: add x8, x0, x0 ; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: tbl z0.d, { z0.d }, z1.d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll @@ -535,12 +535,12 @@ ; CHECK-LABEL: st4b_i8_invalid_imm_out_of_lower_bound: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: mov x9, #-576 -; CHECK-NEXT: lsr x8, x8, #4 +; CHECK-NEXT: mov x9, #-576 // =0xfffffffffffffdc0 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: lsr x8, x8, #4 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: mul x8, x8, x9 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mul x8, x8, x9 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret @@ -562,12 +562,12 @@ ; CHECK-LABEL: st4b_i8_invalid_imm_out_of_upper_bound: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #1 -; CHECK-NEXT: mov w9, #512 -; CHECK-NEXT: lsr x8, x8, #4 +; CHECK-NEXT: mov w9, #512 // =0x200 ; CHECK-NEXT: // kill: def $z3 killed $z3 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: lsr x8, x8, #4 ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: mul x8, x8, x9 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 +; CHECK-NEXT: mul x8, x8, x9 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll b/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll --- a/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll +++ b/llvm/test/CodeGen/AArch64/sve-ld-post-inc.ll @@ -39,18 +39,18 @@ ; CHECK-LABEL: test_post_ld1_int_fixed: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x2] -; CHECK-NEXT: ldr x10, [x0, x1, lsl #3] -; CHECK-NEXT: ldr x11, [x0] -; CHECK-NEXT: index z3.d, #0, #1 -; CHECK-NEXT: mov z2.d, x9 +; CHECK-NEXT: mov w9, #2 // =0x2 +; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: ptrue p1.d, vl1 -; CHECK-NEXT: cmpeq p2.d, p0/z, z3.d, z2.d -; CHECK-NEXT: mov z1.d, z0.d -; CHECK-NEXT: mov z0.d, p2/m, x10 -; CHECK-NEXT: mov z1.d, p1/m, x11 -; CHECK-NEXT: add z0.d, z1.d, z0.d +; CHECK-NEXT: mov z1.d, x9 +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x2] +; CHECK-NEXT: cmpeq p2.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ldr x9, [x0] +; CHECK-NEXT: ldr x10, [x0, x1, lsl #3] +; CHECK-NEXT: mov z0.d, z2.d +; CHECK-NEXT: mov z2.d, p2/m, x10 +; CHECK-NEXT: mov z0.d, p1/m, x9 +; CHECK-NEXT: add z0.d, z0.d, z2.d ; CHECK-NEXT: st1d { z0.d }, p0, [x8] ; CHECK-NEXT: ret %A = load <4 x i64>, ptr %addr @@ -67,17 +67,17 @@ ; CHECK-LABEL: test_post_ld1_double_fixed: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x2] -; CHECK-NEXT: ldr d1, [x0, x1, lsl #3] -; CHECK-NEXT: ldr d2, [x0] -; CHECK-NEXT: index z4.d, #0, #1 -; CHECK-NEXT: mov z3.d, x9 +; CHECK-NEXT: mov w9, #2 // =0x2 +; CHECK-NEXT: index z0.d, #0, #1 +; CHECK-NEXT: mov z1.d, x9 ; CHECK-NEXT: ptrue p1.d, vl1 -; CHECK-NEXT: cmpeq p2.d, p0/z, z4.d, z3.d -; CHECK-NEXT: sel z2.d, p1, z2.d, z0.d -; CHECK-NEXT: mov z0.d, p2/m, d1 -; CHECK-NEXT: fadd z0.d, z2.d, z0.d +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x2] +; CHECK-NEXT: cmpeq p2.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x0, x1, lsl #3] +; CHECK-NEXT: sel z0.d, p1, z0.d, z2.d +; CHECK-NEXT: mov z2.d, p2/m, d1 +; CHECK-NEXT: fadd z0.d, z0.d, z2.d ; CHECK-NEXT: st1d { z0.d }, p0, [x8] ; CHECK-NEXT: ret %A = load <4 x double>, ptr %addr diff --git a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll @@ -39,8 +39,8 @@ define @ld1b_out_of_upper_bound(* %a) { ; CHECK-LABEL: ld1b_out_of_upper_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #8 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: rdvl x8, #8 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %a, i64 8 @@ -51,8 +51,8 @@ define @ld1b_out_of_lower_bound(* %a) { ; CHECK-LABEL: ld1b_out_of_lower_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #-9 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: rdvl x8, #-9 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %a, i64 -9 diff --git a/llvm/test/CodeGen/AArch64/sve-ld1r.ll b/llvm/test/CodeGen/AArch64/sve-ld1r.ll --- a/llvm/test/CodeGen/AArch64/sve-ld1r.ll +++ b/llvm/test/CodeGen/AArch64/sve-ld1r.ll @@ -19,8 +19,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: adrp x8, :got:g8 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: adrp x8, :got:g8 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:g8] ; CHECK-NEXT: ldrb w8, [x8] ; CHECK-NEXT: strb w8, [sp, #12] @@ -65,8 +65,8 @@ define @ld1rb_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rb_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #64 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: add x8, x0, #64 ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i8, ptr %valp, i32 64 @@ -79,8 +79,8 @@ define @ld1rb_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rb_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #1 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: sub x8, x0, #1 ; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i8, ptr %valp, i32 -1 @@ -196,8 +196,8 @@ define @ld1rh_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rh_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i16, ptr %valp, i32 64 @@ -210,8 +210,8 @@ define @ld1rh_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rh_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i16, ptr %valp, i32 -1 @@ -301,8 +301,8 @@ define @ld1rw_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rw_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i32, ptr %valp, i32 64 @@ -315,8 +315,8 @@ define @ld1rw_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rw_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #4 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sub x8, x0, #4 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i32, ptr %valp, i32 -1 @@ -380,8 +380,8 @@ define @ld1rd_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rd_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #512 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: add x8, x0, #512 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i64, ptr %valp, i32 64 @@ -394,8 +394,8 @@ define @ld1rd_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rd_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #8 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sub x8, x0, #8 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr i64, ptr %valp, i32 -1 @@ -433,8 +433,8 @@ define @ld1rh_half_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rh_half_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 64 @@ -447,8 +447,8 @@ define @ld1rh_half_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rh_half_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 -1 @@ -486,8 +486,8 @@ define @ld1rh_half_unpacked4_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rh_half_unpacked4_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 64 @@ -500,8 +500,8 @@ define @ld1rh_half_unpacked4_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rh_half_unpacked4_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 -1 @@ -539,8 +539,8 @@ define @ld1rh_half_unpacked2_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rh_half_unpacked2_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: add x8, x0, #128 ; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 64 @@ -553,8 +553,8 @@ define @ld1rh_half_unpacked2_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rh_half_unpacked2_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sub x8, x0, #2 ; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr half, ptr %valp, i32 -1 @@ -592,8 +592,8 @@ define @ld1rw_float_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rw_float_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr float, ptr %valp, i32 64 @@ -606,8 +606,8 @@ define @ld1rw_float_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rw_float_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #4 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sub x8, x0, #4 ; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr float, ptr %valp, i32 -1 @@ -645,8 +645,8 @@ define @ld1rw_float_unpacked2_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rw_float_unpacked2_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr float, ptr %valp, i32 64 @@ -659,8 +659,8 @@ define @ld1rw_float_unpacked2_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rw_float_unpacked2_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #4 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sub x8, x0, #4 ; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr float, ptr %valp, i32 -1 @@ -698,8 +698,8 @@ define @ld1rd_double_gep_out_of_range_up(ptr %valp) { ; CHECK-LABEL: ld1rd_double_gep_out_of_range_up: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #512 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: add x8, x0, #512 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr double, ptr %valp, i32 64 @@ -712,8 +712,8 @@ define @ld1rd_double_gep_out_of_range_down(ptr %valp) { ; CHECK-LABEL: ld1rd_double_gep_out_of_range_down: ; CHECK: // %bb.0: -; CHECK-NEXT: sub x8, x0, #8 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: sub x8, x0, #8 ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %valp2 = getelementptr double, ptr %valp, i32 -1 @@ -1192,9 +1192,8 @@ ; CHECK-LABEL: avoid_preindex_load: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: add x8, x0, #1 ; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0, #1] -; CHECK-NEXT: mov x0, x8 +; CHECK-NEXT: add x0, x0, #1 ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, i8* %src, i64 1 @@ -1211,11 +1210,10 @@ define i8* @avoid_preindex_load_dup(i8* %src, %pg, * %out) { ; CHECK-LABEL: avoid_preindex_load_dup: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #1 +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0, #1] -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z0.d }, p0, [x1] +; CHECK-NEXT: add x0, x0, #1 +; CHECK-NEXT: st1d { z0.d }, p1, [x1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, i8* %src, i64 1 %tmp = load i8, i8* %ptr, align 4 @@ -1229,11 +1227,10 @@ define i8* @avoid_preindex_load_dup_passthru_zero(i8* %src, %pg, * %out) { ; CHECK-LABEL: avoid_preindex_load_dup_passthru_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: add x8, x0, #1 +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0, #1] -; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: st1d { z0.d }, p0, [x1] +; CHECK-NEXT: add x0, x0, #1 +; CHECK-NEXT: st1d { z0.d }, p1, [x1] ; CHECK-NEXT: ret %ptr = getelementptr inbounds i8, i8* %src, i64 1 %tmp = load i8, i8* %ptr, align 4 @@ -1247,8 +1244,8 @@ define i8* @preindex_load_dup_passthru( %passthru, i8* %src, %pg, * %out) { ; CHECK-LABEL: preindex_load_dup_passthru: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsb x8, [x0, #1]! ; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: ldrsb x8, [x0, #1]! ; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: st1d { z0.d }, p1, [x1] ; CHECK-NEXT: ret @@ -1265,8 +1262,8 @@ define i8* @preidx8sext64_instead_of_ld1r(i8* %src, * %out, i64* %dst) { ; CHECK-LABEL: preidx8sext64_instead_of_ld1r: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsb x8, [x0, #1]! ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldrsb x8, [x0, #1]! ; CHECK-NEXT: mov z0.d, x8 ; CHECK-NEXT: st1d { z0.d }, p0, [x1] ; CHECK-NEXT: str x8, [x2] diff --git a/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll b/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll --- a/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-lsr-scaled-index-addressing-mode.ll @@ -38,10 +38,10 @@ ; ; ASM-LABEL: ld_st_nxv8i16: ; ASM: // %bb.0: // %entry -; ASM-NEXT: mov x8, xzr +; ASM-NEXT: ptrue p0.h ; ASM-NEXT: mov z0.h, #3 // =0x3 +; ASM-NEXT: mov x8, xzr ; ASM-NEXT: cnth x9 -; ASM-NEXT: ptrue p0.h ; ASM-NEXT: .LBB0_1: // %loop ; ASM-NEXT: // =>This Inner Loop Header: Depth=1 ; ASM-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] @@ -111,9 +111,9 @@ ; ; ASM-LABEL: masked_ld_st_nxv8i16: ; ASM: // %bb.0: // %entry -; ASM-NEXT: mov x8, xzr -; ASM-NEXT: mov z0.h, #3 // =0x3 ; ASM-NEXT: ptrue p0.h +; ASM-NEXT: mov z0.h, #3 // =0x3 +; ASM-NEXT: mov x8, xzr ; ASM-NEXT: cnth x9 ; ASM-NEXT: .LBB1_1: // %loop ; ASM-NEXT: // =>This Inner Loop Header: Depth=1 diff --git a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll --- a/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-gather-legalize.ll @@ -113,9 +113,9 @@ ; CHECK-NEXT: ld1h { z2.d }, p1/z, [z2.d] ; CHECK-NEXT: punpkhi p1.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b +; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s ; CHECK-NEXT: ld1h { z1.d }, p1/z, [z1.d] ; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] -; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h ; CHECK-NEXT: ret @@ -126,8 +126,8 @@ define @masked_gather_nxv8bf16(ptr %base, %indices, %mask) #0 { ; CHECK-LABEL: masked_gather_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: sunpkhi z1.s, z0.h ; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: sunpkhi z1.s, z0.h ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: ld1h { z1.s }, p1/z, [x0, z1.s, sxtw #1] @@ -175,16 +175,16 @@ define @masked_gather_nxv16i8(ptr %base, %indices, %mask) #0 { ; CHECK-LABEL: masked_gather_nxv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sunpkhi z1.h, z0.b ; CHECK-NEXT: punpkhi p1.h, p0.b +; CHECK-NEXT: sunpkhi z1.h, z0.b +; CHECK-NEXT: sunpklo z0.h, z0.b +; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: sunpkhi z2.s, z1.h -; CHECK-NEXT: punpkhi p2.h, p1.b ; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: punpkhi p2.h, p1.b ; CHECK-NEXT: punpklo p1.h, p1.b ; CHECK-NEXT: ld1b { z2.s }, p2/z, [x0, z2.s, sxtw] ; CHECK-NEXT: ld1b { z1.s }, p1/z, [x0, z1.s, sxtw] -; CHECK-NEXT: sunpklo z0.h, z0.b -; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: punpkhi p1.h, p0.b ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: uzp1 z1.h, z1.h, z2.h diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll --- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll @@ -68,8 +68,8 @@ define @masked_sload_passthru( *%a, %mask, %passthru) { ; CHECK-LABEL: masked_sload_passthru: ; CHECK: // %bb.0: -; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x0] ; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: ld1sw { z1.d }, p0/z, [x0] ; CHECK-NEXT: sxtw z0.d, p1/m, z0.d ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll --- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll @@ -98,9 +98,9 @@ define @masked_zload_2i16_2f64(* noalias %in, %mask) { ; CHECK-LABEL: masked_zload_2i16_2f64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] -; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d +; CHECK-NEXT: ucvtf z0.d, p1/m, z0.d ; CHECK-NEXT: ret %wide.load = call @llvm.masked.load.nxv2i16(* %in, i32 2, %mask, undef) %zext = zext %wide.load to diff --git a/llvm/test/CodeGen/AArch64/sve-masked-scatter-legalize.ll b/llvm/test/CodeGen/AArch64/sve-masked-scatter-legalize.ll --- a/llvm/test/CodeGen/AArch64/sve-masked-scatter-legalize.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-scatter-legalize.ll @@ -9,25 +9,25 @@ define void @masked_scatter_nxv16i8( %data, ptr %base, %offsets, %mask) #0 { ; CHECK-LABEL: masked_scatter_nxv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sunpklo z2.h, z1.b ; CHECK-NEXT: punpklo p1.h, p0.b +; CHECK-NEXT: sunpklo z2.h, z1.b ; CHECK-NEXT: uunpklo z4.h, z0.b +; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: sunpkhi z1.h, z1.b +; CHECK-NEXT: uunpkhi z0.h, z0.b ; CHECK-NEXT: sunpklo z3.s, z2.h -; CHECK-NEXT: punpklo p2.h, p1.b ; CHECK-NEXT: uunpklo z5.s, z4.h -; CHECK-NEXT: st1b { z5.s }, p2, [x0, z3.s, sxtw] ; CHECK-NEXT: sunpkhi z2.s, z2.h +; CHECK-NEXT: punpklo p2.h, p1.b ; CHECK-NEXT: punpkhi p1.h, p1.b +; CHECK-NEXT: st1b { z5.s }, p2, [x0, z3.s, sxtw] ; CHECK-NEXT: uunpkhi z3.s, z4.h -; CHECK-NEXT: sunpkhi z1.h, z1.b -; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: uunpkhi z0.h, z0.b ; CHECK-NEXT: st1b { z3.s }, p1, [x0, z2.s, sxtw] -; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: punpklo p1.h, p0.b +; CHECK-NEXT: sunpklo z2.s, z1.h +; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: uunpklo z3.s, z0.h ; CHECK-NEXT: sunpkhi z1.s, z1.h -; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: st1b { z3.s }, p1, [x0, z2.s, sxtw] ; CHECK-NEXT: st1b { z0.s }, p0, [x0, z1.s, sxtw] @@ -40,11 +40,11 @@ define void @masked_scatter_nxv8i16( %data, ptr %base, %offsets, %mask) #0 { ; CHECK-LABEL: masked_scatter_nxv8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: punpklo p1.h, p0.b +; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: uunpklo z3.s, z0.h -; CHECK-NEXT: sunpkhi z1.s, z1.h ; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: sunpkhi z1.s, z1.h ; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: st1h { z3.s }, p1, [x0, z2.s, sxtw #1] ; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw #1] @@ -57,11 +57,11 @@ define void @masked_scatter_nxv8bf16( %data, ptr %base, %offsets, %mask) #0 { ; CHECK-LABEL: masked_scatter_nxv8bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: punpklo p1.h, p0.b +; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: uunpklo z3.s, z0.h -; CHECK-NEXT: sunpkhi z1.s, z1.h ; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: sunpkhi z1.s, z1.h ; CHECK-NEXT: uunpkhi z0.s, z0.h ; CHECK-NEXT: st1h { z3.s }, p1, [x0, z2.s, sxtw #1] ; CHECK-NEXT: st1h { z0.s }, p0, [x0, z1.s, sxtw #1] diff --git a/llvm/test/CodeGen/AArch64/sve-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-masked-scatter.ll @@ -76,8 +76,8 @@ define void @masked_scatter_splat_constant_pointer ( %pg) { ; CHECK-LABEL: masked_scatter_splat_constant_pointer: ; CHECK: // %bb.0: // %vector.body -; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: punpklo p1.h, p0.b +; CHECK-NEXT: mov z0.d, #0 // =0x0 ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: st1w { z0.d }, p1, [z0.d] ; CHECK-NEXT: st1w { z0.d }, p0, [z0.d] diff --git a/llvm/test/CodeGen/AArch64/sve-pr62151.ll b/llvm/test/CodeGen/AArch64/sve-pr62151.ll --- a/llvm/test/CodeGen/AArch64/sve-pr62151.ll +++ b/llvm/test/CodeGen/AArch64/sve-pr62151.ll @@ -5,8 +5,8 @@ define i32 @build_interpolation(<2 x i32> %0, <2 x i32> %1, <2 x i32> %2) { ; CHECK-LABEL: build_interpolation: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z2.s ; CHECK-NEXT: mla v0.2s, v1.2s, v0.s[1] diff --git a/llvm/test/CodeGen/AArch64/sve-pred-arith.ll b/llvm/test/CodeGen/AArch64/sve-pred-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-pred-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-arith.ll @@ -54,23 +54,23 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: ptrue p4.b ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: ptrue p8.b -; CHECK-NEXT: ldr p4, [x1] ; CHECK-NEXT: ldr p5, [x0] -; CHECK-NEXT: ldr p6, [x3] +; CHECK-NEXT: ldr p6, [x1] ; CHECK-NEXT: ldr p7, [x2] -; CHECK-NEXT: eor p0.b, p8/z, p0.b, p5.b -; CHECK-NEXT: eor p1.b, p8/z, p1.b, p4.b -; CHECK-NEXT: eor p2.b, p8/z, p2.b, p7.b -; CHECK-NEXT: eor p3.b, p8/z, p3.b, p6.b -; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [x3] +; CHECK-NEXT: eor p0.b, p4/z, p0.b, p5.b +; CHECK-NEXT: eor p1.b, p4/z, p1.b, p6.b ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: eor p2.b, p4/z, p2.b, p7.b +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: eor p3.b, p4/z, p3.b, p8.b +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -138,23 +138,23 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: ptrue p4.b ; CHECK-NEXT: str p8, [sp, #3, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p7, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: ptrue p8.b -; CHECK-NEXT: ldr p4, [x1] ; CHECK-NEXT: ldr p5, [x0] -; CHECK-NEXT: ldr p6, [x3] +; CHECK-NEXT: ldr p6, [x1] ; CHECK-NEXT: ldr p7, [x2] -; CHECK-NEXT: eor p0.b, p8/z, p0.b, p5.b -; CHECK-NEXT: eor p1.b, p8/z, p1.b, p4.b -; CHECK-NEXT: eor p2.b, p8/z, p2.b, p7.b -; CHECK-NEXT: eor p3.b, p8/z, p3.b, p6.b -; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p8, [x3] +; CHECK-NEXT: eor p0.b, p4/z, p0.b, p5.b +; CHECK-NEXT: eor p1.b, p4/z, p1.b, p6.b ; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: eor p2.b, p4/z, p2.b, p7.b +; CHECK-NEXT: ldr p7, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: eor p3.b, p4/z, p3.b, p8.b +; CHECK-NEXT: ldr p8, [sp, #3, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll --- a/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop.ll @@ -322,8 +322,8 @@ define @ornot_v4i32( %z, %x, %y) { ; CHECK-LABEL: ornot_v4i32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z3.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov z3.s, #-1 // =0xffffffffffffffff ; CHECK-NEXT: eor z2.d, z2.d, z3.d ; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: orr z1.d, z1.d, z2.d @@ -340,8 +340,8 @@ define @ornot_v8i16( %z, %x, %y) { ; CHECK-LABEL: ornot_v8i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z3.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov z3.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: eor z2.d, z2.d, z3.d ; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: orr z1.d, z1.d, z2.d @@ -358,8 +358,8 @@ define @ornot_v16i8( %z, %x, %y) { ; CHECK-LABEL: ornot_v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov z3.b, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov z3.b, #-1 // =0xffffffffffffffff ; CHECK-NEXT: eor z2.d, z2.d, z3.d ; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: orr z1.d, z1.d, z2.d @@ -467,9 +467,9 @@ ; CHECK-LABEL: icmp_slt_v4i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0 ; CHECK-NEXT: smin z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 -; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: mov z0.s, p1/m, z1.s ; CHECK-NEXT: ret entry: %c = icmp eq %z, zeroinitializer @@ -483,9 +483,9 @@ ; CHECK-LABEL: icmp_slt_v8i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0 ; CHECK-NEXT: smin z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 -; CHECK-NEXT: mov z0.h, p0/m, z1.h +; CHECK-NEXT: mov z0.h, p1/m, z1.h ; CHECK-NEXT: ret entry: %c = icmp eq %z, zeroinitializer @@ -499,9 +499,9 @@ ; CHECK-LABEL: icmp_slt_v16i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0 ; CHECK-NEXT: smin z1.b, p0/m, z1.b, z2.b -; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: mov z0.b, p0/m, z1.b +; CHECK-NEXT: mov z0.b, p1/m, z1.b ; CHECK-NEXT: ret entry: %c = icmp eq %z, zeroinitializer @@ -515,9 +515,9 @@ ; CHECK-LABEL: icmp_sgt_v4i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0 ; CHECK-NEXT: smax z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 -; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: mov z0.s, p1/m, z1.s ; CHECK-NEXT: ret entry: %c = icmp eq %z, zeroinitializer @@ -531,9 +531,9 @@ ; CHECK-LABEL: icmp_sgt_v8i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0 ; CHECK-NEXT: smax z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 -; CHECK-NEXT: mov z0.h, p0/m, z1.h +; CHECK-NEXT: mov z0.h, p1/m, z1.h ; CHECK-NEXT: ret entry: %c = icmp eq %z, zeroinitializer @@ -547,9 +547,9 @@ ; CHECK-LABEL: icmp_sgt_v16i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0 ; CHECK-NEXT: smax z1.b, p0/m, z1.b, z2.b -; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: mov z0.b, p0/m, z1.b +; CHECK-NEXT: mov z0.b, p1/m, z1.b ; CHECK-NEXT: ret entry: %c = icmp eq %z, zeroinitializer @@ -563,9 +563,9 @@ ; CHECK-LABEL: icmp_ult_v4i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0 ; CHECK-NEXT: umin z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 -; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: mov z0.s, p1/m, z1.s ; CHECK-NEXT: ret entry: %c = icmp eq %z, zeroinitializer @@ -579,9 +579,9 @@ ; CHECK-LABEL: icmp_ult_v8i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0 ; CHECK-NEXT: umin z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 -; CHECK-NEXT: mov z0.h, p0/m, z1.h +; CHECK-NEXT: mov z0.h, p1/m, z1.h ; CHECK-NEXT: ret entry: %c = icmp eq %z, zeroinitializer @@ -595,9 +595,9 @@ ; CHECK-LABEL: icmp_ult_v16i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0 ; CHECK-NEXT: umin z1.b, p0/m, z1.b, z2.b -; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: mov z0.b, p0/m, z1.b +; CHECK-NEXT: mov z0.b, p1/m, z1.b ; CHECK-NEXT: ret entry: %c = icmp eq %z, zeroinitializer @@ -611,9 +611,9 @@ ; CHECK-LABEL: icmp_ugt_v4i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, #0 ; CHECK-NEXT: umax z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 -; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: mov z0.s, p1/m, z1.s ; CHECK-NEXT: ret entry: %c = icmp eq %z, zeroinitializer @@ -627,9 +627,9 @@ ; CHECK-LABEL: icmp_ugt_v8i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, #0 ; CHECK-NEXT: umax z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 -; CHECK-NEXT: mov z0.h, p0/m, z1.h +; CHECK-NEXT: mov z0.h, p1/m, z1.h ; CHECK-NEXT: ret entry: %c = icmp eq %z, zeroinitializer @@ -643,9 +643,9 @@ ; CHECK-LABEL: icmp_ugt_v16i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #0 ; CHECK-NEXT: umax z1.b, p0/m, z1.b, z2.b -; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: mov z0.b, p0/m, z1.b +; CHECK-NEXT: mov z0.b, p1/m, z1.b ; CHECK-NEXT: ret entry: %c = icmp eq %z, zeroinitializer @@ -659,9 +659,9 @@ ; CHECK-LABEL: fcmp_fast_olt_v4f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: mov z0.s, p1/m, z1.s ; CHECK-NEXT: ret entry: %c = fcmp oeq %z, zeroinitializer @@ -675,9 +675,9 @@ ; CHECK-LABEL: fcmp_fast_olt_v8f16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 ; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: mov z0.h, p0/m, z1.h +; CHECK-NEXT: mov z0.h, p1/m, z1.h ; CHECK-NEXT: ret entry: %c = fcmp oeq %z, zeroinitializer @@ -691,9 +691,9 @@ ; CHECK-LABEL: fcmp_fast_ogt_v4f32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: mov z0.s, p1/m, z1.s ; CHECK-NEXT: ret entry: %c = fcmp oeq %z, zeroinitializer @@ -707,9 +707,9 @@ ; CHECK-LABEL: fcmp_fast_ogt_v8f16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 ; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z2.h -; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 -; CHECK-NEXT: mov z0.h, p0/m, z1.h +; CHECK-NEXT: mov z0.h, p1/m, z1.h ; CHECK-NEXT: ret entry: %c = fcmp oeq %z, zeroinitializer @@ -904,8 +904,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z2.s, w0 -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: add z1.s, z1.s, z2.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: @@ -922,8 +922,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z2.h, w0 -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: add z1.h, z1.h, z2.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: @@ -940,8 +940,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z2.b, w0 -; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: add z1.b, z1.b, z2.b +; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: mov z0.b, p0/m, z1.b ; CHECK-NEXT: ret entry: @@ -958,8 +958,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z2.s, w0 -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: sub z1.s, z1.s, z2.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: @@ -976,8 +976,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z2.h, w0 -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: sub z1.h, z1.h, z2.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: @@ -994,8 +994,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z2.b, w0 -; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: sub z1.b, z1.b, z2.b +; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: mov z0.b, p0/m, z1.b ; CHECK-NEXT: ret entry: @@ -1012,8 +1012,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z2.s, w0 -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: mul z1.s, z1.s, z2.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: @@ -1030,8 +1030,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z2.h, w0 -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: mul z1.h, z1.h, z2.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: @@ -1048,8 +1048,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z2.b, w0 -; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: mul z1.b, z1.b, z2.b +; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: mov z0.b, p0/m, z1.b ; CHECK-NEXT: ret entry: @@ -1064,11 +1064,11 @@ define @faddqr_v4f32( %z, %x, float %y) { ; CHECK-LABEL: faddqr_v4f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $s2 killed $s2 def $z2 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $s2 killed $s2 def $z2 ; CHECK-NEXT: mov z2.s, s2 -; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: fadd z1.s, z1.s, z2.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: @@ -1083,11 +1083,11 @@ define @faddqr_v8f16( %z, %x, half %y) { ; CHECK-LABEL: faddqr_v8f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 ; CHECK-NEXT: mov z2.h, h2 -; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; CHECK-NEXT: fadd z1.h, z1.h, z2.h +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: @@ -1102,11 +1102,11 @@ define @fsubqr_v4f32( %z, %x, float %y) { ; CHECK-LABEL: fsubqr_v4f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $s2 killed $s2 def $z2 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $s2 killed $s2 def $z2 ; CHECK-NEXT: mov z2.s, s2 -; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: fsub z1.s, z1.s, z2.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: @@ -1121,11 +1121,11 @@ define @fsubqr_v8f16( %z, %x, half %y) { ; CHECK-LABEL: fsubqr_v8f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 ; CHECK-NEXT: mov z2.h, h2 -; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; CHECK-NEXT: fsub z1.h, z1.h, z2.h +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: @@ -1140,11 +1140,11 @@ define @fmulqr_v4f32( %z, %x, float %y) { ; CHECK-LABEL: fmulqr_v4f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $s2 killed $s2 def $z2 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: // kill: def $s2 killed $s2 def $z2 ; CHECK-NEXT: mov z2.s, s2 -; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: fmul z1.s, z1.s, z2.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: @@ -1159,11 +1159,11 @@ define @fmulqr_v8f16( %z, %x, half %y) { ; CHECK-LABEL: fmulqr_v8f16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: // kill: def $h2 killed $h2 def $z2 ; CHECK-NEXT: mov z2.h, h2 -; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; CHECK-NEXT: fmul z1.h, z1.h, z2.h +; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: @@ -1180,8 +1180,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z2.s, w0 -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: sqadd z1.s, z1.s, z2.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: @@ -1198,8 +1198,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z2.h, w0 -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: sqadd z1.h, z1.h, z2.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: @@ -1216,8 +1216,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z2.b, w0 -; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: sqadd z1.b, z1.b, z2.b +; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: mov z0.b, p0/m, z1.b ; CHECK-NEXT: ret entry: @@ -1234,8 +1234,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z2.s, w0 -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: uqadd z1.s, z1.s, z2.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: @@ -1252,8 +1252,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z2.h, w0 -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: uqadd z1.h, z1.h, z2.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: @@ -1270,8 +1270,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z2.b, w0 -; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: uqadd z1.b, z1.b, z2.b +; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: mov z0.b, p0/m, z1.b ; CHECK-NEXT: ret entry: @@ -1288,8 +1288,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z2.s, w0 -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: sqsub z1.s, z1.s, z2.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: @@ -1306,8 +1306,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z2.h, w0 -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: sqsub z1.h, z1.h, z2.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: @@ -1324,8 +1324,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z2.b, w0 -; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: sqsub z1.b, z1.b, z2.b +; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: mov z0.b, p0/m, z1.b ; CHECK-NEXT: ret entry: @@ -1342,8 +1342,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mov z2.s, w0 -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: uqsub z1.s, z1.s, z2.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: @@ -1360,8 +1360,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: mov z2.h, w0 -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: uqsub z1.h, z1.h, z2.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: @@ -1378,8 +1378,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: mov z2.b, w0 -; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: uqsub z1.b, z1.b, z2.b +; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: mov z0.b, p0/m, z1.b ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll --- a/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop2.ll @@ -173,9 +173,9 @@ ; CHECK-LABEL: sdiv_nxv2i64_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0 ; CHECK-NEXT: sdivr z1.d, p0/m, z1.d, z0.d -; CHECK-NEXT: mov z0.d, p1/m, z1.d +; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -188,9 +188,9 @@ ; CHECK-LABEL: sdiv_nxv4i32_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0 ; CHECK-NEXT: sdivr z1.s, p0/m, z1.s, z0.s -; CHECK-NEXT: mov z0.s, p1/m, z1.s +; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 +; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -202,14 +202,14 @@ define @sdiv_nxv8i16_x( %x, %y, %n) { ; CHECK-LABEL: sdiv_nxv8i16_x: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sunpkhi z3.s, z1.h ; CHECK-NEXT: sunpkhi z4.s, z0.h ; CHECK-NEXT: sunpklo z1.s, z1.h -; CHECK-NEXT: sunpklo z5.s, z0.h +; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: sunpklo z4.s, z0.h +; CHECK-NEXT: sdivr z1.s, p0/m, z1.s, z4.s ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z4.s -; CHECK-NEXT: sdivr z1.s, p1/m, z1.s, z5.s ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 ; CHECK-NEXT: uzp1 z1.h, z1.h, z3.h ; CHECK-NEXT: mov z0.h, p0/m, z1.h @@ -227,25 +227,24 @@ ; CHECK-NEXT: sunpkhi z3.h, z1.b ; CHECK-NEXT: sunpkhi z4.h, z0.b ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sunpklo z1.h, z1.b ; CHECK-NEXT: sunpkhi z5.s, z3.h ; CHECK-NEXT: sunpkhi z6.s, z4.h -; CHECK-NEXT: sunpklo z1.h, z1.b -; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: sunpklo z6.h, z0.b ; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sunpklo z4.s, z4.h -; CHECK-NEXT: sunpkhi z7.s, z1.h -; CHECK-NEXT: sunpkhi z24.s, z6.h +; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s +; CHECK-NEXT: sunpkhi z6.s, z1.h ; CHECK-NEXT: sunpklo z1.s, z1.h -; CHECK-NEXT: sunpklo z6.s, z6.h ; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: sdivr z1.s, p0/m, z1.s, z6.s -; CHECK-NEXT: movprfx z4, z24 -; CHECK-NEXT: sdiv z4.s, p0/m, z4.s, z7.s -; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: sunpklo z4.h, z0.b +; CHECK-NEXT: sunpkhi z7.s, z4.h +; CHECK-NEXT: sunpklo z4.s, z4.h +; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s ; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h -; CHECK-NEXT: uzp1 z1.h, z1.h, z4.h +; CHECK-NEXT: sdivr z1.s, p0/m, z1.s, z4.s +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 +; CHECK-NEXT: uzp1 z1.h, z1.h, z6.h ; CHECK-NEXT: uzp1 z1.b, z1.b, z3.b ; CHECK-NEXT: mov z0.b, p0/m, z1.b ; CHECK-NEXT: ret @@ -260,9 +259,9 @@ ; CHECK-LABEL: udiv_nxv2i64_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0 ; CHECK-NEXT: udivr z1.d, p0/m, z1.d, z0.d -; CHECK-NEXT: mov z0.d, p1/m, z1.d +; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -275,9 +274,9 @@ ; CHECK-LABEL: udiv_nxv4i32_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0 ; CHECK-NEXT: udivr z1.s, p0/m, z1.s, z0.s -; CHECK-NEXT: mov z0.s, p1/m, z1.s +; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 +; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -289,14 +288,14 @@ define @udiv_nxv8i16_x( %x, %y, %n) { ; CHECK-LABEL: udiv_nxv8i16_x: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpkhi z3.s, z1.h ; CHECK-NEXT: uunpkhi z4.s, z0.h ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z5.s, z0.h +; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: uunpklo z4.s, z0.h +; CHECK-NEXT: udivr z1.s, p0/m, z1.s, z4.s ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z4.s -; CHECK-NEXT: udivr z1.s, p1/m, z1.s, z5.s ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 ; CHECK-NEXT: uzp1 z1.h, z1.h, z3.h ; CHECK-NEXT: mov z0.h, p0/m, z1.h @@ -314,25 +313,24 @@ ; CHECK-NEXT: uunpkhi z3.h, z1.b ; CHECK-NEXT: uunpkhi z4.h, z0.b ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uunpklo z1.h, z1.b ; CHECK-NEXT: uunpkhi z5.s, z3.h ; CHECK-NEXT: uunpkhi z6.s, z4.h -; CHECK-NEXT: uunpklo z1.h, z1.b -; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: uunpklo z6.h, z0.b ; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: uunpklo z4.s, z4.h -; CHECK-NEXT: uunpkhi z7.s, z1.h -; CHECK-NEXT: uunpkhi z24.s, z6.h +; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s +; CHECK-NEXT: uunpkhi z6.s, z1.h ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z6.s, z6.h ; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: udivr z1.s, p0/m, z1.s, z6.s -; CHECK-NEXT: movprfx z4, z24 -; CHECK-NEXT: udiv z4.s, p0/m, z4.s, z7.s -; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uunpklo z4.h, z0.b +; CHECK-NEXT: uunpkhi z7.s, z4.h +; CHECK-NEXT: uunpklo z4.s, z4.h +; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s ; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h -; CHECK-NEXT: uzp1 z1.h, z1.h, z4.h +; CHECK-NEXT: udivr z1.s, p0/m, z1.s, z4.s +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 +; CHECK-NEXT: uzp1 z1.h, z1.h, z6.h ; CHECK-NEXT: uzp1 z1.b, z1.b, z3.b ; CHECK-NEXT: mov z0.b, p0/m, z1.b ; CHECK-NEXT: ret @@ -347,10 +345,10 @@ ; CHECK-LABEL: srem_nxv2i64_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: mls z0.d, p1/m, z2.d, z1.d +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: sdiv z3.d, p0/m, z3.d, z1.d +; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: mls z0.d, p0/m, z3.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -363,10 +361,10 @@ ; CHECK-LABEL: srem_nxv4i32_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: mls z0.s, p1/m, z2.s, z1.s +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z1.s +; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 +; CHECK-NEXT: mls z0.s, p0/m, z3.s, z1.s ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -378,18 +376,17 @@ define @srem_nxv8i16_x( %x, %y, %n) { ; CHECK-LABEL: srem_nxv8i16_x: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sunpkhi z3.s, z1.h ; CHECK-NEXT: sunpkhi z4.s, z0.h +; CHECK-NEXT: sunpklo z5.s, z0.h +; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: sunpklo z4.s, z1.h +; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z4.s -; CHECK-NEXT: sunpklo z5.s, z1.h -; CHECK-NEXT: sunpklo z6.s, z0.h -; CHECK-NEXT: movprfx z4, z6 -; CHECK-NEXT: sdiv z4.s, p1/m, z4.s, z5.s ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 -; CHECK-NEXT: uzp1 z2.h, z4.h, z3.h -; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h +; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h +; CHECK-NEXT: mls z0.h, p0/m, z3.h, z1.h ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -406,25 +403,24 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sunpkhi z5.s, z3.h ; CHECK-NEXT: sunpkhi z6.s, z4.h -; CHECK-NEXT: sunpklo z7.h, z1.b -; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: sunpklo z6.h, z0.b ; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sunpklo z4.s, z4.h -; CHECK-NEXT: sunpkhi z24.s, z7.h -; CHECK-NEXT: sunpkhi z25.s, z6.h -; CHECK-NEXT: sunpklo z7.s, z7.h +; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s +; CHECK-NEXT: sunpklo z6.h, z0.b +; CHECK-NEXT: sunpkhi z24.s, z6.h ; CHECK-NEXT: sunpklo z6.s, z6.h ; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: movprfx z4, z25 -; CHECK-NEXT: sdiv z4.s, p0/m, z4.s, z24.s -; CHECK-NEXT: sdiv z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: sunpklo z4.h, z1.b +; CHECK-NEXT: sunpkhi z7.s, z4.h +; CHECK-NEXT: sunpklo z4.s, z4.h +; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z24.s ; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h -; CHECK-NEXT: uzp1 z4.h, z6.h, z4.h +; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z6.s +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 -; CHECK-NEXT: uzp1 z2.b, z4.b, z3.b -; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b +; CHECK-NEXT: uzp1 z4.h, z4.h, z7.h +; CHECK-NEXT: uzp1 z3.b, z4.b, z3.b +; CHECK-NEXT: mls z0.b, p0/m, z3.b, z1.b ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -437,10 +433,10 @@ ; CHECK-LABEL: urem_nxv2i64_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: mls z0.d, p1/m, z2.d, z1.d +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: udiv z3.d, p0/m, z3.d, z1.d +; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: mls z0.d, p0/m, z3.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -453,10 +449,10 @@ ; CHECK-LABEL: urem_nxv4i32_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: mls z0.s, p1/m, z2.s, z1.s +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z1.s +; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 +; CHECK-NEXT: mls z0.s, p0/m, z3.s, z1.s ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -468,18 +464,17 @@ define @urem_nxv8i16_x( %x, %y, %n) { ; CHECK-LABEL: urem_nxv8i16_x: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpkhi z3.s, z1.h ; CHECK-NEXT: uunpkhi z4.s, z0.h +; CHECK-NEXT: uunpklo z5.s, z0.h +; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: uunpklo z4.s, z1.h +; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z4.s -; CHECK-NEXT: uunpklo z5.s, z1.h -; CHECK-NEXT: uunpklo z6.s, z0.h -; CHECK-NEXT: movprfx z4, z6 -; CHECK-NEXT: udiv z4.s, p1/m, z4.s, z5.s ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 -; CHECK-NEXT: uzp1 z2.h, z4.h, z3.h -; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h +; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h +; CHECK-NEXT: mls z0.h, p0/m, z3.h, z1.h ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -496,25 +491,24 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpkhi z5.s, z3.h ; CHECK-NEXT: uunpkhi z6.s, z4.h -; CHECK-NEXT: uunpklo z7.h, z1.b -; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: uunpklo z6.h, z0.b ; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: uunpklo z4.s, z4.h -; CHECK-NEXT: uunpkhi z24.s, z7.h -; CHECK-NEXT: uunpkhi z25.s, z6.h -; CHECK-NEXT: uunpklo z7.s, z7.h +; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s +; CHECK-NEXT: uunpklo z6.h, z0.b +; CHECK-NEXT: uunpkhi z24.s, z6.h ; CHECK-NEXT: uunpklo z6.s, z6.h ; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: movprfx z4, z25 -; CHECK-NEXT: udiv z4.s, p0/m, z4.s, z24.s -; CHECK-NEXT: udiv z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uunpklo z4.h, z1.b +; CHECK-NEXT: uunpkhi z7.s, z4.h +; CHECK-NEXT: uunpklo z4.s, z4.h +; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z24.s ; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h -; CHECK-NEXT: uzp1 z4.h, z6.h, z4.h +; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z6.s +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 -; CHECK-NEXT: uzp1 z2.b, z4.b, z3.b -; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b +; CHECK-NEXT: uzp1 z4.h, z4.h, z7.h +; CHECK-NEXT: uzp1 z3.b, z4.b, z3.b +; CHECK-NEXT: mls z0.b, p0/m, z3.b, z1.b ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1130,10 +1124,10 @@ ; CHECK-LABEL: fdiv_nxv4f32_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0 ; CHECK-NEXT: fdivr z1.s, p0/m, z1.s, z0.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z0.s, p1/m, z1.s +; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1148,8 +1142,8 @@ ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0 ; CHECK-NEXT: fdivr z1.h, p0/m, z1.h, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z0.h, p1/m, z1.h +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1162,10 +1156,10 @@ ; CHECK-LABEL: fdiv_nxv2f64_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0 ; CHECK-NEXT: fdivr z1.d, p0/m, z1.d, z0.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z0.d, p1/m, z1.d +; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1631,9 +1625,9 @@ ; CHECK-LABEL: sdiv_nxv2i64_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0 ; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d +; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1646,9 +1640,9 @@ ; CHECK-LABEL: sdiv_nxv4i32_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0 ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s +; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1660,14 +1654,14 @@ define @sdiv_nxv8i16_y( %x, %y, %n) { ; CHECK-LABEL: sdiv_nxv8i16_y: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sunpkhi z3.s, z1.h ; CHECK-NEXT: sunpkhi z4.s, z0.h -; CHECK-NEXT: sunpklo z5.s, z1.h ; CHECK-NEXT: sunpklo z0.s, z0.h +; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: sunpklo z4.s, z1.h +; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z4.s ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z4.s -; CHECK-NEXT: sdiv z0.s, p1/m, z0.s, z5.s ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 ; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h @@ -1685,25 +1679,24 @@ ; CHECK-NEXT: sunpkhi z3.h, z1.b ; CHECK-NEXT: sunpkhi z4.h, z0.b ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: sunpklo z0.h, z0.b ; CHECK-NEXT: sunpkhi z5.s, z3.h ; CHECK-NEXT: sunpkhi z6.s, z4.h -; CHECK-NEXT: sunpklo z7.h, z1.b ; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sunpklo z4.s, z4.h +; CHECK-NEXT: sunpkhi z7.s, z0.h +; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: sunpklo z0.h, z0.b -; CHECK-NEXT: sunpkhi z6.s, z7.h -; CHECK-NEXT: sunpkhi z24.s, z0.h ; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: movprfx z4, z24 -; CHECK-NEXT: sdiv z4.s, p0/m, z4.s, z6.s -; CHECK-NEXT: sunpklo z6.s, z7.h -; CHECK-NEXT: sunpklo z0.s, z0.h -; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z6.s -; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: sunpklo z4.h, z1.b +; CHECK-NEXT: sunpkhi z6.s, z4.h +; CHECK-NEXT: sunpklo z4.s, z4.h +; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s ; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z4.h +; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z4.s +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 +; CHECK-NEXT: uzp1 z0.h, z0.h, z6.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z3.b ; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b ; CHECK-NEXT: ret @@ -1718,9 +1711,9 @@ ; CHECK-LABEL: udiv_nxv2i64_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0 ; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d +; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1733,9 +1726,9 @@ ; CHECK-LABEL: udiv_nxv4i32_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0 ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s +; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = icmp sgt %n, zeroinitializer @@ -1747,14 +1740,14 @@ define @udiv_nxv8i16_y( %x, %y, %n) { ; CHECK-LABEL: udiv_nxv8i16_y: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpkhi z3.s, z1.h ; CHECK-NEXT: uunpkhi z4.s, z0.h -; CHECK-NEXT: uunpklo z5.s, z1.h ; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: uunpklo z4.s, z1.h +; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z4.s ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z4.s -; CHECK-NEXT: udiv z0.s, p1/m, z0.s, z5.s ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 ; CHECK-NEXT: uzp1 z0.h, z0.h, z3.h ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h @@ -1772,25 +1765,24 @@ ; CHECK-NEXT: uunpkhi z3.h, z1.b ; CHECK-NEXT: uunpkhi z4.h, z0.b ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpkhi z5.s, z3.h ; CHECK-NEXT: uunpkhi z6.s, z4.h -; CHECK-NEXT: uunpklo z7.h, z1.b ; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: uunpklo z4.s, z4.h +; CHECK-NEXT: uunpkhi z7.s, z0.h +; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: uunpkhi z6.s, z7.h -; CHECK-NEXT: uunpkhi z24.s, z0.h ; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: movprfx z4, z24 -; CHECK-NEXT: udiv z4.s, p0/m, z4.s, z6.s -; CHECK-NEXT: uunpklo z6.s, z7.h -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z6.s -; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uunpklo z4.h, z1.b +; CHECK-NEXT: uunpkhi z6.s, z4.h +; CHECK-NEXT: uunpklo z4.s, z4.h +; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s ; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z4.h +; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z4.s +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 +; CHECK-NEXT: uzp1 z0.h, z0.h, z6.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z3.b ; CHECK-NEXT: sel z0.b, p0, z0.b, z1.b ; CHECK-NEXT: ret @@ -1805,10 +1797,10 @@ ; CHECK-LABEL: srem_nxv2i64_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: msb z1.d, p1/m, z2.d, z0.d +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: sdiv z3.d, p0/m, z3.d, z1.d +; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: msb z1.d, p0/m, z3.d, z0.d ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: @@ -1822,10 +1814,10 @@ ; CHECK-LABEL: srem_nxv4i32_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: msb z1.s, p1/m, z2.s, z0.s +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z1.s +; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 +; CHECK-NEXT: msb z1.s, p0/m, z3.s, z0.s ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: @@ -1838,18 +1830,17 @@ define @srem_nxv8i16_y( %x, %y, %n) { ; CHECK-LABEL: srem_nxv8i16_y: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sunpkhi z3.s, z1.h ; CHECK-NEXT: sunpkhi z4.s, z0.h +; CHECK-NEXT: sunpklo z5.s, z0.h +; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: sunpklo z4.s, z1.h +; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: sdivr z3.s, p1/m, z3.s, z4.s -; CHECK-NEXT: sunpklo z5.s, z1.h -; CHECK-NEXT: sunpklo z6.s, z0.h -; CHECK-NEXT: movprfx z4, z6 -; CHECK-NEXT: sdiv z4.s, p1/m, z4.s, z5.s ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 -; CHECK-NEXT: uzp1 z2.h, z4.h, z3.h -; CHECK-NEXT: msb z1.h, p0/m, z2.h, z0.h +; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h +; CHECK-NEXT: msb z1.h, p0/m, z3.h, z0.h ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: @@ -1867,25 +1858,24 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sunpkhi z5.s, z3.h ; CHECK-NEXT: sunpkhi z6.s, z4.h -; CHECK-NEXT: sunpklo z7.h, z1.b -; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: sunpklo z6.h, z0.b ; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sunpklo z4.s, z4.h -; CHECK-NEXT: sunpkhi z24.s, z7.h -; CHECK-NEXT: sunpkhi z25.s, z6.h -; CHECK-NEXT: sunpklo z7.s, z7.h +; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s +; CHECK-NEXT: sunpklo z6.h, z0.b +; CHECK-NEXT: sunpkhi z24.s, z6.h ; CHECK-NEXT: sunpklo z6.s, z6.h ; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: movprfx z4, z25 -; CHECK-NEXT: sdiv z4.s, p0/m, z4.s, z24.s -; CHECK-NEXT: sdiv z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: sunpklo z4.h, z1.b +; CHECK-NEXT: sunpkhi z7.s, z4.h +; CHECK-NEXT: sunpklo z4.s, z4.h +; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z24.s ; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h -; CHECK-NEXT: uzp1 z4.h, z6.h, z4.h +; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z6.s +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 -; CHECK-NEXT: uzp1 z2.b, z4.b, z3.b -; CHECK-NEXT: msb z1.b, p0/m, z2.b, z0.b +; CHECK-NEXT: uzp1 z4.h, z4.h, z7.h +; CHECK-NEXT: uzp1 z3.b, z4.b, z3.b +; CHECK-NEXT: msb z1.b, p0/m, z3.b, z0.b ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: @@ -1899,10 +1889,10 @@ ; CHECK-LABEL: urem_nxv2i64_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: cmpgt p1.d, p0/z, z2.d, #0 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d -; CHECK-NEXT: msb z1.d, p1/m, z2.d, z0.d +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: udiv z3.d, p0/m, z3.d, z1.d +; CHECK-NEXT: cmpgt p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: msb z1.d, p0/m, z3.d, z0.d ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: @@ -1916,10 +1906,10 @@ ; CHECK-LABEL: urem_nxv4i32_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, #0 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s -; CHECK-NEXT: msb z1.s, p1/m, z2.s, z0.s +; CHECK-NEXT: movprfx z3, z0 +; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z1.s +; CHECK-NEXT: cmpgt p0.s, p0/z, z2.s, #0 +; CHECK-NEXT: msb z1.s, p0/m, z3.s, z0.s ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: @@ -1932,18 +1922,17 @@ define @urem_nxv8i16_y( %x, %y, %n) { ; CHECK-LABEL: urem_nxv8i16_y: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpkhi z3.s, z1.h ; CHECK-NEXT: uunpkhi z4.s, z0.h +; CHECK-NEXT: uunpklo z5.s, z0.h +; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: uunpklo z4.s, z1.h +; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: udivr z3.s, p1/m, z3.s, z4.s -; CHECK-NEXT: uunpklo z5.s, z1.h -; CHECK-NEXT: uunpklo z6.s, z0.h -; CHECK-NEXT: movprfx z4, z6 -; CHECK-NEXT: udiv z4.s, p1/m, z4.s, z5.s ; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, #0 -; CHECK-NEXT: uzp1 z2.h, z4.h, z3.h -; CHECK-NEXT: msb z1.h, p0/m, z2.h, z0.h +; CHECK-NEXT: uzp1 z3.h, z4.h, z3.h +; CHECK-NEXT: msb z1.h, p0/m, z3.h, z0.h ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: @@ -1961,25 +1950,24 @@ ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpkhi z5.s, z3.h ; CHECK-NEXT: uunpkhi z6.s, z4.h -; CHECK-NEXT: uunpklo z7.h, z1.b -; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s -; CHECK-NEXT: uunpklo z6.h, z0.b ; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: uunpklo z4.s, z4.h -; CHECK-NEXT: uunpkhi z24.s, z7.h -; CHECK-NEXT: uunpkhi z25.s, z6.h -; CHECK-NEXT: uunpklo z7.s, z7.h +; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s +; CHECK-NEXT: uunpklo z6.h, z0.b +; CHECK-NEXT: uunpkhi z24.s, z6.h ; CHECK-NEXT: uunpklo z6.s, z6.h ; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s -; CHECK-NEXT: movprfx z4, z25 -; CHECK-NEXT: udiv z4.s, p0/m, z4.s, z24.s -; CHECK-NEXT: udiv z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: uunpklo z4.h, z1.b +; CHECK-NEXT: uunpkhi z7.s, z4.h +; CHECK-NEXT: uunpklo z4.s, z4.h +; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z24.s ; CHECK-NEXT: uzp1 z3.h, z3.h, z5.h -; CHECK-NEXT: uzp1 z4.h, z6.h, z4.h +; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z6.s +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: cmpgt p0.b, p0/z, z2.b, #0 -; CHECK-NEXT: uzp1 z2.b, z4.b, z3.b -; CHECK-NEXT: msb z1.b, p0/m, z2.b, z0.b +; CHECK-NEXT: uzp1 z4.h, z4.h, z7.h +; CHECK-NEXT: uzp1 z3.b, z4.b, z3.b +; CHECK-NEXT: msb z1.b, p0/m, z3.b, z0.b ; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret entry: @@ -2625,10 +2613,10 @@ ; CHECK-LABEL: fdiv_nxv4f32_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0 ; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s +; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -2643,8 +2631,8 @@ ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0 ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -2657,10 +2645,10 @@ ; CHECK-LABEL: fdiv_nxv2f64_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0 ; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d +; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -2865,10 +2853,10 @@ ; CHECK-LABEL: fmai_nxv4f32_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: fcmle p1.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -2881,10 +2869,10 @@ ; CHECK-LABEL: fmai_nxv8f16_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: fcmle p1.h, p0/z, z3.h, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h +; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -2897,10 +2885,10 @@ ; CHECK-LABEL: fmai_nxv2f64_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: fcmle p1.d, p0/z, z3.d, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -2913,10 +2901,10 @@ ; CHECK-LABEL: fma_nxv4f32_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: fcmle p1.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -2930,10 +2918,10 @@ ; CHECK-LABEL: fma_nxv8f16_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: fcmle p1.h, p0/z, z3.h, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h +; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -2947,10 +2935,10 @@ ; CHECK-LABEL: fma_nxv2f64_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: fcmle p1.d, p0/z, z3.d, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll b/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll --- a/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll +++ b/llvm/test/CodeGen/AArch64/sve-pred-selectop3.ll @@ -776,10 +776,10 @@ ; CHECK-LABEL: fdiv_nxv4f32_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0 ; CHECK-NEXT: fdivr z1.s, p0/m, z1.s, z0.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z0.s, p1/m, z1.s +; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -794,8 +794,8 @@ ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0 ; CHECK-NEXT: fdivr z1.h, p0/m, z1.h, z0.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z0.h, p1/m, z1.h +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -808,10 +808,10 @@ ; CHECK-LABEL: fdiv_nxv2f64_x: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0 ; CHECK-NEXT: fdivr z1.d, p0/m, z1.d, z0.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: mov z0.d, p1/m, z1.d +; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1684,10 +1684,10 @@ ; CHECK-LABEL: fdiv_nxv4f32_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0 ; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s +; CHECK-NEXT: fcmle p1.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1702,8 +1702,8 @@ ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: fcmle p1.h, p0/z, z2.h, #0.0 ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1716,10 +1716,10 @@ ; CHECK-LABEL: fdiv_nxv2f64_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0 ; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d -; CHECK-NEXT: not p1.b, p0/z, p1.b -; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d +; CHECK-NEXT: fcmle p1.d, p0/z, z2.d, #0.0 +; CHECK-NEXT: not p0.b, p0/z, p1.b +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1732,10 +1732,10 @@ ; CHECK-LABEL: fmai_nxv4f32_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: fcmle p1.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1748,10 +1748,10 @@ ; CHECK-LABEL: fmai_nxv8f16_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: fcmle p1.h, p0/z, z3.h, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h +; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1764,10 +1764,10 @@ ; CHECK-LABEL: fmai_nxv2f64_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: fcmle p1.d, p0/z, z3.d, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1780,10 +1780,10 @@ ; CHECK-LABEL: fma_nxv4f32_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: fcmle p1.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s +; CHECK-NEXT: fmla z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1797,10 +1797,10 @@ ; CHECK-LABEL: fma_nxv8f16_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h ; CHECK-NEXT: fcmle p1.h, p0/z, z3.h, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h +; CHECK-NEXT: fmla z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer @@ -1814,10 +1814,10 @@ ; CHECK-LABEL: fma_nxv2f64_y: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d ; CHECK-NEXT: fcmle p1.d, p0/z, z3.d, #0.0 -; CHECK-NEXT: not p0.b, p0/z, p1.b -; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d +; CHECK-NEXT: fmla z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: not p1.b, p0/z, p1.b +; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d ; CHECK-NEXT: ret entry: %c = fcmp ugt %n, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/sve-ptest-removal-sink.ll b/llvm/test/CodeGen/AArch64/sve-ptest-removal-sink.ll --- a/llvm/test/CodeGen/AArch64/sve-ptest-removal-sink.ll +++ b/llvm/test/CodeGen/AArch64/sve-ptest-removal-sink.ll @@ -15,9 +15,8 @@ ; CHECK-NEXT: cntw x8 ; CHECK-NEXT: .LBB0_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add w10, w9, w8 ; CHECK-NEXT: whilelt p0.s, w9, w0 -; CHECK-NEXT: mov w9, w10 +; CHECK-NEXT: add w9, w9, w8 ; CHECK-NEXT: b.mi .LBB0_2 ; CHECK-NEXT: .LBB0_3: // %exit ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-redundant-store.ll b/llvm/test/CodeGen/AArch64/sve-redundant-store.ll --- a/llvm/test/CodeGen/AArch64/sve-redundant-store.ll +++ b/llvm/test/CodeGen/AArch64/sve-redundant-store.ll @@ -35,8 +35,8 @@ define void @keep_scalable_store(ptr writeonly %ptr, ptr %a, %b) { ; CHECK-LABEL: keep_scalable_store: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q1, [x1] ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldp q2, q1, [x1] ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll --- a/llvm/test/CodeGen/AArch64/sve-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-select.ll @@ -602,10 +602,10 @@ ; CHECK-LABEL: select_f32_no_invert_2_op: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: fmul z2.s, z2.s, z3.s +; CHECK-NEXT: fmul z1.s, z0.s, z1.s ; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: fmul z0.s, z0.s, z1.s -; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s +; CHECK-NEXT: fmul z0.s, z2.s, z3.s +; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret %p = fcmp oeq %a, zeroinitializer %fmul1 = fmul %a, %b diff --git a/llvm/test/CodeGen/AArch64/sve-sext-zext.ll b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll --- a/llvm/test/CodeGen/AArch64/sve-sext-zext.ll +++ b/llvm/test/CodeGen/AArch64/sve-sext-zext.ll @@ -240,11 +240,11 @@ ; CHECK-LABEL: sext_b_to_d: ; CHECK: // %bb.0: ; CHECK-NEXT: sunpklo z1.h, z0.b -; CHECK-NEXT: sunpkhi z6.h, z0.b +; CHECK-NEXT: sunpkhi z0.h, z0.b ; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: sunpkhi z3.s, z1.h -; CHECK-NEXT: sunpklo z5.s, z6.h -; CHECK-NEXT: sunpkhi z7.s, z6.h +; CHECK-NEXT: sunpklo z5.s, z0.h +; CHECK-NEXT: sunpkhi z7.s, z0.h ; CHECK-NEXT: sunpklo z0.d, z2.s ; CHECK-NEXT: sunpkhi z1.d, z2.s ; CHECK-NEXT: sunpklo z2.d, z3.s @@ -309,11 +309,11 @@ ; CHECK-LABEL: zext_b_to_d: ; CHECK: // %bb.0: ; CHECK-NEXT: uunpklo z1.h, z0.b -; CHECK-NEXT: uunpkhi z6.h, z0.b +; CHECK-NEXT: uunpkhi z0.h, z0.b ; CHECK-NEXT: uunpklo z2.s, z1.h ; CHECK-NEXT: uunpkhi z3.s, z1.h -; CHECK-NEXT: uunpklo z5.s, z6.h -; CHECK-NEXT: uunpkhi z7.s, z6.h +; CHECK-NEXT: uunpklo z5.s, z0.h +; CHECK-NEXT: uunpkhi z7.s, z0.h ; CHECK-NEXT: uunpklo z0.d, z2.s ; CHECK-NEXT: uunpkhi z1.d, z2.s ; CHECK-NEXT: uunpklo z2.d, z3.s diff --git a/llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll b/llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll --- a/llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll +++ b/llvm/test/CodeGen/AArch64/sve-smulo-sdnode.ll @@ -92,18 +92,19 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: smulh z4.b, p0/m, z4.b, z3.b -; CHECK-NEXT: mul z1.b, p0/m, z1.b, z3.b -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: mul z3.b, p0/m, z3.b, z2.b -; CHECK-NEXT: asr z5.b, z1.b, #7 +; CHECK-NEXT: mul z4.b, p0/m, z4.b, z3.b +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: mul z5.b, p0/m, z5.b, z2.b +; CHECK-NEXT: smulh z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: asr z2.b, z3.b, #7 -; CHECK-NEXT: cmpne p1.b, p0/z, z4.b, z5.b -; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z2.b -; CHECK-NEXT: mov z1.b, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z3.b, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: asr z2.b, z4.b, #7 +; CHECK-NEXT: asr z3.b, z5.b, #7 +; CHECK-NEXT: cmpne p1.b, p0/z, z1.b, z2.b +; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z3.b +; CHECK-NEXT: mov z5.b, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z4.b, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z5.d +; CHECK-NEXT: mov z1.d, z4.d ; CHECK-NEXT: ret %a = call { , } @llvm.smul.with.overflow.nxv32i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -119,31 +120,33 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: movprfx z24, z3 -; CHECK-NEXT: smulh z24.b, p0/m, z24.b, z7.b -; CHECK-NEXT: mul z3.b, p0/m, z3.b, z7.b -; CHECK-NEXT: movprfx z7, z2 -; CHECK-NEXT: mul z7.b, p0/m, z7.b, z6.b -; CHECK-NEXT: smulh z2.b, p0/m, z2.b, z6.b -; CHECK-NEXT: asr z6.b, z7.b, #7 -; CHECK-NEXT: cmpne p2.b, p0/z, z2.b, z6.b -; CHECK-NEXT: movprfx z6, z1 -; CHECK-NEXT: smulh z6.b, p0/m, z6.b, z5.b -; CHECK-NEXT: mul z1.b, p0/m, z1.b, z5.b -; CHECK-NEXT: asr z25.b, z3.b, #7 -; CHECK-NEXT: asr z5.b, z1.b, #7 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.b, p0/m, z2.b, z4.b +; CHECK-NEXT: mul z24.b, p0/m, z24.b, z7.b +; CHECK-NEXT: movprfx z25, z0 +; CHECK-NEXT: mul z25.b, p0/m, z25.b, z4.b +; CHECK-NEXT: movprfx z26, z2 +; CHECK-NEXT: mul z26.b, p0/m, z26.b, z6.b +; CHECK-NEXT: movprfx z27, z1 +; CHECK-NEXT: mul z27.b, p0/m, z27.b, z5.b +; CHECK-NEXT: smulh z3.b, p0/m, z3.b, z7.b ; CHECK-NEXT: smulh z0.b, p0/m, z0.b, z4.b -; CHECK-NEXT: asr z4.b, z2.b, #7 -; CHECK-NEXT: cmpne p1.b, p0/z, z24.b, z25.b -; CHECK-NEXT: cmpne p3.b, p0/z, z6.b, z5.b -; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, z4.b -; CHECK-NEXT: mov z7.b, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z2.b, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z1.b, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z3.b, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: mov z2.d, z7.d +; CHECK-NEXT: asr z4.b, z25.b, #7 +; CHECK-NEXT: smulh z2.b, p0/m, z2.b, z6.b +; CHECK-NEXT: smulh z1.b, p0/m, z1.b, z5.b +; CHECK-NEXT: asr z5.b, z24.b, #7 +; CHECK-NEXT: asr z6.b, z26.b, #7 +; CHECK-NEXT: asr z7.b, z27.b, #7 +; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, z4.b +; CHECK-NEXT: cmpne p2.b, p0/z, z3.b, z5.b +; CHECK-NEXT: cmpne p3.b, p0/z, z2.b, z6.b +; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, z7.b +; CHECK-NEXT: mov z25.b, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z24.b, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z27.b, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z26.b, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z25.d +; CHECK-NEXT: mov z3.d, z24.d +; CHECK-NEXT: mov z1.d, z27.d +; CHECK-NEXT: mov z2.d, z26.d ; CHECK-NEXT: ret %a = call { , } @llvm.smul.with.overflow.nxv64i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -222,18 +225,19 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: smulh z4.h, p0/m, z4.h, z3.h -; CHECK-NEXT: mul z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: mul z3.h, p0/m, z3.h, z2.h -; CHECK-NEXT: asr z5.h, z1.h, #15 +; CHECK-NEXT: mul z4.h, p0/m, z4.h, z3.h +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: mul z5.h, p0/m, z5.h, z2.h +; CHECK-NEXT: smulh z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: asr z2.h, z3.h, #15 -; CHECK-NEXT: cmpne p1.h, p0/z, z4.h, z5.h -; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z2.h -; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z3.h, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: asr z2.h, z4.h, #15 +; CHECK-NEXT: asr z3.h, z5.h, #15 +; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, z2.h +; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z3.h +; CHECK-NEXT: mov z5.h, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z4.h, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z5.d +; CHECK-NEXT: mov z1.d, z4.d ; CHECK-NEXT: ret %a = call { , } @llvm.smul.with.overflow.nxv16i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -249,31 +253,33 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: movprfx z24, z3 -; CHECK-NEXT: smulh z24.h, p0/m, z24.h, z7.h -; CHECK-NEXT: mul z3.h, p0/m, z3.h, z7.h -; CHECK-NEXT: movprfx z7, z2 -; CHECK-NEXT: mul z7.h, p0/m, z7.h, z6.h -; CHECK-NEXT: smulh z2.h, p0/m, z2.h, z6.h -; CHECK-NEXT: asr z6.h, z7.h, #15 -; CHECK-NEXT: cmpne p2.h, p0/z, z2.h, z6.h -; CHECK-NEXT: movprfx z6, z1 -; CHECK-NEXT: smulh z6.h, p0/m, z6.h, z5.h -; CHECK-NEXT: mul z1.h, p0/m, z1.h, z5.h -; CHECK-NEXT: asr z25.h, z3.h, #15 -; CHECK-NEXT: asr z5.h, z1.h, #15 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.h, p0/m, z2.h, z4.h +; CHECK-NEXT: mul z24.h, p0/m, z24.h, z7.h +; CHECK-NEXT: movprfx z25, z0 +; CHECK-NEXT: mul z25.h, p0/m, z25.h, z4.h +; CHECK-NEXT: movprfx z26, z2 +; CHECK-NEXT: mul z26.h, p0/m, z26.h, z6.h +; CHECK-NEXT: movprfx z27, z1 +; CHECK-NEXT: mul z27.h, p0/m, z27.h, z5.h +; CHECK-NEXT: smulh z3.h, p0/m, z3.h, z7.h ; CHECK-NEXT: smulh z0.h, p0/m, z0.h, z4.h -; CHECK-NEXT: asr z4.h, z2.h, #15 -; CHECK-NEXT: cmpne p1.h, p0/z, z24.h, z25.h -; CHECK-NEXT: cmpne p3.h, p0/z, z6.h, z5.h -; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, z4.h -; CHECK-NEXT: mov z7.h, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z2.h, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z1.h, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z3.h, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: mov z2.d, z7.d +; CHECK-NEXT: asr z4.h, z25.h, #15 +; CHECK-NEXT: smulh z2.h, p0/m, z2.h, z6.h +; CHECK-NEXT: smulh z1.h, p0/m, z1.h, z5.h +; CHECK-NEXT: asr z5.h, z24.h, #15 +; CHECK-NEXT: asr z6.h, z26.h, #15 +; CHECK-NEXT: asr z7.h, z27.h, #15 +; CHECK-NEXT: cmpne p1.h, p0/z, z0.h, z4.h +; CHECK-NEXT: cmpne p2.h, p0/z, z3.h, z5.h +; CHECK-NEXT: cmpne p3.h, p0/z, z2.h, z6.h +; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, z7.h +; CHECK-NEXT: mov z25.h, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z24.h, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z27.h, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z26.h, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z25.d +; CHECK-NEXT: mov z3.d, z24.d +; CHECK-NEXT: mov z1.d, z27.d +; CHECK-NEXT: mov z2.d, z26.d ; CHECK-NEXT: ret %a = call { , } @llvm.smul.with.overflow.nxv32i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -331,18 +337,19 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: smulh z4.s, p0/m, z4.s, z3.s -; CHECK-NEXT: mul z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: mul z3.s, p0/m, z3.s, z2.s -; CHECK-NEXT: asr z5.s, z1.s, #31 +; CHECK-NEXT: mul z4.s, p0/m, z4.s, z3.s +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: mul z5.s, p0/m, z5.s, z2.s +; CHECK-NEXT: smulh z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: asr z2.s, z3.s, #31 -; CHECK-NEXT: cmpne p1.s, p0/z, z4.s, z5.s -; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z2.s -; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z3.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: asr z2.s, z4.s, #31 +; CHECK-NEXT: asr z3.s, z5.s, #31 +; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, z2.s +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z3.s +; CHECK-NEXT: mov z5.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z4.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z5.d +; CHECK-NEXT: mov z1.d, z4.d ; CHECK-NEXT: ret %a = call { , } @llvm.smul.with.overflow.nxv8i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -358,31 +365,33 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: movprfx z24, z3 -; CHECK-NEXT: smulh z24.s, p0/m, z24.s, z7.s -; CHECK-NEXT: mul z3.s, p0/m, z3.s, z7.s -; CHECK-NEXT: movprfx z7, z2 -; CHECK-NEXT: mul z7.s, p0/m, z7.s, z6.s -; CHECK-NEXT: smulh z2.s, p0/m, z2.s, z6.s -; CHECK-NEXT: asr z6.s, z7.s, #31 -; CHECK-NEXT: cmpne p2.s, p0/z, z2.s, z6.s -; CHECK-NEXT: movprfx z6, z1 -; CHECK-NEXT: smulh z6.s, p0/m, z6.s, z5.s -; CHECK-NEXT: mul z1.s, p0/m, z1.s, z5.s -; CHECK-NEXT: asr z25.s, z3.s, #31 -; CHECK-NEXT: asr z5.s, z1.s, #31 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.s, p0/m, z2.s, z4.s +; CHECK-NEXT: mul z24.s, p0/m, z24.s, z7.s +; CHECK-NEXT: movprfx z25, z0 +; CHECK-NEXT: mul z25.s, p0/m, z25.s, z4.s +; CHECK-NEXT: movprfx z26, z2 +; CHECK-NEXT: mul z26.s, p0/m, z26.s, z6.s +; CHECK-NEXT: movprfx z27, z1 +; CHECK-NEXT: mul z27.s, p0/m, z27.s, z5.s +; CHECK-NEXT: smulh z3.s, p0/m, z3.s, z7.s ; CHECK-NEXT: smulh z0.s, p0/m, z0.s, z4.s -; CHECK-NEXT: asr z4.s, z2.s, #31 -; CHECK-NEXT: cmpne p1.s, p0/z, z24.s, z25.s -; CHECK-NEXT: cmpne p3.s, p0/z, z6.s, z5.s -; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, z4.s -; CHECK-NEXT: mov z7.s, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z2.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z1.s, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z3.s, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: mov z2.d, z7.d +; CHECK-NEXT: asr z4.s, z25.s, #31 +; CHECK-NEXT: smulh z2.s, p0/m, z2.s, z6.s +; CHECK-NEXT: smulh z1.s, p0/m, z1.s, z5.s +; CHECK-NEXT: asr z5.s, z24.s, #31 +; CHECK-NEXT: asr z6.s, z26.s, #31 +; CHECK-NEXT: asr z7.s, z27.s, #31 +; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, z4.s +; CHECK-NEXT: cmpne p2.s, p0/z, z3.s, z5.s +; CHECK-NEXT: cmpne p3.s, p0/z, z2.s, z6.s +; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, z7.s +; CHECK-NEXT: mov z25.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z24.s, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z27.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z26.s, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z25.d +; CHECK-NEXT: mov z3.d, z24.d +; CHECK-NEXT: mov z1.d, z27.d +; CHECK-NEXT: mov z2.d, z26.d ; CHECK-NEXT: ret %a = call { , } @llvm.smul.with.overflow.nxv16i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -419,18 +428,19 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: smulh z4.d, p0/m, z4.d, z3.d -; CHECK-NEXT: mul z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: mul z3.d, p0/m, z3.d, z2.d -; CHECK-NEXT: asr z5.d, z1.d, #63 +; CHECK-NEXT: mul z4.d, p0/m, z4.d, z3.d +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: mul z5.d, p0/m, z5.d, z2.d +; CHECK-NEXT: smulh z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: asr z2.d, z3.d, #63 -; CHECK-NEXT: cmpne p1.d, p0/z, z4.d, z5.d -; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, z2.d -; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z3.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: asr z2.d, z4.d, #63 +; CHECK-NEXT: asr z3.d, z5.d, #63 +; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, z2.d +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, z3.d +; CHECK-NEXT: mov z5.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z4.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z5.d +; CHECK-NEXT: mov z1.d, z4.d ; CHECK-NEXT: ret %a = call { , } @llvm.smul.with.overflow.nxv4i64( %x, %y) %b = extractvalue { , } %a, 0 @@ -446,31 +456,33 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: movprfx z24, z3 -; CHECK-NEXT: smulh z24.d, p0/m, z24.d, z7.d -; CHECK-NEXT: mul z3.d, p0/m, z3.d, z7.d -; CHECK-NEXT: movprfx z7, z2 -; CHECK-NEXT: mul z7.d, p0/m, z7.d, z6.d -; CHECK-NEXT: smulh z2.d, p0/m, z2.d, z6.d -; CHECK-NEXT: asr z6.d, z7.d, #63 -; CHECK-NEXT: cmpne p2.d, p0/z, z2.d, z6.d -; CHECK-NEXT: movprfx z6, z1 -; CHECK-NEXT: smulh z6.d, p0/m, z6.d, z5.d -; CHECK-NEXT: mul z1.d, p0/m, z1.d, z5.d -; CHECK-NEXT: asr z25.d, z3.d, #63 -; CHECK-NEXT: asr z5.d, z1.d, #63 -; CHECK-NEXT: movprfx z2, z0 -; CHECK-NEXT: mul z2.d, p0/m, z2.d, z4.d +; CHECK-NEXT: mul z24.d, p0/m, z24.d, z7.d +; CHECK-NEXT: movprfx z25, z0 +; CHECK-NEXT: mul z25.d, p0/m, z25.d, z4.d +; CHECK-NEXT: movprfx z26, z2 +; CHECK-NEXT: mul z26.d, p0/m, z26.d, z6.d +; CHECK-NEXT: movprfx z27, z1 +; CHECK-NEXT: mul z27.d, p0/m, z27.d, z5.d +; CHECK-NEXT: smulh z3.d, p0/m, z3.d, z7.d ; CHECK-NEXT: smulh z0.d, p0/m, z0.d, z4.d -; CHECK-NEXT: asr z4.d, z2.d, #63 -; CHECK-NEXT: cmpne p1.d, p0/z, z24.d, z25.d -; CHECK-NEXT: cmpne p3.d, p0/z, z6.d, z5.d -; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, z4.d -; CHECK-NEXT: mov z7.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z2.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z3.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, z2.d -; CHECK-NEXT: mov z2.d, z7.d +; CHECK-NEXT: asr z4.d, z25.d, #63 +; CHECK-NEXT: smulh z2.d, p0/m, z2.d, z6.d +; CHECK-NEXT: smulh z1.d, p0/m, z1.d, z5.d +; CHECK-NEXT: asr z5.d, z24.d, #63 +; CHECK-NEXT: asr z6.d, z26.d, #63 +; CHECK-NEXT: asr z7.d, z27.d, #63 +; CHECK-NEXT: cmpne p1.d, p0/z, z0.d, z4.d +; CHECK-NEXT: cmpne p2.d, p0/z, z3.d, z5.d +; CHECK-NEXT: cmpne p3.d, p0/z, z2.d, z6.d +; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, z7.d +; CHECK-NEXT: mov z25.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z24.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z27.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z26.d, p3/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z25.d +; CHECK-NEXT: mov z3.d, z24.d +; CHECK-NEXT: mov z1.d, z27.d +; CHECK-NEXT: mov z2.d, z26.d ; CHECK-NEXT: ret %a = call { , } @llvm.smul.with.overflow.nxv8i64( %x, %y) %b = extractvalue { , } %a, 0 diff --git a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-extract-elt.ll @@ -22,15 +22,15 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: ldrb w0, [x9, x8] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -46,15 +46,15 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -70,14 +70,14 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cnth x8 ; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: ldr w0, [x9, x8, lsl #2] ; CHECK-NEXT: addvl sp, sp, #2 @@ -94,14 +94,14 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cnth x8 ; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmp x9, x8 -; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl] ; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl] ; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [sp] @@ -140,15 +140,15 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: mov w9, #128 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1h { z0.h }, p0, [sp] +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov w9, #128 // =0x80 ; CHECK-NEXT: addvl x8, x8, #1 ; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: ldrh w0, [x9, x8, lsl #1] ; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -164,18 +164,18 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: mov w9, #34464 -; CHECK-NEXT: movk w9, #1, lsl #16 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] -; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov w9, #34464 // =0x86a0 +; CHECK-NEXT: movk w9, #1, lsl #16 ; CHECK-NEXT: addvl x8, x8, #1 -; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1w { z3.s }, p0, [sp, #3, mul vl] +; CHECK-NEXT: st1w { z2.s }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: ldr w0, [x9, x8, lsl #2] ; CHECK-NEXT: addvl sp, sp, #4 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload @@ -191,14 +191,14 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cntw x8 -; CHECK-NEXT: mov w9, #10 +; CHECK-NEXT: mov w9, #10 // =0xa ; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmp x8, #10 -; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: ldr x0, [x9, x8, lsl #3] ; CHECK-NEXT: addvl sp, sp, #2 diff --git a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -42,7 +42,6 @@ ; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: uunpkhi z1.d, z1.s ; CHECK-NEXT: uunpklo z3.d, z0.s -; CHECK-NEXT: fcvt z1.d, p0/m, z1.h ; CHECK-NEXT: uunpkhi z4.d, z0.s ; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvt z0.d, p0/m, z2.h @@ -50,6 +49,7 @@ ; CHECK-NEXT: fcvt z2.d, p0/m, z3.h ; CHECK-NEXT: movprfx z3, z4 ; CHECK-NEXT: fcvt z3.d, p0/m, z4.h +; CHECK-NEXT: fcvt z1.d, p0/m, z1.h ; CHECK-NEXT: ret %res = fpext %a to ret %res @@ -77,13 +77,13 @@ ; CHECK-NEXT: uunpkhi z3.d, z0.s ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: uunpklo z4.d, z1.s -; CHECK-NEXT: uunpkhi z5.d, z1.s ; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvt z0.d, p0/m, z2.s -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: fcvt z1.d, p0/m, z3.s ; CHECK-NEXT: movprfx z2, z4 ; CHECK-NEXT: fcvt z2.d, p0/m, z4.s +; CHECK-NEXT: uunpkhi z5.d, z1.s +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: fcvt z1.d, p0/m, z3.s ; CHECK-NEXT: movprfx z3, z5 ; CHECK-NEXT: fcvt z3.d, p0/m, z5.s ; CHECK-NEXT: ret @@ -149,9 +149,9 @@ ; CHECK-LABEL: fcvts_nxv8f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: fcvt z3.s, p0/m, z3.d ; CHECK-NEXT: fcvt z1.s, p0/m, z1.d ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d +; CHECK-NEXT: fcvt z3.s, p0/m, z3.d ; CHECK-NEXT: fcvt z2.s, p0/m, z2.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s ; CHECK-NEXT: uzp1 z1.s, z2.s, z3.s @@ -214,13 +214,13 @@ ; CHECK-NEXT: uunpkhi z3.s, z0.h ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: uunpklo z4.s, z1.h -; CHECK-NEXT: uunpkhi z5.s, z1.h ; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z2.h -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h ; CHECK-NEXT: movprfx z2, z4 ; CHECK-NEXT: fcvtzs z2.s, p0/m, z4.h +; CHECK-NEXT: uunpkhi z5.s, z1.h +; CHECK-NEXT: movprfx z1, z3 +; CHECK-NEXT: fcvtzs z1.s, p0/m, z3.h ; CHECK-NEXT: movprfx z3, z5 ; CHECK-NEXT: fcvtzs z3.s, p0/m, z5.h ; CHECK-NEXT: ret @@ -300,7 +300,6 @@ ; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: sunpkhi z1.s, z1.h ; CHECK-NEXT: sunpklo z3.s, z0.h -; CHECK-NEXT: scvtf z1.s, p0/m, z1.s ; CHECK-NEXT: sunpkhi z4.s, z0.h ; CHECK-NEXT: movprfx z0, z2 ; CHECK-NEXT: scvtf z0.s, p0/m, z2.s @@ -308,6 +307,7 @@ ; CHECK-NEXT: scvtf z2.s, p0/m, z3.s ; CHECK-NEXT: movprfx z3, z4 ; CHECK-NEXT: scvtf z3.s, p0/m, z4.s +; CHECK-NEXT: scvtf z1.s, p0/m, z1.s ; CHECK-NEXT: ret %res = sitofp %a to ret %res diff --git a/llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-fp-reduce.ll @@ -6,8 +6,8 @@ define double @fadda_nxv8f64(double %init, %a) { ; CHECK-LABEL: fadda_nxv8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fadda d0, p0, d0, z1.d ; CHECK-NEXT: fadda d0, p0, d0, z2.d ; CHECK-NEXT: fadda d0, p0, d0, z3.d diff --git a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-insert-elt.ll @@ -6,9 +6,9 @@ define @promote_insert_8i8( %a, i8 %elt, i64 %idx) { ; CHECK-LABEL: promote_insert_8i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: mov z2.h, w1 -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h ; CHECK-NEXT: mov z0.h, p0/m, w0 ; CHECK-NEXT: ret @@ -23,14 +23,14 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] -; CHECK-NEXT: st1b { z0.b }, p0, [sp] +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: cmp x1, x8 ; CHECK-NEXT: csel x8, x1, x8, lo +; CHECK-NEXT: st1b { z1.b }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1b { z0.b }, p0, [sp] ; CHECK-NEXT: strb w0, [x9, x8] ; CHECK-NEXT: ld1b { z0.b }, p0/z, [sp] ; CHECK-NEXT: ld1b { z1.b }, p0/z, [sp, #1, mul vl] @@ -48,13 +48,13 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cnth x8 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cmp x0, x8 -; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: csel x8, x0, x8, lo +; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: str s2, [x9, x8, lsl #2] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] @@ -73,13 +73,13 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cnth x8 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: sub x8, x8, #1 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cmp x1, x8 -; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl] ; CHECK-NEXT: csel x8, x1, x8, lo +; CHECK-NEXT: st1d { z3.d }, p0, [sp, #3, mul vl] ; CHECK-NEXT: st1d { z2.d }, p0, [sp, #2, mul vl] ; CHECK-NEXT: st1d { z1.d }, p0, [sp, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [sp] @@ -100,11 +100,11 @@ define @promote_insert_4i16( %a, i16 %elt) { ; CHECK-LABEL: promote_insert_4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #5 -; CHECK-NEXT: index z2.s, #0, #1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z1.s +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: index z1.s, #0, #1 +; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s ; CHECK-NEXT: mov z0.s, p0/m, w0 ; CHECK-NEXT: ret %ins = insertelement %a, i16 %elt, i64 5 @@ -117,11 +117,11 @@ define @split_insert_32i8( %a, i8 %elt) { ; CHECK-LABEL: split_insert_32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: index z3.b, #0, #1 ; CHECK-NEXT: ptrue p0.b -; CHECK-NEXT: mov z2.b, w8 -; CHECK-NEXT: cmpeq p0.b, p0/z, z3.b, z2.b +; CHECK-NEXT: mov w8, #3 // =0x3 +; CHECK-NEXT: index z2.b, #0, #1 +; CHECK-NEXT: mov z3.b, w8 +; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b ; CHECK-NEXT: mov z0.b, p0/m, w0 ; CHECK-NEXT: ret %ins = insertelement %a, i8 %elt, i64 3 @@ -135,17 +135,17 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x20, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 32 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: mov x8, #-1 -; CHECK-NEXT: mov w9, #128 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: st1h { z3.h }, p0, [sp, #3, mul vl] -; CHECK-NEXT: st1h { z2.h }, p0, [sp, #2, mul vl] +; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov w9, #128 // =0x80 ; CHECK-NEXT: addvl x8, x8, #2 -; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: st1h { z3.h }, p0, [sp, #3, mul vl] +; CHECK-NEXT: st1h { z2.h }, p0, [sp, #2, mul vl] +; CHECK-NEXT: st1h { z1.h }, p0, [sp, #1, mul vl] +; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: strh w0, [x9, x8, lsl #1] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: ld1h { z1.h }, p0/z, [sp, #1, mul vl] @@ -165,12 +165,12 @@ ; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: mov w9, #16960 +; CHECK-NEXT: mov w9, #16960 // =0x4240 ; CHECK-NEXT: movk w9, #15, lsl #16 ; CHECK-NEXT: sub x8, x8, #1 ; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: csel x8, x8, x9, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: st1w { z1.s }, p0, [sp, #1, mul vl] diff --git a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-int-pred-reduce.ll @@ -18,20 +18,12 @@ define i1 @andv_nxv64i1( %a) { ; CHECK-LABEL: andv_nxv64i1: ; CHECK: // %bb.0: -; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG -; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: and p1.b, p1/z, p1.b, p3.b ; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b -; CHECK-NEXT: ptrue p4.b ; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b -; CHECK-NEXT: nots p0.b, p4/z, p0.b -; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ptrue p1.b +; CHECK-NEXT: nots p0.b, p1/z, p0.b ; CHECK-NEXT: cset w0, eq -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %res = call i1 @llvm.vector.reduce.and.nxv64i1( %a) ret i1 %res diff --git a/llvm/test/CodeGen/AArch64/sve-split-load.ll b/llvm/test/CodeGen/AArch64/sve-split-load.ll --- a/llvm/test/CodeGen/AArch64/sve-split-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-load.ll @@ -122,11 +122,11 @@ ; CHECK-NEXT: punpkhi p0.h, p0.b ; CHECK-NEXT: punpklo p2.h, p1.b ; CHECK-NEXT: punpkhi p1.h, p1.b -; CHECK-NEXT: ld1d { z0.d }, p2/z, [x0] -; CHECK-NEXT: punpklo p2.h, p0.b +; CHECK-NEXT: punpklo p3.h, p0.b ; CHECK-NEXT: punpkhi p0.h, p0.b +; CHECK-NEXT: ld1d { z0.d }, p2/z, [x0] ; CHECK-NEXT: ld1d { z1.d }, p1/z, [x0, #1, mul vl] -; CHECK-NEXT: ld1d { z2.d }, p2/z, [x0, #2, mul vl] +; CHECK-NEXT: ld1d { z2.d }, p3/z, [x0, #2, mul vl] ; CHECK-NEXT: ld1d { z3.d }, p0/z, [x0, #3, mul vl] ; CHECK-NEXT: ret %load = call @llvm.masked.load.nxv8i64( *%a, i32 1, %pg, undef) diff --git a/llvm/test/CodeGen/AArch64/sve-split-store.ll b/llvm/test/CodeGen/AArch64/sve-split-store.ll --- a/llvm/test/CodeGen/AArch64/sve-split-store.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-store.ll @@ -80,11 +80,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: punpkhi p2.h, p1.b ; CHECK-NEXT: punpklo p1.h, p1.b +; CHECK-NEXT: punpkhi p3.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: st1h { z3.h }, p2, [x0, #3, mul vl] ; CHECK-NEXT: st1h { z2.h }, p1, [x0, #2, mul vl] -; CHECK-NEXT: punpkhi p1.h, p0.b -; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: st1h { z1.h }, p1, [x0, #1, mul vl] +; CHECK-NEXT: st1h { z1.h }, p3, [x0, #1, mul vl] ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.nxv32i16( %data, *%a, i32 1, %pg) @@ -110,11 +110,11 @@ ; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: punpkhi p2.h, p1.b ; CHECK-NEXT: punpklo p1.h, p1.b +; CHECK-NEXT: punpkhi p3.h, p0.b +; CHECK-NEXT: punpklo p0.h, p0.b ; CHECK-NEXT: st1d { z3.d }, p2, [x0, #3, mul vl] ; CHECK-NEXT: st1d { z2.d }, p1, [x0, #2, mul vl] -; CHECK-NEXT: punpkhi p1.h, p0.b -; CHECK-NEXT: punpklo p0.h, p0.b -; CHECK-NEXT: st1d { z1.d }, p1, [x0, #1, mul vl] +; CHECK-NEXT: st1d { z1.d }, p3, [x0, #1, mul vl] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.masked.store.nxv8i64( %data, *%a, i32 1, %pg) diff --git a/llvm/test/CodeGen/AArch64/sve-srem-combine-loop.ll b/llvm/test/CodeGen/AArch64/sve-srem-combine-loop.ll --- a/llvm/test/CodeGen/AArch64/sve-srem-combine-loop.ll +++ b/llvm/test/CodeGen/AArch64/sve-srem-combine-loop.ll @@ -6,10 +6,10 @@ define @srem_combine_loop( %a) #0 { ; CHECK-LABEL: srem_combine_loop: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: asrd z1.s, p0/m, z1.s, #1 +; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.s, #2 // =0x2 +; CHECK-NEXT: asrd z1.s, p0/m, z1.s, #1 ; CHECK-NEXT: mls z0.s, p0/m, z1.s, z2.s ; CHECK-NEXT: ret %rem = srem %a, shufflevector ( insertelement ( poison, i32 2, i32 0), poison, zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll @@ -39,8 +39,8 @@ define void @st1b_out_of_upper_bound( %data, * %a) { ; CHECK-LABEL: st1b_out_of_upper_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #8 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: rdvl x8, #8 ; CHECK-NEXT: st1b { z0.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %a, i64 8 @@ -51,8 +51,8 @@ define void @st1b_out_of_lower_bound( %data, * %a) { ; CHECK-LABEL: st1b_out_of_lower_bound: ; CHECK: // %bb.0: -; CHECK-NEXT: rdvl x8, #-9 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: rdvl x8, #-9 ; CHECK-NEXT: st1b { z0.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %a, i64 -9 @@ -105,8 +105,8 @@ define void @store_nxv2f32(* %out) { ; CHECK-LABEL: store_nxv2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov z0.s, #1.00000000 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmov z0.s, #1.00000000 ; CHECK-NEXT: st1w { z0.d }, p0, [x0] ; CHECK-NEXT: ret %ins = insertelement undef, float 1.0, i32 0 @@ -118,8 +118,8 @@ define void @store_nxv4f16(* %out) { ; CHECK-LABEL: store_nxv4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov z0.h, #1.00000000 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmov z0.h, #1.00000000 ; CHECK-NEXT: st1h { z0.s }, p0, [x0] ; CHECK-NEXT: ret %ins = insertelement undef, half 1.0, i32 0 @@ -133,9 +133,9 @@ define void @store_nxv6f32(* %out) { ; CHECK-LABEL: store_nxv6f32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov z0.s, #1.00000000 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: fmov z0.s, #1.00000000 ; CHECK-NEXT: st1w { z0.d }, p0, [x0, #2, mul vl] ; CHECK-NEXT: st1w { z0.s }, p1, [x0] ; CHECK-NEXT: ret @@ -148,9 +148,9 @@ define void @store_nxv12f16(* %out) { ; CHECK-LABEL: store_nxv12f16: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov z0.h, #1.00000000 ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ptrue p1.h +; CHECK-NEXT: fmov z0.h, #1.00000000 ; CHECK-NEXT: st1h { z0.s }, p0, [x0, #2, mul vl] ; CHECK-NEXT: st1h { z0.h }, p1, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-stepvector.ll b/llvm/test/CodeGen/AArch64/sve-stepvector.ll --- a/llvm/test/CodeGen/AArch64/sve-stepvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-stepvector.ll @@ -49,8 +49,8 @@ ; CHECK-LABEL: stepvector_nxv6i64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: index z0.d, #0, #1 -; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z1.d, z0.d ; CHECK-NEXT: incd z1.d ; CHECK-NEXT: incd z2.d, all, mul #2 ; CHECK-NEXT: ret @@ -209,10 +209,10 @@ ; CHECK-LABEL: multiple_use_stepvector_nxv4i32_1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z0.s, w0 -; CHECK-NEXT: index z1.s, w0, #1 -; CHECK-NEXT: mul z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: sub z0.s, z0.s, z1.s +; CHECK-NEXT: index z0.s, w0, #1 +; CHECK-NEXT: mov z1.s, w0 +; CHECK-NEXT: mul z1.s, p0/m, z1.s, z0.s +; CHECK-NEXT: sub z0.s, z1.s, z0.s ; CHECK-NEXT: ret entry: %0 = insertelement poison, i32 %data, i32 0 @@ -242,8 +242,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: mov z1.d, x0 -; CHECK-NEXT: add z1.d, z0.d, z1.d ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: add z1.d, z0.d, z1.d ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: ret entry: @@ -271,7 +271,7 @@ define @mul_stepvector_nxv2i64() { ; CHECK-LABEL: mul_stepvector_nxv2i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #2222 +; CHECK-NEXT: mov w8, #2222 // =0x8ae ; CHECK-NEXT: index z0.d, #0, x8 ; CHECK-NEXT: ret entry: @@ -285,7 +285,7 @@ define @mul_stepvector_bigconst_nxv2i64() { ; CHECK-LABEL: mul_stepvector_bigconst_nxv2i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #146028888064 +; CHECK-NEXT: mov x8, #146028888064 // =0x2200000000 ; CHECK-NEXT: index z0.d, #0, x8 ; CHECK-NEXT: ret entry: @@ -299,7 +299,7 @@ define @mul_add_stepvector_nxv2i64(i64 %x) { ; CHECK-LABEL: mul_add_stepvector_nxv2i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w8, #2222 +; CHECK-NEXT: mov w8, #2222 // =0x8ae ; CHECK-NEXT: index z0.d, x0, x8 ; CHECK-NEXT: ret entry: @@ -332,7 +332,7 @@ define @mul_add_stepvector_bigconst_nxv2i64(i64 %x) { ; CHECK-LABEL: mul_add_stepvector_bigconst_nxv2i64: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov x8, #146028888064 +; CHECK-NEXT: mov x8, #146028888064 // =0x2200000000 ; CHECK-NEXT: index z0.d, x0, x8 ; CHECK-NEXT: ret entry: @@ -425,12 +425,12 @@ ; CHECK-LABEL: split_sub_stepvector_nxv16i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cntw x8 -; CHECK-NEXT: cnth x9 -; CHECK-NEXT: neg x8, x8 -; CHECK-NEXT: neg x9, x9 ; CHECK-NEXT: index z0.s, #0, #-1 +; CHECK-NEXT: neg x8, x8 ; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: mov z3.s, w9 +; CHECK-NEXT: cnth x8 +; CHECK-NEXT: neg x8, x8 +; CHECK-NEXT: mov z3.s, w8 ; CHECK-NEXT: add z1.s, z0.s, z1.s ; CHECK-NEXT: add z2.s, z0.s, z3.s ; CHECK-NEXT: add z3.s, z1.s, z3.s diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-and-combine.ll @@ -122,8 +122,8 @@ define <2 x i32> @vls_sve_and_2xi32(<2 x i32> %b) nounwind { ; CHECK-LABEL: vls_sve_and_2xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: index z1.s, #0, #-1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -164,8 +164,8 @@ define <2 x i64> @vls_sve_and_2xi64(<2 x i64> %b) nounwind { ; CHECK-LABEL: vls_sve_and_2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: index z1.d, #0, #-1 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: and z0.d, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -176,9 +176,9 @@ define <4 x i64> @vls_sve_and_4xi64(<4 x i64> %b) nounwind { ; CHECK-LABEL: vls_sve_and_4xi64: ; CHECK: // %bb.0: +; CHECK-NEXT: index z2.d, #0, #-1 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: index z2.d, #0, #-1 ; CHECK-NEXT: and z0.d, z0.d, z2.d ; CHECK-NEXT: and z1.d, z1.d, z2.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bit-counting.ll @@ -10,8 +10,8 @@ define <4 x i8> @ctlz_v4i8(<4 x i8> %op) { ; CHECK-LABEL: ctlz_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: sub z0.h, z0.h, #8 // =0x8 @@ -24,8 +24,8 @@ define <8 x i8> @ctlz_v8i8(<8 x i8> %op) { ; CHECK-LABEL: ctlz_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -36,8 +36,8 @@ define <16 x i8> @ctlz_v16i8(<16 x i8> %op) { ; CHECK-LABEL: ctlz_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -48,8 +48,8 @@ define void @ctlz_v32i8(ptr %a) { ; CHECK-LABEL: ctlz_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: clz z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] @@ -63,8 +63,8 @@ define <2 x i16> @ctlz_v2i16(<2 x i16> %op) { ; CHECK-LABEL: ctlz_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: sub z0.s, z0.s, #16 // =0x10 @@ -77,8 +77,8 @@ define <4 x i16> @ctlz_v4i16(<4 x i16> %op) { ; CHECK-LABEL: ctlz_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -89,8 +89,8 @@ define <8 x i16> @ctlz_v8i16(<8 x i16> %op) { ; CHECK-LABEL: ctlz_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -101,8 +101,8 @@ define void @ctlz_v16i16(ptr %a) { ; CHECK-LABEL: ctlz_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: clz z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -116,8 +116,8 @@ define <2 x i32> @ctlz_v2i32(<2 x i32> %op) { ; CHECK-LABEL: ctlz_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -128,8 +128,8 @@ define <4 x i32> @ctlz_v4i32(<4 x i32> %op) { ; CHECK-LABEL: ctlz_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -140,8 +140,8 @@ define void @ctlz_v8i32(ptr %a) { ; CHECK-LABEL: ctlz_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: clz z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -155,8 +155,8 @@ define <1 x i64> @ctlz_v1i64(<1 x i64> %op) { ; CHECK-LABEL: ctlz_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -167,8 +167,8 @@ define <2 x i64> @ctlz_v2i64(<2 x i64> %op) { ; CHECK-LABEL: ctlz_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -179,8 +179,8 @@ define void @ctlz_v4i64(ptr %a) { ; CHECK-LABEL: ctlz_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: clz z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -198,8 +198,8 @@ define <4 x i8> @ctpop_v4i8(<4 x i8> %op) { ; CHECK-LABEL: ctpop_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: cnt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -211,8 +211,8 @@ define <8 x i8> @ctpop_v8i8(<8 x i8> %op) { ; CHECK-LABEL: ctpop_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: cnt z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -223,8 +223,8 @@ define <16 x i8> @ctpop_v16i8(<16 x i8> %op) { ; CHECK-LABEL: ctpop_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: cnt z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -235,8 +235,8 @@ define void @ctpop_v32i8(ptr %a) { ; CHECK-LABEL: ctpop_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: cnt z0.b, p0/m, z0.b ; CHECK-NEXT: cnt z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] @@ -250,8 +250,8 @@ define <2 x i16> @ctpop_v2i16(<2 x i16> %op) { ; CHECK-LABEL: ctpop_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: cnt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -263,8 +263,8 @@ define <4 x i16> @ctpop_v4i16(<4 x i16> %op) { ; CHECK-LABEL: ctpop_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: cnt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -275,8 +275,8 @@ define <8 x i16> @ctpop_v8i16(<8 x i16> %op) { ; CHECK-LABEL: ctpop_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: cnt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -287,8 +287,8 @@ define void @ctpop_v16i16(ptr %a) { ; CHECK-LABEL: ctpop_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: cnt z0.h, p0/m, z0.h ; CHECK-NEXT: cnt z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -302,8 +302,8 @@ define <2 x i32> @ctpop_v2i32(<2 x i32> %op) { ; CHECK-LABEL: ctpop_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: cnt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -314,8 +314,8 @@ define <4 x i32> @ctpop_v4i32(<4 x i32> %op) { ; CHECK-LABEL: ctpop_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: cnt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -326,8 +326,8 @@ define void @ctpop_v8i32(ptr %a) { ; CHECK-LABEL: ctpop_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: cnt z0.s, p0/m, z0.s ; CHECK-NEXT: cnt z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -341,8 +341,8 @@ define <1 x i64> @ctpop_v1i64(<1 x i64> %op) { ; CHECK-LABEL: ctpop_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: cnt z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -353,8 +353,8 @@ define <2 x i64> @ctpop_v2i64(<2 x i64> %op) { ; CHECK-LABEL: ctpop_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: cnt z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -365,8 +365,8 @@ define void @ctpop_v4i64(ptr %a) { ; CHECK-LABEL: ctpop_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: cnt z0.d, p0/m, z0.d ; CHECK-NEXT: cnt z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -384,8 +384,8 @@ define <4 x i8> @cttz_v4i8(<4 x i8> %op) { ; CHECK-LABEL: cttz_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: orr z0.h, z0.h, #0x100 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: clz z0.h, p0/m, z0.h @@ -398,8 +398,8 @@ define <8 x i8> @cttz_v8i8(<8 x i8> %op) { ; CHECK-LABEL: cttz_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b ; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -411,8 +411,8 @@ define <16 x i8> @cttz_v16i8(<16 x i8> %op) { ; CHECK-LABEL: cttz_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b ; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -424,11 +424,11 @@ define void @cttz_v32i8(ptr %a) { ; CHECK-LABEL: cttz_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: rbit z0.b, p0/m, z0.b -; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: rbit z1.b, p0/m, z1.b +; CHECK-NEXT: clz z0.b, p0/m, z0.b ; CHECK-NEXT: clz z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -441,8 +441,8 @@ define <2 x i16> @cttz_v2i16(<2 x i16> %op) { ; CHECK-LABEL: cttz_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: orr z0.s, z0.s, #0x10000 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: clz z0.s, p0/m, z0.s @@ -455,8 +455,8 @@ define <4 x i16> @cttz_v4i16(<4 x i16> %op) { ; CHECK-LABEL: cttz_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -468,8 +468,8 @@ define <8 x i16> @cttz_v8i16(<8 x i16> %op) { ; CHECK-LABEL: cttz_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -481,11 +481,11 @@ define void @cttz_v16i16(ptr %a) { ; CHECK-LABEL: cttz_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: rbit z0.h, p0/m, z0.h -; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: rbit z1.h, p0/m, z1.h +; CHECK-NEXT: clz z0.h, p0/m, z0.h ; CHECK-NEXT: clz z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -498,8 +498,8 @@ define <2 x i32> @cttz_v2i32(<2 x i32> %op) { ; CHECK-LABEL: cttz_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -511,8 +511,8 @@ define <4 x i32> @cttz_v4i32(<4 x i32> %op) { ; CHECK-LABEL: cttz_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -524,11 +524,11 @@ define void @cttz_v8i32(ptr %a) { ; CHECK-LABEL: cttz_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: rbit z0.s, p0/m, z0.s -; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: rbit z1.s, p0/m, z1.s +; CHECK-NEXT: clz z0.s, p0/m, z0.s ; CHECK-NEXT: clz z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -541,8 +541,8 @@ define <1 x i64> @cttz_v1i64(<1 x i64> %op) { ; CHECK-LABEL: cttz_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -554,8 +554,8 @@ define <2 x i64> @cttz_v2i64(<2 x i64> %op) { ; CHECK-LABEL: cttz_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d ; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -567,11 +567,11 @@ define void @cttz_v4i64(ptr %a) { ; CHECK-LABEL: cttz_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: rbit z0.d, p0/m, z0.d -; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: rbit z1.d, p0/m, z1.d +; CHECK-NEXT: clz z0.d, p0/m, z0.d ; CHECK-NEXT: clz z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitcast.ll @@ -66,9 +66,9 @@ ; CHECK-NEXT: ldr d0, [sp] ; CHECK-NEXT: mov z1.s, z0.s[1] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: strh w9, [sp, #10] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: str w8, [x1] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-bitselect.ll @@ -12,22 +12,22 @@ define <8 x i32> @fixed_bitselect_v8i32(ptr %pre_cond_ptr, ptr %left_ptr, ptr %right_ptr) { ; CHECK-LABEL: fixed_bitselect_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: mov z0.s, #-1 // =0xffffffffffffffff -; CHECK-NEXT: add z7.s, z3.s, z0.s -; CHECK-NEXT: subr z3.s, z3.s, #0 // =0x0 -; CHECK-NEXT: ldp q1, q4, [x1] +; CHECK-NEXT: ldp q2, q1, [x0] +; CHECK-NEXT: ldp q5, q4, [x1] +; CHECK-NEXT: ldp q6, q7, [x2] +; CHECK-NEXT: add z3.s, z1.s, z0.s +; CHECK-NEXT: subr z1.s, z1.s, #0 // =0x0 ; CHECK-NEXT: add z0.s, z2.s, z0.s ; CHECK-NEXT: subr z2.s, z2.s, #0 // =0x0 -; CHECK-NEXT: and z1.d, z3.d, z1.d -; CHECK-NEXT: ldp q5, q6, [x2] -; CHECK-NEXT: and z2.d, z2.d, z4.d -; CHECK-NEXT: and z3.d, z0.d, z6.d -; CHECK-NEXT: and z0.d, z7.d, z5.d -; CHECK-NEXT: orr z0.d, z0.d, z1.d -; CHECK-NEXT: orr z1.d, z3.d, z2.d -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: and z1.d, z1.d, z4.d +; CHECK-NEXT: and z3.d, z3.d, z7.d +; CHECK-NEXT: and z0.d, z0.d, z6.d +; CHECK-NEXT: and z2.d, z2.d, z5.d +; CHECK-NEXT: orr z1.d, z3.d, z1.d +; CHECK-NEXT: orr z0.d, z0.d, z2.d ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %pre_cond = load <8 x i32>, ptr %pre_cond_ptr %left = load <8 x i32>, ptr %left_ptr diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-build-vector.ll @@ -56,12 +56,12 @@ ; CHECK-LABEL: build_vector_minus2_dec32_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #-32 // =0xffffffffffffffe0 -; CHECK-NEXT: mov z0.d, #-66 // =0xffffffffffffffbe +; CHECK-NEXT: mov z1.d, #-66 // =0xffffffffffffffbe ; CHECK-NEXT: mov z2.d, #-2 // =0xfffffffffffffffe -; CHECK-NEXT: index z1.d, #0, x8 -; CHECK-NEXT: add z0.d, z1.d, z0.d -; CHECK-NEXT: add z1.d, z1.d, z2.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: index z0.d, #0, x8 +; CHECK-NEXT: add z1.d, z0.d, z1.d +; CHECK-NEXT: add z0.d, z0.d, z2.d +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret store <4 x i64> , ptr %a, align 8 ret void diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-concat.ll @@ -13,29 +13,29 @@ ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z2.h, z1.h[3] ; CHECK-NEXT: mov z3.h, z1.h[2] -; CHECK-NEXT: mov z4.h, z1.h[1] -; CHECK-NEXT: fmov w10, s2 +; CHECK-NEXT: mov z1.h, z1.h[1] +; CHECK-NEXT: mov z4.h, z0.h[3] ; CHECK-NEXT: strb w8, [sp, #12] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strb w9, [sp, #8] -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: mov z1.h, z0.h[3] -; CHECK-NEXT: mov z5.h, z0.h[2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strb w8, [sp, #8] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov z2.h, z0.h[2] ; CHECK-NEXT: mov z0.h, z0.h[1] -; CHECK-NEXT: strb w10, [sp, #15] -; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: strb w8, [sp, #15] +; CHECK-NEXT: fmov w8, s3 ; CHECK-NEXT: strb w8, [sp, #14] -; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: strb w9, [sp, #13] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w10, [sp, #11] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strb w8, [sp, #13] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: strb w8, [sp, #11] +; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: strb w8, [sp, #10] -; CHECK-NEXT: strb w9, [sp, #9] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strb w8, [sp, #9] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret @@ -46,8 +46,8 @@ define <16 x i8> @concat_v16i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-LABEL: concat_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -78,9 +78,9 @@ ; CHECK-LABEL: concat_v64i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x1] -; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: stp q0, q1, [x2, #32] -; CHECK-NEXT: stp q2, q3, [x2] +; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b @@ -107,16 +107,16 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov z2.s, z1.s[1] ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: mov z1.s, z1.s[1] -; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: fmov w10, s1 -; CHECK-NEXT: fmov w11, s0 +; CHECK-NEXT: mov z1.s, z0.s[1] ; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: strh w10, [sp, #14] -; CHECK-NEXT: strh w11, [sp, #10] +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: strh w9, [sp, #10] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret @@ -128,8 +128,8 @@ define <8 x i16> @concat_v8i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-LABEL: concat_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -157,9 +157,9 @@ ; CHECK-LABEL: concat_v32i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x1] -; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: stp q0, q1, [x2, #32] -; CHECK-NEXT: stp q2, q3, [x2] +; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b @@ -192,8 +192,8 @@ define <4 x i32> @concat_v4i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-LABEL: concat_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -220,9 +220,9 @@ ; CHECK-LABEL: concat_v16i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x1] -; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: stp q0, q1, [x2, #32] -; CHECK-NEXT: stp q2, q3, [x2] +; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b @@ -240,8 +240,8 @@ define <2 x i64> @concat_v2i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-LABEL: concat_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -268,9 +268,9 @@ ; CHECK-LABEL: concat_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x1] -; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: stp q0, q1, [x2, #32] -; CHECK-NEXT: stp q2, q3, [x2] +; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b @@ -290,12 +290,12 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov z2.h, z1.h[1] ; CHECK-NEXT: str h1, [sp, #12] +; CHECK-NEXT: mov z1.h, z0.h[1] ; CHECK-NEXT: str h0, [sp, #8] -; CHECK-NEXT: mov z1.h, z1.h[1] -; CHECK-NEXT: mov z0.h, z0.h[1] -; CHECK-NEXT: str h1, [sp, #14] -; CHECK-NEXT: str h0, [sp, #10] +; CHECK-NEXT: str h2, [sp, #14] +; CHECK-NEXT: str h1, [sp, #10] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret @@ -306,8 +306,8 @@ define <8 x half> @concat_v8f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-LABEL: concat_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -335,9 +335,9 @@ ; CHECK-LABEL: concat_v32f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x1] -; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: stp q0, q1, [x2, #32] -; CHECK-NEXT: stp q2, q3, [x2] +; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b @@ -370,8 +370,8 @@ define <4 x float> @concat_v4f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-LABEL: concat_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -398,9 +398,9 @@ ; CHECK-LABEL: concat_v16f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x1] -; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: stp q0, q1, [x2, #32] -; CHECK-NEXT: stp q2, q3, [x2] +; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b @@ -418,8 +418,8 @@ define <2 x double> @concat_v2f64(<1 x double> %op1, <1 x double> %op2) { ; CHECK-LABEL: concat_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: splice z0.d, p0, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -446,9 +446,9 @@ ; CHECK-LABEL: concat_v8f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x1] -; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: stp q0, q1, [x2, #32] -; CHECK-NEXT: stp q2, q3, [x2] +; CHECK-NEXT: stp q3, q2, [x2] ; CHECK-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ext-loads.ll @@ -50,8 +50,8 @@ ; CHECK-NEXT: mov x6, xzr ; CHECK-NEXT: mov z1.d, z0.d[1] ; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: fmov x4, d1 ; CHECK-NEXT: mov x7, xzr +; CHECK-NEXT: fmov x4, d1 ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %ap %val = zext <2 x i64> %a to <2 x i256> @@ -61,18 +61,18 @@ define <16 x i32> @load_sext_v16i8i32(ptr %ap) { ; CHECK-LABEL: load_sext_v16i8i32: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: mov w8, #4 // =0x4 ; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: mov w10, #12 // =0xc -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ld1sb { z1.s }, p0/z, [x0, x8] ; CHECK-NEXT: ld1sb { z2.s }, p0/z, [x0, x9] ; CHECK-NEXT: ld1sb { z3.s }, p0/z, [x0, x10] ; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %a = load <16 x i8>, ptr %ap %val = sext <16 x i8> %a to <16 x i32> @@ -82,8 +82,8 @@ define <8 x i32> @load_sext_v8i16i32(ptr %ap) { ; CHECK-LABEL: load_sext_v8i16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ld1sh { z1.s }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -100,25 +100,25 @@ ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: sunpklo z1.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: sunpklo z0.d, z0.s +; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: mov z1.d, z1.d[1] ; CHECK-NEXT: fmov x11, d0 ; CHECK-NEXT: mov z0.d, z0.d[1] ; CHECK-NEXT: asr x10, x9, #63 -; CHECK-NEXT: asr x12, x11, #63 ; CHECK-NEXT: stp x9, x10, [x8] -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: mov z0.d, z1.d[1] +; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: asr x12, x11, #63 +; CHECK-NEXT: stp x10, x10, [x8, #16] ; CHECK-NEXT: stp x11, x12, [x8, #64] ; CHECK-NEXT: fmov x11, d0 -; CHECK-NEXT: stp x10, x10, [x8, #16] -; CHECK-NEXT: stp x12, x12, [x8, #80] ; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: stp x12, x12, [x8, #80] +; CHECK-NEXT: stp x10, x10, [x8, #48] ; CHECK-NEXT: asr x12, x11, #63 -; CHECK-NEXT: stp x10, x10, [x8, #112] -; CHECK-NEXT: stp x9, x10, [x8, #96] -; CHECK-NEXT: stp x12, x12, [x8, #48] -; CHECK-NEXT: stp x11, x12, [x8, #32] +; CHECK-NEXT: stp x9, x10, [x8, #32] +; CHECK-NEXT: stp x12, x12, [x8, #112] +; CHECK-NEXT: stp x11, x12, [x8, #96] ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %ap %val = sext <4 x i32> %a to <4 x i256> @@ -130,28 +130,28 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: mov z0.d, z0.d[1] -; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: mov z1.d, z0.d[1] ; CHECK-NEXT: asr x9, x8, #63 -; CHECK-NEXT: asr x11, x10, #63 +; CHECK-NEXT: fmov x10, d1 ; CHECK-NEXT: stp x8, x9, [sp, #-32]! ; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: asr x8, x10, #63 ; CHECK-NEXT: mov z0.d, x9 -; CHECK-NEXT: stp x10, x11, [sp, #16] -; CHECK-NEXT: mov z1.d, z0.d[1] +; CHECK-NEXT: stp x10, x8, [sp, #16] +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: ldp q2, q4, [sp], #32 +; CHECK-NEXT: mov z3.d, z0.d[1] +; CHECK-NEXT: mov z5.d, z1.d[1] +; CHECK-NEXT: mov z6.d, z2.d[1] ; CHECK-NEXT: fmov x2, d0 -; CHECK-NEXT: mov z0.d, x11 -; CHECK-NEXT: fmov x3, d1 -; CHECK-NEXT: ldp q1, q3, [sp], #32 -; CHECK-NEXT: mov z2.d, z0.d[1] -; CHECK-NEXT: fmov x6, d0 -; CHECK-NEXT: mov z0.d, z1.d[1] -; CHECK-NEXT: fmov x0, d1 -; CHECK-NEXT: mov z1.d, z3.d[1] -; CHECK-NEXT: fmov x7, d2 -; CHECK-NEXT: fmov x4, d3 -; CHECK-NEXT: fmov x1, d0 -; CHECK-NEXT: fmov x5, d1 +; CHECK-NEXT: mov z0.d, z4.d[1] +; CHECK-NEXT: fmov x6, d1 +; CHECK-NEXT: fmov x0, d2 +; CHECK-NEXT: fmov x4, d4 +; CHECK-NEXT: fmov x3, d3 +; CHECK-NEXT: fmov x7, d5 +; CHECK-NEXT: fmov x1, d6 +; CHECK-NEXT: fmov x5, d0 ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %ap %val = sext <2 x i64> %a to <2 x i256> @@ -161,29 +161,29 @@ define <16 x i64> @load_zext_v16i16i64(ptr %ap) { ; CHECK-LABEL: load_zext_v16i16i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: mov x8, #2 // =0x2 ; CHECK-NEXT: mov x9, #4 // =0x4 -; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mov x10, #6 // =0x6 -; CHECK-NEXT: mov x11, #8 // =0x8 -; CHECK-NEXT: mov x12, #10 // =0xa ; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, x8, lsl #1] -; CHECK-NEXT: mov x8, #12 // =0xc +; CHECK-NEXT: mov x8, #6 // =0x6 ; CHECK-NEXT: ld1h { z2.d }, p0/z, [x0, x9, lsl #1] -; CHECK-NEXT: mov x9, #14 // =0xe -; CHECK-NEXT: ld1h { z3.d }, p0/z, [x0, x10, lsl #1] -; CHECK-NEXT: ld1h { z4.d }, p0/z, [x0, x11, lsl #1] -; CHECK-NEXT: ld1h { z5.d }, p0/z, [x0, x12, lsl #1] -; CHECK-NEXT: ld1h { z6.d }, p0/z, [x0, x8, lsl #1] -; CHECK-NEXT: ld1h { z7.d }, p0/z, [x0, x9, lsl #1] -; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] -; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: mov x9, #8 // =0x8 +; CHECK-NEXT: ld1h { z3.d }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: mov x8, #10 // =0xa +; CHECK-NEXT: ld1h { z4.d }, p0/z, [x0, x9, lsl #1] +; CHECK-NEXT: mov x9, #12 // =0xc +; CHECK-NEXT: ld1h { z5.d }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: mov x8, #14 // =0xe +; CHECK-NEXT: ld1h { z6.d }, p0/z, [x0, x9, lsl #1] ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: // kill: def $q2 killed $q2 killed $z2 ; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z3 ; CHECK-NEXT: // kill: def $q4 killed $q4 killed $z4 ; CHECK-NEXT: // kill: def $q5 killed $q5 killed $z5 +; CHECK-NEXT: ld1h { z7.d }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] ; CHECK-NEXT: // kill: def $q6 killed $q6 killed $z6 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: // kill: def $q7 killed $q7 killed $z7 ; CHECK-NEXT: ret %a = load <16 x i16>, ptr %ap diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-extract-subvector.ll @@ -13,16 +13,16 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z1.b, z0.b[7] ; CHECK-NEXT: mov z2.b, z0.b[6] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z1.b, z0.b[5] +; CHECK-NEXT: mov z3.b, z0.b[5] ; CHECK-NEXT: mov z0.b, z0.b[4] +; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: fmov w10, s1 -; CHECK-NEXT: fmov w11, s0 ; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s3 ; CHECK-NEXT: strh w9, [sp, #12] -; CHECK-NEXT: strh w10, [sp, #10] -; CHECK-NEXT: strh w11, [sp, #8] +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: strh w9, [sp, #8] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret @@ -40,16 +40,16 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z1.b, z0.b[7] ; CHECK-NEXT: mov z2.b, z0.b[6] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z1.b, z0.b[5] +; CHECK-NEXT: mov z3.b, z0.b[5] ; CHECK-NEXT: mov z0.b, z0.b[4] +; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: fmov w10, s1 -; CHECK-NEXT: fmov w11, s0 ; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s3 ; CHECK-NEXT: strh w9, [sp, #12] -; CHECK-NEXT: strh w10, [sp, #10] -; CHECK-NEXT: strh w11, [sp, #8] +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: strh w9, [sp, #8] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fcopysign.ll @@ -13,19 +13,19 @@ ; SVE: // %bb.0: ; SVE-NEXT: ldr d0, [x0] ; SVE-NEXT: ldr d1, [x1] -; SVE-NEXT: and z0.h, z0.h, #0x7fff ; SVE-NEXT: and z1.h, z1.h, #0x8000 +; SVE-NEXT: and z0.h, z0.h, #0x7fff ; SVE-NEXT: orr z0.d, z0.d, z1.d ; SVE-NEXT: str d0, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_v4f16_v4f16: ; SVE2: // %bb.0: -; SVE2-NEXT: ldr d0, [x0] -; SVE2-NEXT: mov z2.h, #32767 // =0x7fff -; SVE2-NEXT: ldr d1, [x1] -; SVE2-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; SVE2-NEXT: str d0, [x0] +; SVE2-NEXT: mov z0.h, #32767 // =0x7fff +; SVE2-NEXT: ldr d1, [x0] +; SVE2-NEXT: ldr d2, [x1] +; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; SVE2-NEXT: str d1, [x0] ; SVE2-NEXT: ret %a = load <4 x half>, ptr %ap %b = load <4 x half>, ptr %bp @@ -39,19 +39,19 @@ ; SVE: // %bb.0: ; SVE-NEXT: ldr q0, [x0] ; SVE-NEXT: ldr q1, [x1] -; SVE-NEXT: and z0.h, z0.h, #0x7fff ; SVE-NEXT: and z1.h, z1.h, #0x8000 +; SVE-NEXT: and z0.h, z0.h, #0x7fff ; SVE-NEXT: orr z0.d, z0.d, z1.d ; SVE-NEXT: str q0, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_v8f16_v8f16: ; SVE2: // %bb.0: -; SVE2-NEXT: ldr q0, [x0] -; SVE2-NEXT: mov z2.h, #32767 // =0x7fff -; SVE2-NEXT: ldr q1, [x1] -; SVE2-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; SVE2-NEXT: str q0, [x0] +; SVE2-NEXT: mov z0.h, #32767 // =0x7fff +; SVE2-NEXT: ldr q1, [x0] +; SVE2-NEXT: ldr q2, [x1] +; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; SVE2-NEXT: str q1, [x0] ; SVE2-NEXT: ret %a = load <8 x half>, ptr %ap %b = load <8 x half>, ptr %bp @@ -63,25 +63,25 @@ define void @test_copysign_v16f16_v16f16(ptr %ap, ptr %bp) { ; SVE-LABEL: test_copysign_v16f16_v16f16: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x1] +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: and z0.h, z0.h, #0x8000 -; SVE-NEXT: ldp q2, q3, [x0] -; SVE-NEXT: and z1.h, z1.h, #0x8000 +; SVE-NEXT: and z3.h, z3.h, #0x8000 +; SVE-NEXT: and z1.h, z1.h, #0x7fff ; SVE-NEXT: and z2.h, z2.h, #0x7fff -; SVE-NEXT: orr z0.d, z2.d, z0.d -; SVE-NEXT: and z3.h, z3.h, #0x7fff -; SVE-NEXT: orr z1.d, z3.d, z1.d +; SVE-NEXT: orr z0.d, z1.d, z0.d +; SVE-NEXT: orr z1.d, z2.d, z3.d ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_v16f16_v16f16: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q1, q2, [x0] ; SVE2-NEXT: mov z0.h, #32767 // =0x7fff -; SVE2-NEXT: ldp q3, q4, [x1] -; SVE2-NEXT: bsl z1.d, z1.d, z3.d, z0.d -; SVE2-NEXT: bsl z2.d, z2.d, z4.d, z0.d -; SVE2-NEXT: stp q1, q2, [x0] +; SVE2-NEXT: ldp q1, q4, [x1] +; SVE2-NEXT: ldp q2, q3, [x0] +; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d +; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d +; SVE2-NEXT: stp q2, q3, [x0] ; SVE2-NEXT: ret %a = load <16 x half>, ptr %ap %b = load <16 x half>, ptr %bp @@ -97,19 +97,19 @@ ; SVE: // %bb.0: ; SVE-NEXT: ldr d0, [x0] ; SVE-NEXT: ldr d1, [x1] -; SVE-NEXT: and z0.s, z0.s, #0x7fffffff ; SVE-NEXT: and z1.s, z1.s, #0x80000000 +; SVE-NEXT: and z0.s, z0.s, #0x7fffffff ; SVE-NEXT: orr z0.d, z0.d, z1.d ; SVE-NEXT: str d0, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_v2f32_v2f32: ; SVE2: // %bb.0: -; SVE2-NEXT: ldr d0, [x0] -; SVE2-NEXT: mov z2.s, #0x7fffffff -; SVE2-NEXT: ldr d1, [x1] -; SVE2-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; SVE2-NEXT: str d0, [x0] +; SVE2-NEXT: mov z0.s, #0x7fffffff +; SVE2-NEXT: ldr d1, [x0] +; SVE2-NEXT: ldr d2, [x1] +; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; SVE2-NEXT: str d1, [x0] ; SVE2-NEXT: ret %a = load <2 x float>, ptr %ap %b = load <2 x float>, ptr %bp @@ -123,19 +123,19 @@ ; SVE: // %bb.0: ; SVE-NEXT: ldr q0, [x0] ; SVE-NEXT: ldr q1, [x1] -; SVE-NEXT: and z0.s, z0.s, #0x7fffffff ; SVE-NEXT: and z1.s, z1.s, #0x80000000 +; SVE-NEXT: and z0.s, z0.s, #0x7fffffff ; SVE-NEXT: orr z0.d, z0.d, z1.d ; SVE-NEXT: str q0, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_v4f32_v4f32: ; SVE2: // %bb.0: -; SVE2-NEXT: ldr q0, [x0] -; SVE2-NEXT: mov z2.s, #0x7fffffff -; SVE2-NEXT: ldr q1, [x1] -; SVE2-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; SVE2-NEXT: str q0, [x0] +; SVE2-NEXT: mov z0.s, #0x7fffffff +; SVE2-NEXT: ldr q1, [x0] +; SVE2-NEXT: ldr q2, [x1] +; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; SVE2-NEXT: str q1, [x0] ; SVE2-NEXT: ret %a = load <4 x float>, ptr %ap %b = load <4 x float>, ptr %bp @@ -147,25 +147,25 @@ define void @test_copysign_v8f32_v8f32(ptr %ap, ptr %bp) { ; SVE-LABEL: test_copysign_v8f32_v8f32: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x1] +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: and z0.s, z0.s, #0x80000000 -; SVE-NEXT: ldp q2, q3, [x0] -; SVE-NEXT: and z1.s, z1.s, #0x80000000 +; SVE-NEXT: and z3.s, z3.s, #0x80000000 +; SVE-NEXT: and z1.s, z1.s, #0x7fffffff ; SVE-NEXT: and z2.s, z2.s, #0x7fffffff -; SVE-NEXT: orr z0.d, z2.d, z0.d -; SVE-NEXT: and z3.s, z3.s, #0x7fffffff -; SVE-NEXT: orr z1.d, z3.d, z1.d +; SVE-NEXT: orr z0.d, z1.d, z0.d +; SVE-NEXT: orr z1.d, z2.d, z3.d ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_v8f32_v8f32: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q1, q2, [x0] ; SVE2-NEXT: mov z0.s, #0x7fffffff -; SVE2-NEXT: ldp q3, q4, [x1] -; SVE2-NEXT: bsl z1.d, z1.d, z3.d, z0.d -; SVE2-NEXT: bsl z2.d, z2.d, z4.d, z0.d -; SVE2-NEXT: stp q1, q2, [x0] +; SVE2-NEXT: ldp q1, q4, [x1] +; SVE2-NEXT: ldp q2, q3, [x0] +; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d +; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d +; SVE2-NEXT: stp q2, q3, [x0] ; SVE2-NEXT: ret %a = load <8 x float>, ptr %ap %b = load <8 x float>, ptr %bp @@ -181,19 +181,19 @@ ; SVE: // %bb.0: ; SVE-NEXT: ldr q0, [x0] ; SVE-NEXT: ldr q1, [x1] -; SVE-NEXT: and z0.d, z0.d, #0x7fffffffffffffff ; SVE-NEXT: and z1.d, z1.d, #0x8000000000000000 +; SVE-NEXT: and z0.d, z0.d, #0x7fffffffffffffff ; SVE-NEXT: orr z0.d, z0.d, z1.d ; SVE-NEXT: str q0, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_v2f64_v2f64: ; SVE2: // %bb.0: -; SVE2-NEXT: ldr q0, [x0] -; SVE2-NEXT: mov z2.d, #0x7fffffffffffffff -; SVE2-NEXT: ldr q1, [x1] -; SVE2-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; SVE2-NEXT: str q0, [x0] +; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff +; SVE2-NEXT: ldr q1, [x0] +; SVE2-NEXT: ldr q2, [x1] +; SVE2-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; SVE2-NEXT: str q1, [x0] ; SVE2-NEXT: ret %a = load <2 x double>, ptr %ap %b = load <2 x double>, ptr %bp @@ -205,25 +205,25 @@ define void @test_copysign_v4f64_v4f64(ptr %ap, ptr %bp) { ; SVE-LABEL: test_copysign_v4f64_v4f64: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x1] +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] ; SVE-NEXT: and z0.d, z0.d, #0x8000000000000000 -; SVE-NEXT: ldp q2, q3, [x0] -; SVE-NEXT: and z1.d, z1.d, #0x8000000000000000 +; SVE-NEXT: and z3.d, z3.d, #0x8000000000000000 +; SVE-NEXT: and z1.d, z1.d, #0x7fffffffffffffff ; SVE-NEXT: and z2.d, z2.d, #0x7fffffffffffffff -; SVE-NEXT: orr z0.d, z2.d, z0.d -; SVE-NEXT: and z3.d, z3.d, #0x7fffffffffffffff -; SVE-NEXT: orr z1.d, z3.d, z1.d +; SVE-NEXT: orr z0.d, z1.d, z0.d +; SVE-NEXT: orr z1.d, z2.d, z3.d ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_v4f64_v4f64: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q1, q2, [x0] ; SVE2-NEXT: mov z0.d, #0x7fffffffffffffff -; SVE2-NEXT: ldp q3, q4, [x1] -; SVE2-NEXT: bsl z1.d, z1.d, z3.d, z0.d -; SVE2-NEXT: bsl z2.d, z2.d, z4.d, z0.d -; SVE2-NEXT: stp q1, q2, [x0] +; SVE2-NEXT: ldp q1, q4, [x1] +; SVE2-NEXT: ldp q2, q3, [x0] +; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d +; SVE2-NEXT: bsl z3.d, z3.d, z4.d, z0.d +; SVE2-NEXT: stp q2, q3, [x0] ; SVE2-NEXT: ret %a = load <4 x double>, ptr %ap %b = load <4 x double>, ptr %bp @@ -237,12 +237,12 @@ define void @test_copysign_v2f32_v2f64(ptr %ap, ptr %bp) { ; SVE-LABEL: test_copysign_v2f32_v2f64: ; SVE: // %bb.0: -; SVE-NEXT: ldr q0, [x1] ; SVE-NEXT: ptrue p0.d +; SVE-NEXT: ldr q0, [x1] ; SVE-NEXT: ldr d1, [x0] +; SVE-NEXT: and z1.s, z1.s, #0x7fffffff ; SVE-NEXT: fcvt z0.s, p0/m, z0.d ; SVE-NEXT: uzp1 z0.s, z0.s, z0.s -; SVE-NEXT: and z1.s, z1.s, #0x7fffffff ; SVE-NEXT: and z0.s, z0.s, #0x80000000 ; SVE-NEXT: orr z0.d, z1.d, z0.d ; SVE-NEXT: str d0, [x0] @@ -250,14 +250,14 @@ ; ; SVE2-LABEL: test_copysign_v2f32_v2f64: ; SVE2: // %bb.0: -; SVE2-NEXT: ldr q0, [x1] ; SVE2-NEXT: ptrue p0.d -; SVE2-NEXT: ldr d1, [x0] -; SVE2-NEXT: mov z2.s, #0x7fffffff +; SVE2-NEXT: ldr q0, [x1] +; SVE2-NEXT: mov z1.s, #0x7fffffff +; SVE2-NEXT: ldr d2, [x0] ; SVE2-NEXT: fcvt z0.s, p0/m, z0.d ; SVE2-NEXT: uzp1 z0.s, z0.s, z0.s -; SVE2-NEXT: bsl z1.d, z1.d, z0.d, z2.d -; SVE2-NEXT: str d1, [x0] +; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d +; SVE2-NEXT: str d2, [x0] ; SVE2-NEXT: ret %a = load <2 x float>, ptr %ap %b = load <2 x double>, ptr %bp @@ -273,34 +273,34 @@ define void @test_copysign_v4f32_v4f64(ptr %ap, ptr %bp) { ; SVE-LABEL: test_copysign_v4f32_v4f64: ; SVE: // %bb.0: -; SVE-NEXT: ldp q1, q0, [x1] ; SVE-NEXT: ptrue p0.d +; SVE-NEXT: ldp q0, q1, [x1] ; SVE-NEXT: fcvt z1.s, p0/m, z1.d -; SVE-NEXT: uzp1 z1.s, z1.s, z1.s -; SVE-NEXT: ldr q2, [x0] ; SVE-NEXT: fcvt z0.s, p0/m, z0.d -; SVE-NEXT: uzp1 z0.s, z0.s, z0.s ; SVE-NEXT: ptrue p0.s, vl2 -; SVE-NEXT: splice z1.s, p0, z1.s, z0.s -; SVE-NEXT: and z1.s, z1.s, #0x80000000 -; SVE-NEXT: and z2.s, z2.s, #0x7fffffff -; SVE-NEXT: orr z0.d, z2.d, z1.d +; SVE-NEXT: uzp1 z1.s, z1.s, z1.s +; SVE-NEXT: uzp1 z0.s, z0.s, z0.s +; SVE-NEXT: splice z0.s, p0, z0.s, z1.s +; SVE-NEXT: ldr q1, [x0] +; SVE-NEXT: and z1.s, z1.s, #0x7fffffff +; SVE-NEXT: and z0.s, z0.s, #0x80000000 +; SVE-NEXT: orr z0.d, z1.d, z0.d ; SVE-NEXT: str q0, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_v4f32_v4f64: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q1, q0, [x1] ; SVE2-NEXT: ptrue p0.d -; SVE2-NEXT: fcvt z1.s, p0/m, z1.d -; SVE2-NEXT: uzp1 z1.s, z1.s, z1.s +; SVE2-NEXT: ldp q0, q1, [x1] ; SVE2-NEXT: ldr q2, [x0] +; SVE2-NEXT: fcvt z1.s, p0/m, z1.d ; SVE2-NEXT: fcvt z0.s, p0/m, z0.d -; SVE2-NEXT: uzp1 z0.s, z0.s, z0.s ; SVE2-NEXT: ptrue p0.s, vl2 -; SVE2-NEXT: splice z1.s, p0, z1.s, z0.s -; SVE2-NEXT: mov z0.s, #0x7fffffff -; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d +; SVE2-NEXT: uzp1 z1.s, z1.s, z1.s +; SVE2-NEXT: uzp1 z0.s, z0.s, z0.s +; SVE2-NEXT: splice z0.s, p0, z0.s, z1.s +; SVE2-NEXT: mov z1.s, #0x7fffffff +; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str q2, [x0] ; SVE2-NEXT: ret %a = load <4 x float>, ptr %ap @@ -318,8 +318,8 @@ ; SVE: // %bb.0: ; SVE-NEXT: ptrue p0.d, vl2 ; SVE-NEXT: ldr q0, [x0] -; SVE-NEXT: ld1w { z1.d }, p0/z, [x1] ; SVE-NEXT: and z0.d, z0.d, #0x7fffffffffffffff +; SVE-NEXT: ld1w { z1.d }, p0/z, [x1] ; SVE-NEXT: fcvt z1.d, p0/m, z1.s ; SVE-NEXT: and z1.d, z1.d, #0x8000000000000000 ; SVE-NEXT: orr z0.d, z0.d, z1.d @@ -330,8 +330,8 @@ ; SVE2: // %bb.0: ; SVE2-NEXT: ptrue p0.d, vl2 ; SVE2-NEXT: ldr q0, [x0] -; SVE2-NEXT: ld1w { z1.d }, p0/z, [x1] ; SVE2-NEXT: mov z2.d, #0x7fffffffffffffff +; SVE2-NEXT: ld1w { z1.d }, p0/z, [x1] ; SVE2-NEXT: fcvt z1.d, p0/m, z1.s ; SVE2-NEXT: bsl z0.d, z0.d, z1.d, z2.d ; SVE2-NEXT: str q0, [x0] @@ -350,15 +350,15 @@ define void @test_copysign_v4f64_v4f32(ptr %ap, ptr %bp) { ; SVE-LABEL: test_copysign_v4f64_v4f32: ; SVE: // %bb.0: -; SVE-NEXT: mov x8, #2 // =0x2 ; SVE-NEXT: ptrue p0.d, vl2 +; SVE-NEXT: mov x8, #2 // =0x2 ; SVE-NEXT: ldp q0, q1, [x0] +; SVE-NEXT: and z0.d, z0.d, #0x7fffffffffffffff +; SVE-NEXT: and z1.d, z1.d, #0x7fffffffffffffff ; SVE-NEXT: ld1w { z2.d }, p0/z, [x1, x8, lsl #2] ; SVE-NEXT: ld1w { z3.d }, p0/z, [x1] -; SVE-NEXT: and z0.d, z0.d, #0x7fffffffffffffff ; SVE-NEXT: fcvt z3.d, p0/m, z3.s ; SVE-NEXT: fcvt z2.d, p0/m, z2.s -; SVE-NEXT: and z1.d, z1.d, #0x7fffffffffffffff ; SVE-NEXT: and z3.d, z3.d, #0x8000000000000000 ; SVE-NEXT: and z2.d, z2.d, #0x8000000000000000 ; SVE-NEXT: orr z0.d, z0.d, z3.d @@ -368,10 +368,10 @@ ; ; SVE2-LABEL: test_copysign_v4f64_v4f32: ; SVE2: // %bb.0: -; SVE2-NEXT: mov x8, #2 // =0x2 ; SVE2-NEXT: ptrue p0.d, vl2 -; SVE2-NEXT: ldp q0, q1, [x0] +; SVE2-NEXT: mov x8, #2 // =0x2 ; SVE2-NEXT: mov z4.d, #0x7fffffffffffffff +; SVE2-NEXT: ldp q0, q1, [x0] ; SVE2-NEXT: ld1w { z2.d }, p0/z, [x1, x8, lsl #2] ; SVE2-NEXT: ld1w { z3.d }, p0/z, [x1] ; SVE2-NEXT: fcvt z3.d, p0/m, z3.s @@ -393,12 +393,12 @@ define void @test_copysign_v4f16_v4f32(ptr %ap, ptr %bp) { ; SVE-LABEL: test_copysign_v4f16_v4f32: ; SVE: // %bb.0: -; SVE-NEXT: ldr q0, [x1] ; SVE-NEXT: ptrue p0.s +; SVE-NEXT: ldr q0, [x1] ; SVE-NEXT: ldr d1, [x0] +; SVE-NEXT: and z1.h, z1.h, #0x7fff ; SVE-NEXT: fcvt z0.h, p0/m, z0.s ; SVE-NEXT: uzp1 z0.h, z0.h, z0.h -; SVE-NEXT: and z1.h, z1.h, #0x7fff ; SVE-NEXT: and z0.h, z0.h, #0x8000 ; SVE-NEXT: orr z0.d, z1.d, z0.d ; SVE-NEXT: str d0, [x0] @@ -406,14 +406,14 @@ ; ; SVE2-LABEL: test_copysign_v4f16_v4f32: ; SVE2: // %bb.0: -; SVE2-NEXT: ldr q0, [x1] ; SVE2-NEXT: ptrue p0.s -; SVE2-NEXT: ldr d1, [x0] -; SVE2-NEXT: mov z2.h, #32767 // =0x7fff +; SVE2-NEXT: ldr q0, [x1] +; SVE2-NEXT: mov z1.h, #32767 // =0x7fff +; SVE2-NEXT: ldr d2, [x0] ; SVE2-NEXT: fcvt z0.h, p0/m, z0.s ; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h -; SVE2-NEXT: bsl z1.d, z1.d, z0.d, z2.d -; SVE2-NEXT: str d1, [x0] +; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d +; SVE2-NEXT: str d2, [x0] ; SVE2-NEXT: ret %a = load <4 x half>, ptr %ap %b = load <4 x float>, ptr %bp @@ -429,18 +429,18 @@ ; SVE-NEXT: sub sp, sp, #16 ; SVE-NEXT: .cfi_def_cfa_offset 16 ; SVE-NEXT: ldp q1, q0, [x1] -; SVE-NEXT: fcvt h3, d1 -; SVE-NEXT: mov z1.d, z1.d[1] -; SVE-NEXT: fcvt h1, d1 -; SVE-NEXT: fcvt h2, d0 -; SVE-NEXT: mov z0.d, z0.d[1] -; SVE-NEXT: fcvt h0, d0 ; SVE-NEXT: ldr d4, [x0] -; SVE-NEXT: str h3, [sp, #8] -; SVE-NEXT: str h1, [sp, #10] -; SVE-NEXT: str h2, [sp, #12] ; SVE-NEXT: and z4.h, z4.h, #0x7fff -; SVE-NEXT: str h0, [sp, #14] +; SVE-NEXT: mov z2.d, z0.d[1] +; SVE-NEXT: mov z3.d, z1.d[1] +; SVE-NEXT: fcvt h0, d0 +; SVE-NEXT: fcvt h1, d1 +; SVE-NEXT: fcvt h2, d2 +; SVE-NEXT: fcvt h3, d3 +; SVE-NEXT: str h0, [sp, #12] +; SVE-NEXT: str h1, [sp, #8] +; SVE-NEXT: str h2, [sp, #14] +; SVE-NEXT: str h3, [sp, #10] ; SVE-NEXT: ldr d0, [sp, #8] ; SVE-NEXT: and z0.h, z0.h, #0x8000 ; SVE-NEXT: orr z0.d, z4.d, z0.d @@ -452,22 +452,22 @@ ; SVE2: // %bb.0: ; SVE2-NEXT: sub sp, sp, #16 ; SVE2-NEXT: .cfi_def_cfa_offset 16 -; SVE2-NEXT: ldp q1, q0, [x1] -; SVE2-NEXT: fcvt h3, d1 -; SVE2-NEXT: mov z1.d, z1.d[1] +; SVE2-NEXT: ldp q2, q1, [x1] +; SVE2-NEXT: mov z0.h, #32767 // =0x7fff +; SVE2-NEXT: ldr d5, [x0] +; SVE2-NEXT: mov z3.d, z1.d[1] +; SVE2-NEXT: mov z4.d, z2.d[1] ; SVE2-NEXT: fcvt h1, d1 -; SVE2-NEXT: fcvt h2, d0 -; SVE2-NEXT: mov z0.d, z0.d[1] -; SVE2-NEXT: fcvt h0, d0 -; SVE2-NEXT: ldr d4, [x0] -; SVE2-NEXT: str h3, [sp, #8] -; SVE2-NEXT: str h1, [sp, #10] -; SVE2-NEXT: mov z1.h, #32767 // =0x7fff -; SVE2-NEXT: str h2, [sp, #12] -; SVE2-NEXT: str h0, [sp, #14] -; SVE2-NEXT: ldr d0, [sp, #8] -; SVE2-NEXT: bsl z4.d, z4.d, z0.d, z1.d -; SVE2-NEXT: str d4, [x0] +; SVE2-NEXT: fcvt h2, d2 +; SVE2-NEXT: fcvt h3, d3 +; SVE2-NEXT: fcvt h4, d4 +; SVE2-NEXT: str h1, [sp, #12] +; SVE2-NEXT: str h2, [sp, #8] +; SVE2-NEXT: str h3, [sp, #14] +; SVE2-NEXT: str h4, [sp, #10] +; SVE2-NEXT: ldr d1, [sp, #8] +; SVE2-NEXT: bsl z5.d, z5.d, z1.d, z0.d +; SVE2-NEXT: str d5, [x0] ; SVE2-NEXT: add sp, sp, #16 ; SVE2-NEXT: ret %a = load <4 x half>, ptr %ap @@ -483,34 +483,34 @@ define void @test_copysign_v8f16_v8f32(ptr %ap, ptr %bp) { ; SVE-LABEL: test_copysign_v8f16_v8f32: ; SVE: // %bb.0: -; SVE-NEXT: ldp q1, q0, [x1] ; SVE-NEXT: ptrue p0.s +; SVE-NEXT: ldp q0, q1, [x1] ; SVE-NEXT: fcvt z1.h, p0/m, z1.s -; SVE-NEXT: uzp1 z1.h, z1.h, z1.h -; SVE-NEXT: ldr q2, [x0] ; SVE-NEXT: fcvt z0.h, p0/m, z0.s -; SVE-NEXT: uzp1 z0.h, z0.h, z0.h ; SVE-NEXT: ptrue p0.h, vl4 -; SVE-NEXT: splice z1.h, p0, z1.h, z0.h -; SVE-NEXT: and z1.h, z1.h, #0x8000 -; SVE-NEXT: and z2.h, z2.h, #0x7fff -; SVE-NEXT: orr z0.d, z2.d, z1.d +; SVE-NEXT: uzp1 z1.h, z1.h, z1.h +; SVE-NEXT: uzp1 z0.h, z0.h, z0.h +; SVE-NEXT: splice z0.h, p0, z0.h, z1.h +; SVE-NEXT: ldr q1, [x0] +; SVE-NEXT: and z1.h, z1.h, #0x7fff +; SVE-NEXT: and z0.h, z0.h, #0x8000 +; SVE-NEXT: orr z0.d, z1.d, z0.d ; SVE-NEXT: str q0, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: test_copysign_v8f16_v8f32: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q1, q0, [x1] ; SVE2-NEXT: ptrue p0.s -; SVE2-NEXT: fcvt z1.h, p0/m, z1.s -; SVE2-NEXT: uzp1 z1.h, z1.h, z1.h +; SVE2-NEXT: ldp q0, q1, [x1] ; SVE2-NEXT: ldr q2, [x0] +; SVE2-NEXT: fcvt z1.h, p0/m, z1.s ; SVE2-NEXT: fcvt z0.h, p0/m, z0.s -; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h ; SVE2-NEXT: ptrue p0.h, vl4 -; SVE2-NEXT: splice z1.h, p0, z1.h, z0.h -; SVE2-NEXT: mov z0.h, #32767 // =0x7fff -; SVE2-NEXT: bsl z2.d, z2.d, z1.d, z0.d +; SVE2-NEXT: uzp1 z1.h, z1.h, z1.h +; SVE2-NEXT: uzp1 z0.h, z0.h, z0.h +; SVE2-NEXT: splice z0.h, p0, z0.h, z1.h +; SVE2-NEXT: mov z1.h, #32767 // =0x7fff +; SVE2-NEXT: bsl z2.d, z2.d, z0.d, z1.d ; SVE2-NEXT: str q2, [x0] ; SVE2-NEXT: ret %a = load <8 x half>, ptr %ap diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-arith.ll @@ -10,8 +10,8 @@ define <2 x half> @fadd_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-LABEL: fadd_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -23,8 +23,8 @@ define <4 x half> @fadd_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-LABEL: fadd_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -36,8 +36,8 @@ define <8 x half> @fadd_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-LABEL: fadd_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -49,10 +49,11 @@ define void @fadd_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: fadd_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -66,8 +67,8 @@ define <2 x float> @fadd_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-LABEL: fadd_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -79,8 +80,8 @@ define <4 x float> @fadd_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-LABEL: fadd_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -92,10 +93,11 @@ define void @fadd_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: fadd_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -109,8 +111,8 @@ define <2 x double> @fadd_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-LABEL: fadd_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -122,10 +124,11 @@ define void @fadd_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: fadd_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -143,8 +146,8 @@ define <2 x half> @fdiv_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-LABEL: fdiv_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -156,8 +159,8 @@ define <4 x half> @fdiv_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-LABEL: fdiv_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -169,8 +172,8 @@ define <8 x half> @fdiv_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-LABEL: fdiv_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -182,10 +185,11 @@ define void @fdiv_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: fdiv_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fdiv z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fdivr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fdiv z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -199,8 +203,8 @@ define <2 x float> @fdiv_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-LABEL: fdiv_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -212,8 +216,8 @@ define <4 x float> @fdiv_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-LABEL: fdiv_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -225,10 +229,11 @@ define void @fdiv_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: fdiv_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fdiv z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fdivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fdiv z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -242,8 +247,8 @@ define <2 x double> @fdiv_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-LABEL: fdiv_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -255,10 +260,11 @@ define void @fdiv_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: fdiv_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fdiv z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fdivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fdiv z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -276,8 +282,8 @@ define <2 x half> @fma_v2f16(<2 x half> %op1, <2 x half> %op2, <2 x half> %op3) { ; CHECK-LABEL: fma_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h @@ -290,8 +296,8 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) { ; CHECK-LABEL: fma_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h @@ -304,8 +310,8 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) { ; CHECK-LABEL: fma_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h @@ -318,13 +324,13 @@ define void @fma_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fma_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldp q4, q5, [x2] -; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z4.h +; CHECK-NEXT: ldp q0, q4, [x1] +; CHECK-NEXT: ldp q1, q5, [x2] +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: fmad z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fmla z1.h, p0/m, z2.h, z3.h +; CHECK-NEXT: fmla z1.h, p0/m, z3.h, z4.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a @@ -338,8 +344,8 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3) { ; CHECK-LABEL: fma_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s @@ -352,8 +358,8 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3) { ; CHECK-LABEL: fma_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s @@ -366,13 +372,13 @@ define void @fma_v8f32(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fma_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldp q4, q5, [x2] -; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z4.s +; CHECK-NEXT: ldp q0, q4, [x1] +; CHECK-NEXT: ldp q1, q5, [x2] +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: fmad z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fmla z1.s, p0/m, z2.s, z3.s +; CHECK-NEXT: fmla z1.s, p0/m, z3.s, z4.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a @@ -386,8 +392,8 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3) { ; CHECK-LABEL: fma_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d @@ -400,13 +406,13 @@ define void @fma_v4f64(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fma_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldp q4, q5, [x2] -; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z4.d +; CHECK-NEXT: ldp q0, q4, [x1] +; CHECK-NEXT: ldp q1, q5, [x2] +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: fmad z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fmla z1.d, p0/m, z2.d, z3.d +; CHECK-NEXT: fmla z1.d, p0/m, z3.d, z4.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x double>, ptr %a @@ -424,8 +430,8 @@ define <2 x half> @fmul_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-LABEL: fmul_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -437,8 +443,8 @@ define <4 x half> @fmul_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-LABEL: fmul_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -450,8 +456,8 @@ define <8 x half> @fmul_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-LABEL: fmul_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -463,10 +469,11 @@ define void @fmul_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: fmul_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fmul z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fmul z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -480,8 +487,8 @@ define <2 x float> @fmul_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-LABEL: fmul_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -493,8 +500,8 @@ define <4 x float> @fmul_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-LABEL: fmul_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -506,10 +513,11 @@ define void @fmul_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: fmul_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -523,8 +531,8 @@ define <2 x double> @fmul_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-LABEL: fmul_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -536,10 +544,11 @@ define void @fmul_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: fmul_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fmul z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fmul z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -557,8 +566,8 @@ define <2 x half> @fneg_v2f16(<2 x half> %op) { ; CHECK-LABEL: fneg_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fneg z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -569,8 +578,8 @@ define <4 x half> @fneg_v4f16(<4 x half> %op) { ; CHECK-LABEL: fneg_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fneg z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -581,8 +590,8 @@ define <8 x half> @fneg_v8f16(<8 x half> %op) { ; CHECK-LABEL: fneg_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fneg z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -593,8 +602,8 @@ define void @fneg_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: fneg_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fneg z0.h, p0/m, z0.h ; CHECK-NEXT: fneg z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -608,8 +617,8 @@ define <2 x float> @fneg_v2f32(<2 x float> %op) { ; CHECK-LABEL: fneg_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fneg z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -620,8 +629,8 @@ define <4 x float> @fneg_v4f32(<4 x float> %op) { ; CHECK-LABEL: fneg_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fneg z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -632,8 +641,8 @@ define void @fneg_v8f32(ptr %a) { ; CHECK-LABEL: fneg_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fneg z0.s, p0/m, z0.s ; CHECK-NEXT: fneg z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -647,8 +656,8 @@ define <2 x double> @fneg_v2f64(<2 x double> %op) { ; CHECK-LABEL: fneg_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fneg z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -659,8 +668,8 @@ define void @fneg_v4f64(ptr %a) { ; CHECK-LABEL: fneg_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fneg z0.d, p0/m, z0.d ; CHECK-NEXT: fneg z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -678,8 +687,8 @@ define <2 x half> @fsqrt_v2f16(<2 x half> %op) { ; CHECK-LABEL: fsqrt_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -690,8 +699,8 @@ define <4 x half> @fsqrt_v4f16(<4 x half> %op) { ; CHECK-LABEL: fsqrt_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -702,8 +711,8 @@ define <8 x half> @fsqrt_v8f16(<8 x half> %op) { ; CHECK-LABEL: fsqrt_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -714,8 +723,8 @@ define void @fsqrt_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: fsqrt_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fsqrt z0.h, p0/m, z0.h ; CHECK-NEXT: fsqrt z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -729,8 +738,8 @@ define <2 x float> @fsqrt_v2f32(<2 x float> %op) { ; CHECK-LABEL: fsqrt_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -741,8 +750,8 @@ define <4 x float> @fsqrt_v4f32(<4 x float> %op) { ; CHECK-LABEL: fsqrt_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -753,8 +762,8 @@ define void @fsqrt_v8f32(ptr %a) { ; CHECK-LABEL: fsqrt_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fsqrt z0.s, p0/m, z0.s ; CHECK-NEXT: fsqrt z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -768,8 +777,8 @@ define <2 x double> @fsqrt_v2f64(<2 x double> %op) { ; CHECK-LABEL: fsqrt_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -780,8 +789,8 @@ define void @fsqrt_v4f64(ptr %a) { ; CHECK-LABEL: fsqrt_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fsqrt z0.d, p0/m, z0.d ; CHECK-NEXT: fsqrt z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -799,8 +808,8 @@ define <2 x half> @fsub_v2f16(<2 x half> %op1, <2 x half> %op2) { ; CHECK-LABEL: fsub_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -812,8 +821,8 @@ define <4 x half> @fsub_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-LABEL: fsub_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -825,8 +834,8 @@ define <8 x half> @fsub_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-LABEL: fsub_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -838,10 +847,11 @@ define void @fsub_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: fsub_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fsub z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fsubr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fsub z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -855,8 +865,8 @@ define <2 x float> @fsub_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-LABEL: fsub_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -868,8 +878,8 @@ define <4 x float> @fsub_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-LABEL: fsub_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -881,10 +891,11 @@ define void @fsub_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: fsub_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fsub z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fsubr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fsub z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -898,8 +909,8 @@ define <2 x double> @fsub_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-LABEL: fsub_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -911,10 +922,11 @@ define void @fsub_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: fsub_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fsub z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fsubr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fsub z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -932,8 +944,8 @@ define <2 x half> @fabs_v2f16(<2 x half> %op) { ; CHECK-LABEL: fabs_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fabs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -944,8 +956,8 @@ define <4 x half> @fabs_v4f16(<4 x half> %op) { ; CHECK-LABEL: fabs_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fabs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -956,8 +968,8 @@ define <8 x half> @fabs_v8f16(<8 x half> %op) { ; CHECK-LABEL: fabs_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fabs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -968,8 +980,8 @@ define void @fabs_v16f16(ptr %a) { ; CHECK-LABEL: fabs_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fabs z0.h, p0/m, z0.h ; CHECK-NEXT: fabs z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -983,8 +995,8 @@ define <2 x float> @fabs_v2f32(<2 x float> %op) { ; CHECK-LABEL: fabs_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fabs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -995,8 +1007,8 @@ define <4 x float> @fabs_v4f32(<4 x float> %op) { ; CHECK-LABEL: fabs_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fabs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -1007,8 +1019,8 @@ define void @fabs_v8f32(ptr %a) { ; CHECK-LABEL: fabs_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fabs z0.s, p0/m, z0.s ; CHECK-NEXT: fabs z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -1022,8 +1034,8 @@ define <2 x double> @fabs_v2f64(<2 x double> %op) { ; CHECK-LABEL: fabs_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fabs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -1034,8 +1046,8 @@ define void @fabs_v4f64(ptr %a) { ; CHECK-LABEL: fabs_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fabs z0.d, p0/m, z0.d ; CHECK-NEXT: fabs z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-compares.ll @@ -56,12 +56,12 @@ define void @fcmp_oeq_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_oeq_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmeq p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmeq p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -106,12 +106,12 @@ define void @fcmp_oeq_v8f32(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_oeq_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmeq p1.s, p0/z, z1.s, z0.s -; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmeq p0.s, p0/z, z2.s, z3.s +; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -156,12 +156,12 @@ define void @fcmp_oeq_v4f64(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_oeq_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmeq p1.d, p0/z, z1.d, z0.d -; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmeq p0.d, p0/z, z2.d, z3.d +; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -180,15 +180,15 @@ define void @fcmp_ueq_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_ueq_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmuo p1.h, p0/z, z1.h, z0.h ; CHECK-NEXT: fcmeq p2.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov p1.b, p2/m, p2.b -; CHECK-NEXT: fcmuo p2.h, p0/z, z2.h, z3.h +; CHECK-NEXT: fcmuo p3.h, p0/z, z2.h, z3.h ; CHECK-NEXT: fcmeq p0.h, p0/z, z2.h, z3.h -; CHECK-NEXT: sel p0.b, p0, p0.b, p2.b +; CHECK-NEXT: mov p1.b, p2/m, p2.b +; CHECK-NEXT: sel p0.b, p0, p0.b, p3.b ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] @@ -208,15 +208,15 @@ define void @fcmp_one_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_one_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h ; CHECK-NEXT: fcmgt p2.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov p1.b, p2/m, p2.b -; CHECK-NEXT: fcmgt p2.h, p0/z, z3.h, z2.h +; CHECK-NEXT: fcmgt p3.h, p0/z, z3.h, z2.h ; CHECK-NEXT: fcmgt p0.h, p0/z, z2.h, z3.h -; CHECK-NEXT: sel p0.b, p0, p0.b, p2.b +; CHECK-NEXT: mov p1.b, p2/m, p2.b +; CHECK-NEXT: sel p0.b, p0, p0.b, p3.b ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] @@ -236,12 +236,12 @@ define void @fcmp_une_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_une_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmne p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmne p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -260,12 +260,12 @@ define void @fcmp_ogt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_ogt_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmgt p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -284,15 +284,15 @@ define void @fcmp_ugt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_ugt_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcmge p0.h, p0/z, z3.h, z2.h ; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fcmge p0.h, p0/z, z3.h, z2.h -; CHECK-NEXT: eor z1.d, z1.d, z0.d ; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor z1.d, z1.d, z0.d ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret @@ -311,12 +311,12 @@ define void @fcmp_olt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_olt_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z2.h +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -335,15 +335,15 @@ define void @fcmp_ult_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_ult_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z0.h +; CHECK-NEXT: fcmge p0.h, p0/z, z2.h, z3.h ; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fcmge p0.h, p0/z, z2.h, z3.h -; CHECK-NEXT: eor z1.d, z1.d, z0.d ; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor z1.d, z1.d, z0.d ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret @@ -362,12 +362,12 @@ define void @fcmp_oge_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_oge_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmge p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -386,15 +386,15 @@ define void @fcmp_uge_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_uge_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z2.h ; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z2.h -; CHECK-NEXT: eor z1.d, z1.d, z0.d ; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor z1.d, z1.d, z0.d ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret @@ -413,12 +413,12 @@ define void @fcmp_ole_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_ole_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmge p0.h, p0/z, z3.h, z2.h +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -437,15 +437,15 @@ define void @fcmp_ule_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_ule_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h +; CHECK-NEXT: fcmgt p0.h, p0/z, z2.h, z3.h ; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fcmgt p0.h, p0/z, z2.h, z3.h -; CHECK-NEXT: eor z1.d, z1.d, z0.d ; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor z1.d, z1.d, z0.d ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret @@ -464,12 +464,12 @@ define void @fcmp_uno_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_uno_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmuo p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmuo p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -488,15 +488,15 @@ define void @fcmp_ord_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_ord_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmuo p1.h, p0/z, z1.h, z0.h +; CHECK-NEXT: fcmuo p0.h, p0/z, z2.h, z3.h ; CHECK-NEXT: mov z0.h, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fcmuo p0.h, p0/z, z2.h, z3.h -; CHECK-NEXT: eor z1.d, z1.d, z0.d ; CHECK-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: eor z1.d, z1.d, z0.d ; CHECK-NEXT: eor z0.d, z2.d, z0.d ; CHECK-NEXT: stp q1, q0, [x2] ; CHECK-NEXT: ret @@ -515,12 +515,12 @@ define void @fcmp_eq_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_eq_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmeq p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmeq p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -539,12 +539,12 @@ define void @fcmp_ne_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_ne_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmne p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmne p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -563,12 +563,12 @@ define void @fcmp_gt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_gt_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmgt p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmgt p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -587,12 +587,12 @@ define void @fcmp_lt_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_lt_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmgt p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmgt p0.h, p0/z, z3.h, z2.h +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -611,12 +611,12 @@ define void @fcmp_ge_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_ge_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmge p1.h, p0/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmge p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret @@ -635,12 +635,12 @@ define void @fcmp_le_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fcmp_le_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ldp q1, q2, [x0] ; CHECK-NEXT: fcmge p1.h, p0/z, z0.h, z1.h -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: fcmge p0.h, p0/z, z3.h, z2.h +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x2] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-convert.ll @@ -7,14 +7,14 @@ define void @fp_convert_combine_crash(ptr %a, ptr %b) { ; CHECK-LABEL: fp_convert_combine_crash: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: fmov z2.s, #8.00000000 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fmov z0.s, #8.00000000 +; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z0.s ; CHECK-NEXT: fmul z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s -; CHECK-NEXT: fmul z1.s, p0/m, z1.s, z2.s ; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s +; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret %f = load <8 x float>, ptr %a %mul.i = fmul <8 x float> %f, %a, ptr %b) { ; CHECK-LABEL: fcvt_v2f16_to_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: str d0, [x0] @@ -24,8 +24,8 @@ define void @fcvt_v4f16_to_v4f32(<4 x half> %a, ptr %b) { ; CHECK-LABEL: fcvt_v4f16_to_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: str q0, [x0] @@ -42,8 +42,8 @@ ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: fcvt z1.s, p0/m, z1.h ; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: fcvt z1.s, p0/m, z1.h ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret @@ -59,18 +59,17 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z2.s, z1.h ; CHECK-NEXT: uunpklo z3.s, z0.h +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: fcvt z1.s, p0/m, z1.h ; CHECK-NEXT: fcvt z2.s, p0/m, z2.h +; CHECK-NEXT: fcvt z3.s, p0/m, z3.h +; CHECK-NEXT: fcvt z1.s, p0/m, z1.h ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h +; CHECK-NEXT: stp q3, q0, [x0] ; CHECK-NEXT: stp q2, q1, [x0, #32] -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: fcvt z1.s, p0/m, z3.h -; CHECK-NEXT: stp q1, q0, [x0] ; CHECK-NEXT: ret %res = fpext <16 x half> %a to <16 x float> store <16 x float> %res, ptr %b @@ -113,8 +112,8 @@ define void @fcvt_v8f16_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v8f16_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0] ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h @@ -130,22 +129,20 @@ define void @fcvt_v16f16_v16f32(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v16f16_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #8 // =0x8 -; CHECK-NEXT: mov x9, #12 // =0xc ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: mov x10, #4 // =0x4 +; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, x8, lsl #1] -; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0, x9, lsl #1] -; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, x10, lsl #1] +; CHECK-NEXT: mov x8, #12 // =0xc +; CHECK-NEXT: ld1h { z1.s }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: mov x8, #4 // =0x4 +; CHECK-NEXT: ld1h { z2.s }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: ld1h { z3.s }, p0/z, [x0] ; CHECK-NEXT: fcvt z0.s, p0/m, z0.h ; CHECK-NEXT: fcvt z1.s, p0/m, z1.h +; CHECK-NEXT: fcvt z3.s, p0/m, z3.h +; CHECK-NEXT: fcvt z2.s, p0/m, z2.h ; CHECK-NEXT: stp q0, q1, [x1, #32] -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvt z0.s, p0/m, z3.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvt z1.s, p0/m, z2.h -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK-NEXT: stp q3, q2, [x1] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fpext <16 x half> %op1 to <16 x float> @@ -187,8 +184,8 @@ define void @fcvt_v4f16_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v4f16_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #2 // =0x2 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov x8, #2 // =0x2 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0] ; CHECK-NEXT: fcvt z0.d, p0/m, z0.h @@ -204,22 +201,20 @@ define void @fcvt_v8f16_v8f64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v8f16_v8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4 // =0x4 -; CHECK-NEXT: mov x9, #6 // =0x6 ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mov x10, #2 // =0x2 +; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1] -; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, x9, lsl #1] -; CHECK-NEXT: ld1h { z2.d }, p0/z, [x0, x10, lsl #1] +; CHECK-NEXT: mov x8, #6 // =0x6 +; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: mov x8, #2 // =0x2 +; CHECK-NEXT: ld1h { z2.d }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: ld1h { z3.d }, p0/z, [x0] ; CHECK-NEXT: fcvt z0.d, p0/m, z0.h ; CHECK-NEXT: fcvt z1.d, p0/m, z1.h +; CHECK-NEXT: fcvt z3.d, p0/m, z3.h +; CHECK-NEXT: fcvt z2.d, p0/m, z2.h ; CHECK-NEXT: stp q0, q1, [x1, #32] -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvt z0.d, p0/m, z3.h -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvt z1.d, p0/m, z2.h -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK-NEXT: stp q3, q2, [x1] ; CHECK-NEXT: ret %op1 = load <8 x half>, ptr %a %res = fpext <8 x half> %op1 to <8 x double> @@ -230,40 +225,37 @@ define void @fcvt_v16f16_v16f64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v16f16_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x9, #14 // =0xe -; CHECK-NEXT: mov x10, #12 // =0xc ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mov x8, #2 // =0x2 -; CHECK-NEXT: mov x11, #6 // =0x6 -; CHECK-NEXT: mov x12, #4 // =0x4 -; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x9, lsl #1] -; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, x10, lsl #1] -; CHECK-NEXT: mov x9, #8 // =0x8 -; CHECK-NEXT: mov x10, #10 // =0xa +; CHECK-NEXT: mov x8, #12 // =0xc +; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: mov x8, #14 // =0xe +; CHECK-NEXT: ld1h { z1.d }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ld1h { z2.d }, p0/z, [x0, x8, lsl #1] -; CHECK-NEXT: ld1h { z3.d }, p0/z, [x0, x11, lsl #1] -; CHECK-NEXT: ld1h { z5.d }, p0/z, [x0, x12, lsl #1] +; CHECK-NEXT: mov x8, #10 // =0xa +; CHECK-NEXT: ld1h { z3.d }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: fcvt z0.d, p0/m, z0.h +; CHECK-NEXT: ld1h { z4.d }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: mov x8, #6 // =0x6 ; CHECK-NEXT: fcvt z1.d, p0/m, z1.h -; CHECK-NEXT: ld1h { z4.d }, p0/z, [x0, x9, lsl #1] -; CHECK-NEXT: ld1h { z6.d }, p0/z, [x0, x10, lsl #1] +; CHECK-NEXT: ld1h { z5.d }, p0/z, [x0, x8, lsl #1] +; CHECK-NEXT: mov x8, #2 // =0x2 +; CHECK-NEXT: fcvt z2.d, p0/m, z2.h +; CHECK-NEXT: ld1h { z6.d }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: ld1h { z7.d }, p0/z, [x0] -; CHECK-NEXT: stp q1, q0, [x1, #96] -; CHECK-NEXT: movprfx z1, z4 -; CHECK-NEXT: fcvt z1.d, p0/m, z4.h -; CHECK-NEXT: movprfx z0, z6 -; CHECK-NEXT: fcvt z0.d, p0/m, z6.h -; CHECK-NEXT: stp q1, q0, [x1, #64] -; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fcvt z1.d, p0/m, z5.h -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvt z0.d, p0/m, z3.h -; CHECK-NEXT: stp q1, q0, [x1, #32] +; CHECK-NEXT: fcvt z3.d, p0/m, z3.h +; CHECK-NEXT: fcvt z4.d, p0/m, z4.h +; CHECK-NEXT: stp q0, q1, [x1, #96] +; CHECK-NEXT: movprfx z0, z5 +; CHECK-NEXT: fcvt z0.d, p0/m, z5.h ; CHECK-NEXT: movprfx z1, z7 ; CHECK-NEXT: fcvt z1.d, p0/m, z7.h -; CHECK-NEXT: movprfx z0, z2 -; CHECK-NEXT: fcvt z0.d, p0/m, z2.h -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: stp q2, q3, [x1, #64] +; CHECK-NEXT: movprfx z2, z6 +; CHECK-NEXT: fcvt z2.d, p0/m, z6.h +; CHECK-NEXT: stp q1, q2, [x1] +; CHECK-NEXT: stp q4, q0, [x1, #32] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fpext <16 x half> %op1 to <16 x double> @@ -305,8 +297,8 @@ define void @fcvt_v4f32_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v4f32_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #2 // =0x2 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov x8, #2 // =0x2 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2] ; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0] ; CHECK-NEXT: fcvt z0.d, p0/m, z0.s @@ -322,22 +314,20 @@ define void @fcvt_v8f32_v8f64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v8f32_v8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4 // =0x4 -; CHECK-NEXT: mov x9, #6 // =0x6 ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mov x10, #2 // =0x2 +; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, x8, lsl #2] -; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0, x9, lsl #2] -; CHECK-NEXT: ld1w { z2.d }, p0/z, [x0, x10, lsl #2] +; CHECK-NEXT: mov x8, #6 // =0x6 +; CHECK-NEXT: ld1w { z1.d }, p0/z, [x0, x8, lsl #2] +; CHECK-NEXT: mov x8, #2 // =0x2 +; CHECK-NEXT: ld1w { z2.d }, p0/z, [x0, x8, lsl #2] ; CHECK-NEXT: ld1w { z3.d }, p0/z, [x0] ; CHECK-NEXT: fcvt z0.d, p0/m, z0.s ; CHECK-NEXT: fcvt z1.d, p0/m, z1.s +; CHECK-NEXT: fcvt z3.d, p0/m, z3.s +; CHECK-NEXT: fcvt z2.d, p0/m, z2.s ; CHECK-NEXT: stp q0, q1, [x1, #32] -; CHECK-NEXT: movprfx z0, z3 -; CHECK-NEXT: fcvt z0.d, p0/m, z3.s -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvt z1.d, p0/m, z2.s -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK-NEXT: stp q3, q2, [x1] ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fpext <8 x float> %op1 to <8 x double> @@ -352,8 +342,8 @@ define void @fcvt_v2f32_v2f16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v2f32_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: fcvt z0.h, p0/m, z0.s ; CHECK-NEXT: st1h { z0.s }, p0, [x1] ; CHECK-NEXT: ret @@ -366,8 +356,8 @@ define void @fcvt_v4f32_v4f16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v4f32_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: fcvt z0.h, p0/m, z0.s ; CHECK-NEXT: st1h { z0.s }, p0, [x1] ; CHECK-NEXT: ret @@ -380,13 +370,13 @@ define void @fcvt_v8f32_v8f16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v8f32_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: fcvt z0.h, p0/m, z0.s -; CHECK-NEXT: st1h { z0.s }, p0, [x1] ; CHECK-NEXT: fcvt z1.h, p0/m, z1.s -; CHECK-NEXT: st1h { z1.s }, p0, [x1, x8, lsl #1] +; CHECK-NEXT: st1h { z0.s }, p0, [x1, x8, lsl #1] +; CHECK-NEXT: st1h { z1.s }, p0, [x1] ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptrunc <8 x float> %op1 to <8 x half> @@ -401,8 +391,8 @@ define void @fcvt_v1f64_v1f16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v1f64_v1f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d ; CHECK-NEXT: st1h { z0.d }, p0, [x1] ; CHECK-NEXT: ret @@ -415,8 +405,8 @@ define void @fcvt_v2f64_v2f16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v2f64_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d ; CHECK-NEXT: st1h { z0.d }, p0, [x1] ; CHECK-NEXT: ret @@ -429,13 +419,13 @@ define void @fcvt_v4f64_v4f16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v4f64_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov x8, #2 // =0x2 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: mov x8, #2 // =0x2 ; CHECK-NEXT: fcvt z0.h, p0/m, z0.d -; CHECK-NEXT: st1h { z0.d }, p0, [x1] ; CHECK-NEXT: fcvt z1.h, p0/m, z1.d -; CHECK-NEXT: st1h { z1.d }, p0, [x1, x8, lsl #1] +; CHECK-NEXT: st1h { z0.d }, p0, [x1, x8, lsl #1] +; CHECK-NEXT: st1h { z1.d }, p0, [x1] ; CHECK-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptrunc <4 x double> %op1 to <4 x half> @@ -450,8 +440,8 @@ define void @fcvt_v1f64_v1f32(<1 x double> %op1, ptr %b) { ; CHECK-LABEL: fcvt_v1f64_v1f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d ; CHECK-NEXT: st1w { z0.d }, p0, [x0] ; CHECK-NEXT: ret @@ -463,8 +453,8 @@ define void @fcvt_v2f64_v2f32(<2 x double> %op1, ptr %b) { ; CHECK-LABEL: fcvt_v2f64_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d ; CHECK-NEXT: st1w { z0.d }, p0, [x0] ; CHECK-NEXT: ret @@ -476,13 +466,13 @@ define void @fcvt_v4f64_v4f32(ptr %a, ptr %b) { ; CHECK-LABEL: fcvt_v4f64_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov x8, #2 // =0x2 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: mov x8, #2 // =0x2 ; CHECK-NEXT: fcvt z0.s, p0/m, z0.d -; CHECK-NEXT: st1w { z0.d }, p0, [x1] ; CHECK-NEXT: fcvt z1.s, p0/m, z1.d -; CHECK-NEXT: st1w { z1.d }, p0, [x1, x8, lsl #2] +; CHECK-NEXT: st1w { z0.d }, p0, [x1, x8, lsl #2] +; CHECK-NEXT: st1w { z1.d }, p0, [x1] ; CHECK-NEXT: ret %op1 = load <4 x double>, ptr %a %res = fptrunc <4 x double> %op1 to <4 x float> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-fma.ll @@ -10,8 +10,8 @@ define <4 x half> @fma_v4f16(<4 x half> %op1, <4 x half> %op2, <4 x half> %op3) { ; CHECK-LABEL: fma_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h @@ -25,8 +25,8 @@ define <8 x half> @fma_v8f16(<8 x half> %op1, <8 x half> %op2, <8 x half> %op3) { ; CHECK-LABEL: fma_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z2.h @@ -40,13 +40,13 @@ define void @fma_v16f16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fma_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldp q4, q5, [x2] -; CHECK-NEXT: fmad z0.h, p0/m, z1.h, z4.h +; CHECK-NEXT: ldp q0, q4, [x1] +; CHECK-NEXT: ldp q1, q5, [x2] +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: fmad z0.h, p0/m, z2.h, z1.h ; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fmla z1.h, p0/m, z2.h, z3.h +; CHECK-NEXT: fmla z1.h, p0/m, z3.h, z4.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a @@ -61,8 +61,8 @@ define <2 x float> @fma_v2f32(<2 x float> %op1, <2 x float> %op2, <2 x float> %op3) { ; CHECK-LABEL: fma_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s @@ -76,8 +76,8 @@ define <4 x float> @fma_v4f32(<4 x float> %op1, <4 x float> %op2, <4 x float> %op3) { ; CHECK-LABEL: fma_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z2.s @@ -91,13 +91,13 @@ define void @fma_v8f32(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fma_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldp q4, q5, [x2] -; CHECK-NEXT: fmad z0.s, p0/m, z1.s, z4.s +; CHECK-NEXT: ldp q0, q4, [x1] +; CHECK-NEXT: ldp q1, q5, [x2] +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: fmad z0.s, p0/m, z2.s, z1.s ; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fmla z1.s, p0/m, z2.s, z3.s +; CHECK-NEXT: fmla z1.s, p0/m, z3.s, z4.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a @@ -125,8 +125,8 @@ define <2 x double> @fma_v2f64(<2 x double> %op1, <2 x double> %op2, <2 x double> %op3) { ; CHECK-LABEL: fma_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q2 killed $q2 def $z2 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z2.d @@ -140,13 +140,13 @@ define void @fma_v4f64(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: fma_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: ldp q4, q5, [x2] -; CHECK-NEXT: fmad z0.d, p0/m, z1.d, z4.d +; CHECK-NEXT: ldp q0, q4, [x1] +; CHECK-NEXT: ldp q1, q5, [x2] +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: fmad z0.d, p0/m, z2.d, z1.d ; CHECK-NEXT: movprfx z1, z5 -; CHECK-NEXT: fmla z1.d, p0/m, z2.d, z3.d +; CHECK-NEXT: fmla z1.d, p0/m, z3.d, z4.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x double>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-minmax.ll @@ -10,8 +10,8 @@ define <4 x half> @fmaxnm_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-LABEL: fmaxnm_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -23,8 +23,8 @@ define <8 x half> @fmaxnm_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-LABEL: fmaxnm_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -36,10 +36,11 @@ define void @fmaxnm_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: fmaxnm_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fmaxnm z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -53,8 +54,8 @@ define <2 x float> @fmaxnm_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-LABEL: fmaxnm_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -66,8 +67,8 @@ define <4 x float> @fmaxnm_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-LABEL: fmaxnm_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -79,10 +80,11 @@ define void @fmaxnm_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: fmaxnm_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fmaxnm z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -107,8 +109,8 @@ define <2 x double> @fmaxnm_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-LABEL: fmaxnm_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -120,10 +122,11 @@ define void @fmaxnm_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: fmaxnm_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fmaxnm z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -141,8 +144,8 @@ define <4 x half> @fminnm_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-LABEL: fminnm_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -154,8 +157,8 @@ define <8 x half> @fminnm_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-LABEL: fminnm_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -167,10 +170,11 @@ define void @fminnm_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: fminnm_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fminnm z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -184,8 +188,8 @@ define <2 x float> @fminnm_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-LABEL: fminnm_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -197,8 +201,8 @@ define <4 x float> @fminnm_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-LABEL: fminnm_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -210,10 +214,11 @@ define void @fminnm_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: fminnm_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fminnm z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -238,8 +243,8 @@ define <2 x double> @fminnm_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-LABEL: fminnm_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -251,10 +256,11 @@ define void @fminnm_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: fminnm_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fminnm z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -272,8 +278,8 @@ define <4 x half> @fmax_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-LABEL: fmax_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -285,8 +291,8 @@ define <8 x half> @fmax_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-LABEL: fmax_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -298,10 +304,11 @@ define void @fmax_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: fmax_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fmax z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -315,8 +322,8 @@ define <2 x float> @fmax_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-LABEL: fmax_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -328,8 +335,8 @@ define <4 x float> @fmax_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-LABEL: fmax_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -341,10 +348,11 @@ define void @fmax_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: fmax_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fmax z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -369,8 +377,8 @@ define <2 x double> @fmax_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-LABEL: fmax_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -382,10 +390,11 @@ define void @fmax_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: fmax_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fmax z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -403,8 +412,8 @@ define <4 x half> @fmin_v4f16(<4 x half> %op1, <4 x half> %op2) { ; CHECK-LABEL: fmin_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -416,8 +425,8 @@ define <8 x half> @fmin_v8f16(<8 x half> %op1, <8 x half> %op2) { ; CHECK-LABEL: fmin_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -429,10 +438,11 @@ define void @fmin_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: fmin_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fmin z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -446,8 +456,8 @@ define <2 x float> @fmin_v2f32(<2 x float> %op1, <2 x float> %op2) { ; CHECK-LABEL: fmin_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -459,8 +469,8 @@ define <4 x float> @fmin_v4f32(<4 x float> %op1, <4 x float> %op2) { ; CHECK-LABEL: fmin_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -472,10 +482,11 @@ define void @fmin_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: fmin_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fmin z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -500,8 +511,8 @@ define <2 x double> @fmin_v2f64(<2 x double> %op1, <2 x double> %op2) { ; CHECK-LABEL: fmin_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -513,10 +524,11 @@ define void @fmin_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: fmin_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fmin z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-reduce.ll @@ -10,8 +10,8 @@ define half @fadda_v4f16(half %start, <4 x half> %a) { ; CHECK-LABEL: fadda_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -23,8 +23,8 @@ define half @fadda_v8f16(half %start, <8 x half> %a) { ; CHECK-LABEL: fadda_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fadda h0, p0, h0, z1.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -36,11 +36,12 @@ define half @fadda_v16f16(half %start, ptr %a) { ; CHECK-LABEL: fadda_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 +; CHECK-NEXT: fadda h0, p0, h0, z1.h +; CHECK-NEXT: ldr q1, [x0, #16] ; CHECK-NEXT: fadda h0, p0, h0, z1.h -; CHECK-NEXT: fadda h0, p0, h0, z2.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret %op = load <16 x half>, ptr %a @@ -51,8 +52,8 @@ define float @fadda_v2f32(float %start, <2 x float> %a) { ; CHECK-LABEL: fadda_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: fadda s0, p0, s0, z1.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 @@ -64,8 +65,8 @@ define float @fadda_v4f32(float %start, <4 x float> %a) { ; CHECK-LABEL: fadda_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fadda s0, p0, s0, z1.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 @@ -77,11 +78,12 @@ define float @fadda_v8f32(float %start, ptr %a) { ; CHECK-LABEL: fadda_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 +; CHECK-NEXT: fadda s0, p0, s0, z1.s +; CHECK-NEXT: ldr q1, [x0, #16] ; CHECK-NEXT: fadda s0, p0, s0, z1.s -; CHECK-NEXT: fadda s0, p0, s0, z2.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret %op = load <8 x float>, ptr %a @@ -102,8 +104,8 @@ define double @fadda_v2f64(double %start, <2 x double> %a) { ; CHECK-LABEL: fadda_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: fadda d0, p0, d0, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -115,11 +117,12 @@ define double @fadda_v4f64(double %start, ptr %a) { ; CHECK-LABEL: fadda_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: fadda d0, p0, d0, z1.d +; CHECK-NEXT: ldr q1, [x0, #16] ; CHECK-NEXT: fadda d0, p0, d0, z1.d -; CHECK-NEXT: fadda d0, p0, d0, z2.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %op = load <4 x double>, ptr %a @@ -134,8 +137,8 @@ define half @faddv_v4f16(half %start, <4 x half> %a) { ; CHECK-LABEL: faddv_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: faddv h1, p0, z1.h ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret @@ -146,8 +149,8 @@ define half @faddv_v8f16(half %start, <8 x half> %a) { ; CHECK-LABEL: faddv_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: faddv h1, p0, z1.h ; CHECK-NEXT: fadd h0, h0, h1 ; CHECK-NEXT: ret @@ -158,8 +161,8 @@ define half @faddv_v16f16(half %start, ptr %a) { ; CHECK-LABEL: faddv_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q2, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q2, q1, [x0] ; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z2.h ; CHECK-NEXT: faddv h1, p0, z1.h ; CHECK-NEXT: fadd h0, h0, h1 @@ -172,8 +175,8 @@ define float @faddv_v2f32(float %start, <2 x float> %a) { ; CHECK-LABEL: faddv_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: faddv s1, p0, z1.s ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret @@ -184,8 +187,8 @@ define float @faddv_v4f32(float %start, <4 x float> %a) { ; CHECK-LABEL: faddv_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: faddv s1, p0, z1.s ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret @@ -196,8 +199,8 @@ define float @faddv_v8f32(float %start, ptr %a) { ; CHECK-LABEL: faddv_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q2, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q2, q1, [x0] ; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z2.s ; CHECK-NEXT: faddv s1, p0, z1.s ; CHECK-NEXT: fadd s0, s0, s1 @@ -220,8 +223,8 @@ define double @faddv_v2f64(double %start, <2 x double> %a) { ; CHECK-LABEL: faddv_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: fadd d0, d0, d1 ; CHECK-NEXT: ret @@ -232,8 +235,8 @@ define double @faddv_v4f64(double %start, ptr %a) { ; CHECK-LABEL: faddv_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q2, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q2, q1, [x0] ; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z2.d ; CHECK-NEXT: faddv d1, p0, z1.d ; CHECK-NEXT: fadd d0, d0, d1 @@ -250,8 +253,8 @@ define half @fmaxv_v4f16(<4 x half> %a) { ; CHECK-LABEL: fmaxv_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fmaxnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret @@ -262,8 +265,8 @@ define half @fmaxv_v8f16(<8 x half> %a) { ; CHECK-LABEL: fmaxv_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fmaxnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret @@ -274,8 +277,8 @@ define half @fmaxv_v16f16(ptr %a) { ; CHECK-LABEL: fmaxv_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fmaxnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: fmaxnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -288,8 +291,8 @@ define float @fmaxv_v2f32(<2 x float> %a) { ; CHECK-LABEL: fmaxv_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fmaxnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret @@ -300,8 +303,8 @@ define float @fmaxv_v4f32(<4 x float> %a) { ; CHECK-LABEL: fmaxv_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fmaxnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret @@ -312,8 +315,8 @@ define float @fmaxv_v8f32(ptr %a) { ; CHECK-LABEL: fmaxv_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fmaxnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: fmaxnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 @@ -336,8 +339,8 @@ define double @fmaxv_v2f64(<2 x double> %a) { ; CHECK-LABEL: fmaxv_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fmaxnmv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -348,8 +351,8 @@ define double @fmaxv_v4f64(ptr %a) { ; CHECK-LABEL: fmaxv_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fmaxnm z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: fmaxnmv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -366,8 +369,8 @@ define half @fminv_v4f16(<4 x half> %a) { ; CHECK-LABEL: fminv_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fminnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret @@ -378,8 +381,8 @@ define half @fminv_v8f16(<8 x half> %a) { ; CHECK-LABEL: fminv_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fminnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret @@ -390,8 +393,8 @@ define half @fminv_v16f16(ptr %a) { ; CHECK-LABEL: fminv_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fminnm z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: fminnmv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -404,8 +407,8 @@ define float @fminv_v2f32(<2 x float> %a) { ; CHECK-LABEL: fminv_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fminnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret @@ -416,8 +419,8 @@ define float @fminv_v4f32(<4 x float> %a) { ; CHECK-LABEL: fminv_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fminnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret @@ -428,8 +431,8 @@ define float @fminv_v8f32(ptr %a) { ; CHECK-LABEL: fminv_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fminnm z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: fminnmv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 @@ -452,8 +455,8 @@ define double @fminv_v2f64(<2 x double> %a) { ; CHECK-LABEL: fminv_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fminnmv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -464,8 +467,8 @@ define double @fminv_v4f64(ptr %a) { ; CHECK-LABEL: fminv_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fminnm z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: fminnmv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -482,8 +485,8 @@ define half @fmaximumv_v4f16(<4 x half> %a) { ; CHECK-LABEL: fmaximumv_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fmaxv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret @@ -494,8 +497,8 @@ define half @fmaximumv_v8f16(<8 x half> %a) { ; CHECK-LABEL: fmaximumv_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fmaxv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret @@ -506,8 +509,8 @@ define half @fmaximumv_v16f16(ptr %a) { ; CHECK-LABEL: fmaximumv_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fmax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: fmaxv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -520,8 +523,8 @@ define float @fmaximumv_v2f32(<2 x float> %a) { ; CHECK-LABEL: fmaximumv_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fmaxv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret @@ -532,8 +535,8 @@ define float @fmaximumv_v4f32(<4 x float> %a) { ; CHECK-LABEL: fmaximumv_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fmaxv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret @@ -544,8 +547,8 @@ define float @fmaximumv_v8f32(ptr %a) { ; CHECK-LABEL: fmaximumv_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fmax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: fmaxv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 @@ -568,8 +571,8 @@ define double @fmaximumv_v2f64(<2 x double> %a) { ; CHECK-LABEL: fmaximumv_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fmaxv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -580,8 +583,8 @@ define double @fmaximumv_v4f64(ptr %a) { ; CHECK-LABEL: fmaximumv_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fmax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: fmaxv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -598,8 +601,8 @@ define half @fminimumv_v4f16(<4 x half> %a) { ; CHECK-LABEL: fminimumv_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fminv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret @@ -610,8 +613,8 @@ define half @fminimumv_v8f16(<8 x half> %a) { ; CHECK-LABEL: fminimumv_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fminv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 ; CHECK-NEXT: ret @@ -622,8 +625,8 @@ define half @fminimumv_v16f16(ptr %a) { ; CHECK-LABEL: fminimumv_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fmin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: fminv h0, p0, z0.h ; CHECK-NEXT: // kill: def $h0 killed $h0 killed $z0 @@ -636,8 +639,8 @@ define float @fminimumv_v2f32(<2 x float> %a) { ; CHECK-LABEL: fminimumv_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fminv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret @@ -648,8 +651,8 @@ define float @fminimumv_v4f32(<4 x float> %a) { ; CHECK-LABEL: fminimumv_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fminv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 ; CHECK-NEXT: ret @@ -660,8 +663,8 @@ define float @fminimumv_v8f32(ptr %a) { ; CHECK-LABEL: fminimumv_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fmin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: fminv s0, p0, z0.s ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $z0 @@ -684,8 +687,8 @@ define double @fminimumv_v2f64(<2 x double> %a) { ; CHECK-LABEL: fminimumv_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fminv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -696,8 +699,8 @@ define double @fminimumv_v4f64(ptr %a) { ; CHECK-LABEL: fminimumv_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fmin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: fminv d0, p0, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-rounding.ll @@ -10,8 +10,8 @@ define <2 x half> @frintp_v2f16(<2 x half> %op) { ; CHECK-LABEL: frintp_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintp z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -22,8 +22,8 @@ define <4 x half> @frintp_v4f16(<4 x half> %op) { ; CHECK-LABEL: frintp_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintp z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -34,8 +34,8 @@ define <8 x half> @frintp_v8f16(<8 x half> %op) { ; CHECK-LABEL: frintp_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintp z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -46,8 +46,8 @@ define void @frintp_v16f16(ptr %a) { ; CHECK-LABEL: frintp_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintp z0.h, p0/m, z0.h ; CHECK-NEXT: frintp z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -61,8 +61,8 @@ define <2 x float> @frintp_v2f32(<2 x float> %op) { ; CHECK-LABEL: frintp_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintp z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -73,8 +73,8 @@ define <4 x float> @frintp_v4f32(<4 x float> %op) { ; CHECK-LABEL: frintp_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintp z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -85,8 +85,8 @@ define void @frintp_v8f32(ptr %a) { ; CHECK-LABEL: frintp_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintp z0.s, p0/m, z0.s ; CHECK-NEXT: frintp z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -110,8 +110,8 @@ define <2 x double> @frintp_v2f64(<2 x double> %op) { ; CHECK-LABEL: frintp_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintp z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -122,8 +122,8 @@ define void @frintp_v4f64(ptr %a) { ; CHECK-LABEL: frintp_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintp z0.d, p0/m, z0.d ; CHECK-NEXT: frintp z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -141,8 +141,8 @@ define <2 x half> @frintm_v2f16(<2 x half> %op) { ; CHECK-LABEL: frintm_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintm z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -153,8 +153,8 @@ define <4 x half> @frintm_v4f16(<4 x half> %op) { ; CHECK-LABEL: frintm_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintm z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -165,8 +165,8 @@ define <8 x half> @frintm_v8f16(<8 x half> %op) { ; CHECK-LABEL: frintm_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintm z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -177,8 +177,8 @@ define void @frintm_v16f16(ptr %a) { ; CHECK-LABEL: frintm_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintm z0.h, p0/m, z0.h ; CHECK-NEXT: frintm z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -192,8 +192,8 @@ define <2 x float> @frintm_v2f32(<2 x float> %op) { ; CHECK-LABEL: frintm_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintm z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -204,8 +204,8 @@ define <4 x float> @frintm_v4f32(<4 x float> %op) { ; CHECK-LABEL: frintm_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintm z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -216,8 +216,8 @@ define void @frintm_v8f32(ptr %a) { ; CHECK-LABEL: frintm_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintm z0.s, p0/m, z0.s ; CHECK-NEXT: frintm z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -241,8 +241,8 @@ define <2 x double> @frintm_v2f64(<2 x double> %op) { ; CHECK-LABEL: frintm_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintm z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -253,8 +253,8 @@ define void @frintm_v4f64(ptr %a) { ; CHECK-LABEL: frintm_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintm z0.d, p0/m, z0.d ; CHECK-NEXT: frintm z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -272,8 +272,8 @@ define <2 x half> @frinti_v2f16(<2 x half> %op) { ; CHECK-LABEL: frinti_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frinti z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -284,8 +284,8 @@ define <4 x half> @frinti_v4f16(<4 x half> %op) { ; CHECK-LABEL: frinti_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frinti z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -296,8 +296,8 @@ define <8 x half> @frinti_v8f16(<8 x half> %op) { ; CHECK-LABEL: frinti_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frinti z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -308,8 +308,8 @@ define void @frinti_v16f16(ptr %a) { ; CHECK-LABEL: frinti_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frinti z0.h, p0/m, z0.h ; CHECK-NEXT: frinti z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -323,8 +323,8 @@ define <2 x float> @frinti_v2f32(<2 x float> %op) { ; CHECK-LABEL: frinti_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frinti z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -335,8 +335,8 @@ define <4 x float> @frinti_v4f32(<4 x float> %op) { ; CHECK-LABEL: frinti_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frinti z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -347,8 +347,8 @@ define void @frinti_v8f32(ptr %a) { ; CHECK-LABEL: frinti_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frinti z0.s, p0/m, z0.s ; CHECK-NEXT: frinti z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -372,8 +372,8 @@ define <2 x double> @frinti_v2f64(<2 x double> %op) { ; CHECK-LABEL: frinti_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frinti z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -384,8 +384,8 @@ define void @frinti_v4f64(ptr %a) { ; CHECK-LABEL: frinti_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frinti z0.d, p0/m, z0.d ; CHECK-NEXT: frinti z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -403,8 +403,8 @@ define <2 x half> @frintx_v2f16(<2 x half> %op) { ; CHECK-LABEL: frintx_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -415,8 +415,8 @@ define <4 x half> @frintx_v4f16(<4 x half> %op) { ; CHECK-LABEL: frintx_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -427,8 +427,8 @@ define <8 x half> @frintx_v8f16(<8 x half> %op) { ; CHECK-LABEL: frintx_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -439,8 +439,8 @@ define void @frintx_v16f16(ptr %a) { ; CHECK-LABEL: frintx_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintx z0.h, p0/m, z0.h ; CHECK-NEXT: frintx z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -454,8 +454,8 @@ define <2 x float> @frintx_v2f32(<2 x float> %op) { ; CHECK-LABEL: frintx_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -466,8 +466,8 @@ define <4 x float> @frintx_v4f32(<4 x float> %op) { ; CHECK-LABEL: frintx_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -478,8 +478,8 @@ define void @frintx_v8f32(ptr %a) { ; CHECK-LABEL: frintx_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintx z0.s, p0/m, z0.s ; CHECK-NEXT: frintx z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -503,8 +503,8 @@ define <2 x double> @frintx_v2f64(<2 x double> %op) { ; CHECK-LABEL: frintx_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -515,8 +515,8 @@ define void @frintx_v4f64(ptr %a) { ; CHECK-LABEL: frintx_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintx z0.d, p0/m, z0.d ; CHECK-NEXT: frintx z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -534,8 +534,8 @@ define <2 x half> @frinta_v2f16(<2 x half> %op) { ; CHECK-LABEL: frinta_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frinta z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -546,8 +546,8 @@ define <4 x half> @frinta_v4f16(<4 x half> %op) { ; CHECK-LABEL: frinta_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frinta z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -558,8 +558,8 @@ define <8 x half> @frinta_v8f16(<8 x half> %op) { ; CHECK-LABEL: frinta_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frinta z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -570,8 +570,8 @@ define void @frinta_v16f16(ptr %a) { ; CHECK-LABEL: frinta_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frinta z0.h, p0/m, z0.h ; CHECK-NEXT: frinta z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -585,8 +585,8 @@ define <2 x float> @frinta_v2f32(<2 x float> %op) { ; CHECK-LABEL: frinta_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frinta z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -597,8 +597,8 @@ define <4 x float> @frinta_v4f32(<4 x float> %op) { ; CHECK-LABEL: frinta_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frinta z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -609,8 +609,8 @@ define void @frinta_v8f32(ptr %a) { ; CHECK-LABEL: frinta_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frinta z0.s, p0/m, z0.s ; CHECK-NEXT: frinta z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -634,8 +634,8 @@ define <2 x double> @frinta_v2f64(<2 x double> %op) { ; CHECK-LABEL: frinta_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frinta z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -646,8 +646,8 @@ define void @frinta_v4f64(ptr %a) { ; CHECK-LABEL: frinta_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frinta z0.d, p0/m, z0.d ; CHECK-NEXT: frinta z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -665,8 +665,8 @@ define <2 x half> @frintn_v2f16(<2 x half> %op) { ; CHECK-LABEL: frintn_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintn z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -677,8 +677,8 @@ define <4 x half> @frintn_v4f16(<4 x half> %op) { ; CHECK-LABEL: frintn_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintn z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -689,8 +689,8 @@ define <8 x half> @frintn_v8f16(<8 x half> %op) { ; CHECK-LABEL: frintn_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintn z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -701,8 +701,8 @@ define void @frintn_v16f16(ptr %a) { ; CHECK-LABEL: frintn_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintn z0.h, p0/m, z0.h ; CHECK-NEXT: frintn z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -716,8 +716,8 @@ define <2 x float> @frintn_v2f32(<2 x float> %op) { ; CHECK-LABEL: frintn_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintn z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -728,8 +728,8 @@ define <4 x float> @frintn_v4f32(<4 x float> %op) { ; CHECK-LABEL: frintn_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintn z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -740,8 +740,8 @@ define void @frintn_v8f32(ptr %a) { ; CHECK-LABEL: frintn_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintn z0.s, p0/m, z0.s ; CHECK-NEXT: frintn z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -765,8 +765,8 @@ define <2 x double> @frintn_v2f64(<2 x double> %op) { ; CHECK-LABEL: frintn_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintn z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -777,8 +777,8 @@ define void @frintn_v4f64(ptr %a) { ; CHECK-LABEL: frintn_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintn z0.d, p0/m, z0.d ; CHECK-NEXT: frintn z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -796,8 +796,8 @@ define <2 x half> @frintz_v2f16(<2 x half> %op) { ; CHECK-LABEL: frintz_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -808,8 +808,8 @@ define <4 x half> @frintz_v4f16(<4 x half> %op) { ; CHECK-LABEL: frintz_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -820,8 +820,8 @@ define <8 x half> @frintz_v8f16(<8 x half> %op) { ; CHECK-LABEL: frintz_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintz z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -832,8 +832,8 @@ define void @frintz_v16f16(ptr %a) { ; CHECK-LABEL: frintz_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintz z0.h, p0/m, z0.h ; CHECK-NEXT: frintz z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -847,8 +847,8 @@ define <2 x float> @frintz_v2f32(<2 x float> %op) { ; CHECK-LABEL: frintz_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: frintz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -859,8 +859,8 @@ define <4 x float> @frintz_v4f32(<4 x float> %op) { ; CHECK-LABEL: frintz_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintz z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -871,8 +871,8 @@ define void @frintz_v8f32(ptr %a) { ; CHECK-LABEL: frintz_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintz z0.s, p0/m, z0.s ; CHECK-NEXT: frintz z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -896,8 +896,8 @@ define <2 x double> @frintz_v2f64(<2 x double> %op) { ; CHECK-LABEL: frintz_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: frintz z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -908,8 +908,8 @@ define void @frintz_v4f64(ptr %a) { ; CHECK-LABEL: frintz_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: frintz z0.d, p0/m, z0.d ; CHECK-NEXT: frintz z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-select.ll @@ -6,8 +6,8 @@ define <2 x half> @select_v2f16(<2 x half> %op1, <2 x half> %op2, i1 %mask) { ; CHECK-LABEL: select_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: mov z2.h, w8 @@ -22,8 +22,8 @@ define <4 x half> @select_v4f16(<4 x half> %op1, <4 x half> %op2, i1 %mask) { ; CHECK-LABEL: select_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: mov z2.h, w8 @@ -38,8 +38,8 @@ define <8 x half> @select_v8f16(<8 x half> %op1, <8 x half> %op2, i1 %mask) { ; CHECK-LABEL: select_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: mov z2.h, w8 @@ -54,14 +54,14 @@ define void @select_v16f16(ptr %a, ptr %b, i1 %mask) { ; CHECK-LABEL: select_v16f16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and w8, w2, #0x1 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x0, #16] -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: mov z4.h, w8 -; CHECK-NEXT: cmpne p0.h, p0/z, z4.h, #0 ; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h ; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -76,8 +76,8 @@ define <2 x float> @select_v2f32(<2 x float> %op1, <2 x float> %op2, i1 %mask) { ; CHECK-LABEL: select_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: mov z2.s, w8 @@ -92,8 +92,8 @@ define <4 x float> @select_v4f32(<4 x float> %op1, <4 x float> %op2, i1 %mask) { ; CHECK-LABEL: select_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: mov z2.s, w8 @@ -108,14 +108,14 @@ define void @select_v8f32(ptr %a, ptr %b, i1 %mask) { ; CHECK-LABEL: select_v8f32: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and w8, w2, #0x1 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x0, #16] -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: mov z4.s, w8 -; CHECK-NEXT: cmpne p0.s, p0/z, z4.s, #0 ; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s ; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -149,9 +149,9 @@ define <2 x double> @select_v2f64(<2 x double> %op1, <2 x double> %op2, i1 %mask) { ; CHECK-LABEL: select_v2f64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: and x8, x0, #0x1 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: mov z2.d, x8 @@ -166,15 +166,15 @@ define void @select_v4f64(ptr %a, ptr %b, i1 %mask) { ; CHECK-LABEL: select_v4f64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: and x8, x2, #0x1 +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x0, #16] -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: mov z4.d, x8 -; CHECK-NEXT: cmpne p0.d, p0/z, z4.d, #0 ; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d ; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-to-int.ll @@ -10,8 +10,8 @@ define <4 x i16> @fcvtzu_v4f16_v4i16(<4 x half> %op1) { ; CHECK-LABEL: fcvtzu_v4f16_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -22,8 +22,8 @@ define void @fcvtzu_v8f16_v8i16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v8f16_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret @@ -36,8 +36,8 @@ define void @fcvtzu_v16f16_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v16f16_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzu z0.h, p0/m, z0.h ; CHECK-NEXT: fcvtzu z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x1] @@ -55,8 +55,8 @@ define <2 x i32> @fcvtzu_v2f16_v2i32(<2 x half> %op1) { ; CHECK-LABEL: fcvtzu_v2f16_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -68,8 +68,8 @@ define <4 x i32> @fcvtzu_v4f16_v4i32(<4 x half> %op1) { ; CHECK-LABEL: fcvtzu_v4f16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -99,21 +99,20 @@ define void @fcvtzu_v16f16_v16i32(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v16f16_v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: fcvtzu z2.s, p0/m, z2.h ; CHECK-NEXT: fcvtzu z3.s, p0/m, z3.h -; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.h ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.h -; CHECK-NEXT: stp q3, q1, [x1, #32] -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvtzu z1.s, p0/m, z2.h -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.h +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptoui <16 x half> %op1 to <16 x i32> @@ -139,9 +138,9 @@ ; CHECK-LABEL: fcvtzu_v2f16_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov z1.h, z0.h[1] ; CHECK-NEXT: fcvtzu x8, h0 -; CHECK-NEXT: mov z0.h, z0.h[1] -; CHECK-NEXT: fcvtzu x9, h0 +; CHECK-NEXT: fcvtzu x9, h1 ; CHECK-NEXT: stp x8, x9, [sp, #-16]! ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [sp], #16 @@ -156,10 +155,10 @@ ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: mov z1.h, z0.h[1] ; CHECK-NEXT: fcvtzu x8, h0 -; CHECK-NEXT: fcvtzu x9, h1 -; CHECK-NEXT: mov z1.h, z0.h[3] +; CHECK-NEXT: mov z2.h, z0.h[3] ; CHECK-NEXT: mov z0.h, z0.h[2] -; CHECK-NEXT: fcvtzu x10, h1 +; CHECK-NEXT: fcvtzu x9, h1 +; CHECK-NEXT: fcvtzu x10, h2 ; CHECK-NEXT: fcvtzu x11, h0 ; CHECK-NEXT: stp x8, x9, [sp, #-32]! ; CHECK-NEXT: .cfi_def_cfa_offset 32 @@ -181,27 +180,27 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: mov z1.h, z0.h[1] +; CHECK-NEXT: mov z2.h, z0.h[3] +; CHECK-NEXT: mov z3.h, z0.h[2] ; CHECK-NEXT: fcvtzu x8, h0 -; CHECK-NEXT: fcvtzu x9, h1 -; CHECK-NEXT: mov z1.h, z0.h[3] -; CHECK-NEXT: fcvtzu x10, h1 -; CHECK-NEXT: mov z1.h, z0.h[2] ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: fcvtzu x11, h1 +; CHECK-NEXT: fcvtzu x9, h1 +; CHECK-NEXT: fcvtzu x10, h2 +; CHECK-NEXT: fcvtzu x11, h3 ; CHECK-NEXT: mov z1.h, z0.h[1] +; CHECK-NEXT: mov z2.h, z0.h[3] ; CHECK-NEXT: fcvtzu x12, h0 +; CHECK-NEXT: mov z0.h, z0.h[2] ; CHECK-NEXT: stp x8, x9, [sp, #32] ; CHECK-NEXT: fcvtzu x8, h1 -; CHECK-NEXT: mov z1.h, z0.h[3] -; CHECK-NEXT: mov z0.h, z0.h[2] +; CHECK-NEXT: fcvtzu x9, h2 ; CHECK-NEXT: stp x11, x10, [sp, #48] -; CHECK-NEXT: fcvtzu x9, h1 ; CHECK-NEXT: fcvtzu x10, h0 +; CHECK-NEXT: ldp q2, q3, [sp, #32] ; CHECK-NEXT: stp x12, x8, [sp] -; CHECK-NEXT: ldp q3, q2, [sp, #32] ; CHECK-NEXT: stp x10, x9, [sp, #16] ; CHECK-NEXT: ldp q1, q0, [sp] -; CHECK-NEXT: stp q3, q2, [x1] +; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: stp q1, q0, [x1, #32] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret @@ -216,53 +215,54 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #128 ; CHECK-NEXT: .cfi_def_cfa_offset 128 -; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: mov z2.h, z1.h[1] -; CHECK-NEXT: mov z3.h, z1.h[3] +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: mov z1.h, z0.h[1] +; CHECK-NEXT: mov z2.h, z0.h[3] +; CHECK-NEXT: fcvtzu x8, h0 +; CHECK-NEXT: mov z3.h, z0.h[2] +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: fcvtzu x9, h1 +; CHECK-NEXT: fcvtzu x10, h2 +; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: fcvtzu x11, h3 +; CHECK-NEXT: mov z2.h, z0.h[1] +; CHECK-NEXT: mov z3.h, z0.h[3] +; CHECK-NEXT: fcvtzu x12, h1 +; CHECK-NEXT: stp x8, x9, [sp, #32] +; CHECK-NEXT: fcvtzu x8, h0 +; CHECK-NEXT: mov z0.h, z0.h[2] ; CHECK-NEXT: fcvtzu x9, h2 -; CHECK-NEXT: mov z2.h, z1.h[2] -; CHECK-NEXT: fcvtzu x8, h1 +; CHECK-NEXT: stp x11, x10, [sp, #48] ; CHECK-NEXT: fcvtzu x10, h3 -; CHECK-NEXT: fcvtzu x11, h2 -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: fcvtzu x12, h1 ; CHECK-NEXT: mov z2.h, z1.h[1] ; CHECK-NEXT: mov z3.h, z1.h[3] -; CHECK-NEXT: mov z1.h, z1.h[2] -; CHECK-NEXT: stp x8, x9, [sp, #32] -; CHECK-NEXT: fcvtzu x9, h3 -; CHECK-NEXT: stp x11, x10, [sp, #48] -; CHECK-NEXT: fcvtzu x10, h1 +; CHECK-NEXT: fcvtzu x11, h0 +; CHECK-NEXT: mov z0.h, z1.h[2] +; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: stp x8, x9, [sp] ; CHECK-NEXT: fcvtzu x8, h2 -; CHECK-NEXT: mov z1.h, z0.h[1] -; CHECK-NEXT: stp x10, x9, [sp, #16] -; CHECK-NEXT: fcvtzu x9, h1 -; CHECK-NEXT: mov z1.h, z0.h[3] -; CHECK-NEXT: stp x12, x8, [sp] -; CHECK-NEXT: fcvtzu x8, h0 -; CHECK-NEXT: fcvtzu x10, h1 -; CHECK-NEXT: mov z1.h, z0.h[2] -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: fcvtzu x9, h3 +; CHECK-NEXT: mov z2.h, z1.h[1] +; CHECK-NEXT: stp x11, x10, [sp, #16] +; CHECK-NEXT: fcvtzu x10, h0 +; CHECK-NEXT: mov z0.h, z1.h[3] ; CHECK-NEXT: fcvtzu x11, h1 -; CHECK-NEXT: mov z1.h, z0.h[1] -; CHECK-NEXT: stp x8, x9, [sp, #96] +; CHECK-NEXT: mov z1.h, z1.h[2] +; CHECK-NEXT: stp x12, x8, [sp, #96] +; CHECK-NEXT: fcvtzu x12, h2 ; CHECK-NEXT: fcvtzu x8, h0 +; CHECK-NEXT: ldp q3, q4, [sp] +; CHECK-NEXT: stp x10, x9, [sp, #112] ; CHECK-NEXT: fcvtzu x9, h1 -; CHECK-NEXT: mov z1.h, z0.h[3] -; CHECK-NEXT: mov z0.h, z0.h[2] -; CHECK-NEXT: stp x11, x10, [sp, #112] -; CHECK-NEXT: fcvtzu x10, h1 -; CHECK-NEXT: fcvtzu x11, h0 -; CHECK-NEXT: stp x8, x9, [sp, #64] ; CHECK-NEXT: ldp q0, q1, [sp, #32] -; CHECK-NEXT: stp x11, x10, [sp, #80] -; CHECK-NEXT: ldp q2, q3, [sp] -; CHECK-NEXT: ldp q5, q4, [sp, #64] -; CHECK-NEXT: ldp q7, q6, [sp, #96] +; CHECK-NEXT: stp x11, x12, [sp, #64] +; CHECK-NEXT: ldp q6, q7, [sp, #96] +; CHECK-NEXT: stp x9, x8, [sp, #80] +; CHECK-NEXT: ldp q5, q2, [sp, #64] ; CHECK-NEXT: stp q0, q1, [x1] -; CHECK-NEXT: stp q2, q3, [x1, #32] -; CHECK-NEXT: stp q5, q4, [x1, #96] -; CHECK-NEXT: stp q7, q6, [x1, #64] +; CHECK-NEXT: stp q3, q4, [x1, #32] +; CHECK-NEXT: stp q6, q7, [x1, #64] +; CHECK-NEXT: stp q5, q2, [x1, #96] ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a @@ -278,8 +278,8 @@ define <2 x i16> @fcvtzu_v2f32_v2i16(<2 x float> %op1) { ; CHECK-LABEL: fcvtzu_v2f32_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -290,8 +290,8 @@ define <4 x i16> @fcvtzu_v4f32_v4i16(<4 x float> %op1) { ; CHECK-LABEL: fcvtzu_v4f32_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -303,14 +303,14 @@ define <8 x i16> @fcvtzu_v8f32_v8i16(ptr %a) { ; CHECK-LABEL: fcvtzu_v8f32_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.s ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z0.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z2.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a @@ -321,21 +321,21 @@ define void @fcvtzu_v16f32_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v16f32_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: ldp q3, q2, [x0, #32] +; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.s -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p1, z0.h, z1.h +; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s ; CHECK-NEXT: fcvtzu z3.s, p0/m, z3.s -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h ; CHECK-NEXT: fcvtzu z2.s, p0/m, z2.s +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h -; CHECK-NEXT: stp q0, q3, [x1] +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret %op1 = load <16 x float>, ptr %a %res = fptoui <16 x float> %op1 to <16 x i16> @@ -350,8 +350,8 @@ define <2 x i32> @fcvtzu_v2f32_v2i32(<2 x float> %op1) { ; CHECK-LABEL: fcvtzu_v2f32_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -362,8 +362,8 @@ define <4 x i32> @fcvtzu_v4f32_v4i32(<4 x float> %op1) { ; CHECK-LABEL: fcvtzu_v4f32_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -374,8 +374,8 @@ define void @fcvtzu_v8f32_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v8f32_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzu z0.s, p0/m, z0.s ; CHECK-NEXT: fcvtzu z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x1] @@ -393,8 +393,8 @@ define <1 x i64> @fcvtzu_v1f32_v1i64(<1 x float> %op1) { ; CHECK-LABEL: fcvtzu_v1f32_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -406,8 +406,8 @@ define <2 x i64> @fcvtzu_v2f32_v2i64(<2 x float> %op1) { ; CHECK-LABEL: fcvtzu_v2f32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -437,21 +437,20 @@ define void @fcvtzu_v8f32_v8i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v8f32_v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z2.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uunpklo z3.d, z1.s ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: fcvtzu z2.d, p0/m, z2.s ; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.s -; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.s ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.s -; CHECK-NEXT: stp q3, q1, [x1, #32] -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvtzu z1.d, p0/m, z2.s -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.s +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptoui <8 x float> %op1 to <8 x i64> @@ -478,8 +477,8 @@ define <2 x i16> @fcvtzu_v2f64_v2i16(<2 x double> %op1) { ; CHECK-LABEL: fcvtzu_v2f64_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -493,21 +492,21 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: mov z1.s, z1.s[1] +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: fmov w10, s0 -; CHECK-NEXT: strh w9, [sp, #8] +; CHECK-NEXT: mov z2.s, z0.s[1] +; CHECK-NEXT: mov z0.s, z1.s[1] ; CHECK-NEXT: strh w8, [sp, #12] ; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w10, [sp, #14] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 @@ -522,37 +521,37 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z4.s, z1.s[1] +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d ; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d -; CHECK-NEXT: strh w9, [sp, #8] +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov z1.s, z1.s[1] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov z2.s, z2.s[1] +; CHECK-NEXT: strh w8, [sp, #4] ; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: mov z1.s, z0.s[1] -; CHECK-NEXT: mov z0.s, z2.s[1] -; CHECK-NEXT: mov z2.s, z3.s[1] +; CHECK-NEXT: mov z3.s, z3.s[1] ; CHECK-NEXT: strh w8, [sp] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w10, [sp, #4] -; CHECK-NEXT: fmov w10, s1 -; CHECK-NEXT: strh w9, [sp, #14] -; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: strh w10, [sp, #10] -; CHECK-NEXT: strh w9, [sp, #2] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strh w8, [sp, #2] ; CHECK-NEXT: ldr q0, [sp], #16 ; CHECK-NEXT: ret %op1 = load <8 x double>, ptr %a @@ -565,67 +564,68 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: ldp q2, q3, [x0, #32] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: ldr q6, [x0, #112] +; CHECK-NEXT: ldp q4, q5, [x0, #80] +; CHECK-NEXT: ldr q7, [x0, #64] +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z6.s, z3.s[1] -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d -; CHECK-NEXT: mov z3.s, z2.s[1] -; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: ldp q0, q1, [x0, #64] +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d ; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d -; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s -; CHECK-NEXT: fmov w10, s5 -; CHECK-NEXT: mov z5.s, z5.s[1] -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q2, q7, [x0, #96] +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s +; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s +; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov z16.s, z1.s[1] +; CHECK-NEXT: mov z1.s, z0.s[1] ; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: fmov w9, s6 -; CHECK-NEXT: mov z4.s, z4.s[1] -; CHECK-NEXT: strh w10, [sp, #4] -; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov z0.s, z2.s[1] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov z2.s, z3.s[1] +; CHECK-NEXT: strh w8, [sp, #4] ; CHECK-NEXT: fmov w8, s3 ; CHECK-NEXT: movprfx z3, z7 ; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.d -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: fmov w8, s16 +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: uzp1 z1.s, z4.s, z4.s ; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w9, [sp, #14] -; CHECK-NEXT: fmov w9, s5 -; CHECK-NEXT: fmov w10, s4 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: strh w8, [sp, #28] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s +; CHECK-NEXT: mov z3.s, z5.s[1] +; CHECK-NEXT: strh w8, [sp, #6] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z3.s, z3.s[1] -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: strh w9, [sp, #6] -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: strh w10, [sp, #2] -; CHECK-NEXT: fmov w10, s0 +; CHECK-NEXT: mov z2.s, z6.s[1] +; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: strh w8, [sp, #28] +; CHECK-NEXT: fmov w8, s5 ; CHECK-NEXT: strh w8, [sp, #24] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z4.s, z2.s[1] -; CHECK-NEXT: mov z2.s, z1.s[1] -; CHECK-NEXT: mov z1.s, z0.s[1] -; CHECK-NEXT: strh w9, [sp, #20] -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: strh w10, [sp, #16] -; CHECK-NEXT: fmov w10, s2 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov z1.s, z1.s[1] +; CHECK-NEXT: strh w8, [sp, #20] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov z0.s, z0.s[1] +; CHECK-NEXT: strh w8, [sp, #16] +; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: strh w8, [sp, #30] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strh w8, [sp, #26] ; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w9, [sp, #26] -; CHECK-NEXT: strh w10, [sp, #22] +; CHECK-NEXT: strh w8, [sp, #22] +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: strh w8, [sp, #18] ; CHECK-NEXT: ldp q1, q0, [sp] ; CHECK-NEXT: stp q1, q0, [x1] @@ -644,8 +644,8 @@ define <1 x i32> @fcvtzu_v1f64_v1i32(<1 x double> %op1) { ; CHECK-LABEL: fcvtzu_v1f64_v1i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -657,8 +657,8 @@ define <2 x i32> @fcvtzu_v2f64_v2i32(<2 x double> %op1) { ; CHECK-LABEL: fcvtzu_v2f64_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -670,14 +670,14 @@ define <4 x i32> @fcvtzu_v4f64_v4i32(ptr %a) { ; CHECK-LABEL: fcvtzu_v4f64_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: uzp1 z2.s, z0.s, z0.s -; CHECK-NEXT: uzp1 z0.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %op1 = load <4 x double>, ptr %a @@ -688,21 +688,21 @@ define void @fcvtzu_v8f64_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v8f64_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ptrue p1.s, vl2 -; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q3, q2, [x0, #32] +; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p1, z0.s, z1.s +; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzu z3.d, p0/m, z3.d -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s ; CHECK-NEXT: fcvtzu z2.d, p0/m, z2.d +; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s ; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: splice z3.s, p1, z3.s, z2.s -; CHECK-NEXT: stp q0, q3, [x1] +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret %op1 = load <8 x double>, ptr %a %res = fptoui <8 x double> %op1 to <8 x i32> @@ -717,8 +717,8 @@ define <1 x i64> @fcvtzu_v1f64_v1i64(<1 x double> %op1) { ; CHECK-LABEL: fcvtzu_v1f64_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -729,8 +729,8 @@ define <2 x i64> @fcvtzu_v2f64_v2i64(<2 x double> %op1) { ; CHECK-LABEL: fcvtzu_v2f64_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -741,8 +741,8 @@ define void @fcvtzu_v4f64_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzu_v4f64_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzu z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzu z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x1] @@ -760,8 +760,8 @@ define <4 x i16> @fcvtzs_v4f16_v4i16(<4 x half> %op1) { ; CHECK-LABEL: fcvtzs_v4f16_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -772,8 +772,8 @@ define void @fcvtzs_v8f16_v8i16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v8f16_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret @@ -786,8 +786,8 @@ define void @fcvtzs_v16f16_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v16f16_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzs z0.h, p0/m, z0.h ; CHECK-NEXT: fcvtzs z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x1] @@ -805,8 +805,8 @@ define <2 x i32> @fcvtzs_v2f16_v2i32(<2 x half> %op1) { ; CHECK-LABEL: fcvtzs_v2f16_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -818,8 +818,8 @@ define <4 x i32> @fcvtzs_v4f16_v4i32(<4 x half> %op1) { ; CHECK-LABEL: fcvtzs_v4f16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -849,21 +849,20 @@ define void @fcvtzs_v16f16_v16i32(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v16f16_v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: fcvtzs z2.s, p0/m, z2.h ; CHECK-NEXT: fcvtzs z3.s, p0/m, z3.h -; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.h ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.h -; CHECK-NEXT: stp q3, q1, [x1, #32] -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvtzs z1.s, p0/m, z2.h -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.h +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a %res = fptosi <16 x half> %op1 to <16 x i32> @@ -890,9 +889,9 @@ ; CHECK-LABEL: fcvtzs_v2f16_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov z1.h, z0.h[1] ; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: mov z0.h, z0.h[1] -; CHECK-NEXT: fcvtzs x9, h0 +; CHECK-NEXT: fcvtzs x9, h1 ; CHECK-NEXT: stp x8, x9, [sp, #-16]! ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: ldr q0, [sp], #16 @@ -907,10 +906,10 @@ ; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: mov z1.h, z0.h[1] ; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: mov z1.h, z0.h[3] +; CHECK-NEXT: mov z2.h, z0.h[3] ; CHECK-NEXT: mov z0.h, z0.h[2] -; CHECK-NEXT: fcvtzs x10, h1 +; CHECK-NEXT: fcvtzs x9, h1 +; CHECK-NEXT: fcvtzs x10, h2 ; CHECK-NEXT: fcvtzs x11, h0 ; CHECK-NEXT: stp x8, x9, [sp, #-32]! ; CHECK-NEXT: .cfi_def_cfa_offset 32 @@ -932,27 +931,27 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: mov z1.h, z0.h[1] +; CHECK-NEXT: mov z2.h, z0.h[3] +; CHECK-NEXT: mov z3.h, z0.h[2] ; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: mov z1.h, z0.h[3] -; CHECK-NEXT: fcvtzs x10, h1 -; CHECK-NEXT: mov z1.h, z0.h[2] ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: fcvtzs x11, h1 +; CHECK-NEXT: fcvtzs x9, h1 +; CHECK-NEXT: fcvtzs x10, h2 +; CHECK-NEXT: fcvtzs x11, h3 ; CHECK-NEXT: mov z1.h, z0.h[1] +; CHECK-NEXT: mov z2.h, z0.h[3] ; CHECK-NEXT: fcvtzs x12, h0 +; CHECK-NEXT: mov z0.h, z0.h[2] ; CHECK-NEXT: stp x8, x9, [sp, #32] ; CHECK-NEXT: fcvtzs x8, h1 -; CHECK-NEXT: mov z1.h, z0.h[3] -; CHECK-NEXT: mov z0.h, z0.h[2] +; CHECK-NEXT: fcvtzs x9, h2 ; CHECK-NEXT: stp x11, x10, [sp, #48] -; CHECK-NEXT: fcvtzs x9, h1 ; CHECK-NEXT: fcvtzs x10, h0 +; CHECK-NEXT: ldp q2, q3, [sp, #32] ; CHECK-NEXT: stp x12, x8, [sp] -; CHECK-NEXT: ldp q3, q2, [sp, #32] ; CHECK-NEXT: stp x10, x9, [sp, #16] ; CHECK-NEXT: ldp q1, q0, [sp] -; CHECK-NEXT: stp q3, q2, [x1] +; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: stp q1, q0, [x1, #32] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret @@ -967,53 +966,54 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #128 ; CHECK-NEXT: .cfi_def_cfa_offset 128 -; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: mov z2.h, z1.h[1] -; CHECK-NEXT: mov z3.h, z1.h[3] +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: mov z1.h, z0.h[1] +; CHECK-NEXT: mov z2.h, z0.h[3] +; CHECK-NEXT: fcvtzs x8, h0 +; CHECK-NEXT: mov z3.h, z0.h[2] +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: fcvtzs x9, h1 +; CHECK-NEXT: fcvtzs x10, h2 +; CHECK-NEXT: ldr q1, [x0, #16] +; CHECK-NEXT: fcvtzs x11, h3 +; CHECK-NEXT: mov z2.h, z0.h[1] +; CHECK-NEXT: mov z3.h, z0.h[3] +; CHECK-NEXT: fcvtzs x12, h1 +; CHECK-NEXT: stp x8, x9, [sp, #32] +; CHECK-NEXT: fcvtzs x8, h0 +; CHECK-NEXT: mov z0.h, z0.h[2] ; CHECK-NEXT: fcvtzs x9, h2 -; CHECK-NEXT: mov z2.h, z1.h[2] -; CHECK-NEXT: fcvtzs x8, h1 +; CHECK-NEXT: stp x11, x10, [sp, #48] ; CHECK-NEXT: fcvtzs x10, h3 -; CHECK-NEXT: fcvtzs x11, h2 -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: fcvtzs x12, h1 ; CHECK-NEXT: mov z2.h, z1.h[1] ; CHECK-NEXT: mov z3.h, z1.h[3] -; CHECK-NEXT: mov z1.h, z1.h[2] -; CHECK-NEXT: stp x8, x9, [sp, #32] -; CHECK-NEXT: fcvtzs x9, h3 -; CHECK-NEXT: stp x11, x10, [sp, #48] -; CHECK-NEXT: fcvtzs x10, h1 +; CHECK-NEXT: fcvtzs x11, h0 +; CHECK-NEXT: mov z0.h, z1.h[2] +; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: stp x8, x9, [sp] ; CHECK-NEXT: fcvtzs x8, h2 -; CHECK-NEXT: mov z1.h, z0.h[1] -; CHECK-NEXT: stp x10, x9, [sp, #16] -; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: mov z1.h, z0.h[3] -; CHECK-NEXT: stp x12, x8, [sp] -; CHECK-NEXT: fcvtzs x8, h0 -; CHECK-NEXT: fcvtzs x10, h1 -; CHECK-NEXT: mov z1.h, z0.h[2] -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: fcvtzs x9, h3 +; CHECK-NEXT: mov z2.h, z1.h[1] +; CHECK-NEXT: stp x11, x10, [sp, #16] +; CHECK-NEXT: fcvtzs x10, h0 +; CHECK-NEXT: mov z0.h, z1.h[3] ; CHECK-NEXT: fcvtzs x11, h1 -; CHECK-NEXT: mov z1.h, z0.h[1] -; CHECK-NEXT: stp x8, x9, [sp, #96] +; CHECK-NEXT: mov z1.h, z1.h[2] +; CHECK-NEXT: stp x12, x8, [sp, #96] +; CHECK-NEXT: fcvtzs x12, h2 ; CHECK-NEXT: fcvtzs x8, h0 +; CHECK-NEXT: ldp q3, q4, [sp] +; CHECK-NEXT: stp x10, x9, [sp, #112] ; CHECK-NEXT: fcvtzs x9, h1 -; CHECK-NEXT: mov z1.h, z0.h[3] -; CHECK-NEXT: mov z0.h, z0.h[2] -; CHECK-NEXT: stp x11, x10, [sp, #112] -; CHECK-NEXT: fcvtzs x10, h1 -; CHECK-NEXT: fcvtzs x11, h0 -; CHECK-NEXT: stp x8, x9, [sp, #64] ; CHECK-NEXT: ldp q0, q1, [sp, #32] -; CHECK-NEXT: stp x11, x10, [sp, #80] -; CHECK-NEXT: ldp q2, q3, [sp] -; CHECK-NEXT: ldp q5, q4, [sp, #64] -; CHECK-NEXT: ldp q7, q6, [sp, #96] +; CHECK-NEXT: stp x11, x12, [sp, #64] +; CHECK-NEXT: ldp q6, q7, [sp, #96] +; CHECK-NEXT: stp x9, x8, [sp, #80] +; CHECK-NEXT: ldp q5, q2, [sp, #64] ; CHECK-NEXT: stp q0, q1, [x1] -; CHECK-NEXT: stp q2, q3, [x1, #32] -; CHECK-NEXT: stp q5, q4, [x1, #96] -; CHECK-NEXT: stp q7, q6, [x1, #64] +; CHECK-NEXT: stp q3, q4, [x1, #32] +; CHECK-NEXT: stp q6, q7, [x1, #64] +; CHECK-NEXT: stp q5, q2, [x1, #96] ; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a @@ -1029,8 +1029,8 @@ define <2 x i16> @fcvtzs_v2f32_v2i16(<2 x float> %op1) { ; CHECK-LABEL: fcvtzs_v2f32_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -1041,8 +1041,8 @@ define <4 x i16> @fcvtzs_v4f32_v4i16(<4 x float> %op1) { ; CHECK-LABEL: fcvtzs_v4f32_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -1054,14 +1054,14 @@ define <8 x i16> @fcvtzs_v8f32_v8i16(ptr %a) { ; CHECK-LABEL: fcvtzs_v8f32_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z0.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z2.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a @@ -1072,21 +1072,21 @@ define void @fcvtzs_v16f32_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v16f32_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: ldp q3, q2, [x0, #32] +; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p1, z0.h, z1.h +; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: fcvtzs z3.s, p0/m, z3.s -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h ; CHECK-NEXT: fcvtzs z2.s, p0/m, z2.s +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h -; CHECK-NEXT: stp q0, q3, [x1] +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret %op1 = load <16 x float>, ptr %a %res = fptosi <16 x float> %op1 to <16 x i16> @@ -1101,8 +1101,8 @@ define <2 x i32> @fcvtzs_v2f32_v2i32(<2 x float> %op1) { ; CHECK-LABEL: fcvtzs_v2f32_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -1113,8 +1113,8 @@ define <4 x i32> @fcvtzs_v4f32_v4i32(<4 x float> %op1) { ; CHECK-LABEL: fcvtzs_v4f32_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -1125,8 +1125,8 @@ define void @fcvtzs_v8f32_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v8f32_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzs z0.s, p0/m, z0.s ; CHECK-NEXT: fcvtzs z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x1] @@ -1144,8 +1144,8 @@ define <1 x i64> @fcvtzs_v1f32_v1i64(<1 x float> %op1) { ; CHECK-LABEL: fcvtzs_v1f32_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -1157,8 +1157,8 @@ define <2 x i64> @fcvtzs_v2f32_v2i64(<2 x float> %op1) { ; CHECK-LABEL: fcvtzs_v2f32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -1188,21 +1188,20 @@ define void @fcvtzs_v8f32_v8i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v8f32_v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z2.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uunpklo z3.d, z1.s ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.s ; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.s -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.s -; CHECK-NEXT: stp q3, q1, [x1, #32] -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z2.s -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.s +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a %res = fptosi <8 x float> %op1 to <8 x i64> @@ -1231,8 +1230,8 @@ define <2 x i16> @fcvtzs_v2f64_v2i16(<2 x double> %op1) { ; CHECK-LABEL: fcvtzs_v2f64_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -1246,21 +1245,21 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: mov z1.s, z1.s[1] +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: fmov w10, s0 -; CHECK-NEXT: strh w9, [sp, #8] +; CHECK-NEXT: mov z2.s, z0.s[1] +; CHECK-NEXT: mov z0.s, z1.s[1] ; CHECK-NEXT: strh w8, [sp, #12] ; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w10, [sp, #14] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 @@ -1275,37 +1274,37 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 -; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z4.s, z1.s[1] +; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d ; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d -; CHECK-NEXT: strh w9, [sp, #8] +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov z1.s, z1.s[1] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov z2.s, z2.s[1] +; CHECK-NEXT: strh w8, [sp, #4] ; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: mov z1.s, z0.s[1] -; CHECK-NEXT: mov z0.s, z2.s[1] -; CHECK-NEXT: mov z2.s, z3.s[1] +; CHECK-NEXT: mov z3.s, z3.s[1] ; CHECK-NEXT: strh w8, [sp] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w10, [sp, #4] -; CHECK-NEXT: fmov w10, s1 -; CHECK-NEXT: strh w9, [sp, #14] -; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: strh w10, [sp, #10] -; CHECK-NEXT: strh w9, [sp, #2] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strh w8, [sp, #2] ; CHECK-NEXT: ldr q0, [sp], #16 ; CHECK-NEXT: ret %op1 = load <8 x double>, ptr %a @@ -1318,67 +1317,68 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: ldp q2, q3, [x0, #32] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: ldr q6, [x0, #112] +; CHECK-NEXT: ldp q4, q5, [x0, #80] +; CHECK-NEXT: ldr q7, [x0, #64] +; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z6.s, z3.s[1] -; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d -; CHECK-NEXT: mov z3.s, z2.s[1] -; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: ldp q0, q1, [x0, #64] +; CHECK-NEXT: fcvtzs z6.d, p0/m, z6.d ; CHECK-NEXT: fcvtzs z5.d, p0/m, z5.d -; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s -; CHECK-NEXT: fmov w10, s5 -; CHECK-NEXT: mov z5.s, z5.s[1] -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d +; CHECK-NEXT: fcvtzs z4.d, p0/m, z4.d +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q2, q7, [x0, #96] +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s +; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s +; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov z16.s, z1.s[1] +; CHECK-NEXT: mov z1.s, z0.s[1] ; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: fmov w9, s6 -; CHECK-NEXT: mov z4.s, z4.s[1] -; CHECK-NEXT: strh w10, [sp, #4] -; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov z0.s, z2.s[1] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov z2.s, z3.s[1] +; CHECK-NEXT: strh w8, [sp, #4] ; CHECK-NEXT: fmov w8, s3 ; CHECK-NEXT: movprfx z3, z7 ; CHECK-NEXT: fcvtzs z3.d, p0/m, z7.d -; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: fmov w8, s16 +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: uzp1 z1.s, z4.s, z4.s ; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w9, [sp, #14] -; CHECK-NEXT: fmov w9, s5 -; CHECK-NEXT: fmov w10, s4 -; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: strh w8, [sp, #28] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: uzp1 z0.s, z3.s, z3.s +; CHECK-NEXT: mov z3.s, z5.s[1] +; CHECK-NEXT: strh w8, [sp, #6] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z3.s, z3.s[1] -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: strh w9, [sp, #6] -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: strh w10, [sp, #2] -; CHECK-NEXT: fmov w10, s0 +; CHECK-NEXT: mov z2.s, z6.s[1] +; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: strh w8, [sp, #28] +; CHECK-NEXT: fmov w8, s5 ; CHECK-NEXT: strh w8, [sp, #24] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z4.s, z2.s[1] -; CHECK-NEXT: mov z2.s, z1.s[1] -; CHECK-NEXT: mov z1.s, z0.s[1] -; CHECK-NEXT: strh w9, [sp, #20] -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: strh w10, [sp, #16] -; CHECK-NEXT: fmov w10, s2 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov z1.s, z1.s[1] +; CHECK-NEXT: strh w8, [sp, #20] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov z0.s, z0.s[1] +; CHECK-NEXT: strh w8, [sp, #16] +; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: strh w8, [sp, #30] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strh w8, [sp, #26] ; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: strh w9, [sp, #26] -; CHECK-NEXT: strh w10, [sp, #22] +; CHECK-NEXT: strh w8, [sp, #22] +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: strh w8, [sp, #18] ; CHECK-NEXT: ldp q1, q0, [sp] ; CHECK-NEXT: stp q1, q0, [x1] @@ -1397,8 +1397,8 @@ define <1 x i32> @fcvtzs_v1f64_v1i32(<1 x double> %op1) { ; CHECK-LABEL: fcvtzs_v1f64_v1i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -1410,8 +1410,8 @@ define <2 x i32> @fcvtzs_v2f64_v2i32(<2 x double> %op1) { ; CHECK-LABEL: fcvtzs_v2f64_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -1423,14 +1423,14 @@ define <4 x i32> @fcvtzs_v4f64_v4i32(ptr %a) { ; CHECK-LABEL: fcvtzs_v4f64_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: uzp1 z2.s, z0.s, z0.s -; CHECK-NEXT: uzp1 z0.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %op1 = load <4 x double>, ptr %a @@ -1441,21 +1441,21 @@ define void @fcvtzs_v8f64_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v8f64_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ptrue p1.s, vl2 -; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q3, q2, [x0, #32] +; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p1, z0.s, z1.s +; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z3.d, p0/m, z3.d -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s ; CHECK-NEXT: fcvtzs z2.d, p0/m, z2.d +; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s ; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: splice z3.s, p1, z3.s, z2.s -; CHECK-NEXT: stp q0, q3, [x1] +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret %op1 = load <8 x double>, ptr %a %res = fptosi <8 x double> %op1 to <8 x i32> @@ -1470,8 +1470,8 @@ define <1 x i64> @fcvtzs_v1f64_v1i64(<1 x double> %op1) { ; CHECK-LABEL: fcvtzs_v1f64_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -1482,8 +1482,8 @@ define <2 x i64> @fcvtzs_v2f64_v2i64(<2 x double> %op1) { ; CHECK-LABEL: fcvtzs_v2f64_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -1494,8 +1494,8 @@ define void @fcvtzs_v4f64_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: fcvtzs_v4f64_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: fcvtzs z0.d, p0/m, z0.d ; CHECK-NEXT: fcvtzs z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x1] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-fp-vselect.ll @@ -11,12 +11,12 @@ ; CHECK-NEXT: // kill: def $d2 killed $d2 def $z2 ; CHECK-NEXT: mov z3.s, z2.s[1] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: strh w9, [sp, #10] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: ldr d2, [sp, #8] ; CHECK-NEXT: lsl z2.h, z2.h, #15 ; CHECK-NEXT: asr z2.h, z2.h, #15 @@ -70,14 +70,14 @@ define void @select_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: select_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fcmeq p1.h, p0/z, z1.h, z2.h -; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h -; CHECK-NEXT: fcmeq p0.h, p0/z, z0.h, z3.h -; CHECK-NEXT: sel z0.h, p0, z0.h, z3.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ldp q1, q3, [x1] +; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: fcmeq p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h +; CHECK-NEXT: sel z1.h, p0, z2.h, z3.h +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b @@ -127,14 +127,14 @@ define void @select_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: select_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fcmeq p1.s, p0/z, z1.s, z2.s -; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s -; CHECK-NEXT: fcmeq p0.s, p0/z, z0.s, z3.s -; CHECK-NEXT: sel z0.s, p0, z0.s, z3.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ldp q1, q3, [x1] +; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: fcmeq p0.s, p0/z, z2.s, z3.s +; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s +; CHECK-NEXT: sel z1.s, p0, z2.s, z3.s +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b @@ -185,14 +185,14 @@ define void @select_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: select_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fcmeq p1.d, p0/z, z1.d, z2.d -; CHECK-NEXT: sel z1.d, p1, z1.d, z2.d -; CHECK-NEXT: fcmeq p0.d, p0/z, z0.d, z3.d -; CHECK-NEXT: sel z0.d, p0, z0.d, z3.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ldp q1, q3, [x1] +; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d +; CHECK-NEXT: fcmeq p0.d, p0/z, z2.d, z3.d +; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d +; CHECK-NEXT: sel z1.d, p0, z2.d, z3.d +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-insert-vector-elt.ll @@ -11,14 +11,14 @@ define <4 x i8> @insertelement_v4i8(<4 x i8> %op1) { ; CHECK-LABEL: insertelement_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3 // =0x3 -; CHECK-NEXT: mov w9, #5 // =0x5 -; CHECK-NEXT: index z2.h, #0, #1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov w8, #3 // =0x3 +; CHECK-NEXT: index z1.h, #0, #1 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z1.h -; CHECK-NEXT: mov z0.h, p0/m, w9 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h +; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %r = insertelement <4 x i8> %op1, i8 5, i64 3 @@ -28,14 +28,14 @@ define <8 x i8> @insertelement_v8i8(<8 x i8> %op1) { ; CHECK-LABEL: insertelement_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #7 // =0x7 -; CHECK-NEXT: mov w9, #5 // =0x5 -; CHECK-NEXT: index z2.b, #0, #1 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov w8, #7 // =0x7 +; CHECK-NEXT: index z1.b, #0, #1 +; CHECK-NEXT: mov z2.b, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: mov z1.b, w8 -; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z1.b -; CHECK-NEXT: mov z0.b, p0/m, w9 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b +; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %r = insertelement <8 x i8> %op1, i8 5, i64 7 @@ -45,14 +45,14 @@ define <16 x i8> @insertelement_v16i8(<16 x i8> %op1) { ; CHECK-LABEL: insertelement_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #15 // =0xf -; CHECK-NEXT: mov w9, #5 // =0x5 -; CHECK-NEXT: index z2.b, #0, #1 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov w8, #15 // =0xf +; CHECK-NEXT: index z1.b, #0, #1 +; CHECK-NEXT: mov z2.b, w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: mov z1.b, w8 -; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z1.b -; CHECK-NEXT: mov z0.b, p0/m, w9 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b +; CHECK-NEXT: mov z0.b, p0/m, w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %r = insertelement <16 x i8> %op1, i8 5, i64 15 @@ -62,14 +62,14 @@ define <32 x i8> @insertelement_v32i8(<32 x i8> %op1) { ; CHECK-LABEL: insertelement_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #15 // =0xf -; CHECK-NEXT: mov w9, #5 // =0x5 -; CHECK-NEXT: index z3.b, #0, #1 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: mov w8, #15 // =0xf +; CHECK-NEXT: index z2.b, #0, #1 +; CHECK-NEXT: mov z3.b, w8 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: mov z2.b, w8 -; CHECK-NEXT: cmpeq p0.b, p0/z, z3.b, z2.b -; CHECK-NEXT: mov z1.b, p0/m, w9 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b +; CHECK-NEXT: mov z1.b, p0/m, w8 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret %r = insertelement <32 x i8> %op1, i8 5, i64 31 @@ -80,14 +80,14 @@ define <2 x i16> @insertelement_v2i16(<2 x i16> %op1) { ; CHECK-LABEL: insertelement_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: mov w9, #5 // =0x5 -; CHECK-NEXT: index z2.s, #0, #1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: index z1.s, #0, #1 +; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z1.s -; CHECK-NEXT: mov z0.s, p0/m, w9 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s +; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %r = insertelement <2 x i16> %op1, i16 5, i64 1 @@ -97,14 +97,14 @@ define <4 x i16> @insertelement_v4i16(<4 x i16> %op1) { ; CHECK-LABEL: insertelement_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3 // =0x3 -; CHECK-NEXT: mov w9, #5 // =0x5 -; CHECK-NEXT: index z2.h, #0, #1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov w8, #3 // =0x3 +; CHECK-NEXT: index z1.h, #0, #1 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z1.h -; CHECK-NEXT: mov z0.h, p0/m, w9 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h +; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %r = insertelement <4 x i16> %op1, i16 5, i64 3 @@ -114,14 +114,14 @@ define <8 x i16> @insertelement_v8i16(<8 x i16> %op1) { ; CHECK-LABEL: insertelement_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #7 // =0x7 -; CHECK-NEXT: mov w9, #5 // =0x5 -; CHECK-NEXT: index z2.h, #0, #1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov w8, #7 // =0x7 +; CHECK-NEXT: index z1.h, #0, #1 +; CHECK-NEXT: mov z2.h, w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: mov z1.h, w8 -; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z1.h -; CHECK-NEXT: mov z0.h, p0/m, w9 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h +; CHECK-NEXT: mov z0.h, p0/m, w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %r = insertelement <8 x i16> %op1, i16 5, i64 7 @@ -131,14 +131,14 @@ define <16 x i16> @insertelement_v16i16(<16 x i16> %op1) { ; CHECK-LABEL: insertelement_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #7 // =0x7 -; CHECK-NEXT: mov w9, #5 // =0x5 -; CHECK-NEXT: index z3.h, #0, #1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: mov w8, #7 // =0x7 +; CHECK-NEXT: index z2.h, #0, #1 +; CHECK-NEXT: mov z3.h, w8 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: cmpeq p0.h, p0/z, z3.h, z2.h -; CHECK-NEXT: mov z1.h, p0/m, w9 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: mov z1.h, p0/m, w8 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret %r = insertelement <16 x i16> %op1, i16 5, i64 15 @@ -149,14 +149,14 @@ define <2 x i32> @insertelement_v2i32(<2 x i32> %op1) { ; CHECK-LABEL: insertelement_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: mov w9, #5 // =0x5 -; CHECK-NEXT: index z2.s, #0, #1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: index z1.s, #0, #1 +; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z1.s -; CHECK-NEXT: mov z0.s, p0/m, w9 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s +; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %r = insertelement <2 x i32> %op1, i32 5, i64 1 @@ -166,14 +166,14 @@ define <4 x i32> @insertelement_v4i32(<4 x i32> %op1) { ; CHECK-LABEL: insertelement_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3 // =0x3 -; CHECK-NEXT: mov w9, #5 // =0x5 -; CHECK-NEXT: index z2.s, #0, #1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov w8, #3 // =0x3 +; CHECK-NEXT: index z1.s, #0, #1 +; CHECK-NEXT: mov z2.s, w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: mov z1.s, w8 -; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z1.s -; CHECK-NEXT: mov z0.s, p0/m, w9 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s +; CHECK-NEXT: mov z0.s, p0/m, w8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %r = insertelement <4 x i32> %op1, i32 5, i64 3 @@ -183,13 +183,13 @@ define <8 x i32> @insertelement_v8i32(ptr %a) { ; CHECK-LABEL: insertelement_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3 // =0x3 -; CHECK-NEXT: index z3.s, #0, #1 -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z2.s, w8 +; CHECK-NEXT: mov w8, #3 // =0x3 +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mov w8, #5 // =0x5 -; CHECK-NEXT: cmpeq p0.s, p0/z, z3.s, z2.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: mov z1.s, p0/m, w8 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret @@ -212,14 +212,14 @@ define <2 x i64> @insertelement_v2i64(<2 x i64> %op1) { ; CHECK-LABEL: insertelement_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: mov w9, #5 // =0x5 -; CHECK-NEXT: index z2.d, #0, #1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: mov z2.d, x8 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: mov z1.d, x8 -; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z1.d -; CHECK-NEXT: mov z0.d, p0/m, x9 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d +; CHECK-NEXT: mov z0.d, p0/m, x8 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %r = insertelement <2 x i64> %op1, i64 5, i64 1 @@ -229,13 +229,13 @@ define <4 x i64> @insertelement_v4i64(ptr %a) { ; CHECK-LABEL: insertelement_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: index z3.d, #0, #1 -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z2.d, x8 +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: index z0.d, #0, #1 +; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mov w8, #5 // =0x5 -; CHECK-NEXT: cmpeq p0.d, p0/z, z3.d, z2.d +; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: mov z1.d, p0/m, x8 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret @@ -264,13 +264,13 @@ define <4 x half> @insertelement_v4f16(<4 x half> %op1) { ; CHECK-LABEL: insertelement_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3 // =0x3 -; CHECK-NEXT: fmov h1, #5.00000000 -; CHECK-NEXT: index z3.h, #0, #1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov w8, #3 // =0x3 +; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: cmpeq p0.h, p0/z, z3.h, z2.h +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h +; CHECK-NEXT: fmov h1, #5.00000000 ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -281,13 +281,13 @@ define <8 x half> @insertelement_v8f16(<8 x half> %op1) { ; CHECK-LABEL: insertelement_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #7 // =0x7 -; CHECK-NEXT: fmov h1, #5.00000000 -; CHECK-NEXT: index z3.h, #0, #1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov w8, #7 // =0x7 +; CHECK-NEXT: index z1.h, #0, #1 ; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: cmpeq p0.h, p0/z, z3.h, z2.h +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h +; CHECK-NEXT: fmov h1, #5.00000000 ; CHECK-NEXT: mov z0.h, p0/m, h1 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -298,14 +298,14 @@ define <16 x half> @insertelement_v16f16(ptr %a) { ; CHECK-LABEL: insertelement_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov w8, #7 // =0x7 -; CHECK-NEXT: fmov h3, #5.00000000 -; CHECK-NEXT: index z4.h, #0, #1 ; CHECK-NEXT: ptrue p0.h -; CHECK-NEXT: mov z2.h, w8 -; CHECK-NEXT: cmpeq p0.h, p0/z, z4.h, z2.h -; CHECK-NEXT: mov z1.h, p0/m, h3 +; CHECK-NEXT: mov w8, #7 // =0x7 +; CHECK-NEXT: index z0.h, #0, #1 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: fmov h2, #5.00000000 +; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z1.h, p0/m, h2 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a @@ -317,13 +317,13 @@ define <2 x float> @insertelement_v2f32(<2 x float> %op1) { ; CHECK-LABEL: insertelement_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: fmov s1, #5.00000000 -; CHECK-NEXT: index z3.s, #0, #1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: index z1.s, #0, #1 ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: cmpeq p0.s, p0/z, z3.s, z2.s +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s +; CHECK-NEXT: fmov s1, #5.00000000 ; CHECK-NEXT: mov z0.s, p0/m, s1 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -334,13 +334,13 @@ define <4 x float> @insertelement_v4f32(<4 x float> %op1) { ; CHECK-LABEL: insertelement_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3 // =0x3 -; CHECK-NEXT: fmov s1, #5.00000000 -; CHECK-NEXT: index z3.s, #0, #1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov w8, #3 // =0x3 +; CHECK-NEXT: index z1.s, #0, #1 ; CHECK-NEXT: mov z2.s, w8 -; CHECK-NEXT: cmpeq p0.s, p0/z, z3.s, z2.s +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s +; CHECK-NEXT: fmov s1, #5.00000000 ; CHECK-NEXT: mov z0.s, p0/m, s1 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -351,14 +351,14 @@ define <8 x float> @insertelement_v8f32(ptr %a) { ; CHECK-LABEL: insertelement_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov w8, #3 // =0x3 -; CHECK-NEXT: fmov s4, #5.00000000 -; CHECK-NEXT: index z2.s, #0, #1 ; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: mov z3.s, w8 -; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s -; CHECK-NEXT: mov z1.s, p0/m, s4 +; CHECK-NEXT: mov w8, #3 // =0x3 +; CHECK-NEXT: index z0.s, #0, #1 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: fmov s2, #5.00000000 +; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z1.s +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z1.s, p0/m, s2 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a @@ -379,13 +379,13 @@ define <2 x double> @insertelement_v2f64(<2 x double> %op1) { ; CHECK-LABEL: insertelement_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: fmov d1, #5.00000000 -; CHECK-NEXT: index z3.d, #0, #1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: index z1.d, #0, #1 ; CHECK-NEXT: mov z2.d, x8 -; CHECK-NEXT: cmpeq p0.d, p0/z, z3.d, z2.d +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d +; CHECK-NEXT: fmov d1, #5.00000000 ; CHECK-NEXT: mov z0.d, p0/m, d1 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -396,14 +396,14 @@ define <4 x double> @insertelement_v4f64(ptr %a) { ; CHECK-LABEL: insertelement_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: fmov d4, #5.00000000 -; CHECK-NEXT: index z2.d, #0, #1 ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: mov z3.d, x8 -; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d -; CHECK-NEXT: mov z1.d, p0/m, d4 +; CHECK-NEXT: mov w8, #1 // =0x1 +; CHECK-NEXT: index z0.d, #0, #1 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: fmov d2, #5.00000000 +; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z1.d +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: mov z1.d, p0/m, d2 ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret %op1 = load <4 x double>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-arith.ll @@ -46,10 +46,10 @@ define void @add_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: add_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: add z0.b, z0.b, z2.b -; CHECK-NEXT: add z1.b, z1.b, z3.b +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: add z0.b, z1.b, z0.b +; CHECK-NEXT: add z1.b, z2.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -98,10 +98,10 @@ define void @add_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: add_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: add z0.h, z0.h, z2.h -; CHECK-NEXT: add z1.h, z1.h, z3.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: add z0.h, z1.h, z0.h +; CHECK-NEXT: add z1.h, z2.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -138,10 +138,10 @@ define void @add_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: add_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: add z0.s, z0.s, z2.s -; CHECK-NEXT: add z1.s, z1.s, z3.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: add z0.s, z1.s, z0.s +; CHECK-NEXT: add z1.s, z2.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -178,10 +178,10 @@ define void @add_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: add_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: add z0.d, z0.d, z2.d -; CHECK-NEXT: add z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: add z0.d, z1.d, z0.d +; CHECK-NEXT: add z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -198,8 +198,8 @@ define <4 x i8> @mul_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; SVE-LABEL: mul_v4i8: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.h, vl4 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -219,8 +219,8 @@ define <8 x i8> @mul_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; SVE-LABEL: mul_v8i8: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.b, vl8 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: mul z0.b, p0/m, z0.b, z1.b ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -240,8 +240,8 @@ define <16 x i8> @mul_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; SVE-LABEL: mul_v16i8: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: ptrue p0.b, vl16 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: mul z0.b, p0/m, z0.b, z1.b ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -261,20 +261,21 @@ define void @mul_v32i8(ptr %a, ptr %b) { ; SVE-LABEL: mul_v32i8: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x0] ; SVE-NEXT: ptrue p0.b, vl16 -; SVE-NEXT: ldp q2, q3, [x1] -; SVE-NEXT: mul z0.b, p0/m, z0.b, z2.b +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] +; SVE-NEXT: mul z0.b, p0/m, z0.b, z1.b +; SVE-NEXT: movprfx z1, z2 ; SVE-NEXT: mul z1.b, p0/m, z1.b, z3.b ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: mul_v32i8: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x0] -; SVE2-NEXT: ldp q2, q3, [x1] -; SVE2-NEXT: mul z0.b, z0.b, z2.b -; SVE2-NEXT: mul z1.b, z1.b, z3.b +; SVE2-NEXT: ldp q0, q3, [x1] +; SVE2-NEXT: ldp q1, q2, [x0] +; SVE2-NEXT: mul z0.b, z1.b, z0.b +; SVE2-NEXT: mul z1.b, z2.b, z3.b ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -287,8 +288,8 @@ define <2 x i16> @mul_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; SVE-LABEL: mul_v2i16: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.s, vl2 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -308,8 +309,8 @@ define <4 x i16> @mul_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; SVE-LABEL: mul_v4i16: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.h, vl4 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -329,8 +330,8 @@ define <8 x i16> @mul_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; SVE-LABEL: mul_v8i16: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: ptrue p0.h, vl8 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -350,20 +351,21 @@ define void @mul_v16i16(ptr %a, ptr %b) { ; SVE-LABEL: mul_v16i16: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x0] ; SVE-NEXT: ptrue p0.h, vl8 -; SVE-NEXT: ldp q2, q3, [x1] -; SVE-NEXT: mul z0.h, p0/m, z0.h, z2.h +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] +; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h +; SVE-NEXT: movprfx z1, z2 ; SVE-NEXT: mul z1.h, p0/m, z1.h, z3.h ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: mul_v16i16: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x0] -; SVE2-NEXT: ldp q2, q3, [x1] -; SVE2-NEXT: mul z0.h, z0.h, z2.h -; SVE2-NEXT: mul z1.h, z1.h, z3.h +; SVE2-NEXT: ldp q0, q3, [x1] +; SVE2-NEXT: ldp q1, q2, [x0] +; SVE2-NEXT: mul z0.h, z1.h, z0.h +; SVE2-NEXT: mul z1.h, z2.h, z3.h ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -376,8 +378,8 @@ define <2 x i32> @mul_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; SVE-LABEL: mul_v2i32: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.s, vl2 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -397,8 +399,8 @@ define <4 x i32> @mul_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; SVE-LABEL: mul_v4i32: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: ptrue p0.s, vl4 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -418,20 +420,21 @@ define void @mul_v8i32(ptr %a, ptr %b) { ; SVE-LABEL: mul_v8i32: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x0] ; SVE-NEXT: ptrue p0.s, vl4 -; SVE-NEXT: ldp q2, q3, [x1] -; SVE-NEXT: mul z0.s, p0/m, z0.s, z2.s +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] +; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s +; SVE-NEXT: movprfx z1, z2 ; SVE-NEXT: mul z1.s, p0/m, z1.s, z3.s ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: mul_v8i32: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x0] -; SVE2-NEXT: ldp q2, q3, [x1] -; SVE2-NEXT: mul z0.s, z0.s, z2.s -; SVE2-NEXT: mul z1.s, z1.s, z3.s +; SVE2-NEXT: ldp q0, q3, [x1] +; SVE2-NEXT: ldp q1, q2, [x0] +; SVE2-NEXT: mul z0.s, z1.s, z0.s +; SVE2-NEXT: mul z1.s, z2.s, z3.s ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -444,8 +447,8 @@ define <1 x i64> @mul_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; SVE-LABEL: mul_v1i64: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.d, vl1 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: mul z0.d, p0/m, z0.d, z1.d ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -465,8 +468,8 @@ define <2 x i64> @mul_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; SVE-LABEL: mul_v2i64: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: ptrue p0.d, vl2 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: mul z0.d, p0/m, z0.d, z1.d ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -486,20 +489,21 @@ define void @mul_v4i64(ptr %a, ptr %b) { ; SVE-LABEL: mul_v4i64: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x0] ; SVE-NEXT: ptrue p0.d, vl2 -; SVE-NEXT: ldp q2, q3, [x1] -; SVE-NEXT: mul z0.d, p0/m, z0.d, z2.d +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] +; SVE-NEXT: mul z0.d, p0/m, z0.d, z1.d +; SVE-NEXT: movprfx z1, z2 ; SVE-NEXT: mul z1.d, p0/m, z1.d, z3.d ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: mul_v4i64: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x0] -; SVE2-NEXT: ldp q2, q3, [x1] -; SVE2-NEXT: mul z0.d, z0.d, z2.d -; SVE2-NEXT: mul z1.d, z1.d, z3.d +; SVE2-NEXT: ldp q0, q3, [x1] +; SVE2-NEXT: ldp q1, q2, [x0] +; SVE2-NEXT: mul z0.d, z1.d, z0.d +; SVE2-NEXT: mul z1.d, z2.d, z3.d ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -552,10 +556,10 @@ define void @sub_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: sub_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: sub z0.b, z0.b, z2.b -; CHECK-NEXT: sub z1.b, z1.b, z3.b +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: sub z0.b, z1.b, z0.b +; CHECK-NEXT: sub z1.b, z2.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -604,10 +608,10 @@ define void @sub_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: sub_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: sub z0.h, z0.h, z2.h -; CHECK-NEXT: sub z1.h, z1.h, z3.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: sub z0.h, z1.h, z0.h +; CHECK-NEXT: sub z1.h, z2.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -644,10 +648,10 @@ define void @sub_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: sub_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: sub z0.s, z0.s, z2.s -; CHECK-NEXT: sub z1.s, z1.s, z3.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: sub z0.s, z1.s, z0.s +; CHECK-NEXT: sub z1.s, z2.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -684,10 +688,10 @@ define void @sub_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: sub_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: sub z0.d, z0.d, z2.d -; CHECK-NEXT: sub z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: sub z0.d, z1.d, z0.d +; CHECK-NEXT: sub z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -704,8 +708,8 @@ define <4 x i8> @abs_v4i8(<4 x i8> %op1) { ; CHECK-LABEL: abs_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: sxtb z0.h, p0/m, z0.h ; CHECK-NEXT: abs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -717,8 +721,8 @@ define <8 x i8> @abs_v8i8(<8 x i8> %op1) { ; CHECK-LABEL: abs_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: abs z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -729,8 +733,8 @@ define <16 x i8> @abs_v16i8(<16 x i8> %op1) { ; CHECK-LABEL: abs_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: abs z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -741,8 +745,8 @@ define void @abs_v32i8(ptr %a) { ; CHECK-LABEL: abs_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: abs z0.b, p0/m, z0.b ; CHECK-NEXT: abs z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] @@ -756,8 +760,8 @@ define <2 x i16> @abs_v2i16(<2 x i16> %op1) { ; CHECK-LABEL: abs_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: sxth z0.s, p0/m, z0.s ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -769,8 +773,8 @@ define <4 x i16> @abs_v4i16(<4 x i16> %op1) { ; CHECK-LABEL: abs_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: abs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -781,8 +785,8 @@ define <8 x i16> @abs_v8i16(<8 x i16> %op1) { ; CHECK-LABEL: abs_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: abs z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -793,8 +797,8 @@ define void @abs_v16i16(ptr %a) { ; CHECK-LABEL: abs_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: abs z0.h, p0/m, z0.h ; CHECK-NEXT: abs z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -808,8 +812,8 @@ define <2 x i32> @abs_v2i32(<2 x i32> %op1) { ; CHECK-LABEL: abs_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -820,8 +824,8 @@ define <4 x i32> @abs_v4i32(<4 x i32> %op1) { ; CHECK-LABEL: abs_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -832,8 +836,8 @@ define void @abs_v8i32(ptr %a) { ; CHECK-LABEL: abs_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: abs z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -847,8 +851,8 @@ define <1 x i64> @abs_v1i64(<1 x i64> %op1) { ; CHECK-LABEL: abs_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -859,8 +863,8 @@ define <2 x i64> @abs_v2i64(<2 x i64> %op1) { ; CHECK-LABEL: abs_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -871,8 +875,8 @@ define void @abs_v4i64(ptr %a) { ; CHECK-LABEL: abs_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: abs z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-compares.ll @@ -40,12 +40,12 @@ define void @icmp_eq_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: icmp_eq_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z2.b +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z0.b +; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b ; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z3.b ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -90,12 +90,12 @@ define void @icmp_eq_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: icmp_eq_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z0.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z3.h ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -140,12 +140,12 @@ define void @icmp_eq_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: icmp_eq_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z0.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z3.s ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -190,12 +190,12 @@ define void @icmp_eq_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: icmp_eq_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: cmpeq p1.d, p0/z, z1.d, z0.d +; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d ; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z3.d ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -214,12 +214,12 @@ define void @icmp_ne_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: icmp_ne_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, z2.b +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: cmpne p1.b, p0/z, z1.b, z0.b +; CHECK-NEXT: cmpne p0.b, p0/z, z2.b, z3.b ; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, z3.b ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -238,8 +238,8 @@ define void @icmp_sge_v8i16(ptr %a, ptr %b) { ; CHECK-LABEL: icmp_sge_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: cmpge p0.h, p0/z, z0.h, z1.h ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff @@ -260,12 +260,12 @@ define void @icmp_sgt_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: icmp_sgt_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: cmpgt p1.h, p0/z, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: cmpgt p1.h, p0/z, z1.h, z0.h +; CHECK-NEXT: cmpgt p0.h, p0/z, z2.h, z3.h ; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cmpgt p0.h, p0/z, z1.h, z3.h ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -284,8 +284,8 @@ define void @icmp_sle_v4i32(ptr %a, ptr %b) { ; CHECK-LABEL: icmp_sle_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: cmpge p0.s, p0/z, z1.s, z0.s ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff @@ -306,12 +306,12 @@ define void @icmp_slt_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: icmp_slt_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: cmpgt p1.s, p0/z, z2.s, z0.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, z2.s ; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: cmpgt p0.s, p0/z, z3.s, z1.s ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -330,8 +330,8 @@ define void @icmp_uge_v2i64(ptr %a, ptr %b) { ; CHECK-LABEL: icmp_uge_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: cmphs p0.d, p0/z, z0.d, z1.d ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff @@ -352,8 +352,8 @@ define void @icmp_ugt_v2i64(ptr %a, ptr %b) { ; CHECK-LABEL: icmp_ugt_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: cmphi p0.d, p0/z, z0.d, z1.d ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff @@ -374,8 +374,8 @@ define void @icmp_ule_v2i64(ptr %a, ptr %b) { ; CHECK-LABEL: icmp_ule_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: cmphs p0.d, p0/z, z1.d, z0.d ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff @@ -396,8 +396,8 @@ define void @icmp_ult_v2i64(ptr %a, ptr %b) { ; CHECK-LABEL: icmp_ult_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: cmphi p0.d, p0/z, z1.d, z0.d ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-div.ll @@ -11,14 +11,14 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-LABEL: sdiv_v4i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: sxtb z1.h, p0/m, z1.h ; CHECK-NEXT: sxtb z0.h, p0/m, z0.h +; CHECK-NEXT: sxtb z1.h, p0/m, z1.h +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: sunpklo z0.s, z0.h -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -43,11 +43,11 @@ ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: splice z2.h, p0, z2.h, z0.h -; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = sdiv <8 x i8> %op1, %op2 @@ -61,41 +61,40 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 +; CHECK-NEXT: sunpklo z1.h, z1.b +; CHECK-NEXT: sunpklo z0.h, z0.b ; CHECK-NEXT: sunpklo z2.h, z2.b ; CHECK-NEXT: sunpklo z3.h, z3.b ; CHECK-NEXT: sunpklo z4.s, z2.h ; CHECK-NEXT: sunpklo z5.s, z3.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: sunpklo z1.h, z1.b -; CHECK-NEXT: sunpklo z0.h, z0.b ; CHECK-NEXT: sunpklo z2.s, z2.h ; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s -; CHECK-NEXT: sunpklo z5.s, z1.h +; CHECK-NEXT: sunpklo z5.s, z0.h +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: sunpklo z3.s, z0.h +; CHECK-NEXT: sunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: sunpklo z1.s, z1.h -; CHECK-NEXT: sunpklo z0.s, z0.h -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z5.s ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h -; CHECK-NEXT: movprfx z2, z3 -; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z5.s -; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z1.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h +; CHECK-NEXT: splice z1.h, p0, z1.h, z2.h +; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b -; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h +; CHECK-NEXT: splice z3.h, p0, z3.h, z0.h ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z2.b +; CHECK-NEXT: uzp1 z0.b, z3.b, z3.b +; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %res = sdiv <16 x i8> %op1, %op2 @@ -105,76 +104,78 @@ define void @sdiv_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: sdiv_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ldp q6, q2, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: ptrue p2.b, vl8 -; CHECK-NEXT: ldp q1, q3, [x1] -; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: ldp q7, q3, [x1] +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z16.d, z6.d +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ext z1.b, z1.b, z2.b, #8 ; CHECK-NEXT: sunpklo z2.h, z2.b -; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 -; CHECK-NEXT: sunpklo z5.h, z5.b -; CHECK-NEXT: sunpklo z7.s, z5.h -; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 -; CHECK-NEXT: sunpklo z5.s, z5.h -; CHECK-NEXT: mov z4.d, z3.d +; CHECK-NEXT: ext z16.b, z16.b, z6.b, #8 +; CHECK-NEXT: ext z0.b, z0.b, z3.b, #8 ; CHECK-NEXT: sunpklo z3.h, z3.b +; CHECK-NEXT: sunpklo z6.h, z6.b +; CHECK-NEXT: sunpklo z1.h, z1.b +; CHECK-NEXT: sunpklo z16.h, z16.b +; CHECK-NEXT: sunpklo z4.h, z0.b +; CHECK-NEXT: sunpklo z5.s, z1.h +; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: sunpklo z18.s, z16.h +; CHECK-NEXT: sunpklo z0.s, z4.h ; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 -; CHECK-NEXT: sunpklo z4.h, z4.b -; CHECK-NEXT: sunpklo z6.s, z4.h -; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 +; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8 +; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: sunpklo z4.s, z4.h -; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s -; CHECK-NEXT: sunpklo z7.s, z3.h +; CHECK-NEXT: sunpklo z16.s, z16.h +; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z5.s ; CHECK-NEXT: sunpklo z5.s, z2.h -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: sunpklo z3.s, z3.h -; CHECK-NEXT: splice z6.h, p1, z6.h, z4.h ; CHECK-NEXT: sunpklo z2.s, z2.h -; CHECK-NEXT: uzp1 z4.b, z6.b, z6.b -; CHECK-NEXT: sdiv z5.s, p0/m, z5.s, z7.s -; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: sunpklo z3.h, z1.b -; CHECK-NEXT: sunpklo z6.h, z0.b -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: sunpklo z1.h, z1.b -; CHECK-NEXT: sunpklo z0.h, z0.b -; CHECK-NEXT: splice z5.h, p1, z5.h, z2.h -; CHECK-NEXT: sunpklo z2.s, z1.h -; CHECK-NEXT: sunpklo z7.s, z0.h -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: sunpklo z1.s, z1.h -; CHECK-NEXT: sunpklo z0.s, z0.h -; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z7.s -; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h -; CHECK-NEXT: sunpklo z2.s, z3.h -; CHECK-NEXT: sunpklo z7.s, z6.h +; CHECK-NEXT: sdiv z1.s, p0/m, z1.s, z4.s +; CHECK-NEXT: sunpklo z4.s, z3.h ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 ; CHECK-NEXT: sunpklo z3.s, z3.h -; CHECK-NEXT: sunpklo z6.s, z6.h -; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z7.s -; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z6.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s +; CHECK-NEXT: mov z5.d, z7.d +; CHECK-NEXT: ext z5.b, z5.b, z7.b, #8 +; CHECK-NEXT: sunpklo z7.h, z7.b +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: sunpklo z5.h, z5.b +; CHECK-NEXT: sunpklo z17.s, z5.h +; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 +; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: sunpklo z5.s, z5.h +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: sdivr z17.s, p0/m, z17.s, z18.s +; CHECK-NEXT: sunpklo z18.s, z6.h +; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 +; CHECK-NEXT: sunpklo z6.s, z6.h ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h -; CHECK-NEXT: splice z2.h, p1, z2.h, z3.h -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b -; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b -; CHECK-NEXT: splice z1.b, p2, z1.b, z0.b -; CHECK-NEXT: splice z2.b, p2, z2.b, z4.b -; CHECK-NEXT: stp q1, q2, [x0] +; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z16.s +; CHECK-NEXT: sunpklo z16.s, z7.h +; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8 +; CHECK-NEXT: sunpklo z7.s, z7.h +; CHECK-NEXT: uzp1 z3.h, z17.h, z17.h +; CHECK-NEXT: sdivr z16.s, p0/m, z16.s, z18.s +; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h +; CHECK-NEXT: sdiv z6.s, p0/m, z6.s, z7.s +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z7.h, z16.h, z16.h +; CHECK-NEXT: splice z3.h, p0, z3.h, z5.h +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: splice z4.h, p0, z4.h, z2.h +; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: splice z7.h, p0, z7.h, z6.h +; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: uzp1 z2.b, z7.b, z7.b +; CHECK-NEXT: splice z3.b, p0, z3.b, z0.b +; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b +; CHECK-NEXT: stp q2, q3, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b @@ -186,9 +187,9 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-LABEL: sdiv_v2i16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: sxth z1.s, p0/m, z1.s ; CHECK-NEXT: sxth z0.s, p0/m, z0.s ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s @@ -201,9 +202,9 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-LABEL: sdiv_v4i16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s @@ -221,18 +222,18 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 ; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z2.s, z2.h ; CHECK-NEXT: sunpklo z3.s, z3.h -; CHECK-NEXT: sunpklo z0.s, z0.h -; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -243,34 +244,38 @@ define void @sdiv_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: sdiv_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q3, q0, [x1] +; CHECK-NEXT: ldp q4, q1, [x1] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: sunpklo z6.s, z3.h -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: sunpklo z4.s, z0.h -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: sunpklo z0.s, z0.h -; CHECK-NEXT: sunpklo z7.s, z1.h -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: ldr q0, [x0, #16] +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 +; CHECK-NEXT: ext z5.b, z5.b, z4.b, #8 +; CHECK-NEXT: sunpklo z4.s, z4.h ; CHECK-NEXT: sunpklo z1.s, z1.h -; CHECK-NEXT: sunpklo z5.s, z2.h -; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 +; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z2.s, z2.h -; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s -; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: sunpklo z2.s, z3.h -; CHECK-NEXT: sdiv z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: movprfx z2, z7 -; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z6.s +; CHECK-NEXT: sunpklo z3.s, z3.h +; CHECK-NEXT: sunpklo z5.s, z5.h +; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: ldr q3, [x0] +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: ext z6.b, z6.b, z3.b, #8 +; CHECK-NEXT: sunpklo z3.s, z3.h +; CHECK-NEXT: sunpklo z6.s, z6.h +; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h -; CHECK-NEXT: splice z2.h, p1, z2.h, z1.h -; CHECK-NEXT: splice z3.h, p1, z3.h, z0.h -; CHECK-NEXT: stp q2, q3, [x0] +; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h +; CHECK-NEXT: splice z0.h, p0, z0.h, z2.h +; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h +; CHECK-NEXT: splice z3.h, p0, z3.h, z1.h +; CHECK-NEXT: stp q3, q0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b @@ -282,8 +287,8 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-LABEL: sdiv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -295,8 +300,8 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-LABEL: sdiv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -308,10 +313,11 @@ define void @sdiv_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: sdiv_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: sdiv z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: sdivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: sdiv z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -325,8 +331,8 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-LABEL: sdiv_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -338,8 +344,8 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-LABEL: sdiv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -351,10 +357,11 @@ define void @sdiv_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: sdiv_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: sdiv z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: sdivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: sdiv z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -403,11 +410,11 @@ ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: splice z2.h, p0, z2.h, z0.h -; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h +; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %res = udiv <8 x i8> %op1, %op2 @@ -421,41 +428,40 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 +; CHECK-NEXT: uunpklo z1.h, z1.b +; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpklo z2.h, z2.b ; CHECK-NEXT: uunpklo z3.h, z3.b ; CHECK-NEXT: uunpklo z4.s, z2.h ; CHECK-NEXT: uunpklo z5.s, z3.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: uunpklo z1.h, z1.b -; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s -; CHECK-NEXT: uunpklo z5.s, z1.h +; CHECK-NEXT: uunpklo z5.s, z0.h +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: uunpklo z3.s, z0.h +; CHECK-NEXT: uunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z5.s ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h -; CHECK-NEXT: movprfx z2, z3 -; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z5.s -; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z1.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h +; CHECK-NEXT: splice z1.h, p0, z1.h, z2.h +; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b -; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h +; CHECK-NEXT: splice z3.h, p0, z3.h, z0.h ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z2.b +; CHECK-NEXT: uzp1 z0.b, z3.b, z3.b +; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %res = udiv <16 x i8> %op1, %op2 @@ -465,76 +471,78 @@ define void @udiv_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: udiv_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ldp q6, q2, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: ptrue p2.b, vl8 -; CHECK-NEXT: ldp q1, q3, [x1] -; CHECK-NEXT: mov z5.d, z2.d +; CHECK-NEXT: ldp q7, q3, [x1] +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z16.d, z6.d +; CHECK-NEXT: mov z0.d, z3.d +; CHECK-NEXT: ext z1.b, z1.b, z2.b, #8 ; CHECK-NEXT: uunpklo z2.h, z2.b -; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 -; CHECK-NEXT: uunpklo z5.h, z5.b -; CHECK-NEXT: uunpklo z7.s, z5.h -; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 -; CHECK-NEXT: uunpklo z5.s, z5.h -; CHECK-NEXT: mov z4.d, z3.d +; CHECK-NEXT: ext z16.b, z16.b, z6.b, #8 +; CHECK-NEXT: ext z0.b, z0.b, z3.b, #8 ; CHECK-NEXT: uunpklo z3.h, z3.b +; CHECK-NEXT: uunpklo z6.h, z6.b +; CHECK-NEXT: uunpklo z1.h, z1.b +; CHECK-NEXT: uunpklo z16.h, z16.b +; CHECK-NEXT: uunpklo z4.h, z0.b +; CHECK-NEXT: uunpklo z5.s, z1.h +; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z18.s, z16.h +; CHECK-NEXT: uunpklo z0.s, z4.h ; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 -; CHECK-NEXT: uunpklo z4.h, z4.b -; CHECK-NEXT: uunpklo z6.s, z4.h -; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 +; CHECK-NEXT: ext z16.b, z16.b, z16.b, #8 +; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z4.s, z4.h -; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s -; CHECK-NEXT: uunpklo z7.s, z3.h +; CHECK-NEXT: uunpklo z16.s, z16.h +; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z5.s ; CHECK-NEXT: uunpklo z5.s, z2.h -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: splice z6.h, p1, z6.h, z4.h ; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: uzp1 z4.b, z6.b, z6.b -; CHECK-NEXT: udiv z5.s, p0/m, z5.s, z7.s -; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: uunpklo z3.h, z1.b -; CHECK-NEXT: uunpklo z6.h, z0.b -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uunpklo z1.h, z1.b -; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: splice z5.h, p1, z5.h, z2.h -; CHECK-NEXT: uunpklo z2.s, z1.h -; CHECK-NEXT: uunpklo z7.s, z0.h -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z7.s -; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h -; CHECK-NEXT: uunpklo z2.s, z3.h -; CHECK-NEXT: uunpklo z7.s, z6.h +; CHECK-NEXT: udiv z1.s, p0/m, z1.s, z4.s +; CHECK-NEXT: uunpklo z4.s, z3.h ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 ; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: uunpklo z6.s, z6.h -; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z7.s -; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z6.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s +; CHECK-NEXT: mov z5.d, z7.d +; CHECK-NEXT: ext z5.b, z5.b, z7.b, #8 +; CHECK-NEXT: uunpklo z7.h, z7.b +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uunpklo z5.h, z5.b +; CHECK-NEXT: uunpklo z17.s, z5.h +; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 +; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: uunpklo z5.s, z5.h +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: udivr z17.s, p0/m, z17.s, z18.s +; CHECK-NEXT: uunpklo z18.s, z6.h +; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 +; CHECK-NEXT: uunpklo z6.s, z6.h ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h -; CHECK-NEXT: splice z2.h, p1, z2.h, z3.h -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b -; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b -; CHECK-NEXT: splice z1.b, p2, z1.b, z0.b -; CHECK-NEXT: splice z2.b, p2, z2.b, z4.b -; CHECK-NEXT: stp q1, q2, [x0] +; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z16.s +; CHECK-NEXT: uunpklo z16.s, z7.h +; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8 +; CHECK-NEXT: uunpklo z7.s, z7.h +; CHECK-NEXT: uzp1 z3.h, z17.h, z17.h +; CHECK-NEXT: udivr z16.s, p0/m, z16.s, z18.s +; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h +; CHECK-NEXT: udiv z6.s, p0/m, z6.s, z7.s +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z7.h, z16.h, z16.h +; CHECK-NEXT: splice z3.h, p0, z3.h, z5.h +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: splice z4.h, p0, z4.h, z2.h +; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: splice z7.h, p0, z7.h, z6.h +; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: uzp1 z2.b, z7.b, z7.b +; CHECK-NEXT: splice z3.b, p0, z3.b, z0.b +; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b +; CHECK-NEXT: stp q2, q3, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b @@ -546,9 +554,9 @@ define <2 x i16> @udiv_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-LABEL: udiv_v2i16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: and z1.s, z1.s, #0xffff ; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s @@ -561,9 +569,9 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-LABEL: udiv_v4i16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s @@ -581,18 +589,18 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: mov z3.d, z0.d -; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 ; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s -; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -603,34 +611,38 @@ define void @udiv_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: udiv_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q3, q0, [x1] +; CHECK-NEXT: ldp q4, q1, [x1] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: uunpklo z6.s, z3.h -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: uunpklo z4.s, z0.h -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z7.s, z1.h -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: ldr q0, [x0, #16] +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 +; CHECK-NEXT: ext z5.b, z5.b, z4.b, #8 +; CHECK-NEXT: uunpklo z4.s, z4.h ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z5.s, z2.h -; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 +; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s -; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: uunpklo z2.s, z3.h -; CHECK-NEXT: udiv z1.s, p0/m, z1.s, z2.s -; CHECK-NEXT: movprfx z2, z7 -; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z6.s +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: uunpklo z5.s, z5.h +; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: ldr q3, [x0] +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: ext z6.b, z6.b, z3.b, #8 +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: uunpklo z6.s, z6.h +; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h -; CHECK-NEXT: splice z2.h, p1, z2.h, z1.h -; CHECK-NEXT: splice z3.h, p1, z3.h, z0.h -; CHECK-NEXT: stp q2, q3, [x0] +; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h +; CHECK-NEXT: splice z0.h, p0, z0.h, z2.h +; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h +; CHECK-NEXT: splice z3.h, p0, z3.h, z1.h +; CHECK-NEXT: stp q3, q0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b @@ -642,8 +654,8 @@ define <2 x i32> @udiv_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-LABEL: udiv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -655,8 +667,8 @@ define <4 x i32> @udiv_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-LABEL: udiv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -668,10 +680,11 @@ define void @udiv_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: udiv_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: udiv z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: udivr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: udiv z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -685,8 +698,8 @@ define <1 x i64> @udiv_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-LABEL: udiv_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -698,8 +711,8 @@ define <2 x i64> @udiv_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-LABEL: udiv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -711,10 +724,11 @@ define void @udiv_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: udiv_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: udiv z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: udivr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: udiv z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -728,40 +742,40 @@ define void @udiv_constantsplat_v8i32(ptr %a) { ; SVE-LABEL: udiv_constantsplat_v8i32: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x0] -; SVE-NEXT: mov w8, #8969 // =0x2309 ; SVE-NEXT: ptrue p0.s, vl4 +; SVE-NEXT: mov w8, #8969 // =0x2309 ; SVE-NEXT: movk w8, #22765, lsl #16 -; SVE-NEXT: mov z2.s, w8 -; SVE-NEXT: movprfx z3, z0 -; SVE-NEXT: umulh z3.s, p0/m, z3.s, z2.s -; SVE-NEXT: umulh z2.s, p0/m, z2.s, z1.s -; SVE-NEXT: sub z0.s, z0.s, z3.s -; SVE-NEXT: sub z1.s, z1.s, z2.s -; SVE-NEXT: lsr z0.s, z0.s, #1 +; SVE-NEXT: ldp q1, q2, [x0] +; SVE-NEXT: mov z0.s, w8 +; SVE-NEXT: movprfx z3, z1 +; SVE-NEXT: umulh z3.s, p0/m, z3.s, z0.s +; SVE-NEXT: sub z1.s, z1.s, z3.s +; SVE-NEXT: umulh z0.s, p0/m, z0.s, z2.s ; SVE-NEXT: lsr z1.s, z1.s, #1 -; SVE-NEXT: add z0.s, z0.s, z3.s -; SVE-NEXT: add z1.s, z1.s, z2.s -; SVE-NEXT: lsr z0.s, z0.s, #6 +; SVE-NEXT: sub z2.s, z2.s, z0.s +; SVE-NEXT: add z1.s, z1.s, z3.s +; SVE-NEXT: lsr z2.s, z2.s, #1 ; SVE-NEXT: lsr z1.s, z1.s, #6 -; SVE-NEXT: stp q0, q1, [x0] +; SVE-NEXT: add z0.s, z2.s, z0.s +; SVE-NEXT: lsr z0.s, z0.s, #6 +; SVE-NEXT: stp q1, q0, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: udiv_constantsplat_v8i32: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x0] ; SVE2-NEXT: mov w8, #8969 // =0x2309 +; SVE2-NEXT: ldp q1, q2, [x0] ; SVE2-NEXT: movk w8, #22765, lsl #16 -; SVE2-NEXT: mov z2.s, w8 -; SVE2-NEXT: umulh z3.s, z0.s, z2.s -; SVE2-NEXT: umulh z2.s, z1.s, z2.s -; SVE2-NEXT: sub z0.s, z0.s, z3.s -; SVE2-NEXT: sub z1.s, z1.s, z2.s -; SVE2-NEXT: usra z3.s, z0.s, #1 -; SVE2-NEXT: usra z2.s, z1.s, #1 -; SVE2-NEXT: lsr z0.s, z3.s, #6 -; SVE2-NEXT: lsr z1.s, z2.s, #6 -; SVE2-NEXT: stp q0, q1, [x0] +; SVE2-NEXT: mov z0.s, w8 +; SVE2-NEXT: umulh z3.s, z1.s, z0.s +; SVE2-NEXT: umulh z0.s, z2.s, z0.s +; SVE2-NEXT: sub z1.s, z1.s, z3.s +; SVE2-NEXT: sub z2.s, z2.s, z0.s +; SVE2-NEXT: usra z3.s, z1.s, #1 +; SVE2-NEXT: usra z0.s, z2.s, #1 +; SVE2-NEXT: lsr z1.s, z3.s, #6 +; SVE2-NEXT: lsr z0.s, z0.s, #6 +; SVE2-NEXT: stp q1, q0, [x0] ; SVE2-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = udiv <8 x i32> %op1, diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-extends.ll @@ -78,17 +78,17 @@ define void @sext_v32i8_v32i16(ptr %in, ptr %out) { ; CHECK-LABEL: sext_v32i8_v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: add z1.b, z1.b, z1.b ; CHECK-NEXT: sunpklo z2.h, z0.b ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: add z1.b, z1.b, z1.b -; CHECK-NEXT: sunpklo z0.h, z0.b ; CHECK-NEXT: sunpklo z3.h, z1.b ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: sunpklo z0.h, z0.b ; CHECK-NEXT: sunpklo z1.h, z1.b -; CHECK-NEXT: stp q2, q0, [x1] -; CHECK-NEXT: stp q3, q1, [x1, #32] +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a @@ -122,15 +122,15 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: sunpklo z1.h, z0.b ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: sunpklo z0.h, z0.b +; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: sunpklo z1.s, z1.h ; CHECK-NEXT: sunpklo z3.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: sunpklo z0.s, z0.h +; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] -; CHECK-NEXT: sunpklo z0.s, z1.h -; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret %b = sext <16 x i8> %a to <16 x i32> store <16 x i32> %b, ptr %out @@ -140,31 +140,31 @@ define void @sext_v32i8_v32i32(ptr %in, ptr %out) { ; CHECK-LABEL: sext_v32i8_v32i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: add z1.b, z1.b, z1.b ; CHECK-NEXT: sunpklo z2.h, z0.b ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: add z1.b, z1.b, z1.b -; CHECK-NEXT: sunpklo z0.h, z0.b ; CHECK-NEXT: sunpklo z3.h, z1.b ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: sunpklo z5.s, z3.h -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: sunpklo z0.h, z0.b ; CHECK-NEXT: sunpklo z1.h, z1.b -; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sunpklo z4.s, z2.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 +; CHECK-NEXT: sunpklo z5.s, z3.h +; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 ; CHECK-NEXT: sunpklo z6.s, z0.h -; CHECK-NEXT: stp q5, q3, [x1, #64] -; CHECK-NEXT: sunpklo z5.s, z1.h -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: sunpklo z2.s, z2.h -; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: sunpklo z7.s, z1.h +; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sunpklo z0.s, z0.h -; CHECK-NEXT: stp q4, q2, [x1] -; CHECK-NEXT: stp q6, q0, [x1, #32] -; CHECK-NEXT: stp q5, q1, [x1, #96] +; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: stp q4, q2, [x1, #64] +; CHECK-NEXT: stp q5, q3, [x1] +; CHECK-NEXT: stp q6, q0, [x1, #96] +; CHECK-NEXT: stp q7, q1, [x1, #32] ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a @@ -226,29 +226,31 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: sunpklo z1.h, z0.b ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: sunpklo z0.h, z0.b +; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: sunpklo z3.s, z0.h -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z1.s, z1.h -; CHECK-NEXT: sunpklo z6.d, z0.s +; CHECK-NEXT: sunpklo z3.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: sunpklo z4.d, z2.s +; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 +; CHECK-NEXT: sunpklo z0.s, z0.h +; CHECK-NEXT: mov z7.d, z1.d +; CHECK-NEXT: sunpklo z2.d, z2.s ; CHECK-NEXT: sunpklo z5.d, z3.s ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: sunpklo z7.d, z1.s -; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: sunpklo z0.d, z0.s -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8 +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: mov z6.d, z0.d ; CHECK-NEXT: sunpklo z3.d, z3.s -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: stp q5, q3, [x0, #64] ; CHECK-NEXT: stp q4, q2, [x0] -; CHECK-NEXT: stp q6, q0, [x0, #96] -; CHECK-NEXT: sunpklo z0.d, z1.s -; CHECK-NEXT: stp q7, q0, [x0, #32] +; CHECK-NEXT: sunpklo z4.d, z7.s +; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8 +; CHECK-NEXT: sunpklo z0.d, z0.s +; CHECK-NEXT: stp q5, q3, [x0, #64] +; CHECK-NEXT: sunpklo z2.d, z6.s +; CHECK-NEXT: stp q1, q4, [x0, #32] +; CHECK-NEXT: stp q0, q2, [x0, #96] ; CHECK-NEXT: ret %b = sext <16 x i8> %a to <16 x i64> store <16 x i64> %b, ptr %out @@ -258,59 +260,65 @@ define void @sext_v32i8_v32i64(ptr %in, ptr %out) { ; CHECK-LABEL: sext_v32i8_v32i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.b, z0.b, z0.b -; CHECK-NEXT: sunpklo z2.h, z0.b -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: add z1.b, z1.b, z1.b +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: sunpklo z0.h, z0.b -; CHECK-NEXT: sunpklo z3.h, z1.b -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: sunpklo z5.s, z3.h +; CHECK-NEXT: mov z3.d, z1.d ; CHECK-NEXT: sunpklo z1.h, z1.b -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: sunpklo z4.s, z2.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: sunpklo z6.s, z0.h -; CHECK-NEXT: sunpklo z7.s, z1.h +; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: sunpklo z4.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: sunpklo z5.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: sunpklo z3.s, z3.h -; CHECK-NEXT: sunpklo z2.s, z2.h +; CHECK-NEXT: sunpklo z2.h, z2.b +; CHECK-NEXT: sunpklo z3.h, z3.b ; CHECK-NEXT: sunpklo z0.s, z0.h -; CHECK-NEXT: sunpklo z1.s, z1.h -; CHECK-NEXT: sunpklo z20.d, z3.s -; CHECK-NEXT: sunpklo z22.d, z4.s +; CHECK-NEXT: sunpklo z16.d, z4.s ; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: sunpklo z16.d, z2.s -; CHECK-NEXT: sunpklo z17.d, z6.s -; CHECK-NEXT: sunpklo z18.d, z0.s -; CHECK-NEXT: sunpklo z19.d, z1.s -; CHECK-NEXT: sunpklo z21.d, z7.s -; CHECK-NEXT: sunpklo z23.d, z5.s +; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: sunpklo z17.d, z5.s ; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 -; CHECK-NEXT: sunpklo z4.d, z4.s -; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8 -; CHECK-NEXT: sunpklo z3.d, z3.s -; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 +; CHECK-NEXT: sunpklo z6.s, z2.h +; CHECK-NEXT: sunpklo z7.s, z3.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: sunpklo z4.d, z4.s +; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: sunpklo z19.d, z0.s ; CHECK-NEXT: sunpklo z5.d, z5.s -; CHECK-NEXT: stp q22, q4, [x1] -; CHECK-NEXT: sunpklo z4.d, z7.s -; CHECK-NEXT: stp q23, q5, [x1, #128] -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: stp q20, q3, [x1, #160] -; CHECK-NEXT: sunpklo z3.d, z6.s -; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: sunpklo z2.s, z2.h +; CHECK-NEXT: sunpklo z18.d, z6.s +; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 +; CHECK-NEXT: sunpklo z3.s, z3.h +; CHECK-NEXT: stp q16, q4, [x1, #128] +; CHECK-NEXT: mov z16.d, z7.d ; CHECK-NEXT: sunpklo z0.d, z0.s -; CHECK-NEXT: stp q16, q2, [x1, #32] -; CHECK-NEXT: stp q17, q3, [x1, #64] -; CHECK-NEXT: stp q18, q0, [x1, #96] -; CHECK-NEXT: stp q21, q4, [x1, #192] -; CHECK-NEXT: stp q19, q1, [x1, #224] +; CHECK-NEXT: stp q17, q5, [x1] +; CHECK-NEXT: sunpklo z5.d, z7.s +; CHECK-NEXT: sunpklo z4.d, z6.s +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: ext z16.b, z16.b, z7.b, #8 +; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: stp q19, q0, [x1, #160] +; CHECK-NEXT: sunpklo z0.d, z2.s +; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8 +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: stp q18, q4, [x1, #192] +; CHECK-NEXT: mov z4.d, z3.d +; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8 +; CHECK-NEXT: sunpklo z16.d, z16.s +; CHECK-NEXT: sunpklo z6.d, z6.s +; CHECK-NEXT: ext z4.b, z4.b, z3.b, #8 +; CHECK-NEXT: sunpklo z2.d, z7.s +; CHECK-NEXT: sunpklo z3.d, z3.s +; CHECK-NEXT: stp q5, q16, [x1, #64] +; CHECK-NEXT: stp q1, q6, [x1, #32] +; CHECK-NEXT: sunpklo z1.d, z4.s +; CHECK-NEXT: stp q0, q2, [x1, #224] +; CHECK-NEXT: stp q3, q1, [x1, #96] ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a @@ -340,17 +348,17 @@ define void @sext_v16i16_v16i32(ptr %in, ptr %out) { ; CHECK-LABEL: sext_v16i16_v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.h, z0.h, z0.h +; CHECK-NEXT: add z1.h, z1.h, z1.h ; CHECK-NEXT: sunpklo z2.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: add z1.h, z1.h, z1.h -; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z1.s, z1.h -; CHECK-NEXT: stp q2, q0, [x1] -; CHECK-NEXT: stp q3, q1, [x1, #32] +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a @@ -384,15 +392,15 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: sunpklo z1.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: sunpklo z2.d, z1.s ; CHECK-NEXT: sunpklo z0.s, z0.h +; CHECK-NEXT: sunpklo z2.d, z1.s ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: sunpklo z1.d, z1.s ; CHECK-NEXT: sunpklo z3.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: sunpklo z0.d, z0.s +; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] -; CHECK-NEXT: sunpklo z0.d, z1.s -; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret %b = sext <8 x i16> %a to <8 x i64> store <8 x i64>%b, ptr %out @@ -402,31 +410,31 @@ define void @sext_v16i16_v16i64(ptr %in, ptr %out) { ; CHECK-LABEL: sext_v16i16_v16i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.h, z0.h, z0.h +; CHECK-NEXT: add z1.h, z1.h, z1.h ; CHECK-NEXT: sunpklo z2.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: add z1.h, z1.h, z1.h -; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: sunpklo z5.d, z3.s -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z1.s, z1.h -; CHECK-NEXT: sunpklo z3.d, z3.s ; CHECK-NEXT: sunpklo z4.d, z2.s ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 +; CHECK-NEXT: sunpklo z5.d, z3.s +; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 ; CHECK-NEXT: sunpklo z6.d, z0.s -; CHECK-NEXT: stp q5, q3, [x1, #64] -; CHECK-NEXT: sunpklo z5.d, z1.s -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: sunpklo z7.d, z1.s +; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: sunpklo z3.d, z3.s ; CHECK-NEXT: sunpklo z0.d, z0.s -; CHECK-NEXT: stp q4, q2, [x1] -; CHECK-NEXT: stp q6, q0, [x1, #32] -; CHECK-NEXT: stp q5, q1, [x1, #96] +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: stp q4, q2, [x1, #64] +; CHECK-NEXT: stp q5, q3, [x1] +; CHECK-NEXT: stp q6, q0, [x1, #96] +; CHECK-NEXT: stp q7, q1, [x1, #32] ; CHECK-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a @@ -456,17 +464,17 @@ define void @sext_v8i32_v8i64(ptr %in, ptr %out) { ; CHECK-LABEL: sext_v8i32_v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.s, z0.s, z0.s +; CHECK-NEXT: add z1.s, z1.s, z1.s ; CHECK-NEXT: sunpklo z2.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: add z1.s, z1.s, z1.s -; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: sunpklo z3.d, z1.s ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: sunpklo z1.d, z1.s -; CHECK-NEXT: stp q2, q0, [x1] -; CHECK-NEXT: stp q3, q1, [x1, #32] +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %a = load <8 x i32>, ptr %in %b = add <8 x i32> %a, %a @@ -497,17 +505,17 @@ define void @zext_v32i8_v32i16(ptr %in, ptr %out) { ; CHECK-LABEL: zext_v32i8_v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: add z1.b, z1.b, z1.b ; CHECK-NEXT: uunpklo z2.h, z0.b ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: add z1.b, z1.b, z1.b -; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpklo z3.h, z1.b ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpklo z1.h, z1.b -; CHECK-NEXT: stp q2, q0, [x1] -; CHECK-NEXT: stp q3, q1, [x1, #32] +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a @@ -541,15 +549,15 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z1.h, z0.b ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z2.s, z1.h ; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: uunpklo z2.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: uunpklo z3.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] -; CHECK-NEXT: uunpklo z0.s, z1.h -; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret %b = zext <16 x i8> %a to <16 x i32> store <16 x i32> %b, ptr %out @@ -559,31 +567,31 @@ define void @zext_v32i8_v32i32(ptr %in, ptr %out) { ; CHECK-LABEL: zext_v32i8_v32i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: add z1.b, z1.b, z1.b ; CHECK-NEXT: uunpklo z2.h, z0.b ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: add z1.b, z1.b, z1.b -; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpklo z3.h, z1.b ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: uunpklo z5.s, z3.h -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpklo z1.h, z1.b -; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: uunpklo z4.s, z2.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 +; CHECK-NEXT: uunpklo z5.s, z3.h +; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 ; CHECK-NEXT: uunpklo z6.s, z0.h -; CHECK-NEXT: stp q5, q3, [x1, #64] -; CHECK-NEXT: uunpklo z5.s, z1.h -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z2.s, z2.h -; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z7.s, z1.h +; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: stp q4, q2, [x1] -; CHECK-NEXT: stp q6, q0, [x1, #32] -; CHECK-NEXT: stp q5, q1, [x1, #96] +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: stp q4, q2, [x1, #64] +; CHECK-NEXT: stp q5, q3, [x1] +; CHECK-NEXT: stp q6, q0, [x1, #96] +; CHECK-NEXT: stp q7, q1, [x1, #32] ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a @@ -643,29 +651,31 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z1.h, z0.b ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z2.s, z1.h ; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: uunpklo z2.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: uunpklo z3.s, z0.h -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z6.d, z0.s +; CHECK-NEXT: uunpklo z3.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z4.d, z2.s +; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: mov z7.d, z1.d +; CHECK-NEXT: uunpklo z2.d, z2.s ; CHECK-NEXT: uunpklo z5.d, z3.s ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: uunpklo z7.d, z1.s -; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: ext z7.b, z7.b, z1.b, #8 +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: mov z6.d, z0.d ; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: stp q5, q3, [x0, #64] ; CHECK-NEXT: stp q4, q2, [x0] -; CHECK-NEXT: stp q6, q0, [x0, #96] -; CHECK-NEXT: uunpklo z0.d, z1.s -; CHECK-NEXT: stp q7, q0, [x0, #32] +; CHECK-NEXT: uunpklo z4.d, z7.s +; CHECK-NEXT: ext z6.b, z6.b, z0.b, #8 +; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: stp q5, q3, [x0, #64] +; CHECK-NEXT: uunpklo z2.d, z6.s +; CHECK-NEXT: stp q1, q4, [x0, #32] +; CHECK-NEXT: stp q0, q2, [x0, #96] ; CHECK-NEXT: ret %b = zext <16 x i8> %a to <16 x i64> store <16 x i64> %b, ptr %out @@ -675,59 +685,65 @@ define void @zext_v32i8_v32i64(ptr %in, ptr %out) { ; CHECK-LABEL: zext_v32i8_v32i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.b, z0.b, z0.b -; CHECK-NEXT: uunpklo z2.h, z0.b -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: add z1.b, z1.b, z1.b +; CHECK-NEXT: mov z2.d, z0.d ; CHECK-NEXT: uunpklo z0.h, z0.b -; CHECK-NEXT: uunpklo z3.h, z1.b -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: uunpklo z5.s, z3.h +; CHECK-NEXT: mov z3.d, z1.d ; CHECK-NEXT: uunpklo z1.h, z1.b -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: uunpklo z4.s, z2.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: uunpklo z6.s, z0.h -; CHECK-NEXT: uunpklo z7.s, z1.h +; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: uunpklo z4.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: uunpklo z5.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uunpklo z2.h, z2.b +; CHECK-NEXT: uunpklo z3.h, z3.b ; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z20.d, z3.s -; CHECK-NEXT: uunpklo z22.d, z4.s +; CHECK-NEXT: uunpklo z16.d, z4.s ; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: uunpklo z16.d, z2.s -; CHECK-NEXT: uunpklo z17.d, z6.s -; CHECK-NEXT: uunpklo z18.d, z0.s -; CHECK-NEXT: uunpklo z19.d, z1.s -; CHECK-NEXT: uunpklo z21.d, z7.s -; CHECK-NEXT: uunpklo z23.d, z5.s +; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: uunpklo z17.d, z5.s ; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 -; CHECK-NEXT: uunpklo z4.d, z4.s -; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8 -; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 +; CHECK-NEXT: uunpklo z6.s, z2.h +; CHECK-NEXT: uunpklo z7.s, z3.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: uunpklo z4.d, z4.s +; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: uunpklo z19.d, z0.s ; CHECK-NEXT: uunpklo z5.d, z5.s -; CHECK-NEXT: stp q22, q4, [x1] -; CHECK-NEXT: uunpklo z4.d, z7.s -; CHECK-NEXT: stp q23, q5, [x1, #128] -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: stp q20, q3, [x1, #160] -; CHECK-NEXT: uunpklo z3.d, z6.s -; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uunpklo z18.d, z6.s +; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: stp q16, q4, [x1, #128] +; CHECK-NEXT: mov z16.d, z7.d ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: stp q16, q2, [x1, #32] -; CHECK-NEXT: stp q17, q3, [x1, #64] -; CHECK-NEXT: stp q18, q0, [x1, #96] -; CHECK-NEXT: stp q21, q4, [x1, #192] -; CHECK-NEXT: stp q19, q1, [x1, #224] +; CHECK-NEXT: stp q17, q5, [x1] +; CHECK-NEXT: uunpklo z5.d, z7.s +; CHECK-NEXT: uunpklo z4.d, z6.s +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: ext z16.b, z16.b, z7.b, #8 +; CHECK-NEXT: mov z7.d, z2.d +; CHECK-NEXT: stp q19, q0, [x1, #160] +; CHECK-NEXT: uunpklo z0.d, z2.s +; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8 +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: stp q18, q4, [x1, #192] +; CHECK-NEXT: mov z4.d, z3.d +; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8 +; CHECK-NEXT: uunpklo z16.d, z16.s +; CHECK-NEXT: uunpklo z6.d, z6.s +; CHECK-NEXT: ext z4.b, z4.b, z3.b, #8 +; CHECK-NEXT: uunpklo z2.d, z7.s +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: stp q5, q16, [x1, #64] +; CHECK-NEXT: stp q1, q6, [x1, #32] +; CHECK-NEXT: uunpklo z1.d, z4.s +; CHECK-NEXT: stp q0, q2, [x1, #224] +; CHECK-NEXT: stp q3, q1, [x1, #96] ; CHECK-NEXT: ret %a = load <32 x i8>, ptr %in %b = add <32 x i8> %a, %a @@ -757,17 +773,17 @@ define void @zext_v16i16_v16i32(ptr %in, ptr %out) { ; CHECK-LABEL: zext_v16i16_v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.h, z0.h, z0.h +; CHECK-NEXT: add z1.h, z1.h, z1.h ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: add z1.h, z1.h, z1.h -; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: stp q2, q0, [x1] -; CHECK-NEXT: stp q3, q1, [x1, #32] +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a @@ -801,15 +817,15 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z1.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: uunpklo z2.d, z1.s ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z1.d, z1.s ; CHECK-NEXT: uunpklo z3.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z0.d, z0.s +; CHECK-NEXT: stp q2, q1, [x0] ; CHECK-NEXT: stp q3, q0, [x0, #32] -; CHECK-NEXT: uunpklo z0.d, z1.s -; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret %b = zext <8 x i16> %a to <8 x i64> store <8 x i64>%b, ptr %out @@ -819,31 +835,31 @@ define void @zext_v16i16_v16i64(ptr %in, ptr %out) { ; CHECK-LABEL: zext_v16i16_v16i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.h, z0.h, z0.h +; CHECK-NEXT: add z1.h, z1.h, z1.h ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: add z1.h, z1.h, z1.h -; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: uunpklo z5.d, z3.s -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z3.d, z3.s ; CHECK-NEXT: uunpklo z4.d, z2.s ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 +; CHECK-NEXT: uunpklo z5.d, z3.s +; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 ; CHECK-NEXT: uunpklo z6.d, z0.s -; CHECK-NEXT: stp q5, q3, [x1, #64] -; CHECK-NEXT: uunpklo z5.d, z1.s -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: uunpklo z7.d, z1.s +; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z3.d, z3.s ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: stp q4, q2, [x1] -; CHECK-NEXT: stp q6, q0, [x1, #32] -; CHECK-NEXT: stp q5, q1, [x1, #96] +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: stp q4, q2, [x1, #64] +; CHECK-NEXT: stp q5, q3, [x1] +; CHECK-NEXT: stp q6, q0, [x1, #96] +; CHECK-NEXT: stp q7, q1, [x1, #32] ; CHECK-NEXT: ret %a = load <16 x i16>, ptr %in %b = add <16 x i16> %a, %a @@ -873,17 +889,17 @@ define void @zext_v8i32_v8i64(ptr %in, ptr %out) { ; CHECK-LABEL: zext_v8i32_v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.s, z0.s, z0.s +; CHECK-NEXT: add z1.s, z1.s, z1.s ; CHECK-NEXT: uunpklo z2.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: add z1.s, z1.s, z1.s -; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uunpklo z3.d, z1.s ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: stp q2, q0, [x1] -; CHECK-NEXT: stp q3, q1, [x1, #32] +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %a = load <8 x i32>, ptr %in %b = add <8 x i32> %a, %a @@ -896,8 +912,8 @@ ; SVE-LABEL: extend_and_mul: ; SVE: // %bb.0: ; SVE-NEXT: mov z1.s, w0 -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: ptrue p0.d, vl2 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: uunpklo z1.d, z1.s ; SVE-NEXT: mul z0.d, p0/m, z0.d, z1.d ; SVE-NEXT: str q0, [x1] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-immediates.ll @@ -220,11 +220,11 @@ define void @icmp_eq_v32i8(ptr %a) { ; CHECK-LABEL: icmp_eq_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, #7 -; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, #7 +; CHECK-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -240,11 +240,11 @@ define void @icmp_sge_v16i16(ptr %a) { ; CHECK-LABEL: icmp_sge_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: cmpge p1.h, p0/z, z0.h, #15 -; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: cmpge p0.h, p0/z, z1.h, #15 +; CHECK-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -260,11 +260,11 @@ define void @icmp_sgt_v8i32(ptr %a) { ; CHECK-LABEL: icmp_sgt_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: cmpgt p1.s, p0/z, z0.s, #-8 -; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: cmpgt p0.s, p0/z, z1.s, #-8 +; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -280,11 +280,11 @@ define void @icmp_ult_v4i64(ptr %a) { ; CHECK-LABEL: icmp_ult_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: cmplo p1.d, p0/z, z0.d, #63 -; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: cmplo p0.d, p0/z, z1.d, #63 +; CHECK-NEXT: mov z0.d, p1/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-log.ll @@ -34,10 +34,10 @@ define void @and_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: and_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: and z0.d, z1.d, z0.d +; CHECK-NEXT: and z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -74,10 +74,10 @@ define void @and_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: and_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: and z0.d, z1.d, z0.d +; CHECK-NEXT: and z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -114,10 +114,10 @@ define void @and_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: and_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: and z0.d, z1.d, z0.d +; CHECK-NEXT: and z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -154,10 +154,10 @@ define void @and_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: and_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: and z0.d, z0.d, z2.d -; CHECK-NEXT: and z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: and z0.d, z1.d, z0.d +; CHECK-NEXT: and z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -198,10 +198,10 @@ define void @or_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: or_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: orr z0.d, z0.d, z2.d -; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: orr z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -238,10 +238,10 @@ define void @or_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: or_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: orr z0.d, z0.d, z2.d -; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: orr z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -278,10 +278,10 @@ define void @or_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: or_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: orr z0.d, z0.d, z2.d -; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: orr z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -318,10 +318,10 @@ define void @or_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: or_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: orr z0.d, z0.d, z2.d -; CHECK-NEXT: orr z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: orr z0.d, z1.d, z0.d +; CHECK-NEXT: orr z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -362,10 +362,10 @@ define void @xor_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: xor_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: eor z0.d, z0.d, z2.d -; CHECK-NEXT: eor z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: eor z0.d, z1.d, z0.d +; CHECK-NEXT: eor z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -402,10 +402,10 @@ define void @xor_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: xor_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: eor z0.d, z0.d, z2.d -; CHECK-NEXT: eor z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: eor z0.d, z1.d, z0.d +; CHECK-NEXT: eor z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -442,10 +442,10 @@ define void @xor_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: xor_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: eor z0.d, z0.d, z2.d -; CHECK-NEXT: eor z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: eor z0.d, z1.d, z0.d +; CHECK-NEXT: eor z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -482,10 +482,10 @@ define void @xor_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: xor_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: eor z0.d, z0.d, z2.d -; CHECK-NEXT: eor z1.d, z1.d, z3.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: eor z0.d, z1.d, z0.d +; CHECK-NEXT: eor z1.d, z2.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-minmax.ll @@ -10,8 +10,8 @@ define <8 x i8> @smax_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-LABEL: smax_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -23,8 +23,8 @@ define <16 x i8> @smax_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-LABEL: smax_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -36,10 +36,11 @@ define void @smax_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: smax_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: smax z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: smax z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -53,8 +54,8 @@ define <4 x i16> @smax_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-LABEL: smax_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -66,8 +67,8 @@ define <8 x i16> @smax_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-LABEL: smax_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -79,10 +80,11 @@ define void @smax_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: smax_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: smax z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: smax z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -96,8 +98,8 @@ define <2 x i32> @smax_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-LABEL: smax_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -109,8 +111,8 @@ define <4 x i32> @smax_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-LABEL: smax_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -122,10 +124,11 @@ define void @smax_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: smax_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: smax z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: smax z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -140,8 +143,8 @@ define <1 x i64> @smax_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-LABEL: smax_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -154,8 +157,8 @@ define <2 x i64> @smax_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-LABEL: smax_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -167,10 +170,11 @@ define void @smax_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: smax_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: smax z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -188,8 +192,8 @@ define <8 x i8> @smin_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-LABEL: smin_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -201,8 +205,8 @@ define <16 x i8> @smin_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-LABEL: smin_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -214,10 +218,11 @@ define void @smin_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: smin_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: smin z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: smin z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -231,8 +236,8 @@ define <4 x i16> @smin_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-LABEL: smin_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -244,8 +249,8 @@ define <8 x i16> @smin_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-LABEL: smin_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -257,10 +262,11 @@ define void @smin_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: smin_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: smin z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: smin z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -274,8 +280,8 @@ define <2 x i32> @smin_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-LABEL: smin_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -287,8 +293,8 @@ define <4 x i32> @smin_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-LABEL: smin_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -300,10 +306,11 @@ define void @smin_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: smin_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: smin z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: smin z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -318,8 +325,8 @@ define <1 x i64> @smin_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-LABEL: smin_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -332,8 +339,8 @@ define <2 x i64> @smin_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-LABEL: smin_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -345,10 +352,11 @@ define void @smin_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: smin_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: smin z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: smin z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -366,8 +374,8 @@ define <8 x i8> @umax_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-LABEL: umax_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -379,8 +387,8 @@ define <16 x i8> @umax_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-LABEL: umax_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -392,10 +400,11 @@ define void @umax_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: umax_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: umax z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: umax z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -409,8 +418,8 @@ define <4 x i16> @umax_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-LABEL: umax_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -422,8 +431,8 @@ define <8 x i16> @umax_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-LABEL: umax_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -435,10 +444,11 @@ define void @umax_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: umax_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: umax z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: umax z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -452,8 +462,8 @@ define <2 x i32> @umax_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-LABEL: umax_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -465,8 +475,8 @@ define <4 x i32> @umax_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-LABEL: umax_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -478,10 +488,11 @@ define void @umax_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: umax_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: umax z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: umax z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -496,8 +507,8 @@ define <1 x i64> @umax_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-LABEL: umax_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -510,8 +521,8 @@ define <2 x i64> @umax_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-LABEL: umax_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -523,10 +534,11 @@ define void @umax_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: umax_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: umax z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: umax z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -544,8 +556,8 @@ define <8 x i8> @umin_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-LABEL: umin_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -557,8 +569,8 @@ define <16 x i8> @umin_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-LABEL: umin_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -570,10 +582,11 @@ define void @umin_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: umin_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: umin z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: umin z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -587,8 +600,8 @@ define <4 x i16> @umin_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-LABEL: umin_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -600,8 +613,8 @@ define <8 x i16> @umin_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-LABEL: umin_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -613,10 +626,11 @@ define void @umin_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: umin_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: umin z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: umin z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -630,8 +644,8 @@ define <2 x i32> @umin_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-LABEL: umin_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -643,8 +657,8 @@ define <4 x i32> @umin_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-LABEL: umin_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -656,10 +670,11 @@ define void @umin_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: umin_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: umin z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: umin z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -674,8 +689,8 @@ define <1 x i64> @umin_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-LABEL: umin_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -688,8 +703,8 @@ define <2 x i64> @umin_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-LABEL: umin_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -701,10 +716,11 @@ define void @umin_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: umin_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: umin z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: umin z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-mulh.ll @@ -14,9 +14,9 @@ define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; SVE-LABEL: smulh_v4i8: ; SVE: // %bb.0: +; SVE-NEXT: ptrue p0.h, vl4 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 -; SVE-NEXT: ptrue p0.h, vl4 ; SVE-NEXT: sxtb z0.h, p0/m, z0.h ; SVE-NEXT: sxtb z1.h, p0/m, z1.h ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h @@ -26,9 +26,9 @@ ; ; SVE2-LABEL: smulh_v4i8: ; SVE2: // %bb.0: +; SVE2-NEXT: ptrue p0.h, vl4 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 -; SVE2-NEXT: ptrue p0.h, vl4 ; SVE2-NEXT: sxtb z0.h, p0/m, z0.h ; SVE2-NEXT: sxtb z1.h, p0/m, z1.h ; SVE2-NEXT: mul z0.h, z0.h, z1.h @@ -48,8 +48,8 @@ define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; SVE-LABEL: smulh_v8i8: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.b, vl8 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: smulh z0.b, p0/m, z0.b, z1.b ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -75,8 +75,8 @@ define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; SVE-LABEL: smulh_v16i8: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: ptrue p0.b, vl16 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: smulh z0.b, p0/m, z0.b, z1.b ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -100,20 +100,21 @@ define void @smulh_v32i8(ptr %a, ptr %b) { ; SVE-LABEL: smulh_v32i8: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x0] ; SVE-NEXT: ptrue p0.b, vl16 -; SVE-NEXT: ldp q2, q3, [x1] -; SVE-NEXT: smulh z0.b, p0/m, z0.b, z2.b +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] +; SVE-NEXT: smulh z0.b, p0/m, z0.b, z1.b +; SVE-NEXT: movprfx z1, z2 ; SVE-NEXT: smulh z1.b, p0/m, z1.b, z3.b ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: smulh_v32i8: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x0] -; SVE2-NEXT: ldp q2, q3, [x1] -; SVE2-NEXT: smulh z0.b, z0.b, z2.b -; SVE2-NEXT: smulh z1.b, z1.b, z3.b +; SVE2-NEXT: ldp q0, q3, [x1] +; SVE2-NEXT: ldp q1, q2, [x0] +; SVE2-NEXT: smulh z0.b, z1.b, z0.b +; SVE2-NEXT: smulh z1.b, z2.b, z3.b ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -130,9 +131,9 @@ define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; SVE-LABEL: smulh_v2i16: ; SVE: // %bb.0: +; SVE-NEXT: ptrue p0.s, vl2 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 -; SVE-NEXT: ptrue p0.s, vl2 ; SVE-NEXT: sxth z0.s, p0/m, z0.s ; SVE-NEXT: sxth z1.s, p0/m, z1.s ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s @@ -142,9 +143,9 @@ ; ; SVE2-LABEL: smulh_v2i16: ; SVE2: // %bb.0: +; SVE2-NEXT: ptrue p0.s, vl2 ; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 -; SVE2-NEXT: ptrue p0.s, vl2 ; SVE2-NEXT: sxth z0.s, p0/m, z0.s ; SVE2-NEXT: sxth z1.s, p0/m, z1.s ; SVE2-NEXT: mul z0.s, z0.s, z1.s @@ -162,8 +163,8 @@ define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; SVE-LABEL: smulh_v4i16: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.h, vl4 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: smulh z0.h, p0/m, z0.h, z1.h ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -187,8 +188,8 @@ define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; SVE-LABEL: smulh_v8i16: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: ptrue p0.h, vl8 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: smulh z0.h, p0/m, z0.h, z1.h ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -212,20 +213,21 @@ define void @smulh_v16i16(ptr %a, ptr %b) { ; SVE-LABEL: smulh_v16i16: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x0] ; SVE-NEXT: ptrue p0.h, vl8 -; SVE-NEXT: ldp q2, q3, [x1] -; SVE-NEXT: smulh z0.h, p0/m, z0.h, z2.h +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] +; SVE-NEXT: smulh z0.h, p0/m, z0.h, z1.h +; SVE-NEXT: movprfx z1, z2 ; SVE-NEXT: smulh z1.h, p0/m, z1.h, z3.h ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: smulh_v16i16: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x0] -; SVE2-NEXT: ldp q2, q3, [x1] -; SVE2-NEXT: smulh z0.h, z0.h, z2.h -; SVE2-NEXT: smulh z1.h, z1.h, z3.h +; SVE2-NEXT: ldp q0, q3, [x1] +; SVE2-NEXT: ldp q1, q2, [x0] +; SVE2-NEXT: smulh z0.h, z1.h, z0.h +; SVE2-NEXT: smulh z1.h, z2.h, z3.h ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -242,8 +244,8 @@ define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; SVE-LABEL: smulh_v2i32: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.s, vl2 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: smulh z0.s, p0/m, z0.s, z1.s ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -267,8 +269,8 @@ define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; SVE-LABEL: smulh_v4i32: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: ptrue p0.s, vl4 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: smulh z0.s, p0/m, z0.s, z1.s ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -292,20 +294,21 @@ define void @smulh_v8i32(ptr %a, ptr %b) { ; SVE-LABEL: smulh_v8i32: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x0] ; SVE-NEXT: ptrue p0.s, vl4 -; SVE-NEXT: ldp q2, q3, [x1] -; SVE-NEXT: smulh z0.s, p0/m, z0.s, z2.s +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] +; SVE-NEXT: smulh z0.s, p0/m, z0.s, z1.s +; SVE-NEXT: movprfx z1, z2 ; SVE-NEXT: smulh z1.s, p0/m, z1.s, z3.s ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: smulh_v8i32: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x0] -; SVE2-NEXT: ldp q2, q3, [x1] -; SVE2-NEXT: smulh z0.s, z0.s, z2.s -; SVE2-NEXT: smulh z1.s, z1.s, z3.s +; SVE2-NEXT: ldp q0, q3, [x1] +; SVE2-NEXT: ldp q1, q2, [x0] +; SVE2-NEXT: smulh z0.s, z1.s, z0.s +; SVE2-NEXT: smulh z1.s, z2.s, z3.s ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -322,8 +325,8 @@ define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; SVE-LABEL: smulh_v1i64: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.d, vl1 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: smulh z0.d, p0/m, z0.d, z1.d ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -349,8 +352,8 @@ define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; SVE-LABEL: smulh_v2i64: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: ptrue p0.d, vl2 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: smulh z0.d, p0/m, z0.d, z1.d ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -374,20 +377,21 @@ define void @smulh_v4i64(ptr %a, ptr %b) { ; SVE-LABEL: smulh_v4i64: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x0] ; SVE-NEXT: ptrue p0.d, vl2 -; SVE-NEXT: ldp q2, q3, [x1] -; SVE-NEXT: smulh z0.d, p0/m, z0.d, z2.d +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] +; SVE-NEXT: smulh z0.d, p0/m, z0.d, z1.d +; SVE-NEXT: movprfx z1, z2 ; SVE-NEXT: smulh z1.d, p0/m, z1.d, z3.d ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: smulh_v4i64: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x0] -; SVE2-NEXT: ldp q2, q3, [x1] -; SVE2-NEXT: smulh z0.d, z0.d, z2.d -; SVE2-NEXT: smulh z1.d, z1.d, z3.d +; SVE2-NEXT: ldp q0, q3, [x1] +; SVE2-NEXT: ldp q1, q2, [x0] +; SVE2-NEXT: smulh z0.d, z1.d, z0.d +; SVE2-NEXT: smulh z1.d, z2.d, z3.d ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -408,9 +412,9 @@ define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; SVE-LABEL: umulh_v4i8: ; SVE: // %bb.0: +; SVE-NEXT: ptrue p0.h, vl4 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 -; SVE-NEXT: ptrue p0.h, vl4 ; SVE-NEXT: and z0.h, z0.h, #0xff ; SVE-NEXT: and z1.h, z1.h, #0xff ; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h @@ -439,8 +443,8 @@ define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; SVE-LABEL: umulh_v8i8: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.b, vl8 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: umulh z0.b, p0/m, z0.b, z1.b ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -464,8 +468,8 @@ define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; SVE-LABEL: umulh_v16i8: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: ptrue p0.b, vl16 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: umulh z0.b, p0/m, z0.b, z1.b ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -489,20 +493,21 @@ define void @umulh_v32i8(ptr %a, ptr %b) { ; SVE-LABEL: umulh_v32i8: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x0] ; SVE-NEXT: ptrue p0.b, vl16 -; SVE-NEXT: ldp q2, q3, [x1] -; SVE-NEXT: umulh z0.b, p0/m, z0.b, z2.b +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] +; SVE-NEXT: umulh z0.b, p0/m, z0.b, z1.b +; SVE-NEXT: movprfx z1, z2 ; SVE-NEXT: umulh z1.b, p0/m, z1.b, z3.b ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: umulh_v32i8: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x0] -; SVE2-NEXT: ldp q2, q3, [x1] -; SVE2-NEXT: umulh z0.b, z0.b, z2.b -; SVE2-NEXT: umulh z1.b, z1.b, z3.b +; SVE2-NEXT: ldp q0, q3, [x1] +; SVE2-NEXT: ldp q1, q2, [x0] +; SVE2-NEXT: umulh z0.b, z1.b, z0.b +; SVE2-NEXT: umulh z1.b, z2.b, z3.b ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -519,9 +524,9 @@ define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; SVE-LABEL: umulh_v2i16: ; SVE: // %bb.0: +; SVE-NEXT: ptrue p0.s, vl2 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 -; SVE-NEXT: ptrue p0.s, vl2 ; SVE-NEXT: and z0.s, z0.s, #0xffff ; SVE-NEXT: and z1.s, z1.s, #0xffff ; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s @@ -550,8 +555,8 @@ define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; SVE-LABEL: umulh_v4i16: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.h, vl4 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: umulh z0.h, p0/m, z0.h, z1.h ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -575,8 +580,8 @@ define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; SVE-LABEL: umulh_v8i16: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: ptrue p0.h, vl8 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: umulh z0.h, p0/m, z0.h, z1.h ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -600,20 +605,21 @@ define void @umulh_v16i16(ptr %a, ptr %b) { ; SVE-LABEL: umulh_v16i16: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x0] ; SVE-NEXT: ptrue p0.h, vl8 -; SVE-NEXT: ldp q2, q3, [x1] -; SVE-NEXT: umulh z0.h, p0/m, z0.h, z2.h +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] +; SVE-NEXT: umulh z0.h, p0/m, z0.h, z1.h +; SVE-NEXT: movprfx z1, z2 ; SVE-NEXT: umulh z1.h, p0/m, z1.h, z3.h ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: umulh_v16i16: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x0] -; SVE2-NEXT: ldp q2, q3, [x1] -; SVE2-NEXT: umulh z0.h, z0.h, z2.h -; SVE2-NEXT: umulh z1.h, z1.h, z3.h +; SVE2-NEXT: ldp q0, q3, [x1] +; SVE2-NEXT: ldp q1, q2, [x0] +; SVE2-NEXT: umulh z0.h, z1.h, z0.h +; SVE2-NEXT: umulh z1.h, z2.h, z3.h ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -630,8 +636,8 @@ define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; SVE-LABEL: umulh_v2i32: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.s, vl2 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: umulh z0.s, p0/m, z0.s, z1.s ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -655,8 +661,8 @@ define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; SVE-LABEL: umulh_v4i32: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: ptrue p0.s, vl4 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: umulh z0.s, p0/m, z0.s, z1.s ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -680,20 +686,21 @@ define void @umulh_v8i32(ptr %a, ptr %b) { ; SVE-LABEL: umulh_v8i32: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x0] ; SVE-NEXT: ptrue p0.s, vl4 -; SVE-NEXT: ldp q2, q3, [x1] -; SVE-NEXT: umulh z0.s, p0/m, z0.s, z2.s +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] +; SVE-NEXT: umulh z0.s, p0/m, z0.s, z1.s +; SVE-NEXT: movprfx z1, z2 ; SVE-NEXT: umulh z1.s, p0/m, z1.s, z3.s ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: umulh_v8i32: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x0] -; SVE2-NEXT: ldp q2, q3, [x1] -; SVE2-NEXT: umulh z0.s, z0.s, z2.s -; SVE2-NEXT: umulh z1.s, z1.s, z3.s +; SVE2-NEXT: ldp q0, q3, [x1] +; SVE2-NEXT: ldp q1, q2, [x0] +; SVE2-NEXT: umulh z0.s, z1.s, z0.s +; SVE2-NEXT: umulh z1.s, z2.s, z3.s ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -712,8 +719,8 @@ define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; SVE-LABEL: umulh_v1i64: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: ptrue p0.d, vl1 +; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 ; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 ; SVE-NEXT: umulh z0.d, p0/m, z0.d, z1.d ; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -737,8 +744,8 @@ define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; SVE-LABEL: umulh_v2i64: ; SVE: // %bb.0: -; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: ptrue p0.d, vl2 +; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 ; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 ; SVE-NEXT: umulh z0.d, p0/m, z0.d, z1.d ; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -762,20 +769,21 @@ define void @umulh_v4i64(ptr %a, ptr %b) { ; SVE-LABEL: umulh_v4i64: ; SVE: // %bb.0: -; SVE-NEXT: ldp q0, q1, [x0] ; SVE-NEXT: ptrue p0.d, vl2 -; SVE-NEXT: ldp q2, q3, [x1] -; SVE-NEXT: umulh z0.d, p0/m, z0.d, z2.d +; SVE-NEXT: ldp q0, q3, [x1] +; SVE-NEXT: ldp q1, q2, [x0] +; SVE-NEXT: umulh z0.d, p0/m, z0.d, z1.d +; SVE-NEXT: movprfx z1, z2 ; SVE-NEXT: umulh z1.d, p0/m, z1.d, z3.d ; SVE-NEXT: stp q0, q1, [x0] ; SVE-NEXT: ret ; ; SVE2-LABEL: umulh_v4i64: ; SVE2: // %bb.0: -; SVE2-NEXT: ldp q0, q1, [x0] -; SVE2-NEXT: ldp q2, q3, [x1] -; SVE2-NEXT: umulh z0.d, z0.d, z2.d -; SVE2-NEXT: umulh z1.d, z1.d, z3.d +; SVE2-NEXT: ldp q0, q3, [x1] +; SVE2-NEXT: ldp q1, q2, [x0] +; SVE2-NEXT: umulh z0.d, z1.d, z0.d +; SVE2-NEXT: umulh z1.d, z2.d, z3.d ; SVE2-NEXT: stp q0, q1, [x0] ; SVE2-NEXT: ret %op1 = load <4 x i64>, ptr %a diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-reduce.ll @@ -10,8 +10,8 @@ define i8 @uaddv_v8i8(<8 x i8> %a) { ; CHECK-LABEL: uaddv_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uaddv d0, p0, z0.b ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -23,8 +23,8 @@ define i8 @uaddv_v16i8(<16 x i8> %a) { ; CHECK-LABEL: uaddv_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uaddv d0, p0, z0.b ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -36,8 +36,8 @@ define i8 @uaddv_v32i8(ptr %a) { ; CHECK-LABEL: uaddv_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.b, z1.b, z0.b ; CHECK-NEXT: uaddv d0, p0, z0.b ; CHECK-NEXT: fmov x0, d0 @@ -51,8 +51,8 @@ define i16 @uaddv_v4i16(<4 x i16> %a) { ; CHECK-LABEL: uaddv_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uaddv d0, p0, z0.h ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -64,8 +64,8 @@ define i16 @uaddv_v8i16(<8 x i16> %a) { ; CHECK-LABEL: uaddv_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uaddv d0, p0, z0.h ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -77,8 +77,8 @@ define i16 @uaddv_v16i16(ptr %a) { ; CHECK-LABEL: uaddv_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.h, z1.h, z0.h ; CHECK-NEXT: uaddv d0, p0, z0.h ; CHECK-NEXT: fmov x0, d0 @@ -92,8 +92,8 @@ define i32 @uaddv_v2i32(<2 x i32> %a) { ; CHECK-LABEL: uaddv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uaddv d0, p0, z0.s ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -105,8 +105,8 @@ define i32 @uaddv_v4i32(<4 x i32> %a) { ; CHECK-LABEL: uaddv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uaddv d0, p0, z0.s ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0 @@ -118,8 +118,8 @@ define i32 @uaddv_v8i32(ptr %a) { ; CHECK-LABEL: uaddv_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.s, z1.s, z0.s ; CHECK-NEXT: uaddv d0, p0, z0.s ; CHECK-NEXT: fmov x0, d0 @@ -133,8 +133,8 @@ define i64 @uaddv_v2i64(<2 x i64> %a) { ; CHECK-LABEL: uaddv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uaddv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -145,8 +145,8 @@ define i64 @uaddv_v4i64(ptr %a) { ; CHECK-LABEL: uaddv_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: add z0.d, z1.d, z0.d ; CHECK-NEXT: uaddv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 @@ -163,8 +163,8 @@ define i8 @smaxv_v8i8(<8 x i8> %a) { ; CHECK-LABEL: smaxv_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: smaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -175,8 +175,8 @@ define i8 @smaxv_v16i8(<16 x i8> %a) { ; CHECK-LABEL: smaxv_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: smaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -187,8 +187,8 @@ define i8 @smaxv_v32i8(ptr %a) { ; CHECK-LABEL: smaxv_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: smaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 @@ -201,8 +201,8 @@ define i16 @smaxv_v4i16(<4 x i16> %a) { ; CHECK-LABEL: smaxv_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: smaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -213,8 +213,8 @@ define i16 @smaxv_v8i16(<8 x i16> %a) { ; CHECK-LABEL: smaxv_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: smaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -225,8 +225,8 @@ define i16 @smaxv_v16i16(ptr %a) { ; CHECK-LABEL: smaxv_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: smaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 @@ -239,8 +239,8 @@ define i32 @smaxv_v2i32(<2 x i32> %a) { ; CHECK-LABEL: smaxv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: smaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -251,8 +251,8 @@ define i32 @smaxv_v4i32(<4 x i32> %a) { ; CHECK-LABEL: smaxv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: smaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -263,8 +263,8 @@ define i32 @smaxv_v8i32(ptr %a) { ; CHECK-LABEL: smaxv_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: smaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 @@ -278,8 +278,8 @@ define i64 @smaxv_v2i64(<2 x i64> %a) { ; CHECK-LABEL: smaxv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: smaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -290,8 +290,8 @@ define i64 @smaxv_v4i64(ptr %a) { ; CHECK-LABEL: smaxv_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: smaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 @@ -308,8 +308,8 @@ define i8 @sminv_v8i8(<8 x i8> %a) { ; CHECK-LABEL: sminv_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: sminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -320,8 +320,8 @@ define i8 @sminv_v16i8(<16 x i8> %a) { ; CHECK-LABEL: sminv_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: sminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -332,8 +332,8 @@ define i8 @sminv_v32i8(ptr %a) { ; CHECK-LABEL: sminv_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: sminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 @@ -346,8 +346,8 @@ define i16 @sminv_v4i16(<4 x i16> %a) { ; CHECK-LABEL: sminv_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: sminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -358,8 +358,8 @@ define i16 @sminv_v8i16(<8 x i16> %a) { ; CHECK-LABEL: sminv_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: sminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -370,8 +370,8 @@ define i16 @sminv_v16i16(ptr %a) { ; CHECK-LABEL: sminv_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: sminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 @@ -384,8 +384,8 @@ define i32 @sminv_v2i32(<2 x i32> %a) { ; CHECK-LABEL: sminv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: sminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -396,8 +396,8 @@ define i32 @sminv_v4i32(<4 x i32> %a) { ; CHECK-LABEL: sminv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: sminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -408,8 +408,8 @@ define i32 @sminv_v8i32(ptr %a) { ; CHECK-LABEL: sminv_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: sminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 @@ -423,8 +423,8 @@ define i64 @sminv_v2i64(<2 x i64> %a) { ; CHECK-LABEL: sminv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: sminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -435,8 +435,8 @@ define i64 @sminv_v4i64(ptr %a) { ; CHECK-LABEL: sminv_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: sminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 @@ -453,8 +453,8 @@ define i8 @umaxv_v8i8(<8 x i8> %a) { ; CHECK-LABEL: umaxv_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: umaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -465,8 +465,8 @@ define i8 @umaxv_v16i8(<16 x i8> %a) { ; CHECK-LABEL: umaxv_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: umaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -477,8 +477,8 @@ define i8 @umaxv_v32i8(ptr %a) { ; CHECK-LABEL: umaxv_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: umaxv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 @@ -491,8 +491,8 @@ define i16 @umaxv_v4i16(<4 x i16> %a) { ; CHECK-LABEL: umaxv_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: umaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -503,8 +503,8 @@ define i16 @umaxv_v8i16(<8 x i16> %a) { ; CHECK-LABEL: umaxv_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: umaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -515,8 +515,8 @@ define i16 @umaxv_v16i16(ptr %a) { ; CHECK-LABEL: umaxv_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: umaxv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 @@ -529,8 +529,8 @@ define i32 @umaxv_v2i32(<2 x i32> %a) { ; CHECK-LABEL: umaxv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: umaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -541,8 +541,8 @@ define i32 @umaxv_v4i32(<4 x i32> %a) { ; CHECK-LABEL: umaxv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: umaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -553,8 +553,8 @@ define i32 @umaxv_v8i32(ptr %a) { ; CHECK-LABEL: umaxv_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: umaxv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 @@ -568,8 +568,8 @@ define i64 @umaxv_v2i64(<2 x i64> %a) { ; CHECK-LABEL: umaxv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: umaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -580,8 +580,8 @@ define i64 @umaxv_v4i64(ptr %a) { ; CHECK-LABEL: umaxv_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: umaxv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 @@ -598,8 +598,8 @@ define i8 @uminv_v8i8(<8 x i8> %a) { ; CHECK-LABEL: uminv_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -610,8 +610,8 @@ define i8 @uminv_v16i8(<16 x i8> %a) { ; CHECK-LABEL: uminv_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -622,8 +622,8 @@ define i8 @uminv_v32i8(ptr %a) { ; CHECK-LABEL: uminv_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: uminv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 @@ -636,8 +636,8 @@ define i16 @uminv_v4i16(<4 x i16> %a) { ; CHECK-LABEL: uminv_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -648,8 +648,8 @@ define i16 @uminv_v8i16(<8 x i16> %a) { ; CHECK-LABEL: uminv_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -660,8 +660,8 @@ define i16 @uminv_v16i16(ptr %a) { ; CHECK-LABEL: uminv_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: uminv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 @@ -674,8 +674,8 @@ define i32 @uminv_v2i32(<2 x i32> %a) { ; CHECK-LABEL: uminv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -686,8 +686,8 @@ define i32 @uminv_v4i32(<4 x i32> %a) { ; CHECK-LABEL: uminv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -698,8 +698,8 @@ define i32 @uminv_v8i32(ptr %a) { ; CHECK-LABEL: uminv_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: uminv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 @@ -713,8 +713,8 @@ define i64 @uminv_v2i64(<2 x i64> %a) { ; CHECK-LABEL: uminv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -725,8 +725,8 @@ define i64 @uminv_v4i64(ptr %a) { ; CHECK-LABEL: uminv_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: uminv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-rem.ll @@ -10,12 +10,12 @@ define <4 x i8> @srem_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-LABEL: srem_v4i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: ptrue p1.s, vl4 -; CHECK-NEXT: sxtb z1.h, p0/m, z1.h ; CHECK-NEXT: sxtb z0.h, p0/m, z0.h +; CHECK-NEXT: sxtb z1.h, p0/m, z1.h ; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: sunpklo z3.s, z0.h ; CHECK-NEXT: sdivr z2.s, p1/m, z2.s, z3.s @@ -34,21 +34,21 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: sunpklo z2.h, z1.b ; CHECK-NEXT: sunpklo z3.h, z0.b +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: sunpklo z4.s, z2.h ; CHECK-NEXT: sunpklo z5.s, z3.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: sunpklo z2.s, z2.h ; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: splice z4.h, p0, z4.h, z2.h +; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b +; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -63,43 +63,42 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ptrue p1.b, vl16 ; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 ; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 ; CHECK-NEXT: sunpklo z2.h, z2.b ; CHECK-NEXT: sunpklo z3.h, z3.b -; CHECK-NEXT: sunpklo z5.s, z2.h -; CHECK-NEXT: sunpklo z6.s, z3.h +; CHECK-NEXT: sunpklo z4.s, z2.h +; CHECK-NEXT: sunpklo z5.s, z3.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: sunpklo z2.s, z2.h ; CHECK-NEXT: sunpklo z3.s, z3.h -; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s +; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s +; CHECK-NEXT: sunpklo z5.h, z0.b +; CHECK-NEXT: sunpklo z7.s, z5.h +; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 +; CHECK-NEXT: sunpklo z5.s, z5.h ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: sunpklo z4.h, z1.b -; CHECK-NEXT: sunpklo z6.h, z0.b -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: sunpklo z3.s, z4.h -; CHECK-NEXT: splice z5.h, p1, z5.h, z2.h -; CHECK-NEXT: sunpklo z2.s, z6.h -; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 -; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 -; CHECK-NEXT: sunpklo z4.s, z4.h -; CHECK-NEXT: sunpklo z6.s, z6.h -; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: movprfx z3, z6 -; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: sunpklo z3.h, z1.b +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: sunpklo z6.s, z3.h +; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: sunpklo z3.s, z3.h +; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z5.s +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h +; CHECK-NEXT: splice z4.h, p0, z4.h, z2.h +; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: uzp1 z4.b, z5.b, z5.b -; CHECK-NEXT: splice z2.h, p1, z2.h, z3.h +; CHECK-NEXT: splice z5.h, p0, z5.h, z3.h ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: splice z2.b, p0, z2.b, z4.b -; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b +; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b +; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b +; CHECK-NEXT: mls z0.b, p1/m, z3.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %res = srem <16 x i8> %op1, %op2 @@ -109,80 +108,83 @@ define void @srem_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: srem_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q2, q0, [x0] +; CHECK-NEXT: ldr q0, [x0, #16] +; CHECK-NEXT: ldr q1, [x1, #16] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: ldp q3, q1, [x1] -; CHECK-NEXT: mov z5.d, z0.d -; CHECK-NEXT: sunpklo z7.h, z0.b -; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8 -; CHECK-NEXT: sunpklo z5.h, z5.b -; CHECK-NEXT: sunpklo z18.s, z5.h -; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 -; CHECK-NEXT: sunpklo z5.s, z5.h -; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: ptrue p1.b, vl16 +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov z3.d, z0.d ; CHECK-NEXT: sunpklo z6.h, z1.b -; CHECK-NEXT: ext z4.b, z4.b, z1.b, #8 -; CHECK-NEXT: sunpklo z16.s, z6.h -; CHECK-NEXT: sunpklo z4.h, z4.b -; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 -; CHECK-NEXT: sunpklo z17.s, z4.h -; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 -; CHECK-NEXT: sunpklo z4.s, z4.h -; CHECK-NEXT: sdivr z17.s, p0/m, z17.s, z18.s -; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s -; CHECK-NEXT: sunpklo z18.s, z7.h -; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8 -; CHECK-NEXT: sunpklo z5.s, z6.h -; CHECK-NEXT: splice z17.h, p1, z17.h, z4.h -; CHECK-NEXT: sunpklo z4.s, z7.h -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z7.d, z2.d -; CHECK-NEXT: ext z6.b, z6.b, z3.b, #8 -; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8 -; CHECK-NEXT: sdivr z16.s, p0/m, z16.s, z18.s -; CHECK-NEXT: sunpklo z6.h, z6.b -; CHECK-NEXT: sunpklo z7.h, z7.b -; CHECK-NEXT: sdiv z4.s, p0/m, z4.s, z5.s -; CHECK-NEXT: uzp1 z5.h, z16.h, z16.h +; CHECK-NEXT: sunpklo z7.h, z0.b +; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 ; CHECK-NEXT: sunpklo z16.s, z6.h -; CHECK-NEXT: sunpklo z18.s, z7.h ; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 +; CHECK-NEXT: sunpklo z17.s, z7.h ; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8 +; CHECK-NEXT: sunpklo z4.h, z2.b +; CHECK-NEXT: sunpklo z3.h, z3.b ; CHECK-NEXT: sunpklo z6.s, z6.h ; CHECK-NEXT: sunpklo z7.s, z7.h -; CHECK-NEXT: sdivr z16.s, p0/m, z16.s, z18.s -; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z7.h, z16.h, z16.h -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: splice z5.h, p1, z5.h, z4.h -; CHECK-NEXT: splice z7.h, p1, z7.h, z6.h -; CHECK-NEXT: sunpklo z4.h, z3.b -; CHECK-NEXT: sunpklo z6.h, z2.b -; CHECK-NEXT: sunpklo z16.s, z4.h -; CHECK-NEXT: sunpklo z18.s, z6.h +; CHECK-NEXT: sdivr z16.s, p0/m, z16.s, z17.s +; CHECK-NEXT: sunpklo z2.s, z4.h +; CHECK-NEXT: sunpklo z5.s, z3.h ; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 -; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 ; CHECK-NEXT: sunpklo z4.s, z4.h -; CHECK-NEXT: sunpklo z6.s, z6.h -; CHECK-NEXT: sdivr z16.s, p0/m, z16.s, z18.s -; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z6.s +; CHECK-NEXT: sunpklo z3.s, z3.h +; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z5.s +; CHECK-NEXT: ldr q5, [x1] +; CHECK-NEXT: mov z17.d, z5.d ; CHECK-NEXT: uzp1 z16.h, z16.h, z16.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: splice z16.h, p1, z16.h, z4.h -; CHECK-NEXT: uzp1 z6.b, z17.b, z17.b -; CHECK-NEXT: uzp1 z5.b, z5.b, z5.b -; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z4.b, z7.b, z7.b +; CHECK-NEXT: ext z17.b, z17.b, z5.b, #8 +; CHECK-NEXT: sunpklo z17.h, z17.b +; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: ldr q4, [x0] +; CHECK-NEXT: sunpklo z19.s, z17.h +; CHECK-NEXT: ext z17.b, z17.b, z17.b, #8 +; CHECK-NEXT: mov z18.d, z4.d +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: sunpklo z17.s, z17.h +; CHECK-NEXT: ext z18.b, z18.b, z4.b, #8 +; CHECK-NEXT: sunpklo z18.h, z18.b +; CHECK-NEXT: sunpklo z20.s, z18.h +; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8 +; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s +; CHECK-NEXT: sunpklo z18.s, z18.h +; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h +; CHECK-NEXT: sdivr z19.s, p0/m, z19.s, z20.s +; CHECK-NEXT: sunpklo z20.h, z4.b +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: sunpklo z22.s, z20.h +; CHECK-NEXT: ext z20.b, z20.b, z20.b, #8 +; CHECK-NEXT: sunpklo z20.s, z20.h +; CHECK-NEXT: sdivr z17.s, p0/m, z17.s, z18.s +; CHECK-NEXT: sunpklo z18.h, z5.b +; CHECK-NEXT: uzp1 z7.h, z19.h, z19.h +; CHECK-NEXT: sunpklo z21.s, z18.h +; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8 +; CHECK-NEXT: sunpklo z18.s, z18.h +; CHECK-NEXT: sdivr z21.s, p0/m, z21.s, z22.s +; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h +; CHECK-NEXT: sdivr z18.s, p0/m, z18.s, z20.s +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z19.h, z21.h, z21.h +; CHECK-NEXT: splice z7.h, p0, z7.h, z17.h +; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: splice z16.h, p0, z16.h, z6.h +; CHECK-NEXT: uzp1 z3.b, z7.b, z7.b +; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b ; CHECK-NEXT: uzp1 z7.b, z16.b, z16.b -; CHECK-NEXT: ptrue p1.b, vl16 -; CHECK-NEXT: splice z7.b, p0, z7.b, z4.b -; CHECK-NEXT: splice z5.b, p0, z5.b, z6.b -; CHECK-NEXT: mls z2.b, p1/m, z7.b, z3.b -; CHECK-NEXT: mls z0.b, p1/m, z5.b, z1.b +; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h +; CHECK-NEXT: splice z19.h, p0, z19.h, z18.h +; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: uzp1 z6.b, z19.b, z19.b +; CHECK-NEXT: splice z7.b, p0, z7.b, z2.b +; CHECK-NEXT: splice z6.b, p0, z6.b, z3.b +; CHECK-NEXT: movprfx z2, z4 +; CHECK-NEXT: mls z2.b, p1/m, z6.b, z5.b +; CHECK-NEXT: mls z0.b, p1/m, z7.b, z1.b ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -195,9 +197,9 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-LABEL: srem_v4i16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: sunpklo z2.s, z1.h ; CHECK-NEXT: sunpklo z3.s, z0.h ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s @@ -217,21 +219,21 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: sunpklo z4.s, z0.h +; CHECK-NEXT: ptrue p1.h, vl8 ; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 ; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: sunpklo z2.s, z2.h ; CHECK-NEXT: sunpklo z3.s, z3.h -; CHECK-NEXT: sunpklo z4.s, z1.h ; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: sunpklo z3.s, z0.h -; CHECK-NEXT: sdiv z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: sunpklo z3.s, z1.h +; CHECK-NEXT: sdivr z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: mls z0.h, p0/m, z3.h, z1.h +; CHECK-NEXT: mls z0.h, p1/m, z3.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %res = srem <8 x i16> %op1, %op2 @@ -241,40 +243,41 @@ define void @srem_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: srem_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q2, q0, [x0] +; CHECK-NEXT: ldp q4, q1, [x1] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: ptrue p1.h, vl8 -; CHECK-NEXT: mov z17.d, z2.d -; CHECK-NEXT: ext z17.b, z17.b, z2.b, #8 -; CHECK-NEXT: ldp q3, q1, [x1] -; CHECK-NEXT: mov z5.d, z0.d -; CHECK-NEXT: sunpklo z7.s, z0.h -; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8 +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: sunpklo z16.s, z0.h +; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 +; CHECK-NEXT: ext z5.b, z5.b, z4.b, #8 +; CHECK-NEXT: sunpklo z2.s, z2.h +; CHECK-NEXT: sunpklo z3.s, z3.h ; CHECK-NEXT: sunpklo z5.s, z5.h -; CHECK-NEXT: mov z16.d, z3.d -; CHECK-NEXT: ext z16.b, z16.b, z3.b, #8 -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: sunpklo z6.s, z1.h -; CHECK-NEXT: ext z4.b, z4.b, z1.b, #8 -; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: sunpklo z4.s, z4.h -; CHECK-NEXT: sunpklo z7.s, z16.h -; CHECK-NEXT: sunpklo z16.s, z17.h -; CHECK-NEXT: sdivr z4.s, p0/m, z4.s, z5.s -; CHECK-NEXT: movprfx z5, z16 -; CHECK-NEXT: sdiv z5.s, p0/m, z5.s, z7.s +; CHECK-NEXT: sdivr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: ldr q3, [x0] +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: sunpklo z7.s, z3.h -; CHECK-NEXT: sunpklo z16.s, z2.h +; CHECK-NEXT: ext z6.b, z6.b, z3.b, #8 +; CHECK-NEXT: sunpklo z6.s, z6.h +; CHECK-NEXT: sdivr z5.s, p0/m, z5.s, z6.s +; CHECK-NEXT: sunpklo z6.s, z4.h +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: sdivr z6.s, p0/m, z6.s, z7.s +; CHECK-NEXT: sunpklo z7.s, z1.h ; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h ; CHECK-NEXT: sdivr z7.s, p0/m, z7.s, z16.s ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: splice z6.h, p0, z6.h, z5.h ; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: splice z7.h, p0, z7.h, z5.h -; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h -; CHECK-NEXT: splice z5.h, p0, z5.h, z4.h -; CHECK-NEXT: mls z2.h, p1/m, z7.h, z3.h -; CHECK-NEXT: mls z0.h, p1/m, z5.h, z1.h +; CHECK-NEXT: splice z7.h, p0, z7.h, z2.h +; CHECK-NEXT: movprfx z2, z3 +; CHECK-NEXT: mls z2.h, p1/m, z6.h, z4.h +; CHECK-NEXT: mls z0.h, p1/m, z7.h, z1.h ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -287,8 +290,8 @@ define <2 x i32> @srem_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-LABEL: srem_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s @@ -302,8 +305,8 @@ define <4 x i32> @srem_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-LABEL: srem_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.s, p0/m, z2.s, z1.s @@ -317,14 +320,15 @@ define void @srem_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: srem_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: sdiv z4.s, p0/m, z4.s, z2.s -; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: sdiv z4.s, p0/m, z4.s, z0.s +; CHECK-NEXT: movprfx z5, z2 ; CHECK-NEXT: sdiv z5.s, p0/m, z5.s, z3.s -; CHECK-NEXT: mls z0.s, p0/m, z4.s, z2.s +; CHECK-NEXT: msb z0.s, p0/m, z4.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: mls z1.s, p0/m, z5.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -338,8 +342,8 @@ define <1 x i64> @srem_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-LABEL: srem_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d @@ -353,8 +357,8 @@ define <2 x i64> @srem_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-LABEL: srem_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: sdiv z2.d, p0/m, z2.d, z1.d @@ -368,14 +372,15 @@ define void @srem_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: srem_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: sdiv z4.d, p0/m, z4.d, z2.d -; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: sdiv z4.d, p0/m, z4.d, z0.d +; CHECK-NEXT: movprfx z5, z2 ; CHECK-NEXT: sdiv z5.d, p0/m, z5.d, z3.d -; CHECK-NEXT: mls z0.d, p0/m, z4.d, z2.d +; CHECK-NEXT: msb z0.d, p0/m, z4.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: mls z1.d, p0/m, z5.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -417,21 +422,21 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z2.h, z1.b ; CHECK-NEXT: uunpklo z3.h, z0.b +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z4.s, z2.h ; CHECK-NEXT: uunpklo z5.s, z3.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: splice z4.h, p0, z4.h, z2.h +; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b +; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b ; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -446,43 +451,42 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ptrue p1.b, vl16 ; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 ; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 ; CHECK-NEXT: uunpklo z2.h, z2.b ; CHECK-NEXT: uunpklo z3.h, z3.b -; CHECK-NEXT: uunpklo z5.s, z2.h -; CHECK-NEXT: uunpklo z6.s, z3.h +; CHECK-NEXT: uunpklo z4.s, z2.h +; CHECK-NEXT: uunpklo z5.s, z3.h ; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 ; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s +; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s +; CHECK-NEXT: uunpklo z5.h, z0.b +; CHECK-NEXT: uunpklo z7.s, z5.h +; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 +; CHECK-NEXT: uunpklo z5.s, z5.h ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: uunpklo z4.h, z1.b -; CHECK-NEXT: uunpklo z6.h, z0.b -; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: uunpklo z3.s, z4.h -; CHECK-NEXT: splice z5.h, p1, z5.h, z2.h -; CHECK-NEXT: uunpklo z2.s, z6.h -; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 -; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 -; CHECK-NEXT: uunpklo z4.s, z4.h -; CHECK-NEXT: uunpklo z6.s, z6.h -; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: movprfx z3, z6 -; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: uunpklo z3.h, z1.b +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: uunpklo z6.s, z3.h +; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z5.s +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h +; CHECK-NEXT: splice z4.h, p0, z4.h, z2.h +; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: uzp1 z4.b, z5.b, z5.b -; CHECK-NEXT: splice z2.h, p1, z2.h, z3.h +; CHECK-NEXT: splice z5.h, p0, z5.h, z3.h ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: splice z2.b, p0, z2.b, z4.b -; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: mls z0.b, p0/m, z2.b, z1.b +; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b +; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b +; CHECK-NEXT: mls z0.b, p1/m, z3.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %res = urem <16 x i8> %op1, %op2 @@ -492,80 +496,83 @@ define void @urem_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: urem_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q2, q0, [x0] +; CHECK-NEXT: ldr q0, [x0, #16] +; CHECK-NEXT: ldr q1, [x1, #16] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: ldp q3, q1, [x1] -; CHECK-NEXT: mov z5.d, z0.d -; CHECK-NEXT: uunpklo z7.h, z0.b -; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8 -; CHECK-NEXT: uunpklo z5.h, z5.b -; CHECK-NEXT: uunpklo z18.s, z5.h -; CHECK-NEXT: ext z5.b, z5.b, z5.b, #8 -; CHECK-NEXT: uunpklo z5.s, z5.h -; CHECK-NEXT: mov z4.d, z1.d +; CHECK-NEXT: ptrue p1.b, vl16 +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov z3.d, z0.d ; CHECK-NEXT: uunpklo z6.h, z1.b -; CHECK-NEXT: ext z4.b, z4.b, z1.b, #8 -; CHECK-NEXT: uunpklo z16.s, z6.h -; CHECK-NEXT: uunpklo z4.h, z4.b -; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 -; CHECK-NEXT: uunpklo z17.s, z4.h -; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 -; CHECK-NEXT: uunpklo z4.s, z4.h -; CHECK-NEXT: udivr z17.s, p0/m, z17.s, z18.s -; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s -; CHECK-NEXT: uunpklo z18.s, z7.h -; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8 -; CHECK-NEXT: uunpklo z5.s, z6.h -; CHECK-NEXT: splice z17.h, p1, z17.h, z4.h -; CHECK-NEXT: uunpklo z4.s, z7.h -; CHECK-NEXT: mov z6.d, z3.d -; CHECK-NEXT: mov z7.d, z2.d -; CHECK-NEXT: ext z6.b, z6.b, z3.b, #8 -; CHECK-NEXT: ext z7.b, z7.b, z2.b, #8 -; CHECK-NEXT: udivr z16.s, p0/m, z16.s, z18.s -; CHECK-NEXT: uunpklo z6.h, z6.b -; CHECK-NEXT: uunpklo z7.h, z7.b -; CHECK-NEXT: udiv z4.s, p0/m, z4.s, z5.s -; CHECK-NEXT: uzp1 z5.h, z16.h, z16.h +; CHECK-NEXT: uunpklo z7.h, z0.b +; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 ; CHECK-NEXT: uunpklo z16.s, z6.h -; CHECK-NEXT: uunpklo z18.s, z7.h ; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 +; CHECK-NEXT: uunpklo z17.s, z7.h ; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8 +; CHECK-NEXT: uunpklo z4.h, z2.b +; CHECK-NEXT: uunpklo z3.h, z3.b ; CHECK-NEXT: uunpklo z6.s, z6.h ; CHECK-NEXT: uunpklo z7.s, z7.h -; CHECK-NEXT: udivr z16.s, p0/m, z16.s, z18.s -; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z7.h, z16.h, z16.h -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: splice z5.h, p1, z5.h, z4.h -; CHECK-NEXT: splice z7.h, p1, z7.h, z6.h -; CHECK-NEXT: uunpklo z4.h, z3.b -; CHECK-NEXT: uunpklo z6.h, z2.b -; CHECK-NEXT: uunpklo z16.s, z4.h -; CHECK-NEXT: uunpklo z18.s, z6.h +; CHECK-NEXT: udivr z16.s, p0/m, z16.s, z17.s +; CHECK-NEXT: uunpklo z2.s, z4.h +; CHECK-NEXT: uunpklo z5.s, z3.h ; CHECK-NEXT: ext z4.b, z4.b, z4.b, #8 -; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 ; CHECK-NEXT: uunpklo z4.s, z4.h -; CHECK-NEXT: uunpklo z6.s, z6.h -; CHECK-NEXT: udivr z16.s, p0/m, z16.s, z18.s -; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z6.s +; CHECK-NEXT: uunpklo z3.s, z3.h +; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z5.s +; CHECK-NEXT: ldr q5, [x1] +; CHECK-NEXT: mov z17.d, z5.d ; CHECK-NEXT: uzp1 z16.h, z16.h, z16.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: splice z16.h, p1, z16.h, z4.h -; CHECK-NEXT: uzp1 z6.b, z17.b, z17.b -; CHECK-NEXT: uzp1 z5.b, z5.b, z5.b -; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z4.b, z7.b, z7.b +; CHECK-NEXT: ext z17.b, z17.b, z5.b, #8 +; CHECK-NEXT: uunpklo z17.h, z17.b +; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: ldr q4, [x0] +; CHECK-NEXT: uunpklo z19.s, z17.h +; CHECK-NEXT: ext z17.b, z17.b, z17.b, #8 +; CHECK-NEXT: mov z18.d, z4.d +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: uunpklo z17.s, z17.h +; CHECK-NEXT: ext z18.b, z18.b, z4.b, #8 +; CHECK-NEXT: uunpklo z18.h, z18.b +; CHECK-NEXT: uunpklo z20.s, z18.h +; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8 +; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s +; CHECK-NEXT: uunpklo z18.s, z18.h +; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h +; CHECK-NEXT: udivr z19.s, p0/m, z19.s, z20.s +; CHECK-NEXT: uunpklo z20.h, z4.b +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: uunpklo z22.s, z20.h +; CHECK-NEXT: ext z20.b, z20.b, z20.b, #8 +; CHECK-NEXT: uunpklo z20.s, z20.h +; CHECK-NEXT: udivr z17.s, p0/m, z17.s, z18.s +; CHECK-NEXT: uunpklo z18.h, z5.b +; CHECK-NEXT: uzp1 z7.h, z19.h, z19.h +; CHECK-NEXT: uunpklo z21.s, z18.h +; CHECK-NEXT: ext z18.b, z18.b, z18.b, #8 +; CHECK-NEXT: uunpklo z18.s, z18.h +; CHECK-NEXT: udivr z21.s, p0/m, z21.s, z22.s +; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h +; CHECK-NEXT: udivr z18.s, p0/m, z18.s, z20.s +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z19.h, z21.h, z21.h +; CHECK-NEXT: splice z7.h, p0, z7.h, z17.h +; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: splice z16.h, p0, z16.h, z6.h +; CHECK-NEXT: uzp1 z3.b, z7.b, z7.b +; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b ; CHECK-NEXT: uzp1 z7.b, z16.b, z16.b -; CHECK-NEXT: ptrue p1.b, vl16 -; CHECK-NEXT: splice z7.b, p0, z7.b, z4.b -; CHECK-NEXT: splice z5.b, p0, z5.b, z6.b -; CHECK-NEXT: mls z2.b, p1/m, z7.b, z3.b -; CHECK-NEXT: mls z0.b, p1/m, z5.b, z1.b +; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h +; CHECK-NEXT: splice z19.h, p0, z19.h, z18.h +; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: uzp1 z6.b, z19.b, z19.b +; CHECK-NEXT: splice z7.b, p0, z7.b, z2.b +; CHECK-NEXT: splice z6.b, p0, z6.b, z3.b +; CHECK-NEXT: movprfx z2, z4 +; CHECK-NEXT: mls z2.b, p1/m, z6.b, z5.b +; CHECK-NEXT: mls z0.b, p1/m, z7.b, z1.b ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -578,9 +585,9 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-LABEL: urem_v4i16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z2.s, z1.h ; CHECK-NEXT: uunpklo z3.s, z0.h ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s @@ -600,21 +607,21 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: uunpklo z4.s, z0.h +; CHECK-NEXT: ptrue p1.h, vl8 ; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 ; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z2.s, z2.h ; CHECK-NEXT: uunpklo z3.s, z3.h -; CHECK-NEXT: uunpklo z4.s, z1.h ; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s -; CHECK-NEXT: uunpklo z3.s, z0.h -; CHECK-NEXT: udiv z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: uunpklo z3.s, z1.h +; CHECK-NEXT: udivr z3.s, p0/m, z3.s, z4.s +; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: mls z0.h, p0/m, z3.h, z1.h +; CHECK-NEXT: mls z0.h, p1/m, z3.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %res = urem <8 x i16> %op1, %op2 @@ -624,40 +631,41 @@ define void @urem_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: urem_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q2, q0, [x0] +; CHECK-NEXT: ldp q4, q1, [x1] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: ptrue p1.h, vl8 -; CHECK-NEXT: mov z17.d, z2.d -; CHECK-NEXT: ext z17.b, z17.b, z2.b, #8 -; CHECK-NEXT: ldp q3, q1, [x1] -; CHECK-NEXT: mov z5.d, z0.d -; CHECK-NEXT: uunpklo z7.s, z0.h -; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8 +; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: mov z3.d, z0.d +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: uunpklo z16.s, z0.h +; CHECK-NEXT: ext z2.b, z2.b, z1.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z0.b, #8 +; CHECK-NEXT: ext z5.b, z5.b, z4.b, #8 +; CHECK-NEXT: uunpklo z2.s, z2.h +; CHECK-NEXT: uunpklo z3.s, z3.h ; CHECK-NEXT: uunpklo z5.s, z5.h -; CHECK-NEXT: mov z16.d, z3.d -; CHECK-NEXT: ext z16.b, z16.b, z3.b, #8 -; CHECK-NEXT: mov z4.d, z1.d -; CHECK-NEXT: uunpklo z6.s, z1.h -; CHECK-NEXT: ext z4.b, z4.b, z1.b, #8 -; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s -; CHECK-NEXT: uunpklo z4.s, z4.h -; CHECK-NEXT: uunpklo z7.s, z16.h -; CHECK-NEXT: uunpklo z16.s, z17.h -; CHECK-NEXT: udivr z4.s, p0/m, z4.s, z5.s -; CHECK-NEXT: movprfx z5, z16 -; CHECK-NEXT: udiv z5.s, p0/m, z5.s, z7.s +; CHECK-NEXT: udivr z2.s, p0/m, z2.s, z3.s +; CHECK-NEXT: ldr q3, [x0] +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: uunpklo z7.s, z3.h -; CHECK-NEXT: uunpklo z16.s, z2.h +; CHECK-NEXT: ext z6.b, z6.b, z3.b, #8 +; CHECK-NEXT: uunpklo z6.s, z6.h +; CHECK-NEXT: udivr z5.s, p0/m, z5.s, z6.s +; CHECK-NEXT: uunpklo z6.s, z4.h +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: udivr z6.s, p0/m, z6.s, z7.s +; CHECK-NEXT: uunpklo z7.s, z1.h ; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h ; CHECK-NEXT: udivr z7.s, p0/m, z7.s, z16.s ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: splice z6.h, p0, z6.h, z5.h ; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: splice z7.h, p0, z7.h, z5.h -; CHECK-NEXT: uzp1 z5.h, z6.h, z6.h -; CHECK-NEXT: splice z5.h, p0, z5.h, z4.h -; CHECK-NEXT: mls z2.h, p1/m, z7.h, z3.h -; CHECK-NEXT: mls z0.h, p1/m, z5.h, z1.h +; CHECK-NEXT: splice z7.h, p0, z7.h, z2.h +; CHECK-NEXT: movprfx z2, z3 +; CHECK-NEXT: mls z2.h, p1/m, z6.h, z4.h +; CHECK-NEXT: mls z0.h, p1/m, z7.h, z1.h ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -670,8 +678,8 @@ define <2 x i32> @urem_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-LABEL: urem_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s @@ -685,8 +693,8 @@ define <4 x i32> @urem_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-LABEL: urem_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.s, p0/m, z2.s, z1.s @@ -700,14 +708,15 @@ define void @urem_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: urem_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: udiv z4.s, p0/m, z4.s, z2.s -; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: udiv z4.s, p0/m, z4.s, z0.s +; CHECK-NEXT: movprfx z5, z2 ; CHECK-NEXT: udiv z5.s, p0/m, z5.s, z3.s -; CHECK-NEXT: mls z0.s, p0/m, z4.s, z2.s +; CHECK-NEXT: msb z0.s, p0/m, z4.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: mls z1.s, p0/m, z5.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -721,8 +730,8 @@ define <1 x i64> @urem_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-LABEL: urem_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d @@ -736,8 +745,8 @@ define <2 x i64> @urem_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-LABEL: urem_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: movprfx z2, z0 ; CHECK-NEXT: udiv z2.d, p0/m, z2.d, z1.d @@ -751,14 +760,15 @@ define void @urem_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: urem_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: movprfx z4, z0 -; CHECK-NEXT: udiv z4.d, p0/m, z4.d, z2.d -; CHECK-NEXT: movprfx z5, z1 +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: movprfx z4, z1 +; CHECK-NEXT: udiv z4.d, p0/m, z4.d, z0.d +; CHECK-NEXT: movprfx z5, z2 ; CHECK-NEXT: udiv z5.d, p0/m, z5.d, z3.d -; CHECK-NEXT: mls z0.d, p0/m, z4.d, z2.d +; CHECK-NEXT: msb z0.d, p0/m, z4.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: mls z1.d, p0/m, z5.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-select.ll @@ -6,8 +6,8 @@ define <4 x i8> @select_v4i8(<4 x i8> %op1, <4 x i8> %op2, i1 %mask) { ; CHECK-LABEL: select_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: mov z2.h, w8 @@ -22,8 +22,8 @@ define <8 x i8> @select_v8i8(<8 x i8> %op1, <8 x i8> %op2, i1 %mask) { ; CHECK-LABEL: select_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: mov z2.b, w8 @@ -38,8 +38,8 @@ define <16 x i8> @select_v16i8(<16 x i8> %op1, <16 x i8> %op2, i1 %mask) { ; CHECK-LABEL: select_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: mov z2.b, w8 @@ -54,14 +54,14 @@ define void @select_v32i8(ptr %a, ptr %b, i1 %mask) { ; CHECK-LABEL: select_v32i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: and w8, w2, #0x1 +; CHECK-NEXT: mov z0.b, w8 +; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x0, #16] -; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: mov z4.b, w8 -; CHECK-NEXT: cmpne p0.b, p0/z, z4.b, #0 ; CHECK-NEXT: sel z0.b, p0, z0.b, z2.b ; CHECK-NEXT: sel z1.b, p0, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] @@ -76,8 +76,8 @@ define <2 x i16> @select_v2i16(<2 x i16> %op1, <2 x i16> %op2, i1 %mask) { ; CHECK-LABEL: select_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: mov z2.s, w8 @@ -92,8 +92,8 @@ define <4 x i16> @select_v4i16(<4 x i16> %op1, <4 x i16> %op2, i1 %mask) { ; CHECK-LABEL: select_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: mov z2.h, w8 @@ -108,8 +108,8 @@ define <8 x i16> @select_v8i16(<8 x i16> %op1, <8 x i16> %op2, i1 %mask) { ; CHECK-LABEL: select_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: mov z2.h, w8 @@ -124,14 +124,14 @@ define void @select_v16i16(ptr %a, ptr %b, i1 %mask) { ; CHECK-LABEL: select_v16i16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: and w8, w2, #0x1 +; CHECK-NEXT: mov z0.h, w8 +; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x0, #16] -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: mov z4.h, w8 -; CHECK-NEXT: cmpne p0.h, p0/z, z4.h, #0 ; CHECK-NEXT: sel z0.h, p0, z0.h, z2.h ; CHECK-NEXT: sel z1.h, p0, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -146,8 +146,8 @@ define <2 x i32> @select_v2i32(<2 x i32> %op1, <2 x i32> %op2, i1 %mask) { ; CHECK-LABEL: select_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: mov z2.s, w8 @@ -162,8 +162,8 @@ define <4 x i32> @select_v4i32(<4 x i32> %op1, <4 x i32> %op2, i1 %mask) { ; CHECK-LABEL: select_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: and w8, w0, #0x1 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: mov z2.s, w8 @@ -178,14 +178,14 @@ define void @select_v8i32(ptr %a, ptr %b, i1 %mask) { ; CHECK-LABEL: select_v8i32: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: and w8, w2, #0x1 +; CHECK-NEXT: mov z0.s, w8 +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x0, #16] -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: mov z4.s, w8 -; CHECK-NEXT: cmpne p0.s, p0/z, z4.s, #0 ; CHECK-NEXT: sel z0.s, p0, z0.s, z2.s ; CHECK-NEXT: sel z1.s, p0, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -200,9 +200,9 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, i1 %mask) { ; CHECK-LABEL: select_v1i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: and x8, x0, #0x1 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: mov z2.d, x8 @@ -217,9 +217,9 @@ define <2 x i64> @select_v2i64(<2 x i64> %op1, <2 x i64> %op2, i1 %mask) { ; CHECK-LABEL: select_v2i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: and x8, x0, #0x1 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: mov z2.d, x8 @@ -234,15 +234,15 @@ define void @select_v4i64(ptr %a, ptr %b, i1 %mask) { ; CHECK-LABEL: select_v4i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $w2 killed $w2 def $x2 ; CHECK-NEXT: and x8, x2, #0x1 +; CHECK-NEXT: mov z0.d, x8 +; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x0, #16] -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: ldr q3, [x1, #16] -; CHECK-NEXT: mov z4.d, x8 -; CHECK-NEXT: cmpne p0.d, p0/z, z4.d, #0 ; CHECK-NEXT: sel z0.d, p0, z0.d, z2.d ; CHECK-NEXT: sel z1.d, p0, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-shifts.ll @@ -10,11 +10,11 @@ define <4 x i8> @ashr_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-LABEL: ashr_v4i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: sxtb z0.h, p0/m, z0.h ; CHECK-NEXT: and z1.h, z1.h, #0xff +; CHECK-NEXT: sxtb z0.h, p0/m, z0.h ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -25,8 +25,8 @@ define <8 x i8> @ashr_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-LABEL: ashr_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -38,8 +38,8 @@ define <16 x i8> @ashr_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-LABEL: ashr_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -51,10 +51,11 @@ define void @ashr_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: ashr_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: asr z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: asrr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: asr z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -68,11 +69,11 @@ define <2 x i16> @ashr_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-LABEL: ashr_v2i16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: sxth z0.s, p0/m, z0.s ; CHECK-NEXT: and z1.s, z1.s, #0xffff +; CHECK-NEXT: sxth z0.s, p0/m, z0.s ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -83,8 +84,8 @@ define <4 x i16> @ashr_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-LABEL: ashr_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -96,8 +97,8 @@ define <8 x i16> @ashr_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-LABEL: ashr_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -109,10 +110,11 @@ define void @ashr_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: ashr_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: asr z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: asrr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: asr z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -126,8 +128,8 @@ define <2 x i32> @ashr_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-LABEL: ashr_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -139,8 +141,8 @@ define <4 x i32> @ashr_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-LABEL: ashr_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -152,10 +154,11 @@ define void @ashr_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: ashr_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: asr z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: asrr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: asr z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -169,8 +172,8 @@ define <1 x i64> @ashr_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-LABEL: ashr_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -182,8 +185,8 @@ define <2 x i64> @ashr_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-LABEL: ashr_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -195,10 +198,11 @@ define void @ashr_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: ashr_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: asr z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: asrr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: asr z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -216,9 +220,9 @@ define <4 x i8> @lshr_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-LABEL: lshr_v4i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: and z0.h, z0.h, #0xff ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h @@ -231,8 +235,8 @@ define <8 x i8> @lshr_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-LABEL: lshr_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -244,8 +248,8 @@ define <16 x i8> @lshr_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-LABEL: lshr_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -257,10 +261,11 @@ define void @lshr_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: lshr_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: lsrr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: lsr z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -274,9 +279,9 @@ define <2 x i16> @lshr_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-LABEL: lshr_v2i16: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: and z1.s, z1.s, #0xffff ; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s @@ -289,8 +294,8 @@ define <4 x i16> @lshr_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-LABEL: lshr_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -302,8 +307,8 @@ define <8 x i16> @lshr_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-LABEL: lshr_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -315,10 +320,11 @@ define void @lshr_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: lshr_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: lsrr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: lsr z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -332,8 +338,8 @@ define <2 x i32> @lshr_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-LABEL: lshr_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -345,8 +351,8 @@ define <4 x i32> @lshr_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-LABEL: lshr_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -358,10 +364,11 @@ define void @lshr_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: lshr_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: lsrr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: lsr z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -375,8 +382,8 @@ define <1 x i64> @lshr_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-LABEL: lshr_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -388,8 +395,8 @@ define <2 x i64> @lshr_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-LABEL: lshr_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -401,10 +408,11 @@ define void @lshr_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: lshr_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: lsrr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: lsr z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -422,9 +430,9 @@ define <2 x i8> @shl_v2i8(<2 x i8> %op1, <2 x i8> %op2) { ; CHECK-LABEL: shl_v2i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.s, vl2 ; CHECK-NEXT: and z1.s, z1.s, #0xff ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -436,9 +444,9 @@ define <4 x i8> @shl_v4i8(<4 x i8> %op1, <4 x i8> %op2) { ; CHECK-LABEL: shl_v4i8: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: and z1.h, z1.h, #0xff ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -450,8 +458,8 @@ define <8 x i8> @shl_v8i8(<8 x i8> %op1, <8 x i8> %op2) { ; CHECK-LABEL: shl_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -463,8 +471,8 @@ define <16 x i8> @shl_v16i8(<16 x i8> %op1, <16 x i8> %op2) { ; CHECK-LABEL: shl_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -476,10 +484,11 @@ define void @shl_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: shl_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: lslr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: lsl z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -493,8 +502,8 @@ define <4 x i16> @shl_v4i16(<4 x i16> %op1, <4 x i16> %op2) { ; CHECK-LABEL: shl_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -506,8 +515,8 @@ define <8 x i16> @shl_v8i16(<8 x i16> %op1, <8 x i16> %op2) { ; CHECK-LABEL: shl_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -519,10 +528,11 @@ define void @shl_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: shl_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: lslr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: lsl z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -536,8 +546,8 @@ define <2 x i32> @shl_v2i32(<2 x i32> %op1, <2 x i32> %op2) { ; CHECK-LABEL: shl_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -549,8 +559,8 @@ define <4 x i32> @shl_v4i32(<4 x i32> %op1, <4 x i32> %op2) { ; CHECK-LABEL: shl_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -562,10 +572,11 @@ define void @shl_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: shl_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: lsl z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -579,8 +590,8 @@ define <1 x i64> @shl_v1i64(<1 x i64> %op1, <1 x i64> %op2) { ; CHECK-LABEL: shl_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -592,8 +603,8 @@ define <2 x i64> @shl_v2i64(<2 x i64> %op1, <2 x i64> %op2) { ; CHECK-LABEL: shl_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 ; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -605,10 +616,11 @@ define void @shl_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: shl_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: lslr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: lsl z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-to-fp.ll @@ -10,8 +10,8 @@ define <4 x half> @ucvtf_v4i16_v4f16(<4 x i16> %op1) { ; CHECK-LABEL: ucvtf_v4i16_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -22,8 +22,8 @@ define void @ucvtf_v8i16_v8f16(ptr %a, ptr %b) { ; CHECK-LABEL: ucvtf_v8i16_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret @@ -36,8 +36,8 @@ define void @ucvtf_v16i16_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: ucvtf_v16i16_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h ; CHECK-NEXT: ucvtf z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x1] @@ -55,8 +55,8 @@ define <2 x float> @ucvtf_v2i16_v2f32(<2 x i16> %op1) { ; CHECK-LABEL: ucvtf_v2i16_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: and z0.s, z0.s, #0xffff ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -68,8 +68,8 @@ define <4 x float> @ucvtf_v4i16_v4f32(<4 x i16> %op1) { ; CHECK-LABEL: ucvtf_v4i16_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -99,21 +99,20 @@ define void @ucvtf_v16i16_v16f32(ptr %a, ptr %b) { ; CHECK-LABEL: ucvtf_v16i16_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.s, z1.h +; CHECK-NEXT: ucvtf z2.s, p0/m, z2.s ; CHECK-NEXT: ucvtf z3.s, p0/m, z3.s -; CHECK-NEXT: ucvtf z1.s, p0/m, z1.s ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s -; CHECK-NEXT: stp q3, q1, [x1, #32] -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: ucvtf z1.s, p0/m, z2.s -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: ucvtf z1.s, p0/m, z1.s +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = uitofp <16 x i16> %op1 to <16 x float> @@ -179,18 +178,17 @@ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z2.d, z1.s +; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 ; CHECK-NEXT: uunpklo z3.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: ucvtf z2.d, p0/m, z2.d ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 ; CHECK-NEXT: ucvtf z3.d, p0/m, z3.d +; CHECK-NEXT: ucvtf z1.d, p0/m, z1.d ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d -; CHECK-NEXT: ucvtf z2.d, p0/m, z2.d -; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: stp q2, q1, [x1] ; CHECK-NEXT: stp q3, q0, [x1, #32] -; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d -; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = uitofp <8 x i16> %op1 to <8 x double> @@ -201,42 +199,44 @@ define void @ucvtf_v16i16_v16f64(ptr %a, ptr %b) { ; CHECK-LABEL: ucvtf_v16i16_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z2.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z0.s, z0.h -; CHECK-NEXT: uunpklo z4.d, z2.s ; CHECK-NEXT: uunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.s, z1.h -; CHECK-NEXT: uunpklo z6.d, z0.s -; CHECK-NEXT: uunpklo z7.d, z1.s -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z1.d, z1.s -; CHECK-NEXT: uunpklo z5.d, z3.s -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: mov z4.d, z2.d +; CHECK-NEXT: mov z7.d, z3.d +; CHECK-NEXT: mov z5.d, z0.d +; CHECK-NEXT: ext z4.b, z4.b, z2.b, #8 +; CHECK-NEXT: uunpklo z2.d, z2.s +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: ext z7.b, z7.b, z3.b, #8 +; CHECK-NEXT: uunpklo z3.d, z3.s +; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8 +; CHECK-NEXT: uunpklo z4.d, z4.s ; CHECK-NEXT: uunpklo z0.d, z0.s -; CHECK-NEXT: ucvtf z7.d, p0/m, z7.d +; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8 +; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: ucvtf z2.d, p0/m, z2.d +; CHECK-NEXT: ucvtf z3.d, p0/m, z3.d +; CHECK-NEXT: uunpklo z7.d, z7.s +; CHECK-NEXT: uunpklo z5.d, z5.s +; CHECK-NEXT: ucvtf z4.d, p0/m, z4.d +; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d +; CHECK-NEXT: uunpklo z6.d, z6.s ; CHECK-NEXT: ucvtf z1.d, p0/m, z1.d -; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: uunpklo z3.d, z3.s -; CHECK-NEXT: stp q7, q1, [x1, #96] ; CHECK-NEXT: ucvtf z5.d, p0/m, z5.d -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: ucvtf z1.d, p0/m, z3.d -; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d -; CHECK-NEXT: uunpklo z2.d, z2.s -; CHECK-NEXT: stp q5, q1, [x1, #64] -; CHECK-NEXT: movprfx z3, z6 -; CHECK-NEXT: ucvtf z3.d, p0/m, z6.d -; CHECK-NEXT: stp q3, q0, [x1, #32] -; CHECK-NEXT: movprfx z1, z4 -; CHECK-NEXT: ucvtf z1.d, p0/m, z4.d -; CHECK-NEXT: movprfx z0, z2 -; CHECK-NEXT: ucvtf z0.d, p0/m, z2.d -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: stp q2, q4, [x1, #64] +; CHECK-NEXT: movprfx z2, z6 +; CHECK-NEXT: ucvtf z2.d, p0/m, z6.d +; CHECK-NEXT: stp q1, q2, [x1, #32] +; CHECK-NEXT: stp q0, q5, [x1, #96] +; CHECK-NEXT: movprfx z0, z7 +; CHECK-NEXT: ucvtf z0.d, p0/m, z7.d +; CHECK-NEXT: stp q3, q0, [x1] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = uitofp <16 x i16> %op1 to <16 x double> @@ -251,8 +251,8 @@ define <2 x half> @ucvtf_v2i32_v2f16(<2 x i32> %op1) { ; CHECK-LABEL: ucvtf_v2i32_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -264,8 +264,8 @@ define <4 x half> @ucvtf_v4i32_v4f16(<4 x i32> %op1) { ; CHECK-LABEL: ucvtf_v4i32_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -277,14 +277,14 @@ define <8 x half> @ucvtf_v8i32_v8f16(ptr %a) { ; CHECK-LABEL: ucvtf_v8i32_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ucvtf z1.h, p0/m, z1.s ; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z0.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z2.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -295,21 +295,21 @@ define void @ucvtf_v16i32_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: ucvtf_v16i32_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: ldp q3, q2, [x0, #32] +; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: ucvtf z1.h, p0/m, z1.s -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p1, z0.h, z1.h +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s ; CHECK-NEXT: ucvtf z3.h, p0/m, z3.s -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h ; CHECK-NEXT: ucvtf z2.h, p0/m, z2.s +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h -; CHECK-NEXT: stp q0, q3, [x1] +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a %res = uitofp <16 x i32> %op1 to <16 x half> @@ -324,8 +324,8 @@ define <2 x float> @ucvtf_v2i32_v2f32(<2 x i32> %op1) { ; CHECK-LABEL: ucvtf_v2i32_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -336,8 +336,8 @@ define <4 x float> @ucvtf_v4i32_v4f32(<4 x i32> %op1) { ; CHECK-LABEL: ucvtf_v4i32_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -348,8 +348,8 @@ define void @ucvtf_v8i32_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: ucvtf_v8i32_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: ucvtf z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x1] @@ -367,8 +367,8 @@ define <2 x double> @ucvtf_v2i32_v2f64(<2 x i32> %op1) { ; CHECK-LABEL: ucvtf_v2i32_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -398,21 +398,20 @@ define void @ucvtf_v8i32_v8f64(ptr %a, ptr %b) { ; CHECK-LABEL: ucvtf_v8i32_v8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: uunpklo z2.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uunpklo z3.d, z1.s ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: uunpklo z0.d, z0.s ; CHECK-NEXT: uunpklo z1.d, z1.s +; CHECK-NEXT: ucvtf z2.d, p0/m, z2.d ; CHECK-NEXT: ucvtf z3.d, p0/m, z3.d -; CHECK-NEXT: ucvtf z1.d, p0/m, z1.d ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d -; CHECK-NEXT: stp q3, q1, [x1, #32] -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: ucvtf z1.d, p0/m, z2.d -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: ucvtf z1.d, p0/m, z1.d +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = uitofp <8 x i32> %op1 to <8 x double> @@ -432,9 +431,9 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z1.d, z0.d[1] ; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: ucvtf h0, x8 -; CHECK-NEXT: ucvtf h1, x9 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: ucvtf h1, x8 ; CHECK-NEXT: str h0, [sp, #8] ; CHECK-NEXT: str h1, [sp, #10] ; CHECK-NEXT: ldr d0, [sp, #8] @@ -447,17 +446,16 @@ define <4 x half> @ucvtf_v4i64_v4f16(ptr %a) { ; CHECK-LABEL: ucvtf_v4i64_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: ucvtf z1.s, p0/m, z1.d -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: fcvt z0.h, p0/m, z1.s +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: fcvt z0.h, p1/m, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -469,25 +467,26 @@ define <8 x half> @ucvtf_v8i64_v8f16(ptr %a) { ; CHECK-LABEL: ucvtf_v8i64_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ptrue p1.s, vl2 -; CHECK-NEXT: ptrue p2.s +; CHECK-NEXT: ldp q1, q0, [x0, #32] +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: ucvtf z1.s, p0/m, z1.d -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p1, z0.s, z1.s ; CHECK-NEXT: ucvtf z3.s, p0/m, z3.d -; CHECK-NEXT: fcvt z0.h, p2/m, z0.s -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s ; CHECK-NEXT: ucvtf z2.s, p0/m, z2.d -; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s ; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: splice z3.s, p1, z3.s, z2.s -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: fcvt z1.h, p2/m, z3.s +; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: fcvt z0.h, p1/m, z1.s +; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: movprfx z1, z2 +; CHECK-NEXT: fcvt z1.h, p1/m, z2.s ; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z0.h, z1.h, z1.h ; CHECK-NEXT: splice z0.h, p0, z0.h, z2.h @@ -505,8 +504,8 @@ define <2 x float> @ucvtf_v2i64_v2f32(<2 x i64> %op1) { ; CHECK-LABEL: ucvtf_v2i64_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -518,14 +517,14 @@ define <4 x float> @ucvtf_v4i64_v4f32(ptr %a) { ; CHECK-LABEL: ucvtf_v4i64_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ucvtf z1.s, p0/m, z1.d ; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: uzp1 z2.s, z0.s, z0.s -; CHECK-NEXT: uzp1 z0.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -536,21 +535,21 @@ define void @ucvtf_v8i64_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: ucvtf_v8i64_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ptrue p1.s, vl2 -; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q3, q2, [x0, #32] +; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: ucvtf z1.s, p0/m, z1.d -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p1, z0.s, z1.s +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d ; CHECK-NEXT: ucvtf z3.s, p0/m, z3.d -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s ; CHECK-NEXT: ucvtf z2.s, p0/m, z2.d +; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s ; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: splice z3.s, p1, z3.s, z2.s -; CHECK-NEXT: stp q0, q3, [x1] +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret %op1 = load <8 x i64>, ptr %a %res = uitofp <8 x i64> %op1 to <8 x float> @@ -565,8 +564,8 @@ define <2 x double> @ucvtf_v2i64_v2f64(<2 x i64> %op1) { ; CHECK-LABEL: ucvtf_v2i64_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -577,8 +576,8 @@ define void @ucvtf_v4i64_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: ucvtf_v4i64_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: ucvtf z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x1] @@ -596,8 +595,8 @@ define <4 x half> @scvtf_v4i16_v4f16(<4 x i16> %op1) { ; CHECK-LABEL: scvtf_v4i16_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: scvtf z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -608,8 +607,8 @@ define void @scvtf_v8i16_v8f16(ptr %a, ptr %b) { ; CHECK-LABEL: scvtf_v8i16_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: scvtf z0.h, p0/m, z0.h ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret @@ -622,8 +621,8 @@ define void @scvtf_v16i16_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: scvtf_v16i16_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: scvtf z0.h, p0/m, z0.h ; CHECK-NEXT: scvtf z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x1] @@ -640,8 +639,8 @@ define <2 x float> @scvtf_v2i16_v2f32(<2 x i16> %op1) { ; CHECK-LABEL: scvtf_v2i16_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: sxth z0.s, p0/m, z0.s ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -653,8 +652,8 @@ define <4 x float> @scvtf_v4i16_v4f32(<4 x i16> %op1) { ; CHECK-LABEL: scvtf_v4i16_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -684,21 +683,20 @@ define void @scvtf_v16i16_v16f32(ptr %a, ptr %b) { ; CHECK-LABEL: scvtf_v16i16_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: sunpklo z2.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z1.s, z1.h +; CHECK-NEXT: scvtf z2.s, p0/m, z2.s ; CHECK-NEXT: scvtf z3.s, p0/m, z3.s -; CHECK-NEXT: scvtf z1.s, p0/m, z1.s ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s -; CHECK-NEXT: stp q3, q1, [x1, #32] -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: scvtf z1.s, p0/m, z2.s -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: scvtf z1.s, p0/m, z1.s +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = sitofp <16 x i16> %op1 to <16 x float> @@ -713,8 +711,8 @@ define <2 x double> @scvtf_v2i16_v2f64(<2 x i16> %op1) { ; CHECK-LABEL: scvtf_v2i16_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: sxth z0.s, p0/m, z0.s ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: sunpklo z0.d, z0.s @@ -753,18 +751,17 @@ ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 ; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z2.d, z1.s +; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 ; CHECK-NEXT: sunpklo z3.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: scvtf z2.d, p0/m, z2.d ; CHECK-NEXT: sunpklo z0.d, z0.s -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 ; CHECK-NEXT: scvtf z3.d, p0/m, z3.d +; CHECK-NEXT: scvtf z1.d, p0/m, z1.d ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d -; CHECK-NEXT: scvtf z2.d, p0/m, z2.d -; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: stp q2, q1, [x1] ; CHECK-NEXT: stp q3, q0, [x1, #32] -; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: scvtf z0.d, p0/m, z1.d -; CHECK-NEXT: stp q2, q0, [x1] ; CHECK-NEXT: ret %op1 = load <8 x i16>, ptr %a %res = sitofp <8 x i16> %op1 to <8 x double> @@ -775,42 +772,44 @@ define void @scvtf_v16i16_v16f64(ptr %a, ptr %b) { ; CHECK-LABEL: scvtf_v16i16_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: sunpklo z2.s, z0.h ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: sunpklo z0.s, z0.h -; CHECK-NEXT: sunpklo z4.d, z2.s ; CHECK-NEXT: sunpklo z3.s, z1.h ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: sunpklo z0.s, z0.h ; CHECK-NEXT: sunpklo z1.s, z1.h -; CHECK-NEXT: sunpklo z6.d, z0.s -; CHECK-NEXT: sunpklo z7.d, z1.s -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: sunpklo z1.d, z1.s -; CHECK-NEXT: sunpklo z5.d, z3.s -; CHECK-NEXT: ext z3.b, z3.b, z3.b, #8 +; CHECK-NEXT: mov z4.d, z2.d +; CHECK-NEXT: mov z7.d, z3.d +; CHECK-NEXT: mov z5.d, z0.d +; CHECK-NEXT: ext z4.b, z4.b, z2.b, #8 +; CHECK-NEXT: sunpklo z2.d, z2.s +; CHECK-NEXT: mov z6.d, z1.d +; CHECK-NEXT: ext z7.b, z7.b, z3.b, #8 +; CHECK-NEXT: sunpklo z3.d, z3.s +; CHECK-NEXT: ext z5.b, z5.b, z0.b, #8 +; CHECK-NEXT: sunpklo z4.d, z4.s ; CHECK-NEXT: sunpklo z0.d, z0.s -; CHECK-NEXT: scvtf z7.d, p0/m, z7.d +; CHECK-NEXT: ext z6.b, z6.b, z1.b, #8 +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: scvtf z2.d, p0/m, z2.d +; CHECK-NEXT: scvtf z3.d, p0/m, z3.d +; CHECK-NEXT: sunpklo z7.d, z7.s +; CHECK-NEXT: sunpklo z5.d, z5.s +; CHECK-NEXT: scvtf z4.d, p0/m, z4.d +; CHECK-NEXT: scvtf z0.d, p0/m, z0.d +; CHECK-NEXT: sunpklo z6.d, z6.s ; CHECK-NEXT: scvtf z1.d, p0/m, z1.d -; CHECK-NEXT: ext z2.b, z2.b, z2.b, #8 -; CHECK-NEXT: sunpklo z3.d, z3.s -; CHECK-NEXT: stp q7, q1, [x1, #96] ; CHECK-NEXT: scvtf z5.d, p0/m, z5.d -; CHECK-NEXT: movprfx z1, z3 -; CHECK-NEXT: scvtf z1.d, p0/m, z3.d -; CHECK-NEXT: scvtf z0.d, p0/m, z0.d -; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: stp q5, q1, [x1, #64] -; CHECK-NEXT: movprfx z3, z6 -; CHECK-NEXT: scvtf z3.d, p0/m, z6.d -; CHECK-NEXT: stp q3, q0, [x1, #32] -; CHECK-NEXT: movprfx z1, z4 -; CHECK-NEXT: scvtf z1.d, p0/m, z4.d -; CHECK-NEXT: movprfx z0, z2 -; CHECK-NEXT: scvtf z0.d, p0/m, z2.d -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: stp q2, q4, [x1, #64] +; CHECK-NEXT: movprfx z2, z6 +; CHECK-NEXT: scvtf z2.d, p0/m, z6.d +; CHECK-NEXT: stp q1, q2, [x1, #32] +; CHECK-NEXT: stp q0, q5, [x1, #96] +; CHECK-NEXT: movprfx z0, z7 +; CHECK-NEXT: scvtf z0.d, p0/m, z7.d +; CHECK-NEXT: stp q3, q0, [x1] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %res = sitofp <16 x i16> %op1 to <16 x double> @@ -825,8 +824,8 @@ define <2 x half> @scvtf_v2i32_v2f16(<2 x i32> %op1) { ; CHECK-LABEL: scvtf_v2i32_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: scvtf z0.h, p0/m, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -838,8 +837,8 @@ define <4 x half> @scvtf_v4i32_v4f16(<4 x i32> %op1) { ; CHECK-LABEL: scvtf_v4i32_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: scvtf z0.h, p0/m, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -851,14 +850,14 @@ define <8 x half> @scvtf_v8i32_v8f16(ptr %a) { ; CHECK-LABEL: scvtf_v8i32_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: scvtf z1.h, p0/m, z1.s ; CHECK-NEXT: scvtf z0.h, p0/m, z0.s ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z2.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z0.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z2.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a @@ -873,8 +872,8 @@ define <2 x float> @scvtf_v2i32_v2f32(<2 x i32> %op1) { ; CHECK-LABEL: scvtf_v2i32_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -885,8 +884,8 @@ define <4 x float> @scvtf_v4i32_v4f32(<4 x i32> %op1) { ; CHECK-LABEL: scvtf_v4i32_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -897,8 +896,8 @@ define void @scvtf_v8i32_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: scvtf_v8i32_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: scvtf z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x1] @@ -916,8 +915,8 @@ define <2 x double> @scvtf_v2i32_v2f64(<2 x i32> %op1) { ; CHECK-LABEL: scvtf_v2i32_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 @@ -947,21 +946,20 @@ define void @scvtf_v8i32_v8f64(ptr %a, ptr %b) { ; CHECK-LABEL: scvtf_v8i32_v8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 ; CHECK-NEXT: sunpklo z2.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: sunpklo z3.d, z1.s ; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 +; CHECK-NEXT: sunpklo z0.d, z0.s ; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: scvtf z2.d, p0/m, z2.d ; CHECK-NEXT: scvtf z3.d, p0/m, z3.d -; CHECK-NEXT: scvtf z1.d, p0/m, z1.d ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d -; CHECK-NEXT: stp q3, q1, [x1, #32] -; CHECK-NEXT: movprfx z1, z2 -; CHECK-NEXT: scvtf z1.d, p0/m, z2.d -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: scvtf z1.d, p0/m, z1.d +; CHECK-NEXT: stp q2, q0, [x1, #32] +; CHECK-NEXT: stp q3, q1, [x1] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %res = sitofp <8 x i32> %op1 to <8 x double> @@ -972,38 +970,40 @@ define void @scvtf_v16i32_v16f64(ptr %a, ptr %b) { ; CHECK-LABEL: scvtf_v16i32_v16f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q2, q3, [x0, #32] +; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: mov z6.d, z2.d +; CHECK-NEXT: ldp q5, q4, [x0] +; CHECK-NEXT: mov z2.d, z0.d +; CHECK-NEXT: mov z3.d, z1.d +; CHECK-NEXT: mov z6.d, z4.d +; CHECK-NEXT: mov z7.d, z5.d +; CHECK-NEXT: ext z2.b, z2.b, z0.b, #8 +; CHECK-NEXT: ext z3.b, z3.b, z1.b, #8 +; CHECK-NEXT: sunpklo z0.d, z0.s +; CHECK-NEXT: sunpklo z1.d, z1.s +; CHECK-NEXT: ext z6.b, z6.b, z4.b, #8 +; CHECK-NEXT: sunpklo z4.d, z4.s +; CHECK-NEXT: ext z7.b, z7.b, z5.b, #8 +; CHECK-NEXT: sunpklo z5.d, z5.s ; CHECK-NEXT: sunpklo z2.d, z2.s -; CHECK-NEXT: ext z6.b, z6.b, z6.b, #8 -; CHECK-NEXT: scvtf z2.d, p0/m, z2.d -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z7.d, z3.d ; CHECK-NEXT: sunpklo z3.d, z3.s -; CHECK-NEXT: ext z7.b, z7.b, z7.b, #8 -; CHECK-NEXT: scvtf z3.d, p0/m, z3.d -; CHECK-NEXT: sunpklo z7.d, z7.s +; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: sunpklo z6.d, z6.s -; CHECK-NEXT: sunpklo z4.d, z0.s -; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: sunpklo z0.d, z0.s -; CHECK-NEXT: scvtf z7.d, p0/m, z7.d -; CHECK-NEXT: sunpklo z5.d, z1.s -; CHECK-NEXT: ext z1.b, z1.b, z1.b, #8 -; CHECK-NEXT: sunpklo z1.d, z1.s -; CHECK-NEXT: stp q3, q7, [x1, #96] -; CHECK-NEXT: movprfx z3, z6 -; CHECK-NEXT: scvtf z3.d, p0/m, z6.d -; CHECK-NEXT: stp q2, q3, [x1, #64] +; CHECK-NEXT: scvtf z1.d, p0/m, z1.d +; CHECK-NEXT: scvtf z4.d, p0/m, z4.d +; CHECK-NEXT: sunpklo z7.d, z7.s +; CHECK-NEXT: scvtf z2.d, p0/m, z2.d +; CHECK-NEXT: scvtf z3.d, p0/m, z3.d +; CHECK-NEXT: stp q1, q3, [x1, #64] +; CHECK-NEXT: movprfx z1, z7 +; CHECK-NEXT: scvtf z1.d, p0/m, z7.d +; CHECK-NEXT: stp q0, q2, [x1, #96] +; CHECK-NEXT: movprfx z0, z6 +; CHECK-NEXT: scvtf z0.d, p0/m, z6.d ; CHECK-NEXT: movprfx z2, z5 ; CHECK-NEXT: scvtf z2.d, p0/m, z5.d -; CHECK-NEXT: scvtf z1.d, p0/m, z1.d -; CHECK-NEXT: scvtf z0.d, p0/m, z0.d -; CHECK-NEXT: stp q2, q1, [x1, #32] -; CHECK-NEXT: movprfx z2, z4 -; CHECK-NEXT: scvtf z2.d, p0/m, z4.d -; CHECK-NEXT: stp q2, q0, [x1] +; CHECK-NEXT: stp q2, q1, [x1] +; CHECK-NEXT: stp q4, q0, [x1, #32] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a %res = sitofp <16 x i32> %op1 to <16 x double> @@ -1023,9 +1023,9 @@ ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z1.d, z0.d[1] ; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: scvtf h0, x8 -; CHECK-NEXT: scvtf h1, x9 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: scvtf h1, x8 ; CHECK-NEXT: str h0, [sp, #8] ; CHECK-NEXT: str h1, [sp, #10] ; CHECK-NEXT: ldr d0, [sp, #8] @@ -1038,17 +1038,16 @@ define <4 x half> @scvtf_v4i64_v4f16(ptr %a) { ; CHECK-LABEL: scvtf_v4i64_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: scvtf z1.s, p0/m, z1.d -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s ; CHECK-NEXT: scvtf z0.s, p0/m, z0.d ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s -; CHECK-NEXT: ptrue p0.s -; CHECK-NEXT: movprfx z0, z1 -; CHECK-NEXT: fcvt z0.h, p0/m, z1.s +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: fcvt z0.h, p1/m, z0.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -1064,8 +1063,8 @@ define <2 x float> @scvtf_v2i64_v2f32(<2 x i64> %op1) { ; CHECK-LABEL: scvtf_v2i64_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: scvtf z0.s, p0/m, z0.d ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -1077,14 +1076,14 @@ define <4 x float> @scvtf_v4i64_v4f32(ptr %a) { ; CHECK-LABEL: scvtf_v4i64_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: scvtf z1.s, p0/m, z1.d ; CHECK-NEXT: scvtf z0.s, p0/m, z0.d ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: uzp1 z2.s, z0.s, z0.s -; CHECK-NEXT: uzp1 z0.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z2.s +; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a @@ -1099,8 +1098,8 @@ define <2 x double> @scvtf_v2i64_v2f64(<2 x i64> %op1) { ; CHECK-LABEL: scvtf_v2i64_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -1111,8 +1110,8 @@ define void @scvtf_v4i64_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: scvtf_v4i64_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: scvtf z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x1] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-int-vselect.ll @@ -60,14 +60,14 @@ define void @select_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: select_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: cmpeq p1.b, p0/z, z1.b, z2.b -; CHECK-NEXT: sel z1.b, p1, z1.b, z2.b -; CHECK-NEXT: cmpeq p0.b, p0/z, z0.b, z3.b -; CHECK-NEXT: sel z0.b, p0, z0.b, z3.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ldp q1, q3, [x1] +; CHECK-NEXT: cmpeq p1.b, p0/z, z0.b, z1.b +; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b +; CHECK-NEXT: sel z0.b, p1, z0.b, z1.b +; CHECK-NEXT: sel z1.b, p0, z2.b, z3.b +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b @@ -135,14 +135,14 @@ define void @select_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: select_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: cmpeq p1.h, p0/z, z1.h, z2.h -; CHECK-NEXT: sel z1.h, p1, z1.h, z2.h -; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z3.h -; CHECK-NEXT: sel z0.h, p0, z0.h, z3.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ldp q1, q3, [x1] +; CHECK-NEXT: cmpeq p1.h, p0/z, z0.h, z1.h +; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h +; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h +; CHECK-NEXT: sel z1.h, p0, z2.h, z3.h +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b @@ -192,14 +192,14 @@ define void @select_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: select_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: cmpeq p1.s, p0/z, z1.s, z2.s -; CHECK-NEXT: sel z1.s, p1, z1.s, z2.s -; CHECK-NEXT: cmpeq p0.s, p0/z, z0.s, z3.s -; CHECK-NEXT: sel z0.s, p0, z0.s, z3.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ldp q1, q3, [x1] +; CHECK-NEXT: cmpeq p1.s, p0/z, z0.s, z1.s +; CHECK-NEXT: cmpeq p0.s, p0/z, z2.s, z3.s +; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s +; CHECK-NEXT: sel z1.s, p0, z2.s, z3.s +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b @@ -212,9 +212,9 @@ define <1 x i64> @select_v1i64(<1 x i64> %op1, <1 x i64> %op2, <1 x i1> %mask) { ; CHECK-LABEL: select_v1i64: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: and x8, x0, #0x1 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 ; CHECK-NEXT: mov z2.d, x8 @@ -248,14 +248,14 @@ define void @select_v4i64(ptr %a, ptr %b) { ; CHECK-LABEL: select_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: cmpeq p1.d, p0/z, z1.d, z2.d -; CHECK-NEXT: sel z1.d, p1, z1.d, z2.d -; CHECK-NEXT: cmpeq p0.d, p0/z, z0.d, z3.d -; CHECK-NEXT: sel z0.d, p0, z0.d, z3.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ldp q1, q3, [x1] +; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, z1.d +; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d +; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d +; CHECK-NEXT: sel z1.d, p0, z2.d, z3.d +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ld2-alloca.ll @@ -49,22 +49,22 @@ ; CHECK-NEXT: ptrue p0.b, vl3 ; CHECK-NEXT: ld2b { z0.b, z1.b }, p0/z, [x20] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov z2.b, z1.b[3] +; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov z3.b, z1.b[2] -; CHECK-NEXT: mov z0.b, z1.b[1] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: fmov w10, s3 +; CHECK-NEXT: mov z4.b, z1.b[1] ; CHECK-NEXT: strh w8, [sp] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w9, [sp, #6] -; CHECK-NEXT: strh w10, [sp, #4] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: fmov w9, s3 +; CHECK-NEXT: strh w8, [sp, #6] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: strh w9, [sp, #4] ; CHECK-NEXT: strh w8, [sp, #2] ; CHECK-NEXT: add x8, sp, #12 ; CHECK-NEXT: ldr d0, [sp] ; CHECK-NEXT: st1b { z0.h }, p0, [x8] ; CHECK-NEXT: ldrh w8, [sp, #12] -; CHECK-NEXT: strb w10, [x19, #2] +; CHECK-NEXT: strb w9, [x19, #2] ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: strh w8, [x19] ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload @@ -86,32 +86,32 @@ ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: add x0, sp, #16 ; CHECK-NEXT: bl def -; CHECK-NEXT: ldp q0, q1, [sp, #16] -; CHECK-NEXT: mov z2.b, z0.b[14] -; CHECK-NEXT: mov z3.b, z0.b[12] +; CHECK-NEXT: ldp q0, q3, [sp, #16] +; CHECK-NEXT: mov z1.b, z0.b[14] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: fmov w10, s3 +; CHECK-NEXT: mov z2.b, z0.b[12] ; CHECK-NEXT: mov z4.b, z0.b[10] ; CHECK-NEXT: mov z5.b, z0.b[8] -; CHECK-NEXT: mov z6.b, z0.b[6] ; CHECK-NEXT: strb w8, [sp] -; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: strb w9, [sp, #7] -; CHECK-NEXT: fmov w9, s5 -; CHECK-NEXT: strb w10, [sp, #6] -; CHECK-NEXT: fmov w10, s6 -; CHECK-NEXT: mov z7.b, z0.b[4] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov z1.b, z0.b[6] +; CHECK-NEXT: strb w8, [sp, #7] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov z2.b, z0.b[4] ; CHECK-NEXT: mov z0.b, z0.b[2] +; CHECK-NEXT: strb w8, [sp, #6] +; CHECK-NEXT: fmov w8, s4 ; CHECK-NEXT: strb w8, [sp, #5] -; CHECK-NEXT: fmov w8, s7 -; CHECK-NEXT: strb w9, [sp, #4] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w10, [sp, #3] -; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: strb w8, [sp, #4] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strb w8, [sp, #3] +; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: strb w8, [sp, #2] -; CHECK-NEXT: strb w9, [sp, #1] -; CHECK-NEXT: strb w10, [x19, #8] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strb w8, [sp, #1] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strb w8, [x19, #8] ; CHECK-NEXT: ldr q0, [sp] ; CHECK-NEXT: fmov x8, d0 ; CHECK-NEXT: str x8, [x19] @@ -137,8 +137,8 @@ ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: mov x20, sp ; CHECK-NEXT: bl def -; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov x8, #4 // =0x4 ; CHECK-NEXT: ld2d { z0.d, z1.d }, p0/z, [x20] ; CHECK-NEXT: ld2d { z2.d, z3.d }, p0/z, [x20, x8, lsl #3] ; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-limit-duplane.ll @@ -6,16 +6,16 @@ define <4 x i32> @test(ptr %arg1, ptr %arg2) { ; CHECK-LABEL: test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldp q2, q1, [x0, #32] -; CHECK-NEXT: add z2.s, z2.s, z2.s +; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ldp q3, q4, [x0] +; CHECK-NEXT: add z2.s, z0.s, z0.s +; CHECK-NEXT: add z5.s, z1.s, z1.s ; CHECK-NEXT: mov z0.s, z1.s[2] -; CHECK-NEXT: add z1.s, z1.s, z1.s -; CHECK-NEXT: stp q2, q1, [x0, #32] +; CHECK-NEXT: add z1.s, z3.s, z3.s +; CHECK-NEXT: add z3.s, z4.s, z4.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 -; CHECK-NEXT: add z2.s, z3.s, z3.s -; CHECK-NEXT: add z1.s, z4.s, z4.s -; CHECK-NEXT: stp q2, q1, [x0] +; CHECK-NEXT: stp q2, q5, [x0, #32] +; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret entry: %0 = load <16 x i32>, ptr %arg1, align 256 @@ -30,16 +30,16 @@ ; CHECK-LABEL: test2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldp q1, q0, [x0, #32] -; CHECK-NEXT: add z1.s, z1.s, z1.s -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: add z4.s, z0.s, z0.s +; CHECK-NEXT: ldp q3, q4, [x0] +; CHECK-NEXT: add z2.s, z0.s, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 +; CHECK-NEXT: add z1.s, z1.s, z1.s +; CHECK-NEXT: add z3.s, z3.s, z3.s +; CHECK-NEXT: add z4.s, z4.s, z4.s ; CHECK-NEXT: mov z0.s, s0 -; CHECK-NEXT: stp q1, q4, [x0, #32] +; CHECK-NEXT: stp q1, q2, [x0, #32] +; CHECK-NEXT: stp q3, q4, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 -; CHECK-NEXT: add z1.s, z2.s, z2.s -; CHECK-NEXT: add z2.s, z3.s, z3.s -; CHECK-NEXT: stp q1, q2, [x0] ; CHECK-NEXT: ret entry: %0 = load <16 x i32>, ptr %arg1, align 256 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-log-reduce.ll @@ -11,8 +11,8 @@ define i8 @andv_v4i8(<4 x i8> %a) { ; CHECK-LABEL: andv_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -23,8 +23,8 @@ define i8 @andv_v8i8(<8 x i8> %a) { ; CHECK-LABEL: andv_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: andv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -35,8 +35,8 @@ define i8 @andv_v16i8(<16 x i8> %a) { ; CHECK-LABEL: andv_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: andv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -47,8 +47,8 @@ define i8 @andv_v32i8(ptr %a) { ; CHECK-LABEL: andv_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: and z0.d, z1.d, z0.d ; CHECK-NEXT: andv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 @@ -61,8 +61,8 @@ define i16 @andv_v2i16(<2 x i16> %a) { ; CHECK-LABEL: andv_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -73,8 +73,8 @@ define i16 @andv_v4i16(<4 x i16> %a) { ; CHECK-LABEL: andv_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -85,8 +85,8 @@ define i16 @andv_v8i16(<8 x i16> %a) { ; CHECK-LABEL: andv_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -97,8 +97,8 @@ define i16 @andv_v16i16(ptr %a) { ; CHECK-LABEL: andv_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: and z0.d, z1.d, z0.d ; CHECK-NEXT: andv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 @@ -111,8 +111,8 @@ define i32 @andv_v2i32(<2 x i32> %a) { ; CHECK-LABEL: andv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -123,8 +123,8 @@ define i32 @andv_v4i32(<4 x i32> %a) { ; CHECK-LABEL: andv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -135,8 +135,8 @@ define i32 @andv_v8i32(ptr %a) { ; CHECK-LABEL: andv_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: and z0.d, z1.d, z0.d ; CHECK-NEXT: andv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 @@ -149,8 +149,8 @@ define i64 @andv_v2i64(<2 x i64> %a) { ; CHECK-LABEL: andv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: andv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -161,8 +161,8 @@ define i64 @andv_v4i64(ptr %a) { ; CHECK-LABEL: andv_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: and z0.d, z1.d, z0.d ; CHECK-NEXT: andv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 @@ -179,8 +179,8 @@ define i8 @eorv_v4i8(<4 x i8> %a) { ; CHECK-LABEL: eorv_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -191,8 +191,8 @@ define i8 @eorv_v8i8(<8 x i8> %a) { ; CHECK-LABEL: eorv_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: eorv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -203,8 +203,8 @@ define i8 @eorv_v16i8(<16 x i8> %a) { ; CHECK-LABEL: eorv_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: eorv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -215,8 +215,8 @@ define i8 @eorv_v32i8(ptr %a) { ; CHECK-LABEL: eorv_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: eor z0.d, z1.d, z0.d ; CHECK-NEXT: eorv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 @@ -229,8 +229,8 @@ define i16 @eorv_v2i16(<2 x i16> %a) { ; CHECK-LABEL: eorv_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -241,8 +241,8 @@ define i16 @eorv_v4i16(<4 x i16> %a) { ; CHECK-LABEL: eorv_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -253,8 +253,8 @@ define i16 @eorv_v8i16(<8 x i16> %a) { ; CHECK-LABEL: eorv_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -265,8 +265,8 @@ define i16 @eorv_v16i16(ptr %a) { ; CHECK-LABEL: eorv_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: eor z0.d, z1.d, z0.d ; CHECK-NEXT: eorv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 @@ -279,8 +279,8 @@ define i32 @eorv_v2i32(<2 x i32> %a) { ; CHECK-LABEL: eorv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -291,8 +291,8 @@ define i32 @eorv_v4i32(<4 x i32> %a) { ; CHECK-LABEL: eorv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -303,8 +303,8 @@ define i32 @eorv_v8i32(ptr %a) { ; CHECK-LABEL: eorv_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: eor z0.d, z1.d, z0.d ; CHECK-NEXT: eorv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 @@ -317,8 +317,8 @@ define i64 @eorv_v2i64(<2 x i64> %a) { ; CHECK-LABEL: eorv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: eorv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -329,8 +329,8 @@ define i64 @eorv_v4i64(ptr %a) { ; CHECK-LABEL: eorv_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: eor z0.d, z1.d, z0.d ; CHECK-NEXT: eorv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 @@ -347,8 +347,8 @@ define i8 @orv_v4i8(<4 x i8> %a) { ; CHECK-LABEL: orv_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -359,8 +359,8 @@ define i8 @orv_v8i8(<8 x i8> %a) { ; CHECK-LABEL: orv_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: orv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -371,8 +371,8 @@ define i8 @orv_v16i8(<16 x i8> %a) { ; CHECK-LABEL: orv_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: orv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -383,8 +383,8 @@ define i8 @orv_v32i8(ptr %a) { ; CHECK-LABEL: orv_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: orr z0.d, z1.d, z0.d ; CHECK-NEXT: orv b0, p0, z0.b ; CHECK-NEXT: fmov w0, s0 @@ -397,8 +397,8 @@ define i16 @orv_v2i16(<2 x i16> %a) { ; CHECK-LABEL: orv_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -409,8 +409,8 @@ define i16 @orv_v4i16(<4 x i16> %a) { ; CHECK-LABEL: orv_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -421,8 +421,8 @@ define i16 @orv_v8i16(<8 x i16> %a) { ; CHECK-LABEL: orv_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -433,8 +433,8 @@ define i16 @orv_v16i16(ptr %a) { ; CHECK-LABEL: orv_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: orr z0.d, z1.d, z0.d ; CHECK-NEXT: orv h0, p0, z0.h ; CHECK-NEXT: fmov w0, s0 @@ -447,8 +447,8 @@ define i32 @orv_v2i32(<2 x i32> %a) { ; CHECK-LABEL: orv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -459,8 +459,8 @@ define i32 @orv_v4i32(<4 x i32> %a) { ; CHECK-LABEL: orv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -471,8 +471,8 @@ define i32 @orv_v8i32(ptr %a) { ; CHECK-LABEL: orv_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: orr z0.d, z1.d, z0.d ; CHECK-NEXT: orv s0, p0, z0.s ; CHECK-NEXT: fmov w0, s0 @@ -485,8 +485,8 @@ define i64 @orv_v2i64(<2 x i64> %a) { ; CHECK-LABEL: orv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: orv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -497,8 +497,8 @@ define i64 @orv_v4i64(ptr %a) { ; CHECK-LABEL: orv_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: orr z0.d, z1.d, z0.d ; CHECK-NEXT: orv d0, p0, z0.d ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-load.ll @@ -59,69 +59,69 @@ ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: ldr w8, [sp, #224] -; CHECK-NEXT: strb w7, [sp, #6] ; CHECK-NEXT: ldr w9, [sp, #216] -; CHECK-NEXT: strb w6, [sp, #5] -; CHECK-NEXT: ldr w10, [sp, #208] -; CHECK-NEXT: strb w5, [sp, #4] +; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: strb w7, [sp, #6] ; CHECK-NEXT: strb w8, [sp, #31] -; CHECK-NEXT: ldr w8, [sp, #200] +; CHECK-NEXT: ldr w8, [sp, #208] ; CHECK-NEXT: strb w9, [sp, #30] -; CHECK-NEXT: ldr w9, [sp, #192] -; CHECK-NEXT: strb w10, [sp, #29] -; CHECK-NEXT: ldr w10, [sp, #184] -; CHECK-NEXT: strb w8, [sp, #28] +; CHECK-NEXT: ldr w9, [sp, #200] +; CHECK-NEXT: strb w8, [sp, #29] +; CHECK-NEXT: ldr w8, [sp, #192] +; CHECK-NEXT: strb w9, [sp, #28] +; CHECK-NEXT: ldr w9, [sp, #184] +; CHECK-NEXT: strb w8, [sp, #27] ; CHECK-NEXT: ldr w8, [sp, #176] -; CHECK-NEXT: strb w9, [sp, #27] +; CHECK-NEXT: strb w9, [sp, #26] ; CHECK-NEXT: ldr w9, [sp, #168] -; CHECK-NEXT: strb w10, [sp, #26] -; CHECK-NEXT: ldr w10, [sp, #160] ; CHECK-NEXT: strb w8, [sp, #25] -; CHECK-NEXT: ldr w8, [sp, #152] +; CHECK-NEXT: ldr w8, [sp, #160] ; CHECK-NEXT: strb w9, [sp, #24] -; CHECK-NEXT: ldr w9, [sp, #144] -; CHECK-NEXT: strb w10, [sp, #23] -; CHECK-NEXT: ldr w10, [sp, #136] -; CHECK-NEXT: strb w8, [sp, #22] +; CHECK-NEXT: ldr w9, [sp, #152] +; CHECK-NEXT: strb w8, [sp, #23] +; CHECK-NEXT: ldr w8, [sp, #144] +; CHECK-NEXT: strb w9, [sp, #22] +; CHECK-NEXT: ldr w9, [sp, #136] +; CHECK-NEXT: strb w8, [sp, #21] ; CHECK-NEXT: ldr w8, [sp, #128] -; CHECK-NEXT: strb w9, [sp, #21] +; CHECK-NEXT: strb w9, [sp, #20] ; CHECK-NEXT: ldr w9, [sp, #120] -; CHECK-NEXT: strb w10, [sp, #20] -; CHECK-NEXT: ldr w10, [sp, #112] ; CHECK-NEXT: strb w8, [sp, #19] -; CHECK-NEXT: ldr w8, [sp, #104] +; CHECK-NEXT: ldr w8, [sp, #112] ; CHECK-NEXT: strb w9, [sp, #18] -; CHECK-NEXT: ldr w9, [sp, #96] -; CHECK-NEXT: strb w10, [sp, #17] -; CHECK-NEXT: ldr w10, [sp, #88] -; CHECK-NEXT: strb w8, [sp, #16] +; CHECK-NEXT: ldr w9, [sp, #104] +; CHECK-NEXT: strb w8, [sp, #17] +; CHECK-NEXT: ldr w8, [sp, #96] +; CHECK-NEXT: strb w9, [sp, #16] +; CHECK-NEXT: ldr w9, [sp, #88] +; CHECK-NEXT: strb w8, [sp, #15] ; CHECK-NEXT: ldr w8, [sp, #80] -; CHECK-NEXT: strb w9, [sp, #15] +; CHECK-NEXT: strb w9, [sp, #14] ; CHECK-NEXT: ldr w9, [sp, #72] -; CHECK-NEXT: strb w10, [sp, #14] -; CHECK-NEXT: ldr w10, [sp, #64] ; CHECK-NEXT: strb w8, [sp, #13] -; CHECK-NEXT: ldr w8, [sp, #56] +; CHECK-NEXT: ldr w8, [sp, #64] ; CHECK-NEXT: strb w9, [sp, #12] -; CHECK-NEXT: ldr w9, [sp, #48] -; CHECK-NEXT: strb w10, [sp, #11] -; CHECK-NEXT: ldr w10, [sp, #40] -; CHECK-NEXT: strb w8, [sp, #10] +; CHECK-NEXT: ldr w9, [sp, #56] +; CHECK-NEXT: strb w8, [sp, #11] +; CHECK-NEXT: ldr w8, [sp, #48] +; CHECK-NEXT: strb w9, [sp, #10] +; CHECK-NEXT: ldr w9, [sp, #40] +; CHECK-NEXT: strb w8, [sp, #9] ; CHECK-NEXT: ldr w8, [sp, #32] -; CHECK-NEXT: strb w9, [sp, #9] -; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: strb w10, [sp, #8] +; CHECK-NEXT: strb w9, [sp, #8] ; CHECK-NEXT: strb w8, [sp, #7] ; CHECK-NEXT: mov w8, #16 // =0x10 +; CHECK-NEXT: strb w6, [sp, #5] +; CHECK-NEXT: strb w5, [sp, #4] ; CHECK-NEXT: strb w4, [sp, #3] ; CHECK-NEXT: strb w3, [sp, #2] ; CHECK-NEXT: strb w2, [sp, #1] ; CHECK-NEXT: strb w1, [sp] ; CHECK-NEXT: ldp q1, q0, [sp] -; CHECK-NEXT: lsl z1.b, z1.b, #7 -; CHECK-NEXT: asr z1.b, z1.b, #7 ; CHECK-NEXT: lsl z0.b, z0.b, #7 +; CHECK-NEXT: lsl z1.b, z1.b, #7 ; CHECK-NEXT: asr z0.b, z0.b, #7 +; CHECK-NEXT: asr z1.b, z1.b, #7 ; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0 ; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] @@ -140,13 +140,13 @@ ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov z1.s, z0.s[1] ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: str wzr, [sp, #12] -; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: strh w9, [sp, #10] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: lsl z0.h, z0.h, #15 ; CHECK-NEXT: asr z0.h, z0.h, #15 @@ -195,19 +195,19 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: uunpklo z1.h, z0.b +; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 -; CHECK-NEXT: lsl z1.h, z1.h, #15 ; CHECK-NEXT: uunpklo z0.h, z0.b +; CHECK-NEXT: lsl z1.h, z1.h, #15 ; CHECK-NEXT: asr z1.h, z1.h, #15 ; CHECK-NEXT: lsl z0.h, z0.h, #15 -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0 +; CHECK-NEXT: asr z0.h, z0.h, #15 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 ; CHECK-NEXT: ld1h { z0.h }, p1/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret %load = call <16 x half> @llvm.masked.load.v16f16(ptr %src, i32 8, <16 x i1> %mask, <16 x half> zeroinitializer) @@ -250,31 +250,31 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: mov z1.b, z0.b[3] ; CHECK-NEXT: mov z2.b, z0.b[2] -; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: mov z3.b, z0.b[1] ; CHECK-NEXT: mov z4.b, z0.b[7] -; CHECK-NEXT: mov z5.b, z0.b[6] -; CHECK-NEXT: mov z6.b, z0.b[5] -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: mov z0.b, z0.b[4] -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: strh w8, [sp, #-16]! ; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov z1.b, z0.b[6] +; CHECK-NEXT: strh w8, [sp, #6] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov z2.b, z0.b[5] +; CHECK-NEXT: mov z0.b, z0.b[4] +; CHECK-NEXT: strh w8, [sp, #4] ; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w9, [sp, #6] -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: strh w10, [sp, #4] -; CHECK-NEXT: fmov w10, s5 ; CHECK-NEXT: strh w8, [sp, #2] -; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: strh w9, [sp, #14] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w10, [sp, #12] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strh w8, [sp, #8] ; CHECK-NEXT: mov x8, #4 // =0x4 -; CHECK-NEXT: strh w9, [sp, #8] ; CHECK-NEXT: ldp d0, d1, [sp] ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.s, z1.h @@ -314,8 +314,8 @@ ; CHECK-LABEL: masked_load_v4f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: mov x8, #2 // =0x2 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov x8, #2 // =0x2 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 @@ -327,8 +327,8 @@ ; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: ld1d { z0.d }, p1/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3] ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3] ; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z1 ; CHECK-NEXT: ret %load = call <4 x double> @llvm.masked.load.v4f64(ptr %src, i32 8, <4 x i1> %mask, <4 x double> zeroinitializer) diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-masked-store.ll @@ -59,74 +59,74 @@ ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: strb w7, [sp, #6] ; CHECK-NEXT: ldr w9, [sp, #88] -; CHECK-NEXT: strb w6, [sp, #5] -; CHECK-NEXT: ldr w10, [sp, #80] -; CHECK-NEXT: strb w5, [sp, #4] +; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldr w10, [sp, #120] +; CHECK-NEXT: strb w7, [sp, #6] ; CHECK-NEXT: strb w8, [sp, #15] -; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: ldr w8, [sp, #80] ; CHECK-NEXT: strb w9, [sp, #14] -; CHECK-NEXT: ldr w9, [sp, #64] -; CHECK-NEXT: strb w10, [sp, #13] -; CHECK-NEXT: ldr w10, [sp, #56] -; CHECK-NEXT: strb w8, [sp, #12] +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: strb w8, [sp, #13] +; CHECK-NEXT: ldr w8, [sp, #64] +; CHECK-NEXT: strb w9, [sp, #12] +; CHECK-NEXT: ldr w9, [sp, #56] +; CHECK-NEXT: strb w8, [sp, #11] ; CHECK-NEXT: ldr w8, [sp, #48] -; CHECK-NEXT: strb w9, [sp, #11] +; CHECK-NEXT: strb w9, [sp, #10] ; CHECK-NEXT: ldr w9, [sp, #40] -; CHECK-NEXT: strb w10, [sp, #10] -; CHECK-NEXT: ldr w10, [sp, #32] ; CHECK-NEXT: strb w8, [sp, #9] -; CHECK-NEXT: ldr w8, [sp, #224] +; CHECK-NEXT: ldr w8, [sp, #32] ; CHECK-NEXT: strb w9, [sp, #8] ; CHECK-NEXT: ldr w9, [sp, #216] -; CHECK-NEXT: strb w10, [sp, #7] -; CHECK-NEXT: ldr w10, [sp, #208] -; CHECK-NEXT: strb w8, [sp, #31] -; CHECK-NEXT: ldr w8, [sp, #200] +; CHECK-NEXT: strb w8, [sp, #7] +; CHECK-NEXT: ldr w8, [sp, #224] ; CHECK-NEXT: strb w9, [sp, #30] -; CHECK-NEXT: ldr w9, [sp, #192] -; CHECK-NEXT: strb w10, [sp, #29] -; CHECK-NEXT: ldr w10, [sp, #184] -; CHECK-NEXT: strb w8, [sp, #28] -; CHECK-NEXT: ldr w8, [sp, #176] -; CHECK-NEXT: strb w9, [sp, #27] +; CHECK-NEXT: ldr w9, [sp, #200] +; CHECK-NEXT: strb w8, [sp, #31] +; CHECK-NEXT: ldr w8, [sp, #208] +; CHECK-NEXT: strb w9, [sp, #28] +; CHECK-NEXT: ldr w9, [sp, #184] +; CHECK-NEXT: strb w8, [sp, #29] +; CHECK-NEXT: ldr w8, [sp, #192] +; CHECK-NEXT: strb w9, [sp, #26] ; CHECK-NEXT: ldr w9, [sp, #168] -; CHECK-NEXT: strb w10, [sp, #26] -; CHECK-NEXT: ldr w10, [sp, #160] -; CHECK-NEXT: strb w8, [sp, #25] -; CHECK-NEXT: ldr w8, [sp, #152] +; CHECK-NEXT: strb w8, [sp, #27] +; CHECK-NEXT: ldr w8, [sp, #176] ; CHECK-NEXT: strb w9, [sp, #24] -; CHECK-NEXT: ldr w9, [sp, #144] -; CHECK-NEXT: strb w10, [sp, #23] -; CHECK-NEXT: ldr w10, [sp, #136] -; CHECK-NEXT: strb w8, [sp, #22] +; CHECK-NEXT: ldr w9, [sp, #152] +; CHECK-NEXT: strb w8, [sp, #25] +; CHECK-NEXT: ldr w8, [sp, #160] +; CHECK-NEXT: strb w9, [sp, #22] +; CHECK-NEXT: ldr w9, [sp, #136] +; CHECK-NEXT: strb w8, [sp, #23] +; CHECK-NEXT: ldr w8, [sp, #144] +; CHECK-NEXT: strb w9, [sp, #20] +; CHECK-NEXT: ldr w9, [sp, #112] +; CHECK-NEXT: strb w8, [sp, #21] ; CHECK-NEXT: ldr w8, [sp, #128] -; CHECK-NEXT: strb w9, [sp, #21] -; CHECK-NEXT: ldr w9, [sp, #120] -; CHECK-NEXT: strb w10, [sp, #20] -; CHECK-NEXT: ldr w10, [sp, #112] +; CHECK-NEXT: strb w6, [sp, #5] ; CHECK-NEXT: strb w8, [sp, #19] ; CHECK-NEXT: ldr w8, [sp, #104] +; CHECK-NEXT: strb w5, [sp, #4] ; CHECK-NEXT: strb w4, [sp, #3] -; CHECK-NEXT: ptrue p0.b, vl16 ; CHECK-NEXT: strb w3, [sp, #2] ; CHECK-NEXT: strb w2, [sp, #1] ; CHECK-NEXT: strb w1, [sp] -; CHECK-NEXT: strb w9, [sp, #18] -; CHECK-NEXT: strb w10, [sp, #17] +; CHECK-NEXT: strb w10, [sp, #18] +; CHECK-NEXT: strb w9, [sp, #17] ; CHECK-NEXT: strb w8, [sp, #16] ; CHECK-NEXT: mov w8, #16 // =0x10 -; CHECK-NEXT: ldp q0, q1, [sp] +; CHECK-NEXT: ldp q1, q0, [sp] ; CHECK-NEXT: lsl z0.b, z0.b, #7 -; CHECK-NEXT: asr z0.b, z0.b, #7 ; CHECK-NEXT: lsl z1.b, z1.b, #7 +; CHECK-NEXT: asr z0.b, z0.b, #7 +; CHECK-NEXT: asr z1.b, z1.b, #7 ; CHECK-NEXT: cmpne p1.b, p0/z, z0.b, #0 -; CHECK-NEXT: asr z0.b, z1.b, #7 -; CHECK-NEXT: cmpne p0.b, p0/z, z0.b, #0 ; CHECK-NEXT: mov z0.b, #0 // =0x0 -; CHECK-NEXT: st1b { z0.b }, p0, [x0, x8] -; CHECK-NEXT: st1b { z0.b }, p1, [x0] +; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 +; CHECK-NEXT: st1b { z0.b }, p1, [x0, x8] +; CHECK-NEXT: st1b { z0.b }, p0, [x0] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret call void @llvm.masked.store.v32i8(<32 x i8> zeroinitializer, ptr %dst, i32 8, <32 x i1> %mask) @@ -139,13 +139,13 @@ ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: .cfi_def_cfa_offset 16 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 +; CHECK-NEXT: mov z1.s, z0.s[1] ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: str wzr, [sp, #12] -; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: strh w9, [sp, #10] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: lsl z0.h, z0.h, #15 ; CHECK-NEXT: asr z0.h, z0.h, #15 @@ -194,15 +194,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: mov z1.d, z0.d +; CHECK-NEXT: ptrue p0.h, vl8 ; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: uunpklo z0.h, z0.b ; CHECK-NEXT: uunpklo z1.h, z1.b ; CHECK-NEXT: lsl z0.h, z0.h, #15 ; CHECK-NEXT: lsl z1.h, z1.h, #15 -; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: asr z1.h, z1.h, #15 ; CHECK-NEXT: asr z0.h, z0.h, #15 +; CHECK-NEXT: asr z1.h, z1.h, #15 ; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0 ; CHECK-NEXT: mov z1.h, #0 // =0x0 ; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 @@ -237,42 +237,42 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: mov z1.b, z0.b[7] ; CHECK-NEXT: mov z2.b, z0.b[6] -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: mov z1.b, z0.b[5] -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: mov z2.b, z0.b[4] -; CHECK-NEXT: fmov w11, s1 +; CHECK-NEXT: mov z3.b, z0.b[5] +; CHECK-NEXT: mov z4.b, z0.b[4] +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov z2.s, #0 // =0x0 +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: fmov w8, s3 ; CHECK-NEXT: mov z3.b, z0.b[2] -; CHECK-NEXT: strh w9, [sp, #14] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: strh w10, [sp, #12] -; CHECK-NEXT: mov z2.b, z0.b[3] -; CHECK-NEXT: strh w11, [sp, #10] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: fmov w8, s4 ; CHECK-NEXT: mov z4.b, z0.b[1] -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: ldr d1, [sp, #8] +; CHECK-NEXT: strh w8, [sp, #8] ; CHECK-NEXT: mov x8, #4 // =0x4 -; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: uunpklo z0.s, z1.h -; CHECK-NEXT: lsl z0.s, z0.s, #31 -; CHECK-NEXT: asr z0.s, z0.s, #31 -; CHECK-NEXT: cmpne p1.s, p0/z, z0.s, #0 -; CHECK-NEXT: mov z0.s, #0 // =0x0 -; CHECK-NEXT: st1w { z0.s }, p1, [x0, x8, lsl #2] -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: strh w9, [sp] -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: strh w10, [sp, #6] -; CHECK-NEXT: strh w8, [sp, #4] -; CHECK-NEXT: strh w9, [sp, #2] -; CHECK-NEXT: ldr d1, [sp] +; CHECK-NEXT: ldr d1, [sp, #8] ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: lsl z1.s, z1.s, #31 ; CHECK-NEXT: asr z1.s, z1.s, #31 -; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0 +; CHECK-NEXT: mov z1.b, z0.b[3] +; CHECK-NEXT: st1w { z2.s }, p1, [x0, x8, lsl #2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strh w8, [sp, #6] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strh w8, [sp, #4] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: ldr d0, [sp] +; CHECK-NEXT: uunpklo z0.s, z0.h +; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: asr z0.s, z0.s, #31 +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: st1w { z2.s }, p0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret call void @llvm.masked.store.v8f32(<8 x float> zeroinitializer, ptr %dst, i32 8, <8 x i1> %mask) @@ -299,8 +299,8 @@ ; CHECK-LABEL: masked_store_v4f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: mov x8, #2 // =0x2 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: mov x8, #2 // =0x2 ; CHECK-NEXT: uunpklo z0.s, z0.h ; CHECK-NEXT: uunpklo z1.d, z0.s ; CHECK-NEXT: ext z0.b, z0.b, z0.b, #8 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-optimize-ptrue.ll @@ -53,10 +53,10 @@ define void @add_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: add_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: add z0.b, z0.b, z2.b -; CHECK-NEXT: add z1.b, z1.b, z3.b +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: add z0.b, z1.b, z0.b +; CHECK-NEXT: add z1.b, z2.b, z3.b ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a @@ -125,10 +125,10 @@ define void @add_v16i16(ptr %a, ptr %b, ptr %c) { ; CHECK-LABEL: add_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: add z0.h, z0.h, z2.h -; CHECK-NEXT: add z1.h, z1.h, z3.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: add z0.h, z1.h, z0.h +; CHECK-NEXT: add z1.h, z2.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a @@ -141,8 +141,8 @@ define void @abs_v2i32(ptr %a) { ; CHECK-LABEL: abs_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret @@ -155,8 +155,8 @@ define void @abs_v4i32(ptr %a) { ; CHECK-LABEL: abs_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret @@ -169,8 +169,8 @@ define void @abs_v8i32(ptr %a) { ; CHECK-LABEL: abs_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: abs z0.s, p0/m, z0.s ; CHECK-NEXT: abs z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -184,8 +184,8 @@ define void @abs_v2i64(ptr %a) { ; CHECK-LABEL: abs_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret @@ -198,8 +198,8 @@ define void @abs_v4i64(ptr %a) { ; CHECK-LABEL: abs_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: abs z0.d, p0/m, z0.d ; CHECK-NEXT: abs z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -213,8 +213,8 @@ define void @fadd_v2f16(ptr %a, ptr %b) { ; CHECK-LABEL: fadd_v2f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: ldr s0, [x0] ; CHECK-NEXT: ldr s1, [x1] ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: fmov w8, s0 @@ -230,8 +230,8 @@ define void @fadd_v4f16(ptr %a, ptr %b) { ; CHECK-LABEL: fadd_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: str d0, [x0] @@ -246,8 +246,8 @@ define void @fadd_v8f16(ptr %a, ptr %b) { ; CHECK-LABEL: fadd_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h ; CHECK-NEXT: str q0, [x0] @@ -262,10 +262,11 @@ define void @fadd_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: fadd_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fadd z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fadd z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -279,8 +280,8 @@ define void @fadd_v2f32(ptr %a, ptr %b) { ; CHECK-LABEL: fadd_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: ldr d0, [x0] ; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: str d0, [x0] @@ -295,8 +296,8 @@ define void @fadd_v4f32(ptr %a, ptr %b) { ; CHECK-LABEL: fadd_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s ; CHECK-NEXT: str q0, [x0] @@ -311,10 +312,11 @@ define void @fadd_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: fadd_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -328,8 +330,8 @@ define void @fadd_v2f64(ptr %a, ptr %b) { ; CHECK-LABEL: fadd_v2f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: str q0, [x0] @@ -344,10 +346,11 @@ define void @fadd_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: fadd_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ldp q0, q3, [x1] +; CHECK-NEXT: ldp q1, q2, [x0] +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: movprfx z1, z2 ; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-rev.ll @@ -8,8 +8,8 @@ define void @test_revbv16i16(ptr %a) { ; CHECK-LABEL: test_revbv16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: revb z0.h, p0/m, z0.h ; CHECK-NEXT: revb z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -24,8 +24,8 @@ define void @test_revbv8i32(ptr %a) { ; CHECK-LABEL: test_revbv8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: revb z0.s, p0/m, z0.s ; CHECK-NEXT: revb z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -40,8 +40,8 @@ define void @test_revbv4i64(ptr %a) { ; CHECK-LABEL: test_revbv4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: revb z0.d, p0/m, z0.d ; CHECK-NEXT: revb z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -56,8 +56,8 @@ define void @test_revhv8i32(ptr %a) { ; CHECK-LABEL: test_revhv8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: revh z0.s, p0/m, z0.s ; CHECK-NEXT: revh z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -72,8 +72,8 @@ define void @test_revhv8f32(ptr %a) { ; CHECK-LABEL: test_revhv8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: revh z0.s, p0/m, z0.s ; CHECK-NEXT: revh z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -88,8 +88,8 @@ define void @test_revhv4i64(ptr %a) { ; CHECK-LABEL: test_revhv4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: revh z0.d, p0/m, z0.d ; CHECK-NEXT: revh z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -104,8 +104,8 @@ define void @test_revwv4i64(ptr %a) { ; CHECK-LABEL: test_revwv4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: revw z0.d, p0/m, z0.d ; CHECK-NEXT: revw z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -120,8 +120,8 @@ define void @test_revwv4f64(ptr %a) { ; CHECK-LABEL: test_revwv4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: revw z0.d, p0/m, z0.d ; CHECK-NEXT: revw z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -135,8 +135,8 @@ define <16 x i8> @test_revv16i8(ptr %a) { ; CHECK-LABEL: test_revv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: revb z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -149,8 +149,8 @@ define void @test_revwv8i32v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: test_revwv8i32v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x1] ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldp q0, q1, [x1] ; CHECK-NEXT: revw z0.d, p0/m, z0.d ; CHECK-NEXT: revw z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -165,15 +165,15 @@ define void @test_revhv32i16(ptr %a) { ; CHECK-LABEL: test_revhv32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.d -; CHECK-NEXT: revh z0.d, p0/m, z0.d +; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: revh z0.d, p0/m, z0.d ; CHECK-NEXT: revh z1.d, p0/m, z1.d +; CHECK-NEXT: revh z2.d, p0/m, z2.d +; CHECK-NEXT: revh z3.d, p0/m, z3.d ; CHECK-NEXT: stp q0, q1, [x0, #32] -; CHECK-NEXT: revh z0.d, p0/m, z2.d -; CHECK-NEXT: revh z1.d, p0/m, z3.d -; CHECK-NEXT: stp q0, q1, [x0] +; CHECK-NEXT: stp q2, q3, [x0] ; CHECK-NEXT: ret %tmp1 = load <32 x i16>, ptr %a %tmp2 = shufflevector <32 x i16> %tmp1, <32 x i16> undef, <32 x i32> @@ -185,15 +185,15 @@ ; CHECK-LABEL: test_rev_elts_fail: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: fmov x10, d1 ; CHECK-NEXT: mov z2.d, z0.d[1] ; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: fmov x9, d2 ; CHECK-NEXT: mov z0.d, z1.d[1] -; CHECK-NEXT: fmov x11, d0 +; CHECK-NEXT: fmov x9, d2 ; CHECK-NEXT: stp x9, x8, [sp, #-32]! ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: stp x11, x10, [sp, #16] +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: stp x9, x8, [sp, #16] ; CHECK-NEXT: ldp q1, q0, [sp] ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: add sp, sp, #32 @@ -209,8 +209,8 @@ define void @test_revdv4i64_sve2p1(ptr %a) #1 { ; CHECK-LABEL: test_revdv4i64_sve2p1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: revd z0.q, p0/m, z0.q ; CHECK-NEXT: revd z1.q, p0/m, z1.q ; CHECK-NEXT: stp q0, q1, [x0] @@ -224,8 +224,8 @@ define void @test_revdv4f64_sve2p1(ptr %a) #1 { ; CHECK-LABEL: test_revdv4f64_sve2p1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: revd z0.q, p0/m, z0.q ; CHECK-NEXT: revd z1.q, p0/m, z1.q ; CHECK-NEXT: stp q0, q1, [x0] @@ -241,25 +241,25 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z2.s, z0.s[1] -; CHECK-NEXT: mov z3.s, z0.s[2] +; CHECK-NEXT: ldp q0, q3, [x0] +; CHECK-NEXT: mov z1.s, z0.s[1] +; CHECK-NEXT: mov z2.s, z0.s[2] ; CHECK-NEXT: mov z4.s, z0.s[3] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: fmov w10, s3 -; CHECK-NEXT: fmov w11, s4 -; CHECK-NEXT: mov z0.s, z1.s[1] -; CHECK-NEXT: mov z2.s, z1.s[2] -; CHECK-NEXT: mov z3.s, z1.s[3] +; CHECK-NEXT: mov z0.s, z3.s[1] +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: mov z1.s, z3.s[2] ; CHECK-NEXT: stp w9, w8, [sp, #24] -; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: fmov w9, s4 +; CHECK-NEXT: mov z2.s, z3.s[3] +; CHECK-NEXT: stp w9, w8, [sp, #16] +; CHECK-NEXT: fmov w8, s3 ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: stp w11, w10, [sp, #16] -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: fmov w11, s3 ; CHECK-NEXT: stp w9, w8, [sp, #8] -; CHECK-NEXT: stp w11, w10, [sp] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: stp w9, w8, [sp] ; CHECK-NEXT: ldp q0, q1, [sp] ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: add sp, sp, #32 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-permute-zip-uzp-trn.ll @@ -14,57 +14,57 @@ ; CHECK-NEXT: ldr q1, [x1, #16] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: mov z2.b, z0.b[15] +; CHECK-NEXT: mov z3.b, z0.b[14] +; CHECK-NEXT: mov z4.b, z0.b[13] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z0.b[14] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z0.b[13] -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z2.b, z0.b[12] ; CHECK-NEXT: strb w8, [sp, #14] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.b, z0.b[11] +; CHECK-NEXT: strb w8, [sp, #12] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z0.b[10] +; CHECK-NEXT: strb w8, [sp, #10] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z0.b[11] -; CHECK-NEXT: strb w9, [sp, #12] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z0.b[10] -; CHECK-NEXT: strb w10, [sp, #10] -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z2.b, z0.b[9] ; CHECK-NEXT: strb w8, [sp, #8] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z0.b[8] -; CHECK-NEXT: strb w9, [sp, #6] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z1.b[15] -; CHECK-NEXT: strb w10, [sp, #4] -; CHECK-NEXT: strb w8, [sp, #2] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.b, z0.b[8] +; CHECK-NEXT: zip1 z0.b, z0.b, z1.b +; CHECK-NEXT: strb w8, [sp, #6] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z1.b[15] +; CHECK-NEXT: strb w8, [sp, #4] ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov z2.b, z1.b[14] -; CHECK-NEXT: strb w9, [sp] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z1.b[13] -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: mov z2.b, z1.b[12] +; CHECK-NEXT: strb w8, [sp, #2] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.b, z1.b[13] +; CHECK-NEXT: strb w8, [sp] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z1.b[12] ; CHECK-NEXT: strb w8, [sp, #15] ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov z2.b, z1.b[11] -; CHECK-NEXT: strb w9, [sp, #13] -; CHECK-NEXT: strb w10, [sp, #11] -; CHECK-NEXT: zip1 z0.b, z0.b, z1.b +; CHECK-NEXT: strb w8, [sp, #13] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.b, z1.b[10] +; CHECK-NEXT: strb w8, [sp, #11] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z1.b[9] ; CHECK-NEXT: strb w8, [sp, #9] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z1.b[10] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z1.b[9] -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z2.b, z1.b[8] ; CHECK-NEXT: strb w8, [sp, #7] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strb w8, [sp, #5] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: strb w8, [sp, #3] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strb w9, [sp, #5] -; CHECK-NEXT: strb w10, [sp, #3] ; CHECK-NEXT: strb w8, [sp, #1] -; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: ldr q1, [sp] ; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: str q2, [x0, #16] +; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %tmp1 = load volatile <32 x i8>, ptr %a @@ -79,119 +79,119 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: ldp q2, q5, [x1] -; CHECK-NEXT: ldp q4, q7, [x0] -; CHECK-NEXT: mov z16.h, z5.h[7] +; CHECK-NEXT: ldp q1, q3, [x1] +; CHECK-NEXT: ldp q0, q4, [x0] +; CHECK-NEXT: ldp q2, q5, [x0, #32] +; CHECK-NEXT: mov z16.h, z3.h[7] +; CHECK-NEXT: mov z18.h, z3.h[6] +; CHECK-NEXT: mov z17.h, z4.h[7] +; CHECK-NEXT: ldp q6, q7, [x1, #32] +; CHECK-NEXT: mov z19.h, z4.h[6] ; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: mov z16.h, z5.h[6] -; CHECK-NEXT: fmov w10, s16 -; CHECK-NEXT: mov z16.h, z5.h[5] -; CHECK-NEXT: mov z17.h, z7.h[7] -; CHECK-NEXT: fmov w9, s17 -; CHECK-NEXT: mov z17.h, z7.h[6] -; CHECK-NEXT: ldp q0, q1, [x0, #32] -; CHECK-NEXT: ldp q3, q6, [x1, #32] +; CHECK-NEXT: mov z16.h, z3.h[5] ; CHECK-NEXT: strh w8, [sp, #30] ; CHECK-NEXT: fmov w8, s17 -; CHECK-NEXT: strh w9, [sp, #28] -; CHECK-NEXT: strh w10, [sp, #26] +; CHECK-NEXT: mov z17.h, z4.h[5] +; CHECK-NEXT: strh w8, [sp, #28] +; CHECK-NEXT: fmov w8, s18 +; CHECK-NEXT: mov z18.h, z3.h[4] +; CHECK-NEXT: zip1 z3.h, z4.h, z3.h +; CHECK-NEXT: strh w8, [sp, #26] +; CHECK-NEXT: fmov w8, s19 +; CHECK-NEXT: mov z19.h, z7.h[6] ; CHECK-NEXT: strh w8, [sp, #24] ; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: mov z16.h, z7.h[5] -; CHECK-NEXT: fmov w9, s16 -; CHECK-NEXT: mov z16.h, z5.h[4] -; CHECK-NEXT: fmov w10, s16 -; CHECK-NEXT: mov z16.h, z7.h[4] +; CHECK-NEXT: mov z16.h, z4.h[4] +; CHECK-NEXT: zip1 z4.h, z5.h, z7.h ; CHECK-NEXT: strh w8, [sp, #22] +; CHECK-NEXT: fmov w8, s17 +; CHECK-NEXT: mov z17.h, z1.h[7] +; CHECK-NEXT: add z3.h, z3.h, z4.h +; CHECK-NEXT: strh w8, [sp, #20] +; CHECK-NEXT: fmov w8, s18 +; CHECK-NEXT: mov z18.h, z0.h[7] +; CHECK-NEXT: strh w8, [sp, #18] ; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: mov z16.h, z2.h[7] -; CHECK-NEXT: strh w9, [sp, #20] -; CHECK-NEXT: strh w10, [sp, #18] -; CHECK-NEXT: mov z18.h, z6.h[7] +; CHECK-NEXT: mov z16.h, z1.h[6] ; CHECK-NEXT: strh w8, [sp, #16] -; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: mov z16.h, z4.h[7] -; CHECK-NEXT: ldr q17, [sp, #16] -; CHECK-NEXT: fmov w9, s16 -; CHECK-NEXT: mov z16.h, z2.h[6] -; CHECK-NEXT: fmov w10, s16 -; CHECK-NEXT: mov z16.h, z4.h[6] +; CHECK-NEXT: fmov w8, s17 +; CHECK-NEXT: mov z17.h, z0.h[6] ; CHECK-NEXT: strh w8, [sp, #62] +; CHECK-NEXT: fmov w8, s18 +; CHECK-NEXT: mov z18.h, z1.h[5] +; CHECK-NEXT: strh w8, [sp, #60] ; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: mov z16.h, z2.h[5] -; CHECK-NEXT: strh w9, [sp, #60] -; CHECK-NEXT: strh w10, [sp, #58] -; CHECK-NEXT: zip1 z5.h, z7.h, z5.h +; CHECK-NEXT: mov z16.h, z0.h[5] +; CHECK-NEXT: strh w8, [sp, #58] +; CHECK-NEXT: fmov w8, s17 +; CHECK-NEXT: mov z17.h, z1.h[4] ; CHECK-NEXT: strh w8, [sp, #56] -; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: mov z16.h, z4.h[5] -; CHECK-NEXT: fmov w9, s16 -; CHECK-NEXT: mov z16.h, z2.h[4] -; CHECK-NEXT: fmov w10, s16 -; CHECK-NEXT: mov z16.h, z4.h[4] +; CHECK-NEXT: fmov w8, s18 +; CHECK-NEXT: mov z18.h, z0.h[4] +; CHECK-NEXT: zip1 z0.h, z0.h, z1.h +; CHECK-NEXT: zip1 z1.h, z2.h, z6.h ; CHECK-NEXT: strh w8, [sp, #54] ; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: strh w9, [sp, #52] -; CHECK-NEXT: zip1 z2.h, z4.h, z2.h -; CHECK-NEXT: strh w10, [sp, #50] -; CHECK-NEXT: strh w8, [sp, #48] +; CHECK-NEXT: mov z16.h, z7.h[7] +; CHECK-NEXT: add z0.h, z0.h, z1.h +; CHECK-NEXT: strh w8, [sp, #52] +; CHECK-NEXT: fmov w8, s17 +; CHECK-NEXT: mov z17.h, z5.h[7] +; CHECK-NEXT: strh w8, [sp, #50] ; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z1.h[7] -; CHECK-NEXT: ldr q16, [sp, #48] -; CHECK-NEXT: fmov w9, s18 -; CHECK-NEXT: mov z18.h, z6.h[6] -; CHECK-NEXT: fmov w10, s18 -; CHECK-NEXT: mov z18.h, z1.h[6] +; CHECK-NEXT: ldr q18, [sp, #16] +; CHECK-NEXT: strh w8, [sp, #48] +; CHECK-NEXT: fmov w8, s16 +; CHECK-NEXT: mov z16.h, z5.h[6] +; CHECK-NEXT: ldr q20, [sp, #48] ; CHECK-NEXT: strh w8, [sp, #46] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z6.h[5] -; CHECK-NEXT: strh w9, [sp, #44] -; CHECK-NEXT: strh w10, [sp, #42] +; CHECK-NEXT: fmov w8, s17 +; CHECK-NEXT: mov z17.h, z7.h[5] +; CHECK-NEXT: strh w8, [sp, #44] +; CHECK-NEXT: fmov w8, s19 +; CHECK-NEXT: mov z19.h, z5.h[5] +; CHECK-NEXT: strh w8, [sp, #42] +; CHECK-NEXT: fmov w8, s16 +; CHECK-NEXT: mov z16.h, z7.h[4] ; CHECK-NEXT: strh w8, [sp, #40] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z1.h[5] -; CHECK-NEXT: fmov w9, s18 -; CHECK-NEXT: mov z18.h, z6.h[4] -; CHECK-NEXT: fmov w10, s18 -; CHECK-NEXT: mov z18.h, z1.h[4] +; CHECK-NEXT: fmov w8, s17 +; CHECK-NEXT: mov z17.h, z5.h[4] ; CHECK-NEXT: strh w8, [sp, #38] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z3.h[7] -; CHECK-NEXT: strh w9, [sp, #36] -; CHECK-NEXT: strh w10, [sp, #34] -; CHECK-NEXT: zip1 z1.h, z1.h, z6.h +; CHECK-NEXT: fmov w8, s19 +; CHECK-NEXT: mov z19.h, z6.h[7] +; CHECK-NEXT: strh w8, [sp, #36] +; CHECK-NEXT: fmov w8, s16 +; CHECK-NEXT: mov z16.h, z2.h[7] +; CHECK-NEXT: strh w8, [sp, #34] +; CHECK-NEXT: fmov w8, s17 +; CHECK-NEXT: mov z17.h, z6.h[6] ; CHECK-NEXT: strh w8, [sp, #32] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z0.h[7] -; CHECK-NEXT: ldr q4, [sp, #32] -; CHECK-NEXT: fmov w9, s18 -; CHECK-NEXT: mov z18.h, z3.h[6] -; CHECK-NEXT: fmov w10, s18 -; CHECK-NEXT: mov z18.h, z0.h[6] +; CHECK-NEXT: fmov w8, s19 +; CHECK-NEXT: mov z19.h, z2.h[6] ; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z3.h[5] -; CHECK-NEXT: strh w9, [sp, #12] -; CHECK-NEXT: strh w10, [sp, #10] -; CHECK-NEXT: add z1.h, z5.h, z1.h +; CHECK-NEXT: fmov w8, s16 +; CHECK-NEXT: mov z16.h, z6.h[5] +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: fmov w8, s17 +; CHECK-NEXT: mov z17.h, z2.h[5] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: fmov w8, s19 +; CHECK-NEXT: mov z19.h, z6.h[4] ; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z18.h, z0.h[5] -; CHECK-NEXT: add z4.h, z17.h, z4.h -; CHECK-NEXT: fmov w9, s18 -; CHECK-NEXT: mov z18.h, z3.h[4] -; CHECK-NEXT: fmov w10, s18 -; CHECK-NEXT: mov z18.h, z0.h[4] +; CHECK-NEXT: fmov w8, s16 +; CHECK-NEXT: mov z16.h, z2.h[4] +; CHECK-NEXT: ldr q2, [sp, #32] ; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: strh w9, [sp, #4] -; CHECK-NEXT: zip1 z0.h, z0.h, z3.h -; CHECK-NEXT: strh w10, [sp, #2] -; CHECK-NEXT: add z0.h, z2.h, z0.h +; CHECK-NEXT: fmov w8, s17 +; CHECK-NEXT: add z2.h, z18.h, z2.h +; CHECK-NEXT: strh w8, [sp, #4] +; CHECK-NEXT: fmov w8, s19 +; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: fmov w8, s16 ; CHECK-NEXT: strh w8, [sp] -; CHECK-NEXT: ldr q3, [sp] -; CHECK-NEXT: stp q1, q4, [x0, #32] -; CHECK-NEXT: add z1.h, z16.h, z3.h +; CHECK-NEXT: ldr q4, [sp] +; CHECK-NEXT: stp q3, q2, [x0, #32] +; CHECK-NEXT: add z1.h, z20.h, z4.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret @@ -214,33 +214,33 @@ ; CHECK-NEXT: ldr q1, [x1, #16] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: mov z2.h, z0.h[7] +; CHECK-NEXT: mov z3.h, z0.h[6] +; CHECK-NEXT: mov z4.h, z0.h[5] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.h, z0.h[6] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.h, z0.h[5] -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z2.h, z0.h[4] +; CHECK-NEXT: zip1 z0.h, z0.h, z1.h ; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.h, z1.h[7] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.h, z1.h[6] +; CHECK-NEXT: strh w8, [sp, #4] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.h, z1.h[7] -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.h, z1.h[6] -; CHECK-NEXT: strh w10, [sp, #4] -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z2.h, z1.h[5] ; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.h, z1.h[4] +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.h, z1.h[4] -; CHECK-NEXT: strh w9, [sp, #14] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: strh w10, [sp, #10] -; CHECK-NEXT: zip1 z0.h, z0.h, z1.h ; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: strh w9, [sp, #2] -; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: ldr q1, [sp] ; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: str q2, [x0, #16] +; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %tmp1 = load volatile <16 x i16>, ptr %a @@ -260,19 +260,19 @@ ; CHECK-NEXT: ldr q1, [x1, #16] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: mov z2.s, z0.s[3] +; CHECK-NEXT: mov z4.s, z0.s[2] +; CHECK-NEXT: mov z3.s, z1.s[3] +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.s, z1.s[3] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.s, z0.s[2] -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z2.s, z1.s[2] -; CHECK-NEXT: fmov w11, s2 -; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: fmov w9, s3 ; CHECK-NEXT: stp w8, w9, [sp, #8] -; CHECK-NEXT: stp w10, w11, [sp] -; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: stp w8, w9, [sp] +; CHECK-NEXT: ldr q1, [sp] ; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: str q2, [x0, #16] +; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %tmp1 = load volatile <8 x i32>, ptr %a @@ -287,12 +287,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: zip1 z4.d, z1.d, z2.d -; CHECK-NEXT: trn2 z1.d, z1.d, z2.d -; CHECK-NEXT: zip1 z2.d, z0.d, z3.d -; CHECK-NEXT: trn2 z0.d, z0.d, z3.d -; CHECK-NEXT: fadd z2.d, p0/m, z2.d, z4.d +; CHECK-NEXT: ldp q3, q2, [x1] +; CHECK-NEXT: zip1 z4.d, z1.d, z3.d +; CHECK-NEXT: zip1 z5.d, z0.d, z2.d +; CHECK-NEXT: trn2 z1.d, z1.d, z3.d +; CHECK-NEXT: trn2 z0.d, z0.d, z2.d +; CHECK-NEXT: movprfx z2, z4 +; CHECK-NEXT: fadd z2.d, p0/m, z2.d, z5.d ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret @@ -314,17 +315,17 @@ ; CHECK-NEXT: ldr q1, [x0] ; CHECK-NEXT: mov z2.s, z0.s[3] ; CHECK-NEXT: mov z3.s, z1.s[3] +; CHECK-NEXT: mov z4.s, z0.s[2] +; CHECK-NEXT: zip1 z0.s, z1.s, z0.s ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.s, z0.s[2] +; CHECK-NEXT: mov z2.s, z1.s[2] ; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: mov z3.s, z1.s[2] -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: fmov w11, s3 -; CHECK-NEXT: zip1 z0.s, z1.s, z0.s ; CHECK-NEXT: stp w9, w8, [sp, #8] -; CHECK-NEXT: stp w11, w10, [sp] -; CHECK-NEXT: ldr q2, [sp] -; CHECK-NEXT: add z0.s, z0.s, z2.s +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: stp w9, w8, [sp] +; CHECK-NEXT: ldr q1, [sp] +; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret @@ -345,10 +346,10 @@ ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: mov z1.s, z0.s[3] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z1.s, z0.s[2] -; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: mov z2.s, z0.s[2] ; CHECK-NEXT: zip1 z0.s, z0.s, z0.s +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: fmov w9, s2 ; CHECK-NEXT: stp w8, w8, [sp, #8] ; CHECK-NEXT: stp w9, w9, [sp] ; CHECK-NEXT: ldr q1, [sp] @@ -365,15 +366,15 @@ define void @trn_v32i8(ptr %a, ptr %b) { ; CHECK-LABEL: trn_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: trn1 z4.b, z1.b, z2.b -; CHECK-NEXT: trn2 z1.b, z1.b, z2.b -; CHECK-NEXT: add z1.b, z4.b, z1.b -; CHECK-NEXT: trn1 z5.b, z0.b, z3.b -; CHECK-NEXT: trn2 z0.b, z0.b, z3.b -; CHECK-NEXT: add z0.b, z5.b, z0.b -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ldp q1, q3, [x1] +; CHECK-NEXT: trn1 z4.b, z0.b, z1.b +; CHECK-NEXT: trn2 z0.b, z0.b, z1.b +; CHECK-NEXT: trn1 z1.b, z2.b, z3.b +; CHECK-NEXT: trn2 z2.b, z2.b, z3.b +; CHECK-NEXT: add z0.b, z4.b, z0.b +; CHECK-NEXT: add z1.b, z1.b, z2.b +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = load <32 x i8>, ptr %b @@ -391,33 +392,33 @@ ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: mov z1.h, z0.h[3] ; CHECK-NEXT: mov z2.h, z0.h[1] -; CHECK-NEXT: mov z6.h, z0.h[2] ; CHECK-NEXT: mov z3.h, z0.h[5] ; CHECK-NEXT: mov z4.h, z0.h[4] -; CHECK-NEXT: mov z5.h, z0.h[6] -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: mov z0.h, z0.h[7] -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: fmov w11, s6 ; CHECK-NEXT: strh w8, [sp, #-32]! ; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: fmov w12, s4 -; CHECK-NEXT: fmov w13, s5 -; CHECK-NEXT: strh w11, [sp, #4] -; CHECK-NEXT: fmov w11, s0 -; CHECK-NEXT: strh w9, [sp, #14] -; CHECK-NEXT: strh w10, [sp, #12] -; CHECK-NEXT: strh w8, [sp, #10] -; CHECK-NEXT: strh w12, [sp, #8] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov z1.h, z0.h[2] +; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: mov z2.h, z0.h[6] +; CHECK-NEXT: mov z0.h, z0.h[7] +; CHECK-NEXT: fmov w10, s3 +; CHECK-NEXT: fmov w11, s4 +; CHECK-NEXT: fmov w12, s1 +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w13, s2 +; CHECK-NEXT: strh w9, [sp, #12] +; CHECK-NEXT: strh w10, [sp, #10] +; CHECK-NEXT: strh w12, [sp, #4] +; CHECK-NEXT: fmov w12, s0 +; CHECK-NEXT: strh w11, [sp, #8] ; CHECK-NEXT: strh w13, [sp, #6] -; CHECK-NEXT: strh w11, [sp, #2] -; CHECK-NEXT: strh w11, [sp, #28] -; CHECK-NEXT: strh w12, [sp, #26] -; CHECK-NEXT: strh w8, [sp, #22] -; CHECK-NEXT: strh w9, [sp, #20] +; CHECK-NEXT: strh w12, [sp, #2] +; CHECK-NEXT: strh w12, [sp, #28] +; CHECK-NEXT: strh w11, [sp, #26] +; CHECK-NEXT: strh w10, [sp, #22] +; CHECK-NEXT: strh w8, [sp, #20] ; CHECK-NEXT: strh w13, [sp, #18] -; CHECK-NEXT: strh w10, [sp, #16] +; CHECK-NEXT: strh w9, [sp, #16] ; CHECK-NEXT: ldp q0, q1, [sp] ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: str q0, [x0] @@ -435,15 +436,15 @@ define void @trn_v16i16(ptr %a, ptr %b) { ; CHECK-LABEL: trn_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: trn1 z4.h, z1.h, z2.h -; CHECK-NEXT: trn2 z1.h, z1.h, z2.h -; CHECK-NEXT: add z1.h, z4.h, z1.h -; CHECK-NEXT: trn1 z5.h, z0.h, z3.h -; CHECK-NEXT: trn2 z0.h, z0.h, z3.h -; CHECK-NEXT: add z0.h, z5.h, z0.h -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ldp q1, q3, [x1] +; CHECK-NEXT: trn1 z4.h, z0.h, z1.h +; CHECK-NEXT: trn2 z0.h, z0.h, z1.h +; CHECK-NEXT: trn1 z1.h, z2.h, z3.h +; CHECK-NEXT: trn2 z2.h, z2.h, z3.h +; CHECK-NEXT: add z0.h, z4.h, z0.h +; CHECK-NEXT: add z1.h, z1.h, z2.h +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %tmp1 = load <16 x i16>, ptr %a %tmp2 = load <16 x i16>, ptr %b @@ -457,15 +458,15 @@ define void @trn_v8i32(ptr %a, ptr %b) { ; CHECK-LABEL: trn_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: zip1 z4.s, z1.s, z2.s -; CHECK-NEXT: trn2 z1.s, z1.s, z2.s -; CHECK-NEXT: add z1.s, z4.s, z1.s -; CHECK-NEXT: trn1 z5.s, z0.s, z3.s -; CHECK-NEXT: trn2 z0.s, z0.s, z3.s -; CHECK-NEXT: add z0.s, z5.s, z0.s -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q2, [x0] +; CHECK-NEXT: ldp q1, q3, [x1] +; CHECK-NEXT: zip1 z4.s, z0.s, z1.s +; CHECK-NEXT: trn2 z0.s, z0.s, z1.s +; CHECK-NEXT: trn1 z1.s, z2.s, z3.s +; CHECK-NEXT: trn2 z2.s, z2.s, z3.s +; CHECK-NEXT: add z0.s, z4.s, z0.s +; CHECK-NEXT: add z1.s, z1.s, z2.s +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %tmp1 = load <8 x i32>, ptr %a %tmp2 = load <8 x i32>, ptr %b @@ -479,16 +480,16 @@ define void @trn_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: trn_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: ldp q0, q2, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: zip1 z4.d, z1.d, z2.d -; CHECK-NEXT: trn2 z1.d, z1.d, z2.d -; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z4.d -; CHECK-NEXT: zip1 z5.d, z0.d, z3.d -; CHECK-NEXT: trn2 z0.d, z0.d, z3.d -; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z5.d -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: ldp q1, q3, [x1] +; CHECK-NEXT: zip1 z4.d, z0.d, z1.d +; CHECK-NEXT: trn2 z0.d, z0.d, z1.d +; CHECK-NEXT: zip1 z1.d, z2.d, z3.d +; CHECK-NEXT: trn2 z2.d, z2.d, z3.d +; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z4.d +; CHECK-NEXT: fadd z1.d, p0/m, z1.d, z2.d +; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %tmp1 = load <4 x double>, ptr %a %tmp2 = load <4 x double>, ptr %b @@ -502,8 +503,8 @@ define void @trn_v4f32(ptr %a, ptr %b) { ; CHECK-LABEL: trn_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: trn1 z2.s, z0.s, z1.s ; CHECK-NEXT: trn2 z0.s, z0.s, z1.s @@ -525,9 +526,9 @@ ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: trn1 z2.s, z0.s, z0.s ; CHECK-NEXT: trn2 z0.s, z0.s, z0.s -; CHECK-NEXT: add z0.s, z2.s, z0.s ; CHECK-NEXT: trn1 z3.s, z1.s, z1.s ; CHECK-NEXT: trn2 z1.s, z1.s, z1.s +; CHECK-NEXT: add z0.s, z2.s, z0.s ; CHECK-NEXT: add z1.s, z3.s, z1.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret @@ -549,57 +550,57 @@ ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: ldr q1, [x1, #16] ; CHECK-NEXT: mov z2.b, z0.b[15] +; CHECK-NEXT: mov z3.b, z0.b[14] +; CHECK-NEXT: mov z4.b, z0.b[13] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z0.b[14] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z0.b[13] -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z2.b, z0.b[12] ; CHECK-NEXT: strb w8, [sp, #14] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.b, z0.b[11] +; CHECK-NEXT: strb w8, [sp, #12] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z0.b[10] +; CHECK-NEXT: strb w8, [sp, #10] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z0.b[11] -; CHECK-NEXT: strb w9, [sp, #12] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z0.b[10] -; CHECK-NEXT: strb w10, [sp, #10] -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z2.b, z0.b[9] ; CHECK-NEXT: strb w8, [sp, #8] -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z0.b[8] -; CHECK-NEXT: strb w9, [sp, #6] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z1.b[15] -; CHECK-NEXT: strb w10, [sp, #4] -; CHECK-NEXT: strb w8, [sp, #2] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.b, z0.b[8] +; CHECK-NEXT: zip1 z0.b, z0.b, z1.b +; CHECK-NEXT: strb w8, [sp, #6] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z1.b[15] +; CHECK-NEXT: strb w8, [sp, #4] ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov z2.b, z1.b[14] -; CHECK-NEXT: strb w9, [sp] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z1.b[13] -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: mov z2.b, z1.b[12] +; CHECK-NEXT: strb w8, [sp, #2] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.b, z1.b[13] +; CHECK-NEXT: strb w8, [sp] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z1.b[12] ; CHECK-NEXT: strb w8, [sp, #15] ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov z2.b, z1.b[11] -; CHECK-NEXT: strb w9, [sp, #13] -; CHECK-NEXT: strb w10, [sp, #11] -; CHECK-NEXT: zip1 z0.b, z0.b, z1.b +; CHECK-NEXT: strb w8, [sp, #13] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.b, z1.b[10] +; CHECK-NEXT: strb w8, [sp, #11] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z1.b[9] ; CHECK-NEXT: strb w8, [sp, #9] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.b, z1.b[10] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.b, z1.b[9] -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z2.b, z1.b[8] ; CHECK-NEXT: strb w8, [sp, #7] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strb w8, [sp, #5] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: strb w8, [sp, #3] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strb w9, [sp, #5] -; CHECK-NEXT: strb w10, [sp, #3] ; CHECK-NEXT: strb w8, [sp, #1] -; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: ldr q1, [sp] ; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: str q2, [x0, #16] +; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %tmp1 = load volatile <32 x i8>, ptr %a @@ -619,33 +620,33 @@ ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: ldr q1, [x1, #16] ; CHECK-NEXT: mov z2.h, z0.h[7] +; CHECK-NEXT: mov z3.h, z0.h[6] +; CHECK-NEXT: mov z4.h, z0.h[5] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.h, z0.h[6] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.h, z0.h[5] -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z2.h, z0.h[4] +; CHECK-NEXT: zip1 z0.h, z0.h, z1.h ; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.h, z1.h[7] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.h, z1.h[6] +; CHECK-NEXT: strh w8, [sp, #4] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.h, z1.h[7] -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.h, z1.h[6] -; CHECK-NEXT: strh w10, [sp, #4] -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z2.h, z1.h[5] ; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.h, z1.h[4] +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.h, z1.h[4] -; CHECK-NEXT: strh w9, [sp, #14] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: strh w10, [sp, #10] -; CHECK-NEXT: zip1 z0.h, z0.h, z1.h ; CHECK-NEXT: strh w8, [sp, #6] -; CHECK-NEXT: strh w9, [sp, #2] -; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: ldr q1, [sp] ; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: str q2, [x0, #16] +; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %tmp1 = load volatile <16 x i16>, ptr %a @@ -665,19 +666,19 @@ ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: ldr q1, [x1, #16] ; CHECK-NEXT: mov z2.s, z0.s[3] +; CHECK-NEXT: mov z4.s, z0.s[2] +; CHECK-NEXT: mov z3.s, z1.s[3] +; CHECK-NEXT: zip1 z0.s, z0.s, z1.s ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z2.s, z1.s[3] -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov z2.s, z0.s[2] -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z2.s, z1.s[2] -; CHECK-NEXT: fmov w11, s2 -; CHECK-NEXT: zip1 z0.s, z0.s, z1.s +; CHECK-NEXT: fmov w9, s3 ; CHECK-NEXT: stp w8, w9, [sp, #8] -; CHECK-NEXT: stp w10, w11, [sp] -; CHECK-NEXT: ldr q2, [sp] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: stp w8, w9, [sp] +; CHECK-NEXT: ldr q1, [sp] ; CHECK-NEXT: str q0, [x0] -; CHECK-NEXT: str q2, [x0, #16] +; CHECK-NEXT: str q1, [x0, #16] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret %tmp1 = load volatile <8 x i32>, ptr %a @@ -695,10 +696,10 @@ ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ldr q0, [x0, #16] ; CHECK-NEXT: mov z1.s, z0.s[3] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z1.s, z0.s[2] -; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: mov z2.s, z0.s[2] ; CHECK-NEXT: zip1 z0.s, z0.s, z0.s +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: fmov w9, s2 ; CHECK-NEXT: stp w8, w8, [sp, #8] ; CHECK-NEXT: stp w9, w9, [sp] ; CHECK-NEXT: ldr q1, [sp] @@ -715,213 +716,197 @@ define void @uzp_v32i8(ptr %a, ptr %b) #0{ ; CHECK-LABEL: uzp_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #128 -; CHECK-NEXT: stp d15, d14, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #96] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #112] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 128 -; CHECK-NEXT: .cfi_offset b8, -8 -; CHECK-NEXT: .cfi_offset b9, -16 -; CHECK-NEXT: .cfi_offset b10, -24 -; CHECK-NEXT: .cfi_offset b11, -32 -; CHECK-NEXT: .cfi_offset b12, -40 -; CHECK-NEXT: .cfi_offset b13, -48 -; CHECK-NEXT: .cfi_offset b14, -56 -; CHECK-NEXT: .cfi_offset b15, -64 -; CHECK-NEXT: ldp q0, q3, [x0] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: mov z27.b, z0.b[14] -; CHECK-NEXT: mov z28.b, z0.b[12] -; CHECK-NEXT: mov z30.b, z0.b[8] +; CHECK-NEXT: sub sp, sp, #64 +; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q0, q1, [x1] ; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z2.b, z3.b[12] -; CHECK-NEXT: mov z4.b, z3.b[10] -; CHECK-NEXT: mov z1.b, z3.b[14] -; CHECK-NEXT: ldp q10, q11, [x1] +; CHECK-NEXT: mov z4.b, z3.b[14] +; CHECK-NEXT: mov z5.b, z3.b[12] +; CHECK-NEXT: mov z6.b, z3.b[10] +; CHECK-NEXT: mov z7.b, z3.b[8] +; CHECK-NEXT: mov z16.b, z3.b[11] +; CHECK-NEXT: mov z17.b, z3.b[9] +; CHECK-NEXT: mov z18.b, z3.b[7] ; CHECK-NEXT: strb w8, [sp, #40] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strb w9, [sp, #32] -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: mov z6.b, z3.b[6] -; CHECK-NEXT: mov z7.b, z3.b[4] -; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: strb w8, [sp, #32] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z3.b[6] +; CHECK-NEXT: strb w8, [sp, #47] +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: mov z5.b, z3.b[4] ; CHECK-NEXT: strb w8, [sp, #46] ; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: strb w9, [sp, #45] -; CHECK-NEXT: fmov w9, s7 -; CHECK-NEXT: mov z5.b, z3.b[8] -; CHECK-NEXT: strb w10, [sp, #47] -; CHECK-NEXT: fmov w10, s5 +; CHECK-NEXT: mov z6.b, z3.b[2] +; CHECK-NEXT: strb w8, [sp, #45] +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: mov z7.b, z2.b[14] +; CHECK-NEXT: strb w8, [sp, #44] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z2.b[12] ; CHECK-NEXT: strb w8, [sp, #43] -; CHECK-NEXT: fmov w8, s27 -; CHECK-NEXT: strb w9, [sp, #42] -; CHECK-NEXT: fmov w9, s28 -; CHECK-NEXT: mov z16.b, z3.b[2] -; CHECK-NEXT: mov z31.b, z0.b[6] -; CHECK-NEXT: strb w10, [sp, #44] -; CHECK-NEXT: fmov w10, s16 +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: mov z5.b, z2.b[10] +; CHECK-NEXT: strb w8, [sp, #42] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: mov z6.b, z2.b[8] +; CHECK-NEXT: strb w8, [sp, #41] +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: mov z7.b, z2.b[6] ; CHECK-NEXT: strb w8, [sp, #39] -; CHECK-NEXT: fmov w8, s30 -; CHECK-NEXT: strb w9, [sp, #38] -; CHECK-NEXT: fmov w9, s31 -; CHECK-NEXT: mov z29.b, z0.b[10] -; CHECK-NEXT: mov z9.b, z0.b[2] -; CHECK-NEXT: strb w10, [sp, #41] -; CHECK-NEXT: fmov w10, s29 +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z2.b[4] +; CHECK-NEXT: strb w8, [sp, #38] +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: mov z5.b, z2.b[2] +; CHECK-NEXT: strb w8, [sp, #37] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: mov z6.b, z1.b[10] ; CHECK-NEXT: strb w8, [sp, #36] -; CHECK-NEXT: fmov w8, s9 -; CHECK-NEXT: strb w9, [sp, #35] -; CHECK-NEXT: fmov w9, s11 -; CHECK-NEXT: mov z8.b, z0.b[4] -; CHECK-NEXT: mov z16.b, z11.b[4] -; CHECK-NEXT: mov z27.b, z11.b[2] -; CHECK-NEXT: strb w10, [sp, #37] -; CHECK-NEXT: fmov w10, s8 +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: mov z7.b, z1.b[8] +; CHECK-NEXT: strb w8, [sp, #35] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z1.b[14] +; CHECK-NEXT: strb w8, [sp, #34] +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: mov z5.b, z1.b[12] ; CHECK-NEXT: strb w8, [sp, #33] -; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: strb w9, [sp, #8] -; CHECK-NEXT: fmov w9, s27 -; CHECK-NEXT: mov z5.b, z11.b[10] -; CHECK-NEXT: mov z6.b, z11.b[8] -; CHECK-NEXT: mov z2.b, z11.b[14] -; CHECK-NEXT: fmov w12, s5 -; CHECK-NEXT: fmov w13, s6 -; CHECK-NEXT: mov z5.b, z10.b[10] -; CHECK-NEXT: mov z6.b, z10.b[8] -; CHECK-NEXT: strb w10, [sp, #34] -; CHECK-NEXT: fmov w10, s2 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strb w8, [sp, #8] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strb w8, [sp] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z1.b[6] +; CHECK-NEXT: strb w8, [sp, #15] +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: mov z5.b, z1.b[4] +; CHECK-NEXT: strb w8, [sp, #14] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: mov z6.b, z1.b[2] +; CHECK-NEXT: strb w8, [sp, #13] +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: mov z7.b, z0.b[14] +; CHECK-NEXT: strb w8, [sp, #12] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z0.b[12] +; CHECK-NEXT: strb w8, [sp, #11] +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: mov z5.b, z0.b[10] ; CHECK-NEXT: strb w8, [sp, #10] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: mov z6.b, z0.b[8] +; CHECK-NEXT: strb w8, [sp, #9] +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: mov z7.b, z0.b[6] +; CHECK-NEXT: strb w8, [sp, #7] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.b, z0.b[4] +; CHECK-NEXT: strb w8, [sp, #6] ; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: strb w9, [sp, #9] -; CHECK-NEXT: fmov w9, s6 -; CHECK-NEXT: mov z4.b, z11.b[12] -; CHECK-NEXT: mov z7.b, z11.b[6] -; CHECK-NEXT: mov z28.b, z11.b[15] -; CHECK-NEXT: mov z29.b, z11.b[13] -; CHECK-NEXT: mov z30.b, z11.b[11] -; CHECK-NEXT: mov z31.b, z11.b[9] -; CHECK-NEXT: mov z8.b, z11.b[7] -; CHECK-NEXT: mov z9.b, z11.b[5] -; CHECK-NEXT: mov z12.b, z11.b[3] -; CHECK-NEXT: mov z13.b, z11.b[1] -; CHECK-NEXT: mov z2.b, z10.b[14] -; CHECK-NEXT: mov z11.b, z10.b[4] -; CHECK-NEXT: mov z14.b, z10.b[2] -; CHECK-NEXT: strb w10, [sp, #15] -; CHECK-NEXT: fmov w10, s2 +; CHECK-NEXT: mov z5.b, z0.b[2] ; CHECK-NEXT: strb w8, [sp, #5] -; CHECK-NEXT: fmov w8, s11 -; CHECK-NEXT: strb w9, [sp, #4] -; CHECK-NEXT: fmov w9, s14 -; CHECK-NEXT: mov z17.b, z3.b[15] -; CHECK-NEXT: mov z18.b, z3.b[13] -; CHECK-NEXT: fmov w14, s7 -; CHECK-NEXT: mov z7.b, z10.b[6] -; CHECK-NEXT: strb w10, [sp, #7] -; CHECK-NEXT: fmov w10, s7 +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: mov z6.b, z3.b[15] +; CHECK-NEXT: strb w8, [sp, #4] +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: mov z7.b, z3.b[13] +; CHECK-NEXT: strb w8, [sp, #3] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: ldr q4, [sp, #32] ; CHECK-NEXT: strb w8, [sp, #2] -; CHECK-NEXT: fmov w8, s17 -; CHECK-NEXT: strb w9, [sp, #1] -; CHECK-NEXT: fmov w9, s18 -; CHECK-NEXT: mov z19.b, z3.b[11] -; CHECK-NEXT: mov z20.b, z3.b[9] -; CHECK-NEXT: mov z21.b, z3.b[7] -; CHECK-NEXT: strb w10, [sp, #3] -; CHECK-NEXT: fmov w10, s19 +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: strb w8, [sp, #1] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: mov z6.b, z3.b[5] +; CHECK-NEXT: mov z3.b, z3.b[3] +; CHECK-NEXT: ldr q5, [sp] ; CHECK-NEXT: strb w8, [sp, #63] -; CHECK-NEXT: fmov w8, s20 -; CHECK-NEXT: strb w9, [sp, #62] -; CHECK-NEXT: fmov w9, s21 -; CHECK-NEXT: mov z22.b, z3.b[5] -; CHECK-NEXT: mov z23.b, z3.b[3] -; CHECK-NEXT: mov z3.b, z0.b[13] -; CHECK-NEXT: strb w10, [sp, #61] -; CHECK-NEXT: fmov w10, s22 +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: mov z7.b, z2.b[13] +; CHECK-NEXT: strb w8, [sp, #62] +; CHECK-NEXT: fmov w8, s16 +; CHECK-NEXT: mov z16.b, z2.b[11] +; CHECK-NEXT: strb w8, [sp, #61] +; CHECK-NEXT: fmov w8, s17 ; CHECK-NEXT: strb w8, [sp, #60] -; CHECK-NEXT: fmov w8, s23 -; CHECK-NEXT: strb w9, [sp, #59] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: mov z24.b, z0.b[11] -; CHECK-NEXT: mov z25.b, z0.b[9] -; CHECK-NEXT: mov z26.b, z0.b[5] -; CHECK-NEXT: strb w10, [sp, #58] -; CHECK-NEXT: fmov w10, s24 +; CHECK-NEXT: fmov w8, s18 +; CHECK-NEXT: strb w8, [sp, #59] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: mov z6.b, z2.b[9] +; CHECK-NEXT: strb w8, [sp, #58] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.b, z2.b[5] ; CHECK-NEXT: strb w8, [sp, #57] -; CHECK-NEXT: fmov w8, s25 -; CHECK-NEXT: strb w9, [sp, #54] -; CHECK-NEXT: fmov w9, s26 -; CHECK-NEXT: mov z1.b, z0.b[3] -; CHECK-NEXT: mov z0.b, z0.b[1] -; CHECK-NEXT: strb w10, [sp, #53] -; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: mov z7.b, z2.b[3] +; CHECK-NEXT: mov z2.b, z2.b[1] +; CHECK-NEXT: strb w8, [sp, #54] +; CHECK-NEXT: fmov w8, s16 +; CHECK-NEXT: strb w8, [sp, #53] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: mov z6.b, z1.b[15] ; CHECK-NEXT: strb w8, [sp, #52] -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strb w9, [sp, #50] -; CHECK-NEXT: fmov w9, s28 -; CHECK-NEXT: strb w10, [sp, #49] -; CHECK-NEXT: fmov w10, s29 +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.b, z1.b[13] +; CHECK-NEXT: strb w8, [sp, #50] +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: mov z7.b, z1.b[11] +; CHECK-NEXT: strb w8, [sp, #49] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov z2.b, z1.b[9] ; CHECK-NEXT: strb w8, [sp, #48] -; CHECK-NEXT: fmov w8, s30 -; CHECK-NEXT: strb w9, [sp, #31] -; CHECK-NEXT: fmov w9, s31 -; CHECK-NEXT: strb w10, [sp, #30] -; CHECK-NEXT: fmov w10, s8 +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: mov z6.b, z1.b[7] +; CHECK-NEXT: strb w8, [sp, #31] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.b, z1.b[5] +; CHECK-NEXT: strb w8, [sp, #30] +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: mov z7.b, z1.b[3] +; CHECK-NEXT: mov z1.b, z1.b[1] ; CHECK-NEXT: strb w8, [sp, #29] -; CHECK-NEXT: fmov w8, s9 -; CHECK-NEXT: strb w9, [sp, #28] -; CHECK-NEXT: fmov w9, s12 -; CHECK-NEXT: fmov w11, s4 -; CHECK-NEXT: mov z15.b, z10.b[15] -; CHECK-NEXT: mov z16.b, z10.b[13] -; CHECK-NEXT: strb w10, [sp, #27] -; CHECK-NEXT: fmov w10, s13 +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov z2.b, z0.b[15] +; CHECK-NEXT: strb w8, [sp, #28] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: mov z6.b, z0.b[11] +; CHECK-NEXT: strb w8, [sp, #27] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.b, z0.b[13] ; CHECK-NEXT: strb w8, [sp, #26] -; CHECK-NEXT: fmov w8, s15 -; CHECK-NEXT: strb w9, [sp, #25] -; CHECK-NEXT: fmov w9, s16 -; CHECK-NEXT: mov z4.b, z10.b[12] -; CHECK-NEXT: mov z27.b, z10.b[11] -; CHECK-NEXT: strb w11, [sp, #14] -; CHECK-NEXT: mov z2.b, z10.b[9] -; CHECK-NEXT: fmov w11, s4 -; CHECK-NEXT: mov z4.b, z10.b[7] -; CHECK-NEXT: strb w10, [sp, #24] -; CHECK-NEXT: fmov w10, s27 -; CHECK-NEXT: strb w8, [sp, #23] +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: strb w8, [sp, #25] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov z1.b, z0.b[9] +; CHECK-NEXT: strb w8, [sp, #24] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: strb w9, [sp, #22] -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: mov z5.b, z10.b[5] -; CHECK-NEXT: mov z6.b, z10.b[3] -; CHECK-NEXT: mov z7.b, z10.b[1] -; CHECK-NEXT: fmov w15, s10 -; CHECK-NEXT: strb w10, [sp, #21] -; CHECK-NEXT: fmov w10, s5 +; CHECK-NEXT: mov z2.b, z0.b[7] +; CHECK-NEXT: strb w8, [sp, #23] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.b, z0.b[5] +; CHECK-NEXT: strb w8, [sp, #22] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: mov z6.b, z0.b[3] +; CHECK-NEXT: mov z0.b, z0.b[1] +; CHECK-NEXT: strb w8, [sp, #21] +; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: strb w8, [sp, #20] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: strb w8, [sp, #19] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strb w8, [sp, #18] ; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: strb w9, [sp, #19] -; CHECK-NEXT: fmov w9, s7 -; CHECK-NEXT: strb w15, [sp] -; CHECK-NEXT: strb w12, [sp, #13] -; CHECK-NEXT: ldr q17, [sp, #32] -; CHECK-NEXT: strb w13, [sp, #12] -; CHECK-NEXT: ldr q0, [sp, #48] -; CHECK-NEXT: strb w14, [sp, #11] -; CHECK-NEXT: strb w11, [sp, #6] -; CHECK-NEXT: strb w10, [sp, #18] -; CHECK-NEXT: ldr q18, [sp] ; CHECK-NEXT: strb w8, [sp, #17] -; CHECK-NEXT: add z0.b, z17.b, z0.b -; CHECK-NEXT: strb w9, [sp, #16] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: ldr q0, [sp, #48] +; CHECK-NEXT: add z0.b, z4.b, z0.b +; CHECK-NEXT: strb w8, [sp, #16] ; CHECK-NEXT: ldr q1, [sp, #16] -; CHECK-NEXT: ldp d9, d8, [sp, #112] // 16-byte Folded Reload -; CHECK-NEXT: ldp d11, d10, [sp, #96] // 16-byte Folded Reload -; CHECK-NEXT: add z1.b, z18.b, z1.b -; CHECK-NEXT: ldp d13, d12, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: add z1.b, z5.b, z1.b ; CHECK-NEXT: stp q0, q1, [x0] -; CHECK-NEXT: ldp d15, d14, [sp, #64] // 16-byte Folded Reload -; CHECK-NEXT: add sp, sp, #128 +; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret %tmp1 = load <32 x i8>, ptr %a %tmp2 = load <32 x i8>, ptr %b @@ -936,21 +921,21 @@ ; CHECK-LABEL: uzp_v4i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: mov z1.h, z0.h[1] +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: mov z2.h, z0.h[2] -; CHECK-NEXT: mov z0.h, z0.h[3] +; CHECK-NEXT: mov z3.h, z0.h[3] ; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: fmov w11, s0 ; CHECK-NEXT: strh w8, [sp, #-16]! ; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: fmov w10, s2 +; CHECK-NEXT: fmov w11, s3 ; CHECK-NEXT: strh w9, [sp, #6] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: strh w9, [sp, #8] ; CHECK-NEXT: strh w10, [sp, #4] ; CHECK-NEXT: strh w11, [sp, #2] -; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: strh w10, [sp, #12] -; CHECK-NEXT: strh w9, [sp, #8] ; CHECK-NEXT: ldp d0, d1, [sp] ; CHECK-NEXT: add z0.h, z0.h, z1.h ; CHECK-NEXT: str d0, [x0] @@ -970,106 +955,106 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: mov z17.h, z0.h[4] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: mov z18.h, z0.h[2] -; CHECK-NEXT: mov z19.h, z0.h[7] -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov z3.h, z1.h[4] -; CHECK-NEXT: ldp q21, q22, [x1] -; CHECK-NEXT: mov z2.h, z1.h[6] -; CHECK-NEXT: mov z4.h, z1.h[2] -; CHECK-NEXT: strh w8, [sp, #40] +; CHECK-NEXT: ldp q1, q3, [x0] +; CHECK-NEXT: ldp q0, q2, [x1] ; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: mov z5.h, z1.h[7] -; CHECK-NEXT: mov z6.h, z1.h[5] -; CHECK-NEXT: mov z7.h, z1.h[3] +; CHECK-NEXT: mov z4.h, z3.h[6] +; CHECK-NEXT: mov z5.h, z3.h[4] +; CHECK-NEXT: mov z6.h, z3.h[2] +; CHECK-NEXT: mov z7.h, z1.h[6] +; CHECK-NEXT: strh w8, [sp, #40] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strh w8, [sp, #32] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.h, z1.h[4] +; CHECK-NEXT: strh w8, [sp, #46] +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: mov z5.h, z1.h[2] ; CHECK-NEXT: strh w8, [sp, #44] -; CHECK-NEXT: fmov w8, s17 -; CHECK-NEXT: mov z16.h, z1.h[1] -; CHECK-NEXT: mov z1.h, z0.h[6] -; CHECK-NEXT: strh w9, [sp, #32] -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: strh w10, [sp, #46] -; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: mov z6.h, z2.h[2] +; CHECK-NEXT: strh w8, [sp, #42] +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: mov z7.h, z0.h[6] +; CHECK-NEXT: strh w8, [sp, #38] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.h, z2.h[6] ; CHECK-NEXT: strh w8, [sp, #36] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z2.h, z22.h[6] -; CHECK-NEXT: strh w9, [sp, #42] -; CHECK-NEXT: strh w10, [sp, #38] -; CHECK-NEXT: fmov w9, s22 -; CHECK-NEXT: fmov w10, s21 +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: mov z5.h, z2.h[4] ; CHECK-NEXT: strh w8, [sp, #34] ; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: mov z3.h, z22.h[4] -; CHECK-NEXT: mov z4.h, z22.h[2] -; CHECK-NEXT: mov z17.h, z22.h[7] -; CHECK-NEXT: mov z18.h, z22.h[5] -; CHECK-NEXT: mov z23.h, z22.h[3] -; CHECK-NEXT: mov z24.h, z22.h[1] -; CHECK-NEXT: mov z22.h, z21.h[6] -; CHECK-NEXT: strh w9, [sp, #8] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: strh w10, [sp] -; CHECK-NEXT: fmov w10, s4 +; CHECK-NEXT: ldr q16, [sp, #32] +; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.h, z0.h[4] ; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s22 -; CHECK-NEXT: mov z25.h, z21.h[4] -; CHECK-NEXT: mov z26.h, z21.h[2] -; CHECK-NEXT: strh w9, [sp, #12] -; CHECK-NEXT: fmov w9, s25 -; CHECK-NEXT: strh w10, [sp, #10] -; CHECK-NEXT: fmov w10, s26 +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: mov z5.h, z0.h[2] +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: mov z6.h, z3.h[7] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: mov z7.h, z1.h[7] ; CHECK-NEXT: strh w8, [sp, #6] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.h, z3.h[5] +; CHECK-NEXT: strh w8, [sp, #4] ; CHECK-NEXT: fmov w8, s5 -; CHECK-NEXT: strh w9, [sp, #4] -; CHECK-NEXT: fmov w9, s6 -; CHECK-NEXT: strh w10, [sp, #2] -; CHECK-NEXT: fmov w10, s7 +; CHECK-NEXT: mov z5.h, z3.h[3] +; CHECK-NEXT: mov z3.h, z3.h[1] +; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: fmov w8, s6 +; CHECK-NEXT: ldr q6, [sp] ; CHECK-NEXT: strh w8, [sp, #62] -; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: mov z20.h, z0.h[5] -; CHECK-NEXT: mov z1.h, z0.h[3] -; CHECK-NEXT: strh w9, [sp, #60] -; CHECK-NEXT: fmov w9, s19 -; CHECK-NEXT: strh w10, [sp, #58] -; CHECK-NEXT: fmov w10, s20 +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.h, z1.h[5] +; CHECK-NEXT: strh w8, [sp, #60] +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: mov z5.h, z1.h[3] +; CHECK-NEXT: mov z1.h, z1.h[1] +; CHECK-NEXT: strh w8, [sp, #58] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.h, z2.h[7] ; CHECK-NEXT: strh w8, [sp, #56] +; CHECK-NEXT: fmov w8, s7 +; CHECK-NEXT: strh w8, [sp, #54] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.h, z2.h[5] +; CHECK-NEXT: strh w8, [sp, #52] +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: mov z5.h, z2.h[3] +; CHECK-NEXT: strh w8, [sp, #50] ; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov z1.h, z2.h[1] +; CHECK-NEXT: mov z2.h, z0.h[7] +; CHECK-NEXT: strh w8, [sp, #48] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z3.h, z0.h[5] +; CHECK-NEXT: strh w8, [sp, #30] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.h, z0.h[3] ; CHECK-NEXT: mov z0.h, z0.h[1] -; CHECK-NEXT: strh w9, [sp, #54] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w10, [sp, #52] -; CHECK-NEXT: fmov w10, s17 -; CHECK-NEXT: strh w8, [sp, #50] -; CHECK-NEXT: fmov w8, s18 -; CHECK-NEXT: mov z27.h, z21.h[7] -; CHECK-NEXT: strh w9, [sp, #48] -; CHECK-NEXT: fmov w9, s23 -; CHECK-NEXT: strh w10, [sp, #30] -; CHECK-NEXT: fmov w10, s24 ; CHECK-NEXT: strh w8, [sp, #28] -; CHECK-NEXT: fmov w8, s27 -; CHECK-NEXT: mov z28.h, z21.h[5] -; CHECK-NEXT: mov z2.h, z21.h[3] -; CHECK-NEXT: mov z3.h, z21.h[1] -; CHECK-NEXT: strh w9, [sp, #26] -; CHECK-NEXT: fmov w9, s28 -; CHECK-NEXT: strh w10, [sp, #24] -; CHECK-NEXT: fmov w10, s2 +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: strh w8, [sp, #26] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strh w8, [sp, #24] +; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: strh w8, [sp, #22] ; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: ldr q4, [sp, #32] -; CHECK-NEXT: strh w9, [sp, #20] -; CHECK-NEXT: ldr q5, [sp] -; CHECK-NEXT: strh w10, [sp, #18] +; CHECK-NEXT: strh w8, [sp, #20] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: strh w8, [sp, #18] +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: ldr q0, [sp, #48] +; CHECK-NEXT: add z0.h, z16.h, z0.h ; CHECK-NEXT: strh w8, [sp, #16] ; CHECK-NEXT: ldr q1, [sp, #16] -; CHECK-NEXT: add z0.h, z4.h, z0.h -; CHECK-NEXT: add z1.h, z5.h, z1.h +; CHECK-NEXT: add z1.h, z6.h, z1.h ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret @@ -1087,27 +1072,27 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #64 ; CHECK-NEXT: .cfi_def_cfa_offset 64 -; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: ldp q2, q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ldp q3, q2, [x1] -; CHECK-NEXT: mov z4.s, z0.s[2] -; CHECK-NEXT: stp s0, s4, [sp, #24] -; CHECK-NEXT: mov z4.s, z3.s[2] -; CHECK-NEXT: mov z5.s, z2.s[2] -; CHECK-NEXT: stp s4, s2, [sp, #4] -; CHECK-NEXT: stp s5, s1, [sp, #12] +; CHECK-NEXT: ldp q4, q1, [x1] +; CHECK-NEXT: mov z3.s, z0.s[2] +; CHECK-NEXT: mov z5.s, z1.s[2] +; CHECK-NEXT: stp s0, s3, [sp, #24] +; CHECK-NEXT: mov z3.s, z4.s[2] +; CHECK-NEXT: stp s5, s2, [sp, #12] ; CHECK-NEXT: mov z5.s, z0.s[3] ; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: mov z1.s, z1.s[1] +; CHECK-NEXT: stp s3, s1, [sp, #4] +; CHECK-NEXT: mov z1.s, z2.s[1] ; CHECK-NEXT: stp s0, s5, [sp, #40] -; CHECK-NEXT: mov z0.s, z3.s[3] +; CHECK-NEXT: mov z5.s, z4.s[3] +; CHECK-NEXT: mov z4.s, z4.s[1] +; CHECK-NEXT: ldp q3, q2, [sp] ; CHECK-NEXT: str s1, [sp, #32] -; CHECK-NEXT: mov z1.s, z3.s[1] -; CHECK-NEXT: stp s1, s0, [sp, #48] -; CHECK-NEXT: ldp q4, q2, [sp] +; CHECK-NEXT: stp s4, s5, [sp, #48] ; CHECK-NEXT: ldp q0, q1, [sp, #32] ; CHECK-NEXT: fadd z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z4.s +; CHECK-NEXT: fadd z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: add sp, sp, #64 ; CHECK-NEXT: ret @@ -1127,10 +1112,10 @@ ; CHECK-NEXT: ldp q3, q2, [x1] ; CHECK-NEXT: zip1 z4.d, z1.d, z0.d ; CHECK-NEXT: trn2 z0.d, z1.d, z0.d +; CHECK-NEXT: zip1 z1.d, z3.d, z2.d +; CHECK-NEXT: trn2 z2.d, z3.d, z2.d ; CHECK-NEXT: add z0.d, z4.d, z0.d -; CHECK-NEXT: zip1 z5.d, z3.d, z2.d -; CHECK-NEXT: trn2 z1.d, z3.d, z2.d -; CHECK-NEXT: add z1.d, z5.d, z1.d +; CHECK-NEXT: add z1.d, z1.d, z2.d ; CHECK-NEXT: stp q0, q1, [x0] ; CHECK-NEXT: ret %tmp1 = load <4 x i64>, ptr %a @@ -1152,51 +1137,51 @@ ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: mov z2.h, z0.h[6] ; CHECK-NEXT: mov z3.h, z0.h[4] -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov z4.h, z0.h[2] +; CHECK-NEXT: mov z5.h, z1.h[6] +; CHECK-NEXT: mov z6.h, z1.h[4] ; CHECK-NEXT: strh w8, [sp, #8] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: strh w8, [sp] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov z2.h, z1.h[2] +; CHECK-NEXT: strh w8, [sp, #14] ; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: mov z5.h, z0.h[7] -; CHECK-NEXT: mov z6.h, z0.h[5] -; CHECK-NEXT: mov z7.h, z0.h[3] -; CHECK-NEXT: mov z16.h, z0.h[1] -; CHECK-NEXT: mov z0.h, z1.h[6] -; CHECK-NEXT: mov z17.h, z1.h[4] -; CHECK-NEXT: strh w9, [sp] -; CHECK-NEXT: fmov w9, s4 -; CHECK-NEXT: strh w10, [sp, #14] -; CHECK-NEXT: fmov w10, s0 +; CHECK-NEXT: mov z3.h, z0.h[7] ; CHECK-NEXT: strh w8, [sp, #12] -; CHECK-NEXT: fmov w8, s17 -; CHECK-NEXT: mov z18.h, z1.h[2] -; CHECK-NEXT: strh w9, [sp, #10] -; CHECK-NEXT: fmov w9, s18 -; CHECK-NEXT: strh w10, [sp, #6] -; CHECK-NEXT: fmov w10, s5 -; CHECK-NEXT: strh w8, [sp, #4] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.h, z0.h[5] +; CHECK-NEXT: strh w8, [sp, #10] +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: mov z5.h, z0.h[3] +; CHECK-NEXT: mov z0.h, z0.h[1] +; CHECK-NEXT: strh w8, [sp, #6] ; CHECK-NEXT: fmov w8, s6 -; CHECK-NEXT: mov z19.h, z1.h[7] -; CHECK-NEXT: strh w9, [sp, #2] -; CHECK-NEXT: fmov w9, s7 -; CHECK-NEXT: strh w10, [sp, #30] -; CHECK-NEXT: fmov w10, s16 +; CHECK-NEXT: strh w8, [sp, #4] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov z2.h, z1.h[7] +; CHECK-NEXT: strh w8, [sp, #2] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strh w8, [sp, #30] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: mov z4.h, z1.h[5] ; CHECK-NEXT: strh w8, [sp, #28] -; CHECK-NEXT: fmov w8, s19 -; CHECK-NEXT: mov z20.h, z1.h[5] -; CHECK-NEXT: mov z21.h, z1.h[3] +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: mov z5.h, z1.h[3] +; CHECK-NEXT: strh w8, [sp, #26] +; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: mov z0.h, z1.h[1] -; CHECK-NEXT: strh w9, [sp, #26] -; CHECK-NEXT: fmov w9, s20 -; CHECK-NEXT: strh w10, [sp, #24] -; CHECK-NEXT: fmov w10, s21 +; CHECK-NEXT: strh w8, [sp, #24] +; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: strh w8, [sp, #22] +; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: strh w8, [sp, #20] +; CHECK-NEXT: fmov w8, s5 +; CHECK-NEXT: strh w8, [sp, #18] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strh w9, [sp, #20] -; CHECK-NEXT: strh w10, [sp, #18] ; CHECK-NEXT: strh w8, [sp, #16] -; CHECK-NEXT: ldp q1, q0, [sp] -; CHECK-NEXT: add z0.h, z1.h, z0.h +; CHECK-NEXT: ldp q3, q0, [sp] +; CHECK-NEXT: add z0.h, z3.h, z0.h ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -1215,24 +1200,24 @@ ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: ldp q1, q0, [x0] -; CHECK-NEXT: fmov w10, s1 -; CHECK-NEXT: mov z5.s, z1.s[3] ; CHECK-NEXT: mov z2.s, z0.s[2] -; CHECK-NEXT: mov z3.s, z0.s[3] -; CHECK-NEXT: mov z4.s, z0.s[1] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov z0.s, z1.s[2] +; CHECK-NEXT: mov z3.s, z1.s[2] +; CHECK-NEXT: mov z4.s, z0.s[3] +; CHECK-NEXT: mov z0.s, z0.s[1] ; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: fmov w11, s0 -; CHECK-NEXT: mov z2.s, z1.s[1] -; CHECK-NEXT: fmov w12, s3 +; CHECK-NEXT: mov z2.s, z1.s[3] ; CHECK-NEXT: stp w8, w9, [sp, #8] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: fmov w9, s3 +; CHECK-NEXT: mov z1.s, z1.s[1] +; CHECK-NEXT: stp w8, w9, [sp] ; CHECK-NEXT: fmov w8, s4 -; CHECK-NEXT: fmov w9, s5 -; CHECK-NEXT: stp w10, w11, [sp] -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: stp w8, w12, [sp, #24] -; CHECK-NEXT: stp w10, w9, [sp, #16] +; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: stp w9, w8, [sp, #24] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: stp w9, w8, [sp, #16] ; CHECK-NEXT: ldp q0, q1, [sp] ; CHECK-NEXT: add z0.s, z0.s, z1.s ; CHECK-NEXT: stp q0, q0, [x0] @@ -1251,12 +1236,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldp q2, q3, [x1] -; CHECK-NEXT: zip1 z4.d, z1.d, z2.d -; CHECK-NEXT: trn2 z1.d, z1.d, z2.d -; CHECK-NEXT: zip1 z2.d, z0.d, z3.d -; CHECK-NEXT: trn2 z0.d, z0.d, z3.d -; CHECK-NEXT: fadd z2.d, p0/m, z2.d, z4.d +; CHECK-NEXT: ldp q3, q2, [x1] +; CHECK-NEXT: zip1 z4.d, z1.d, z3.d +; CHECK-NEXT: zip1 z5.d, z0.d, z2.d +; CHECK-NEXT: trn2 z1.d, z1.d, z3.d +; CHECK-NEXT: trn2 z0.d, z0.d, z2.d +; CHECK-NEXT: movprfx z2, z4 +; CHECK-NEXT: fadd z2.d, p0/m, z2.d, z5.d ; CHECK-NEXT: fadd z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-ptest.ll @@ -7,30 +7,30 @@ define i1 @ptest_v16i1(ptr %a, ptr %b) { ; CHECK-LABEL: ptest_v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl4 -; CHECK-NEXT: ptrue p1.h, vl4 +; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0 -; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fcmne p2.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z0.h, p1, z0.h, z1.h -; CHECK-NEXT: fcmne p2.s, p0/z, z3.s, #0.0 +; CHECK-NEXT: fcmne p3.s, p0/z, z3.s, #0.0 ; CHECK-NEXT: fcmne p0.s, p0/z, z2.s, #0.0 -; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z2.s, p3/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: ptrue p1.b, vl16 +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b +; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h +; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b ; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b -; CHECK-NEXT: ptrue p0.b, vl16 -; CHECK-NEXT: umaxv b0, p0, z1.b +; CHECK-NEXT: umaxv b0, p1, z1.b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -44,49 +44,49 @@ define i1 @ptest_or_v16i1(ptr %a, ptr %b) { ; CHECK-LABEL: ptest_or_v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q2, q3, [x0, #32] +; CHECK-NEXT: ldp q4, q5, [x1] +; CHECK-NEXT: ldp q6, q7, [x1, #32] +; CHECK-NEXT: fcmne p1.s, p0/z, z3.s, #0.0 +; CHECK-NEXT: fcmne p2.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: fcmne p3.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: fcmne p4.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmne p5.s, p0/z, z7.s, #0.0 +; CHECK-NEXT: fcmne p6.s, p0/z, z6.s, #0.0 +; CHECK-NEXT: fcmne p7.s, p0/z, z5.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z4.s, #0.0 +; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z2.s, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z3.s, p4/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z4.s, p5/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z5.s, p6/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z6.s, p7/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z7.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0 -; CHECK-NEXT: mov z1.s, p3/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h -; CHECK-NEXT: fcmne p3.s, p0/z, z2.s, #0.0 -; CHECK-NEXT: mov z4.s, p3/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fcmne p2.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: ldp q3, q0, [x1, #32] -; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h -; CHECK-NEXT: fcmne p3.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: mov z3.s, p3/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: ldp q5, q6, [x1] -; CHECK-NEXT: fcmne p2.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z3.h, p1, z3.h, z0.h -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b -; CHECK-NEXT: fcmne p2.s, p0/z, z6.s, #0.0 -; CHECK-NEXT: fcmne p0.s, p0/z, z5.s, #0.0 -; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z4.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h +; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b -; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b +; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h +; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h +; CHECK-NEXT: splice z5.h, p1, z5.h, z4.h +; CHECK-NEXT: splice z7.h, p1, z7.h, z6.h +; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b +; CHECK-NEXT: uzp1 z3.b, z7.b, z7.b ; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b ; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b -; CHECK-NEXT: orr z0.d, z1.d, z3.d ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: orr z0.d, z1.d, z3.d ; CHECK-NEXT: umaxv b0, p0, z0.b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 @@ -111,49 +111,49 @@ define i1 @ptest_and_v16i1(ptr %a, ptr %b) { ; CHECK-LABEL: ptest_and_v16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] +; CHECK-NEXT: ldp q2, q3, [x0, #32] +; CHECK-NEXT: ldp q4, q5, [x1] +; CHECK-NEXT: ldp q6, q7, [x1, #32] +; CHECK-NEXT: fcmne p1.s, p0/z, z3.s, #0.0 +; CHECK-NEXT: fcmne p2.s, p0/z, z2.s, #0.0 +; CHECK-NEXT: fcmne p3.s, p0/z, z1.s, #0.0 +; CHECK-NEXT: fcmne p4.s, p0/z, z0.s, #0.0 +; CHECK-NEXT: fcmne p5.s, p0/z, z7.s, #0.0 +; CHECK-NEXT: fcmne p6.s, p0/z, z6.s, #0.0 +; CHECK-NEXT: fcmne p7.s, p0/z, z5.s, #0.0 +; CHECK-NEXT: fcmne p0.s, p0/z, z4.s, #0.0 +; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z2.s, p3/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z3.s, p4/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z4.s, p5/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z5.s, p6/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z6.s, p7/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: mov z7.s, p0/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: fcmne p3.s, p0/z, z0.s, #0.0 -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: fcmne p2.s, p0/z, z1.s, #0.0 -; CHECK-NEXT: mov z1.s, p3/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h -; CHECK-NEXT: fcmne p3.s, p0/z, z2.s, #0.0 -; CHECK-NEXT: mov z4.s, p3/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: fcmne p2.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: ldp q3, q0, [x1, #32] -; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h -; CHECK-NEXT: fcmne p3.s, p0/z, z3.s, #0.0 -; CHECK-NEXT: mov z3.s, p3/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: ldp q5, q6, [x1] -; CHECK-NEXT: fcmne p2.s, p0/z, z0.s, #0.0 ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: splice z3.h, p1, z3.h, z0.h -; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b -; CHECK-NEXT: uzp1 z1.b, z4.b, z4.b -; CHECK-NEXT: fcmne p2.s, p0/z, z6.s, #0.0 -; CHECK-NEXT: fcmne p0.s, p0/z, z5.s, #0.0 -; CHECK-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: mov z4.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h ; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: splice z4.h, p1, z4.h, z2.h +; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b -; CHECK-NEXT: uzp1 z3.b, z4.b, z4.b +; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h +; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h +; CHECK-NEXT: splice z5.h, p1, z5.h, z4.h +; CHECK-NEXT: splice z7.h, p1, z7.h, z6.h +; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b +; CHECK-NEXT: uzp1 z3.b, z7.b, z7.b ; CHECK-NEXT: splice z1.b, p0, z1.b, z0.b ; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b -; CHECK-NEXT: and z0.d, z1.d, z3.d ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: and z0.d, z1.d, z3.d ; CHECK-NEXT: uminv b0, p0, z0.b ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: and w0, w8, #0x1 diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-reshuffle.ll @@ -15,14 +15,14 @@ ; CHECK-NEXT: mov z1.s, z0.s[3] ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: mov z2.s, z0.s[2] -; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: fmov w11, s0 +; CHECK-NEXT: mov z3.s, z0.s[1] ; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: strh w9, [sp, #14] -; CHECK-NEXT: strh w10, [sp, #12] -; CHECK-NEXT: strh w11, [sp, #10] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strh w9, [sp, #12] +; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-rev.ll @@ -11,8 +11,8 @@ define <4 x i8> @bitreverse_v4i8(<4 x i8> %op) { ; CHECK-LABEL: bitreverse_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: lsr z0.h, z0.h, #8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -24,8 +24,8 @@ define <8 x i8> @bitreverse_v8i8(<8 x i8> %op) { ; CHECK-LABEL: bitreverse_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -36,8 +36,8 @@ define <16 x i8> @bitreverse_v16i8(<16 x i8> %op) { ; CHECK-LABEL: bitreverse_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.b, p0/m, z0.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -48,8 +48,8 @@ define void @bitreverse_v32i8(ptr %a) { ; CHECK-LABEL: bitreverse_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: rbit z0.b, p0/m, z0.b ; CHECK-NEXT: rbit z1.b, p0/m, z1.b ; CHECK-NEXT: stp q0, q1, [x0] @@ -63,8 +63,8 @@ define <2 x i16> @bitreverse_v2i16(<2 x i16> %op) { ; CHECK-LABEL: bitreverse_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: lsr z0.s, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -76,8 +76,8 @@ define <4 x i16> @bitreverse_v4i16(<4 x i16> %op) { ; CHECK-LABEL: bitreverse_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -88,8 +88,8 @@ define <8 x i16> @bitreverse_v8i16(<8 x i16> %op) { ; CHECK-LABEL: bitreverse_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -100,8 +100,8 @@ define void @bitreverse_v16i16(ptr %a) { ; CHECK-LABEL: bitreverse_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: rbit z0.h, p0/m, z0.h ; CHECK-NEXT: rbit z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -115,8 +115,8 @@ define <2 x i32> @bitreverse_v2i32(<2 x i32> %op) { ; CHECK-LABEL: bitreverse_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -127,8 +127,8 @@ define <4 x i32> @bitreverse_v4i32(<4 x i32> %op) { ; CHECK-LABEL: bitreverse_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -139,8 +139,8 @@ define void @bitreverse_v8i32(ptr %a) { ; CHECK-LABEL: bitreverse_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: rbit z0.s, p0/m, z0.s ; CHECK-NEXT: rbit z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -154,8 +154,8 @@ define <1 x i64> @bitreverse_v1i64(<1 x i64> %op) { ; CHECK-LABEL: bitreverse_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -166,8 +166,8 @@ define <2 x i64> @bitreverse_v2i64(<2 x i64> %op) { ; CHECK-LABEL: bitreverse_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: rbit z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -178,8 +178,8 @@ define void @bitreverse_v4i64(ptr %a) { ; CHECK-LABEL: bitreverse_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: rbit z0.d, p0/m, z0.d ; CHECK-NEXT: rbit z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] @@ -197,8 +197,8 @@ define <2 x i16> @bswap_v2i16(<2 x i16> %op) { ; CHECK-LABEL: bswap_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: revb z0.s, p0/m, z0.s ; CHECK-NEXT: lsr z0.s, z0.s, #16 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -210,8 +210,8 @@ define <4 x i16> @bswap_v4i16(<4 x i16> %op) { ; CHECK-LABEL: bswap_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: revb z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -222,8 +222,8 @@ define <8 x i16> @bswap_v8i16(<8 x i16> %op) { ; CHECK-LABEL: bswap_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: revb z0.h, p0/m, z0.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -234,8 +234,8 @@ define void @bswap_v16i16(ptr %a) { ; CHECK-LABEL: bswap_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: revb z0.h, p0/m, z0.h ; CHECK-NEXT: revb z1.h, p0/m, z1.h ; CHECK-NEXT: stp q0, q1, [x0] @@ -249,8 +249,8 @@ define <2 x i32> @bswap_v2i32(<2 x i32> %op) { ; CHECK-LABEL: bswap_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: revb z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -261,8 +261,8 @@ define <4 x i32> @bswap_v4i32(<4 x i32> %op) { ; CHECK-LABEL: bswap_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: revb z0.s, p0/m, z0.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -273,8 +273,8 @@ define void @bswap_v8i32(ptr %a) { ; CHECK-LABEL: bswap_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: revb z0.s, p0/m, z0.s ; CHECK-NEXT: revb z1.s, p0/m, z1.s ; CHECK-NEXT: stp q0, q1, [x0] @@ -288,8 +288,8 @@ define <1 x i64> @bswap_v1i64(<1 x i64> %op) { ; CHECK-LABEL: bswap_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: revb z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -300,8 +300,8 @@ define <2 x i64> @bswap_v2i64(<2 x i64> %op) { ; CHECK-LABEL: bswap_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: revb z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -312,8 +312,8 @@ define void @bswap_v4i64(ptr %a) { ; CHECK-LABEL: bswap_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: revb z0.d, p0/m, z0.d ; CHECK-NEXT: revb z1.d, p0/m, z1.d ; CHECK-NEXT: stp q0, q1, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-sdiv-pow2.ll @@ -7,8 +7,8 @@ define <4 x i8> @sdiv_v4i8(<4 x i8> %op1) { ; CHECK-LABEL: sdiv_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: sxtb z0.h, p0/m, z0.h ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -20,8 +20,8 @@ define <8 x i8> @sdiv_v8i8(<8 x i8> %op1) { ; CHECK-LABEL: sdiv_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -32,8 +32,8 @@ define <16 x i8> @sdiv_v16i8(<16 x i8> %op1) { ; CHECK-LABEL: sdiv_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -44,8 +44,8 @@ define void @sdiv_v32i8(ptr %a) { ; CHECK-LABEL: sdiv_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl16 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: asrd z0.b, p0/m, z0.b, #5 ; CHECK-NEXT: asrd z1.b, p0/m, z1.b, #5 ; CHECK-NEXT: stp q0, q1, [x0] @@ -59,8 +59,8 @@ define <2 x i16> @sdiv_v2i16(<2 x i16> %op1) { ; CHECK-LABEL: sdiv_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: sxth z0.s, p0/m, z0.s ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -72,8 +72,8 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1) { ; CHECK-LABEL: sdiv_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -84,8 +84,8 @@ define <8 x i16> @sdiv_v8i16(<8 x i16> %op1) { ; CHECK-LABEL: sdiv_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -96,8 +96,8 @@ define void @sdiv_v16i16(ptr %a) { ; CHECK-LABEL: sdiv_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: asrd z0.h, p0/m, z0.h, #5 ; CHECK-NEXT: asrd z1.h, p0/m, z1.h, #5 ; CHECK-NEXT: stp q0, q1, [x0] @@ -111,8 +111,8 @@ define <2 x i32> @sdiv_v2i32(<2 x i32> %op1) { ; CHECK-LABEL: sdiv_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -123,8 +123,8 @@ define <4 x i32> @sdiv_v4i32(<4 x i32> %op1) { ; CHECK-LABEL: sdiv_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -135,8 +135,8 @@ define void @sdiv_v8i32(ptr %a) { ; CHECK-LABEL: sdiv_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: asrd z0.s, p0/m, z0.s, #5 ; CHECK-NEXT: asrd z1.s, p0/m, z1.s, #5 ; CHECK-NEXT: stp q0, q1, [x0] @@ -150,8 +150,8 @@ define <1 x i64> @sdiv_v1i64(<1 x i64> %op1) { ; CHECK-LABEL: sdiv_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -163,8 +163,8 @@ define <2 x i64> @sdiv_v2i64(<2 x i64> %op1) { ; CHECK-LABEL: sdiv_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -175,8 +175,8 @@ define void @sdiv_v4i64(ptr %a) { ; CHECK-LABEL: sdiv_v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: asrd z0.d, p0/m, z0.d, #5 ; CHECK-NEXT: asrd z1.d, p0/m, z1.d, #5 ; CHECK-NEXT: stp q0, q1, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-shuffle.ll @@ -22,8 +22,8 @@ define void @interleave_store_without_splat(ptr %a, <4 x i32> %v1, <4 x i32> %v2) { ; CHECK-LABEL: interleave_store_without_splat: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q1 killed $q1 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0_z1 def $z0_z1 ; CHECK-NEXT: st2w { z0.s, z1.s }, p0, [x0] ; CHECK-NEXT: ret @@ -36,12 +36,12 @@ define void @interleave_store_legalization(ptr %a, <8 x i32> %v1, <8 x i32> %v2) { ; CHECK-LABEL: interleave_store_legalization: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #8 // =0x8 -; CHECK-NEXT: // kill: def $q3 killed $q3 killed $z2_z3 def $z2_z3 +; CHECK-NEXT: ptrue p0.s, vl4 ; CHECK-NEXT: mov z5.d, z2.d -; CHECK-NEXT: mov z2.d, z1.d +; CHECK-NEXT: // kill: def $q3 killed $q3 def $z2_z3 +; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: mov z4.d, z0.d -; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st2w { z4.s, z5.s }, p0, [x0] ; CHECK-NEXT: st2w { z2.s, z3.s }, p0, [x0, x8, lsl #2] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc-stores.ll @@ -7,8 +7,8 @@ define void @store_trunc_v8i16i8(ptr %ap, ptr %dest) { ; CHECK-LABEL: store_trunc_v8i16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.h, vl8 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1b { z0.h }, p0, [x1] ; CHECK-NEXT: ret %a = load <8 x i16>, ptr %ap @@ -20,8 +20,8 @@ define void @store_trunc_v4i32i8(ptr %ap, ptr %dest) { ; CHECK-LABEL: store_trunc_v4i32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1b { z0.s }, p0, [x1] ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %ap @@ -33,8 +33,8 @@ define void @store_trunc_v4i32i16(ptr %ap, ptr %dest) { ; CHECK-LABEL: store_trunc_v4i32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1h { z0.s }, p0, [x1] ; CHECK-NEXT: ret %a = load <4 x i32>, ptr %ap @@ -46,8 +46,8 @@ define void @store_trunc_v2i64i8(ptr %ap, ptr %dest) { ; CHECK-LABEL: store_trunc_v2i64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: st1w { z0.d }, p0, [x1] ; CHECK-NEXT: ret %a = load <2 x i64>, ptr %ap @@ -59,8 +59,8 @@ define void @store_trunc_v2i256i64(ptr %ap, ptr %dest) { ; CHECK-LABEL: store_trunc_v2i256i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0, #32] ; CHECK-NEXT: ptrue p0.d, vl1 +; CHECK-NEXT: ldr d0, [x0, #32] ; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: splice z1.d, p0, z1.d, z0.d ; CHECK-NEXT: str q1, [x1] diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-trunc.ll @@ -13,8 +13,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -29,15 +29,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b -; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b ; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: splice z3.b, p0, z3.b, z2.b -; CHECK-NEXT: add z1.b, z3.b, z3.b +; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b +; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b +; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: add z1.b, z2.b, z2.b ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret %a = load <32 x i16>, ptr %in @@ -53,27 +53,27 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0, #64] ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: ldp q2, q3, [x0, #96] +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q4, q5, [x0, #96] +; CHECK-NEXT: ldp q6, q7, [x0, #32] ; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b -; CHECK-NEXT: add z0.b, z0.b, z0.b -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: ldp q4, q5, [x0] +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b -; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b +; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z5.b, z5.b, z5.b ; CHECK-NEXT: uzp1 z4.b, z4.b, z4.b -; CHECK-NEXT: ldp q6, q7, [x0, #32] -; CHECK-NEXT: uzp1 z1.b, z5.b, z5.b -; CHECK-NEXT: splice z4.b, p0, z4.b, z1.b -; CHECK-NEXT: uzp1 z3.b, z6.b, z6.b -; CHECK-NEXT: uzp1 z1.b, z7.b, z7.b -; CHECK-NEXT: splice z3.b, p0, z3.b, z1.b -; CHECK-NEXT: add z1.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z7.b, z7.b, z7.b +; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b +; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b +; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b +; CHECK-NEXT: splice z4.b, p0, z4.b, z5.b +; CHECK-NEXT: splice z6.b, p0, z6.b, z7.b +; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: add z2.b, z2.b, z2.b +; CHECK-NEXT: add z1.b, z4.b, z4.b +; CHECK-NEXT: add z3.b, z6.b, z6.b ; CHECK-NEXT: stp q0, q1, [x1, #32] -; CHECK-NEXT: add z0.b, z4.b, z4.b -; CHECK-NEXT: add z1.b, z3.b, z3.b -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret %a = load <64 x i16>, ptr %in %b = trunc <64 x i16> %a to <64 x i8> @@ -88,49 +88,49 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0, #192] ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: ldp q2, q3, [x0, #224] +; CHECK-NEXT: ldp q6, q7, [x0, #224] +; CHECK-NEXT: ldp q2, q3, [x0, #32] ; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b -; CHECK-NEXT: add z0.b, z0.b, z0.b -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: ldp q6, q7, [x0, #128] -; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b -; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b -; CHECK-NEXT: add z2.b, z2.b, z2.b -; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b -; CHECK-NEXT: ldp q1, q3, [x0, #160] +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z7.b, z7.b, z7.b -; CHECK-NEXT: splice z6.b, p0, z6.b, z7.b -; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: ldp q16, q17, [x0, #64] +; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b +; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b -; CHECK-NEXT: splice z1.b, p0, z1.b, z3.b -; CHECK-NEXT: add z1.b, z1.b, z1.b +; CHECK-NEXT: ldp q16, q17, [x0, #64] +; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b +; CHECK-NEXT: ldp q18, q19, [x0, #128] +; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b +; CHECK-NEXT: ldp q20, q21, [x0, #160] +; CHECK-NEXT: splice z6.b, p0, z6.b, z7.b +; CHECK-NEXT: ldp q22, q23, [x0, #96] +; CHECK-NEXT: uzp1 z1.b, z17.b, z17.b +; CHECK-NEXT: uzp1 z19.b, z19.b, z19.b +; CHECK-NEXT: uzp1 z18.b, z18.b, z18.b ; CHECK-NEXT: uzp1 z16.b, z16.b, z16.b -; CHECK-NEXT: ldp q7, q18, [x0, #96] -; CHECK-NEXT: uzp1 z17.b, z17.b, z17.b -; CHECK-NEXT: splice z16.b, p0, z16.b, z17.b -; CHECK-NEXT: uzp1 z7.b, z7.b, z7.b -; CHECK-NEXT: ldp q4, q5, [x0, #32] -; CHECK-NEXT: uzp1 z3.b, z18.b, z18.b -; CHECK-NEXT: splice z7.b, p0, z7.b, z3.b +; CHECK-NEXT: uzp1 z21.b, z21.b, z21.b +; CHECK-NEXT: uzp1 z20.b, z20.b, z20.b +; CHECK-NEXT: uzp1 z5.b, z5.b, z5.b +; CHECK-NEXT: uzp1 z7.b, z23.b, z23.b +; CHECK-NEXT: uzp1 z17.b, z22.b, z22.b ; CHECK-NEXT: uzp1 z4.b, z4.b, z4.b -; CHECK-NEXT: ldp q19, q20, [x0] -; CHECK-NEXT: uzp1 z3.b, z5.b, z5.b -; CHECK-NEXT: stp q0, q2, [x1, #96] -; CHECK-NEXT: add z0.b, z6.b, z6.b -; CHECK-NEXT: splice z4.b, p0, z4.b, z3.b -; CHECK-NEXT: stp q0, q1, [x1, #64] +; CHECK-NEXT: splice z2.b, p0, z2.b, z3.b +; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: splice z18.b, p0, z18.b, z19.b +; CHECK-NEXT: splice z16.b, p0, z16.b, z1.b +; CHECK-NEXT: add z1.b, z6.b, z6.b +; CHECK-NEXT: splice z20.b, p0, z20.b, z21.b +; CHECK-NEXT: splice z17.b, p0, z17.b, z7.b +; CHECK-NEXT: splice z4.b, p0, z4.b, z5.b +; CHECK-NEXT: stp q0, q1, [x1, #96] +; CHECK-NEXT: add z2.b, z2.b, z2.b +; CHECK-NEXT: add z5.b, z18.b, z18.b ; CHECK-NEXT: add z0.b, z16.b, z16.b -; CHECK-NEXT: uzp1 z18.b, z19.b, z19.b -; CHECK-NEXT: add z1.b, z7.b, z7.b +; CHECK-NEXT: add z3.b, z20.b, z20.b +; CHECK-NEXT: add z1.b, z17.b, z17.b +; CHECK-NEXT: add z4.b, z4.b, z4.b +; CHECK-NEXT: stp q5, q3, [x1, #64] +; CHECK-NEXT: stp q4, q2, [x1] ; CHECK-NEXT: stp q0, q1, [x1, #32] -; CHECK-NEXT: add z1.b, z4.b, z4.b -; CHECK-NEXT: uzp1 z17.b, z20.b, z20.b -; CHECK-NEXT: splice z18.b, p0, z18.b, z17.b -; CHECK-NEXT: add z0.b, z18.b, z18.b -; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret %a = load <128 x i16>, ptr %in %b = trunc <128 x i16> %a to <128 x i8> @@ -148,8 +148,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -162,18 +162,18 @@ define <16 x i8> @trunc_v16i32_v16i8(ptr %in) nounwind { ; CHECK-LABEL: trunc_v16i32_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: uzp1 z1.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h +; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h +; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h ; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z0.b, z3.b, z3.b +; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -186,32 +186,32 @@ define void @trunc_v32i32_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v32i32_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #96] +; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ptrue p1.b, vl8 -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: ldp q2, q3, [x0, #64] +; CHECK-NEXT: ldp q2, q3, [x0, #96] +; CHECK-NEXT: ldp q4, q5, [x0, #64] +; CHECK-NEXT: ldp q6, q7, [x0] ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h ; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: splice z4.h, p0, z4.h, z5.h +; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h +; CHECK-NEXT: ptrue p0.b, vl8 ; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b -; CHECK-NEXT: splice z1.b, p1, z1.b, z0.b -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: ldp q6, q7, [x0, #32] -; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h -; CHECK-NEXT: splice z4.h, p0, z4.h, z3.h -; CHECK-NEXT: uzp1 z2.h, z6.h, z6.h -; CHECK-NEXT: uzp1 z0.h, z7.h, z7.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z0.h -; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b ; CHECK-NEXT: uzp1 z2.b, z4.b, z4.b -; CHECK-NEXT: splice z2.b, p1, z2.b, z0.b -; CHECK-NEXT: add z0.b, z1.b, z1.b -; CHECK-NEXT: add z1.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z3.b, z6.b, z6.b +; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b +; CHECK-NEXT: splice z3.b, p0, z3.b, z0.b +; CHECK-NEXT: add z0.b, z2.b, z2.b +; CHECK-NEXT: add z1.b, z3.b, z3.b ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret %a = load <32 x i32>, ptr %in @@ -225,58 +225,58 @@ define void @trunc_v64i32_v64i8(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v64i32_v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #128] +; CHECK-NEXT: ldp q0, q1, [x0, #64] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: ptrue p1.b, vl8 -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: ldp q2, q3, [x0, #160] +; CHECK-NEXT: ptrue p1.b, vl8 +; CHECK-NEXT: ldp q4, q5, [x0, #96] +; CHECK-NEXT: ldp q6, q7, [x0] ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: ldp q16, q17, [x0, #128] ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h -; CHECK-NEXT: uzp1 z2.b, z2.b, z2.b -; CHECK-NEXT: ldp q1, q17, [x0, #224] -; CHECK-NEXT: splice z0.b, p1, z0.b, z2.b -; CHECK-NEXT: add z0.b, z0.b, z0.b -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: ldp q18, q2, [x0, #192] +; CHECK-NEXT: ldp q18, q19, [x0, #192] +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: ldp q20, q21, [x0, #224] +; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h +; CHECK-NEXT: ldp q22, q23, [x0, #32] ; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h -; CHECK-NEXT: splice z1.h, p0, z1.h, z17.h -; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z16.h, z16.h, z16.h +; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h ; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h -; CHECK-NEXT: ldp q4, q5, [x0, #64] -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z18.h, p0, z18.h, z2.h -; CHECK-NEXT: uzp1 z2.b, z18.b, z18.b -; CHECK-NEXT: splice z2.b, p1, z2.b, z1.b -; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: ldp q6, q7, [x0, #96] +; CHECK-NEXT: uzp1 z21.h, z21.h, z21.h +; CHECK-NEXT: uzp1 z20.h, z20.h, z20.h +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: uzp1 z23.h, z23.h, z23.h +; CHECK-NEXT: uzp1 z22.h, z22.h, z22.h ; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h +; CHECK-NEXT: splice z18.h, p0, z18.h, z19.h +; CHECK-NEXT: splice z20.h, p0, z20.h, z21.h +; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h +; CHECK-NEXT: splice z22.h, p0, z22.h, z23.h ; CHECK-NEXT: splice z4.h, p0, z4.h, z5.h +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z2.b, z16.b, z16.b +; CHECK-NEXT: uzp1 z5.b, z18.b, z18.b +; CHECK-NEXT: uzp1 z3.b, z20.b, z20.b +; CHECK-NEXT: uzp1 z6.b, z6.b, z6.b +; CHECK-NEXT: uzp1 z7.b, z22.b, z22.b ; CHECK-NEXT: uzp1 z4.b, z4.b, z4.b -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: ldp q3, q16, [x0] -; CHECK-NEXT: uzp1 z1.h, z7.h, z7.h -; CHECK-NEXT: splice z6.h, p0, z6.h, z1.h -; CHECK-NEXT: uzp1 z1.b, z6.b, z6.b -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z4.b, p1, z4.b, z1.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: splice z2.b, p1, z2.b, z1.b +; CHECK-NEXT: splice z5.b, p1, z5.b, z3.b +; CHECK-NEXT: splice z6.b, p1, z6.b, z7.b +; CHECK-NEXT: splice z0.b, p1, z0.b, z4.b ; CHECK-NEXT: add z1.b, z2.b, z2.b -; CHECK-NEXT: ldp q19, q20, [x0, #32] -; CHECK-NEXT: uzp1 z16.h, z16.h, z16.h -; CHECK-NEXT: stp q0, q1, [x1, #32] -; CHECK-NEXT: splice z3.h, p0, z3.h, z16.h -; CHECK-NEXT: add z1.b, z4.b, z4.b -; CHECK-NEXT: uzp1 z3.b, z3.b, z3.b -; CHECK-NEXT: uzp1 z18.h, z19.h, z19.h -; CHECK-NEXT: uzp1 z17.h, z20.h, z20.h -; CHECK-NEXT: splice z18.h, p0, z18.h, z17.h -; CHECK-NEXT: uzp1 z16.b, z18.b, z18.b -; CHECK-NEXT: splice z3.b, p1, z3.b, z16.b -; CHECK-NEXT: add z0.b, z3.b, z3.b -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK-NEXT: add z2.b, z5.b, z5.b +; CHECK-NEXT: add z3.b, z6.b, z6.b +; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: stp q1, q2, [x1, #32] +; CHECK-NEXT: stp q3, q0, [x1] ; CHECK-NEXT: ret %a = load <64 x i32>, ptr %in %b = trunc <64 x i32> %a to <64 x i8> @@ -294,8 +294,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -310,15 +310,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: add z0.h, z0.h, z0.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h ; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z3.h, p0, z3.h, z2.h -; CHECK-NEXT: add z1.h, z3.h, z3.h +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: add z0.h, z0.h, z0.h +; CHECK-NEXT: add z1.h, z2.h, z2.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret %a = load <16 x i32>, ptr %in @@ -334,27 +334,27 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0, #64] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: ldp q2, q3, [x0, #96] +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q4, q5, [x0, #96] +; CHECK-NEXT: ldp q6, q7, [x0, #32] ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: add z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: ldp q4, q5, [x0] +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h ; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: ldp q6, q7, [x0, #32] -; CHECK-NEXT: uzp1 z1.h, z5.h, z5.h -; CHECK-NEXT: splice z4.h, p0, z4.h, z1.h -; CHECK-NEXT: uzp1 z3.h, z6.h, z6.h -; CHECK-NEXT: uzp1 z1.h, z7.h, z7.h -; CHECK-NEXT: splice z3.h, p0, z3.h, z1.h -; CHECK-NEXT: add z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: splice z4.h, p0, z4.h, z5.h +; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h +; CHECK-NEXT: add z0.h, z0.h, z0.h +; CHECK-NEXT: add z2.h, z2.h, z2.h +; CHECK-NEXT: add z1.h, z4.h, z4.h +; CHECK-NEXT: add z3.h, z6.h, z6.h ; CHECK-NEXT: stp q0, q1, [x1, #32] -; CHECK-NEXT: add z0.h, z4.h, z4.h -; CHECK-NEXT: add z1.h, z3.h, z3.h -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret %a = load <32 x i32>, ptr %in %b = trunc <32 x i32> %a to <32 x i16> @@ -369,49 +369,49 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0, #192] ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: ldp q2, q3, [x0, #224] +; CHECK-NEXT: ldp q6, q7, [x0, #224] +; CHECK-NEXT: ldp q2, q3, [x0, #32] ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h -; CHECK-NEXT: add z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: ldp q6, q7, [x0, #128] -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h -; CHECK-NEXT: add z2.h, z2.h, z2.h -; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: ldp q1, q3, [x0, #160] +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h -; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h -; CHECK-NEXT: ldp q16, q17, [x0, #64] +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z1.h, p0, z1.h, z3.h -; CHECK-NEXT: add z1.h, z1.h, z1.h +; CHECK-NEXT: ldp q16, q17, [x0, #64] +; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h +; CHECK-NEXT: ldp q18, q19, [x0, #128] +; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h +; CHECK-NEXT: ldp q20, q21, [x0, #160] +; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h +; CHECK-NEXT: ldp q22, q23, [x0, #96] +; CHECK-NEXT: uzp1 z1.h, z17.h, z17.h +; CHECK-NEXT: uzp1 z19.h, z19.h, z19.h +; CHECK-NEXT: uzp1 z18.h, z18.h, z18.h ; CHECK-NEXT: uzp1 z16.h, z16.h, z16.h -; CHECK-NEXT: ldp q7, q18, [x0, #96] -; CHECK-NEXT: uzp1 z17.h, z17.h, z17.h -; CHECK-NEXT: splice z16.h, p0, z16.h, z17.h -; CHECK-NEXT: uzp1 z7.h, z7.h, z7.h -; CHECK-NEXT: ldp q4, q5, [x0, #32] -; CHECK-NEXT: uzp1 z3.h, z18.h, z18.h -; CHECK-NEXT: splice z7.h, p0, z7.h, z3.h +; CHECK-NEXT: uzp1 z21.h, z21.h, z21.h +; CHECK-NEXT: uzp1 z20.h, z20.h, z20.h +; CHECK-NEXT: uzp1 z5.h, z5.h, z5.h +; CHECK-NEXT: uzp1 z7.h, z23.h, z23.h +; CHECK-NEXT: uzp1 z17.h, z22.h, z22.h ; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: ldp q19, q20, [x0] -; CHECK-NEXT: uzp1 z3.h, z5.h, z5.h -; CHECK-NEXT: stp q0, q2, [x1, #96] -; CHECK-NEXT: add z0.h, z6.h, z6.h -; CHECK-NEXT: splice z4.h, p0, z4.h, z3.h -; CHECK-NEXT: stp q0, q1, [x1, #64] +; CHECK-NEXT: splice z2.h, p0, z2.h, z3.h +; CHECK-NEXT: add z0.h, z0.h, z0.h +; CHECK-NEXT: splice z18.h, p0, z18.h, z19.h +; CHECK-NEXT: splice z16.h, p0, z16.h, z1.h +; CHECK-NEXT: add z1.h, z6.h, z6.h +; CHECK-NEXT: splice z20.h, p0, z20.h, z21.h +; CHECK-NEXT: splice z17.h, p0, z17.h, z7.h +; CHECK-NEXT: splice z4.h, p0, z4.h, z5.h +; CHECK-NEXT: stp q0, q1, [x1, #96] +; CHECK-NEXT: add z2.h, z2.h, z2.h +; CHECK-NEXT: add z5.h, z18.h, z18.h ; CHECK-NEXT: add z0.h, z16.h, z16.h -; CHECK-NEXT: uzp1 z18.h, z19.h, z19.h -; CHECK-NEXT: add z1.h, z7.h, z7.h +; CHECK-NEXT: add z3.h, z20.h, z20.h +; CHECK-NEXT: add z1.h, z17.h, z17.h +; CHECK-NEXT: add z4.h, z4.h, z4.h +; CHECK-NEXT: stp q5, q3, [x1, #64] +; CHECK-NEXT: stp q4, q2, [x1] ; CHECK-NEXT: stp q0, q1, [x1, #32] -; CHECK-NEXT: add z1.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z17.h, z20.h, z20.h -; CHECK-NEXT: splice z18.h, p0, z18.h, z17.h -; CHECK-NEXT: add z0.h, z18.h, z18.h -; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret %a = load <64 x i32>, ptr %in %b = trunc <64 x i32> %a to <64 x i16> @@ -430,8 +430,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -444,18 +444,18 @@ define <8 x i8> @trunc_v8i64_v8i8(ptr %in) nounwind { ; CHECK-LABEL: trunc_v8i64_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s ; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: splice z3.s, p0, z3.s, z2.s +; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s +; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z1.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z0.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h ; CHECK-NEXT: splice z1.h, p0, z1.h, z0.h ; CHECK-NEXT: uzp1 z0.b, z1.b, z1.b ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -468,33 +468,33 @@ define <16 x i8> @trunc_v16i64_v16i8(ptr %in) nounwind { ; CHECK-LABEL: trunc_v16i64_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #96] +; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q2, q3, [x0, #64] +; CHECK-NEXT: ldp q2, q3, [x0, #96] +; CHECK-NEXT: ldp q4, q5, [x0, #64] +; CHECK-NEXT: ldp q6, q7, [x0] ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s +; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s +; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s +; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s ; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s +; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s +; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h -; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h -; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b -; CHECK-NEXT: ldp q6, q7, [x0, #32] -; CHECK-NEXT: uzp1 z3.s, z5.s, z5.s -; CHECK-NEXT: splice z4.s, p0, z4.s, z3.s -; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s -; CHECK-NEXT: uzp1 z0.s, z7.s, z7.s -; CHECK-NEXT: splice z2.s, p0, z2.s, z0.s -; CHECK-NEXT: ptrue p0.b, vl8 -; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h -; CHECK-NEXT: splice z2.h, p1, z2.h, z0.h -; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z3.h, z6.h, z6.h +; CHECK-NEXT: splice z2.h, p0, z2.h, z1.h +; CHECK-NEXT: splice z3.h, p0, z3.h, z0.h +; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z0.b, z3.b, z3.b ; CHECK-NEXT: splice z0.b, p0, z0.b, z1.b ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -507,62 +507,62 @@ define void @trunc_v32i64_v32i8(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v32i64_v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #224] +; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: ptrue p2.b, vl8 -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q2, q3, [x0, #192] +; CHECK-NEXT: ldp q2, q3, [x0, #224] +; CHECK-NEXT: ldp q4, q5, [x0, #32] +; CHECK-NEXT: ldp q6, q7, [x0, #64] ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: ldp q1, q16, [x0, #160] -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z2.h, p1, z2.h, z0.h -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: uzp1 z0.b, z2.b, z2.b -; CHECK-NEXT: ldp q3, q17, [x0, #128] -; CHECK-NEXT: uzp1 z16.s, z16.s, z16.s -; CHECK-NEXT: splice z1.s, p0, z1.s, z16.s -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: ldp q16, q17, [x0, #192] ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s +; CHECK-NEXT: ldp q18, q19, [x0, #128] +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: ldp q20, q21, [x0, #160] +; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s +; CHECK-NEXT: ldp q22, q23, [x0, #96] ; CHECK-NEXT: uzp1 z17.s, z17.s, z17.s -; CHECK-NEXT: splice z3.s, p0, z3.s, z17.s -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: splice z3.h, p1, z3.h, z1.h -; CHECK-NEXT: ldp q4, q5, [x0] -; CHECK-NEXT: uzp1 z1.b, z3.b, z3.b -; CHECK-NEXT: splice z1.b, p2, z1.b, z0.b -; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: ldp q6, q7, [x0, #64] -; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s -; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s +; CHECK-NEXT: uzp1 z16.s, z16.s, z16.s +; CHECK-NEXT: uzp1 z19.s, z19.s, z19.s +; CHECK-NEXT: uzp1 z18.s, z18.s, z18.s +; CHECK-NEXT: uzp1 z21.s, z21.s, z21.s +; CHECK-NEXT: uzp1 z20.s, z20.s, z20.s ; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s -; CHECK-NEXT: ldp q18, q19, [x0, #96] -; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s +; CHECK-NEXT: uzp1 z23.s, z23.s, z23.s +; CHECK-NEXT: uzp1 z22.s, z22.s, z22.s +; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s +; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: splice z16.s, p0, z16.s, z17.s +; CHECK-NEXT: splice z18.s, p0, z18.s, z19.s +; CHECK-NEXT: splice z20.s, p0, z20.s, z21.s ; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s +; CHECK-NEXT: splice z22.s, p0, z22.s, z23.s +; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: ptrue p0.h, vl4 +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z2.h, z16.h, z16.h +; CHECK-NEXT: uzp1 z5.h, z18.h, z18.h +; CHECK-NEXT: uzp1 z3.h, z20.h, z20.h ; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h -; CHECK-NEXT: uzp1 z16.s, z18.s, z18.s -; CHECK-NEXT: ldp q2, q3, [x0, #32] -; CHECK-NEXT: uzp1 z0.s, z19.s, z19.s -; CHECK-NEXT: splice z16.s, p0, z16.s, z0.s -; CHECK-NEXT: uzp1 z0.h, z16.h, z16.h -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: splice z6.h, p1, z6.h, z0.h -; CHECK-NEXT: uzp1 z0.b, z6.b, z6.b -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s -; CHECK-NEXT: uzp1 z3.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: splice z3.h, p1, z3.h, z2.h -; CHECK-NEXT: uzp1 z2.b, z3.b, z3.b -; CHECK-NEXT: splice z2.b, p2, z2.b, z0.b -; CHECK-NEXT: add z0.b, z1.b, z1.b +; CHECK-NEXT: uzp1 z7.h, z22.h, z22.h +; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: splice z2.h, p0, z2.h, z1.h +; CHECK-NEXT: splice z5.h, p0, z5.h, z3.h +; CHECK-NEXT: splice z6.h, p0, z6.h, z7.h +; CHECK-NEXT: splice z0.h, p0, z0.h, z4.h +; CHECK-NEXT: ptrue p0.b, vl8 +; CHECK-NEXT: uzp1 z1.b, z2.b, z2.b +; CHECK-NEXT: uzp1 z2.b, z5.b, z5.b +; CHECK-NEXT: uzp1 z3.b, z6.b, z6.b +; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b +; CHECK-NEXT: splice z2.b, p0, z2.b, z1.b +; CHECK-NEXT: splice z0.b, p0, z0.b, z3.b ; CHECK-NEXT: add z1.b, z2.b, z2.b -; CHECK-NEXT: stp q1, q0, [x1] +; CHECK-NEXT: add z0.b, z0.b, z0.b +; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret %a = load <32 x i64>, ptr %in %b = trunc <32 x i64> %a to <32 x i8> @@ -580,8 +580,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 @@ -594,18 +594,18 @@ define <8 x i16> @trunc_v8i64_v8i16(ptr %in) nounwind { ; CHECK-LABEL: trunc_v8i64_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] +; CHECK-NEXT: ldp q1, q0, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl2 +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q3, q2, [x0] ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: uzp1 z1.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s ; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: splice z3.s, p0, z3.s, z2.s +; CHECK-NEXT: splice z1.s, p0, z1.s, z0.s +; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s ; CHECK-NEXT: ptrue p0.h, vl4 -; CHECK-NEXT: uzp1 z0.h, z3.h, z3.h +; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h ; CHECK-NEXT: splice z0.h, p0, z0.h, z1.h ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -618,32 +618,32 @@ define void @trunc_v16i64_v16i16(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v16i64_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #96] +; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q2, q3, [x0, #64] +; CHECK-NEXT: ldp q2, q3, [x0, #96] +; CHECK-NEXT: ldp q4, q5, [x0, #64] +; CHECK-NEXT: ldp q6, q7, [x0] ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s +; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s +; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s +; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s ; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s +; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s +; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h -; CHECK-NEXT: splice z1.h, p1, z1.h, z0.h -; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: ldp q6, q7, [x0, #32] -; CHECK-NEXT: uzp1 z3.s, z5.s, z5.s -; CHECK-NEXT: splice z4.s, p0, z4.s, z3.s -; CHECK-NEXT: uzp1 z2.s, z6.s, z6.s -; CHECK-NEXT: uzp1 z0.s, z7.s, z7.s -; CHECK-NEXT: splice z2.s, p0, z2.s, z0.s -; CHECK-NEXT: uzp1 z0.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h ; CHECK-NEXT: uzp1 z2.h, z4.h, z4.h -; CHECK-NEXT: splice z2.h, p1, z2.h, z0.h -; CHECK-NEXT: add z0.h, z1.h, z1.h -; CHECK-NEXT: add z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z3.h, z6.h, z6.h +; CHECK-NEXT: splice z2.h, p0, z2.h, z1.h +; CHECK-NEXT: splice z3.h, p0, z3.h, z0.h +; CHECK-NEXT: add z0.h, z2.h, z2.h +; CHECK-NEXT: add z1.h, z3.h, z3.h ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret %a = load <16 x i64>, ptr %in @@ -657,58 +657,58 @@ define void @trunc_v32i64_v32i16(ptr %in, ptr %out) nounwind { ; CHECK-LABEL: trunc_v32i64_v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #128] +; CHECK-NEXT: ldp q0, q1, [x0, #64] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: ptrue p1.h, vl4 -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: ldp q2, q3, [x0, #160] +; CHECK-NEXT: ptrue p1.h, vl4 +; CHECK-NEXT: ldp q4, q5, [x0, #96] +; CHECK-NEXT: ldp q6, q7, [x0] ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: ldp q16, q17, [x0, #128] ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s -; CHECK-NEXT: uzp1 z2.h, z2.h, z2.h -; CHECK-NEXT: ldp q1, q17, [x0, #224] -; CHECK-NEXT: splice z0.h, p1, z0.h, z2.h -; CHECK-NEXT: add z0.h, z0.h, z0.h -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: ldp q18, q2, [x0, #192] +; CHECK-NEXT: ldp q18, q19, [x0, #192] +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: ldp q20, q21, [x0, #224] +; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s +; CHECK-NEXT: ldp q22, q23, [x0, #32] ; CHECK-NEXT: uzp1 z17.s, z17.s, z17.s -; CHECK-NEXT: splice z1.s, p0, z1.s, z17.s -; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h +; CHECK-NEXT: uzp1 z16.s, z16.s, z16.s +; CHECK-NEXT: uzp1 z19.s, z19.s, z19.s ; CHECK-NEXT: uzp1 z18.s, z18.s, z18.s -; CHECK-NEXT: ldp q4, q5, [x0, #64] -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: splice z18.s, p0, z18.s, z2.s -; CHECK-NEXT: uzp1 z2.h, z18.h, z18.h -; CHECK-NEXT: splice z2.h, p1, z2.h, z1.h -; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: ldp q6, q7, [x0, #96] +; CHECK-NEXT: uzp1 z21.s, z21.s, z21.s +; CHECK-NEXT: uzp1 z20.s, z20.s, z20.s +; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s +; CHECK-NEXT: uzp1 z23.s, z23.s, z23.s +; CHECK-NEXT: uzp1 z22.s, z22.s, z22.s ; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s +; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s +; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: splice z16.s, p0, z16.s, z17.s +; CHECK-NEXT: splice z18.s, p0, z18.s, z19.s +; CHECK-NEXT: splice z20.s, p0, z20.s, z21.s +; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s +; CHECK-NEXT: splice z22.s, p0, z22.s, z23.s ; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: uzp1 z1.h, z2.h, z2.h +; CHECK-NEXT: uzp1 z2.h, z16.h, z16.h +; CHECK-NEXT: uzp1 z5.h, z18.h, z18.h +; CHECK-NEXT: uzp1 z3.h, z20.h, z20.h +; CHECK-NEXT: uzp1 z6.h, z6.h, z6.h +; CHECK-NEXT: uzp1 z7.h, z22.h, z22.h ; CHECK-NEXT: uzp1 z4.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s -; CHECK-NEXT: ldp q3, q16, [x0] -; CHECK-NEXT: uzp1 z1.s, z7.s, z7.s -; CHECK-NEXT: splice z6.s, p0, z6.s, z1.s -; CHECK-NEXT: uzp1 z1.h, z6.h, z6.h -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: splice z4.h, p1, z4.h, z1.h +; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h +; CHECK-NEXT: splice z2.h, p1, z2.h, z1.h +; CHECK-NEXT: splice z5.h, p1, z5.h, z3.h +; CHECK-NEXT: splice z6.h, p1, z6.h, z7.h +; CHECK-NEXT: splice z0.h, p1, z0.h, z4.h ; CHECK-NEXT: add z1.h, z2.h, z2.h -; CHECK-NEXT: ldp q19, q20, [x0, #32] -; CHECK-NEXT: uzp1 z16.s, z16.s, z16.s -; CHECK-NEXT: stp q0, q1, [x1, #32] -; CHECK-NEXT: splice z3.s, p0, z3.s, z16.s -; CHECK-NEXT: add z1.h, z4.h, z4.h -; CHECK-NEXT: uzp1 z3.h, z3.h, z3.h -; CHECK-NEXT: uzp1 z18.s, z19.s, z19.s -; CHECK-NEXT: uzp1 z17.s, z20.s, z20.s -; CHECK-NEXT: splice z18.s, p0, z18.s, z17.s -; CHECK-NEXT: uzp1 z16.h, z18.h, z18.h -; CHECK-NEXT: splice z3.h, p1, z3.h, z16.h -; CHECK-NEXT: add z0.h, z3.h, z3.h -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK-NEXT: add z2.h, z5.h, z5.h +; CHECK-NEXT: add z3.h, z6.h, z6.h +; CHECK-NEXT: add z0.h, z0.h, z0.h +; CHECK-NEXT: stp q1, q2, [x1, #32] +; CHECK-NEXT: stp q3, q0, [x1] ; CHECK-NEXT: ret %a = load <32 x i64>, ptr %in %b = trunc <32 x i64> %a to <32 x i16> @@ -726,8 +726,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret @@ -742,15 +742,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0, #32] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: ldp q2, q3, [x0] ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: add z0.s, z0.s, z0.s +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s ; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: splice z3.s, p0, z3.s, z2.s -; CHECK-NEXT: add z1.s, z3.s, z3.s +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: add z0.s, z0.s, z0.s +; CHECK-NEXT: add z1.s, z2.s, z2.s ; CHECK-NEXT: stp q1, q0, [x1] ; CHECK-NEXT: ret %a = load <8 x i64>, ptr %in @@ -766,27 +766,27 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0, #64] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q2, q3, [x0, #96] +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: ldp q4, q5, [x0, #96] +; CHECK-NEXT: ldp q6, q7, [x0, #32] ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: add z0.s, z0.s, z0.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: ldp q4, q5, [x0] +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s ; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: ldp q6, q7, [x0, #32] -; CHECK-NEXT: uzp1 z1.s, z5.s, z5.s -; CHECK-NEXT: splice z4.s, p0, z4.s, z1.s -; CHECK-NEXT: uzp1 z3.s, z6.s, z6.s -; CHECK-NEXT: uzp1 z1.s, z7.s, z7.s -; CHECK-NEXT: splice z3.s, p0, z3.s, z1.s -; CHECK-NEXT: add z1.s, z2.s, z2.s +; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s +; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s +; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s +; CHECK-NEXT: add z0.s, z0.s, z0.s +; CHECK-NEXT: add z2.s, z2.s, z2.s +; CHECK-NEXT: add z1.s, z4.s, z4.s +; CHECK-NEXT: add z3.s, z6.s, z6.s ; CHECK-NEXT: stp q0, q1, [x1, #32] -; CHECK-NEXT: add z0.s, z4.s, z4.s -; CHECK-NEXT: add z1.s, z3.s, z3.s -; CHECK-NEXT: stp q0, q1, [x1] +; CHECK-NEXT: stp q2, q3, [x1] ; CHECK-NEXT: ret %a = load <16 x i64>, ptr %in %b = trunc <16 x i64> %a to <16 x i32> @@ -801,49 +801,49 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0, #192] ; CHECK-NEXT: ptrue p0.s, vl2 -; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s -; CHECK-NEXT: ldp q2, q3, [x0, #224] +; CHECK-NEXT: ldp q6, q7, [x0, #224] +; CHECK-NEXT: ldp q2, q3, [x0, #32] ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s -; CHECK-NEXT: add z0.s, z0.s, z0.s -; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s -; CHECK-NEXT: ldp q6, q7, [x0, #128] -; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s -; CHECK-NEXT: add z2.s, z2.s, z2.s -; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s -; CHECK-NEXT: ldp q1, q3, [x0, #160] +; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s ; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s -; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s -; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s -; CHECK-NEXT: ldp q16, q17, [x0, #64] +; CHECK-NEXT: uzp1 z6.s, z6.s, z6.s +; CHECK-NEXT: ldp q4, q5, [x0] ; CHECK-NEXT: uzp1 z3.s, z3.s, z3.s -; CHECK-NEXT: splice z1.s, p0, z1.s, z3.s -; CHECK-NEXT: add z1.s, z1.s, z1.s +; CHECK-NEXT: ldp q16, q17, [x0, #64] +; CHECK-NEXT: uzp1 z2.s, z2.s, z2.s +; CHECK-NEXT: ldp q18, q19, [x0, #128] +; CHECK-NEXT: splice z0.s, p0, z0.s, z1.s +; CHECK-NEXT: ldp q20, q21, [x0, #160] +; CHECK-NEXT: splice z6.s, p0, z6.s, z7.s +; CHECK-NEXT: ldp q22, q23, [x0, #96] +; CHECK-NEXT: uzp1 z1.s, z17.s, z17.s +; CHECK-NEXT: uzp1 z19.s, z19.s, z19.s +; CHECK-NEXT: uzp1 z18.s, z18.s, z18.s ; CHECK-NEXT: uzp1 z16.s, z16.s, z16.s -; CHECK-NEXT: ldp q7, q18, [x0, #96] -; CHECK-NEXT: uzp1 z17.s, z17.s, z17.s -; CHECK-NEXT: splice z16.s, p0, z16.s, z17.s -; CHECK-NEXT: uzp1 z7.s, z7.s, z7.s -; CHECK-NEXT: ldp q4, q5, [x0, #32] -; CHECK-NEXT: uzp1 z3.s, z18.s, z18.s -; CHECK-NEXT: splice z7.s, p0, z7.s, z3.s +; CHECK-NEXT: uzp1 z21.s, z21.s, z21.s +; CHECK-NEXT: uzp1 z20.s, z20.s, z20.s +; CHECK-NEXT: uzp1 z5.s, z5.s, z5.s +; CHECK-NEXT: uzp1 z7.s, z23.s, z23.s +; CHECK-NEXT: uzp1 z17.s, z22.s, z22.s ; CHECK-NEXT: uzp1 z4.s, z4.s, z4.s -; CHECK-NEXT: ldp q19, q20, [x0] -; CHECK-NEXT: uzp1 z3.s, z5.s, z5.s -; CHECK-NEXT: stp q0, q2, [x1, #96] -; CHECK-NEXT: add z0.s, z6.s, z6.s -; CHECK-NEXT: splice z4.s, p0, z4.s, z3.s -; CHECK-NEXT: stp q0, q1, [x1, #64] +; CHECK-NEXT: splice z2.s, p0, z2.s, z3.s +; CHECK-NEXT: add z0.s, z0.s, z0.s +; CHECK-NEXT: splice z18.s, p0, z18.s, z19.s +; CHECK-NEXT: splice z16.s, p0, z16.s, z1.s +; CHECK-NEXT: add z1.s, z6.s, z6.s +; CHECK-NEXT: splice z20.s, p0, z20.s, z21.s +; CHECK-NEXT: splice z17.s, p0, z17.s, z7.s +; CHECK-NEXT: splice z4.s, p0, z4.s, z5.s +; CHECK-NEXT: stp q0, q1, [x1, #96] +; CHECK-NEXT: add z2.s, z2.s, z2.s +; CHECK-NEXT: add z5.s, z18.s, z18.s ; CHECK-NEXT: add z0.s, z16.s, z16.s -; CHECK-NEXT: uzp1 z18.s, z19.s, z19.s -; CHECK-NEXT: add z1.s, z7.s, z7.s +; CHECK-NEXT: add z3.s, z20.s, z20.s +; CHECK-NEXT: add z1.s, z17.s, z17.s +; CHECK-NEXT: add z4.s, z4.s, z4.s +; CHECK-NEXT: stp q5, q3, [x1, #64] +; CHECK-NEXT: stp q4, q2, [x1] ; CHECK-NEXT: stp q0, q1, [x1, #32] -; CHECK-NEXT: add z1.s, z4.s, z4.s -; CHECK-NEXT: uzp1 z17.s, z20.s, z20.s -; CHECK-NEXT: splice z18.s, p0, z18.s, z17.s -; CHECK-NEXT: add z0.s, z18.s, z18.s -; CHECK-NEXT: stp q0, q1, [x1] ; CHECK-NEXT: ret %a = load <32 x i64>, ptr %in %b = trunc <32 x i64> %a to <32 x i32> diff --git a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll --- a/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll +++ b/llvm/test/CodeGen/AArch64/sve-streaming-mode-fixed-length-vector-shuffle.ll @@ -13,14 +13,14 @@ ; CHECK-NEXT: mov z1.h, z0.h[1] ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: mov z2.h, z0.h[2] -; CHECK-NEXT: mov z0.h, z0.h[3] -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: fmov w11, s0 +; CHECK-NEXT: mov z3.h, z0.h[3] ; CHECK-NEXT: strh w8, [sp, #8] -; CHECK-NEXT: strh w9, [sp, #14] -; CHECK-NEXT: strh w10, [sp, #12] -; CHECK-NEXT: strh w11, [sp, #10] +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: strh w8, [sp, #14] +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: strh w9, [sp, #12] +; CHECK-NEXT: strh w8, [sp, #10] ; CHECK-NEXT: ldr d0, [sp, #8] ; CHECK-NEXT: add sp, sp, #16 ; CHECK-NEXT: ret @@ -61,15 +61,14 @@ ; CHECK-LABEL: shuffle_ext_byone_v32i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0, #16] -; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ldp q1, q3, [x1] ; CHECK-NEXT: mov z0.b, z0.b[15] ; CHECK-NEXT: mov z2.b, z1.b[15] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: ldr q0, [x1, #16] -; CHECK-NEXT: fmov w9, s2 ; CHECK-NEXT: insr z1.b, w8 -; CHECK-NEXT: insr z0.b, w9 -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: insr z3.b, w8 +; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret %op1 = load <32 x i8>, ptr %a %op2 = load <32 x i8>, ptr %b @@ -84,8 +83,8 @@ define <2 x i16> @shuffle_ext_byone_v2i16(<2 x i16> %op1, <2 x i16> %op2) { ; CHECK-LABEL: shuffle_ext_byone_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 ; CHECK-NEXT: revw z0.d, p0/m, z0.d ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret @@ -125,15 +124,14 @@ ; CHECK-LABEL: shuffle_ext_byone_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0, #16] -; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ldp q1, q3, [x1] ; CHECK-NEXT: mov z0.h, z0.h[7] ; CHECK-NEXT: mov z2.h, z1.h[7] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: ldr q0, [x1, #16] -; CHECK-NEXT: fmov w9, s2 ; CHECK-NEXT: insr z1.h, w8 -; CHECK-NEXT: insr z0.h, w9 -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: insr z3.h, w8 +; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i16>, ptr %a %op2 = load <16 x i16>, ptr %b @@ -175,15 +173,14 @@ ; CHECK-LABEL: shuffle_ext_byone_v8i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0, #16] -; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ldp q1, q3, [x1] ; CHECK-NEXT: mov z0.s, z0.s[3] ; CHECK-NEXT: mov z2.s, z1.s[3] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: ldr q0, [x1, #16] -; CHECK-NEXT: fmov w9, s2 ; CHECK-NEXT: insr z1.s, w8 -; CHECK-NEXT: insr z0.s, w9 -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: insr z3.s, w8 +; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret %op1 = load <8 x i32>, ptr %a %op2 = load <8 x i32>, ptr %b @@ -210,15 +207,14 @@ ; CHECK-LABEL: shuffle_ext_byone_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0, #16] -; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ldp q1, q3, [x1] ; CHECK-NEXT: mov z0.d, z0.d[1] ; CHECK-NEXT: mov z2.d, z1.d[1] ; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: ldr q0, [x1, #16] -; CHECK-NEXT: fmov x9, d2 ; CHECK-NEXT: insr z1.d, x8 -; CHECK-NEXT: insr z0.d, x9 -; CHECK-NEXT: stp q1, q0, [x0] +; CHECK-NEXT: fmov x8, d2 +; CHECK-NEXT: insr z3.d, x8 +; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret %op1 = load <4 x i64>, ptr %a %op2 = load <4 x i64>, ptr %b @@ -232,10 +228,10 @@ ; CHECK-LABEL: shuffle_ext_byone_v4f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: mov z0.h, z0.h[3] -; CHECK-NEXT: insr z1.h, h0 +; CHECK-NEXT: mov z2.h, z0.h[3] ; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: insr z0.h, h2 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %ret = shufflevector <4 x half> %op1, <4 x half> %op2, <4 x i32> ret <4 x half> %ret @@ -245,10 +241,10 @@ ; CHECK-LABEL: shuffle_ext_byone_v8f16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: mov z0.h, z0.h[7] -; CHECK-NEXT: insr z1.h, h0 +; CHECK-NEXT: mov z2.h, z0.h[7] ; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: insr z0.h, h2 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %ret = shufflevector <8 x half> %op1, <8 x half> %op2, <8 x i32> ret <8 x half> %ret @@ -257,13 +253,13 @@ define void @shuffle_ext_byone_v16f16(ptr %a, ptr %b) { ; CHECK-LABEL: shuffle_ext_byone_v16f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q2, [x1] -; CHECK-NEXT: mov z3.h, z1.h[7] +; CHECK-NEXT: ldp q1, q3, [x1] ; CHECK-NEXT: ldr q0, [x0, #16] -; CHECK-NEXT: insr z2.h, h3 ; CHECK-NEXT: mov z0.h, z0.h[7] +; CHECK-NEXT: mov z2.h, z1.h[7] ; CHECK-NEXT: insr z1.h, h0 -; CHECK-NEXT: stp q1, q2, [x0] +; CHECK-NEXT: insr z3.h, h2 +; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret %op1 = load <16 x half>, ptr %a %op2 = load <16 x half>, ptr %b @@ -277,10 +273,10 @@ ; CHECK-LABEL: shuffle_ext_byone_v2f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 -; CHECK-NEXT: // kill: def $d1 killed $d1 def $z1 -; CHECK-NEXT: mov z0.s, z0.s[1] -; CHECK-NEXT: insr z1.s, s0 +; CHECK-NEXT: mov z2.s, z0.s[1] ; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: insr z0.s, s2 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %ret = shufflevector <2 x float> %op1, <2 x float> %op2, <2 x i32> ret <2 x float> %ret @@ -290,10 +286,10 @@ ; CHECK-LABEL: shuffle_ext_byone_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: mov z0.s, z0.s[3] -; CHECK-NEXT: insr z1.s, s0 +; CHECK-NEXT: mov z2.s, z0.s[3] ; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: insr z0.s, s2 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %ret = shufflevector <4 x float> %op1, <4 x float> %op2, <4 x i32> ret <4 x float> %ret @@ -302,13 +298,13 @@ define void @shuffle_ext_byone_v8f32(ptr %a, ptr %b) { ; CHECK-LABEL: shuffle_ext_byone_v8f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q2, [x1] -; CHECK-NEXT: mov z3.s, z1.s[3] +; CHECK-NEXT: ldp q1, q3, [x1] ; CHECK-NEXT: ldr q0, [x0, #16] -; CHECK-NEXT: insr z2.s, s3 ; CHECK-NEXT: mov z0.s, z0.s[3] +; CHECK-NEXT: mov z2.s, z1.s[3] ; CHECK-NEXT: insr z1.s, s0 -; CHECK-NEXT: stp q1, q2, [x0] +; CHECK-NEXT: insr z3.s, s2 +; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret %op1 = load <8 x float>, ptr %a %op2 = load <8 x float>, ptr %b @@ -321,10 +317,10 @@ ; CHECK-LABEL: shuffle_ext_byone_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 -; CHECK-NEXT: // kill: def $q1 killed $q1 def $z1 -; CHECK-NEXT: mov z0.d, z0.d[1] -; CHECK-NEXT: insr z1.d, d0 +; CHECK-NEXT: mov z2.d, z0.d[1] ; CHECK-NEXT: mov z0.d, z1.d +; CHECK-NEXT: insr z0.d, d2 +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: ret %ret = shufflevector <2 x double> %op1, <2 x double> %op2, <2 x i32> ret <2 x double> %ret @@ -333,13 +329,13 @@ define void @shuffle_ext_byone_v4f64(ptr %a, ptr %b) { ; CHECK-LABEL: shuffle_ext_byone_v4f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q2, [x1] -; CHECK-NEXT: mov z3.d, z1.d[1] +; CHECK-NEXT: ldp q1, q3, [x1] ; CHECK-NEXT: ldr q0, [x0, #16] -; CHECK-NEXT: insr z2.d, d3 ; CHECK-NEXT: mov z0.d, z0.d[1] +; CHECK-NEXT: mov z2.d, z1.d[1] ; CHECK-NEXT: insr z1.d, d0 -; CHECK-NEXT: stp q1, q2, [x0] +; CHECK-NEXT: insr z3.d, d2 +; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b @@ -351,13 +347,13 @@ define void @shuffle_ext_byone_reverse(ptr %a, ptr %b) { ; CHECK-LABEL: shuffle_ext_byone_reverse: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: mov z3.d, z1.d[1] +; CHECK-NEXT: ldp q1, q3, [x0] ; CHECK-NEXT: ldr q0, [x1, #16] -; CHECK-NEXT: insr z2.d, d3 ; CHECK-NEXT: mov z0.d, z0.d[1] +; CHECK-NEXT: mov z2.d, z1.d[1] ; CHECK-NEXT: insr z1.d, d0 -; CHECK-NEXT: stp q1, q2, [x0] +; CHECK-NEXT: insr z3.d, d2 +; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret %op1 = load <4 x double>, ptr %a %op2 = load <4 x double>, ptr %b diff --git a/llvm/test/CodeGen/AArch64/sve-tailcall.ll b/llvm/test/CodeGen/AArch64/sve-tailcall.ll --- a/llvm/test/CodeGen/AArch64/sve-tailcall.ll +++ b/llvm/test/CodeGen/AArch64/sve-tailcall.ll @@ -84,33 +84,33 @@ ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: bl non_sve_callee ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -159,33 +159,33 @@ ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: bl non_sve_callee ; CHECK-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll --- a/llvm/test/CodeGen/AArch64/sve-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll @@ -111,35 +111,37 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: and z7.d, z7.d, #0x1 ; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: and z7.d, z7.d, #0x1 ; CHECK-NEXT: and z6.d, z6.d, #0x1 ; CHECK-NEXT: and z5.d, z5.d, #0x1 ; CHECK-NEXT: and z4.d, z4.d, #0x1 -; CHECK-NEXT: cmpne p1.d, p0/z, z7.d, #0 -; CHECK-NEXT: cmpne p2.d, p0/z, z6.d, #0 -; CHECK-NEXT: cmpne p3.d, p0/z, z5.d, #0 -; CHECK-NEXT: cmpne p4.d, p0/z, z4.d, #0 ; CHECK-NEXT: and z3.d, z3.d, #0x1 ; CHECK-NEXT: and z2.d, z2.d, #0x1 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: and z0.d, z0.d, #0x1 +; CHECK-NEXT: cmpne p1.d, p0/z, z7.d, #0 +; CHECK-NEXT: cmpne p2.d, p0/z, z6.d, #0 +; CHECK-NEXT: cmpne p3.d, p0/z, z5.d, #0 +; CHECK-NEXT: cmpne p4.d, p0/z, z4.d, #0 +; CHECK-NEXT: cmpne p5.d, p0/z, z3.d, #0 +; CHECK-NEXT: cmpne p6.d, p0/z, z2.d, #0 ; CHECK-NEXT: uzp1 p1.s, p2.s, p1.s -; CHECK-NEXT: uzp1 p2.s, p4.s, p3.s -; CHECK-NEXT: cmpne p3.d, p0/z, z3.d, #0 -; CHECK-NEXT: cmpne p4.d, p0/z, z2.d, #0 -; CHECK-NEXT: cmpne p5.d, p0/z, z1.d, #0 +; CHECK-NEXT: cmpne p2.d, p0/z, z1.d, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 ; CHECK-NEXT: uzp1 p3.s, p4.s, p3.s -; CHECK-NEXT: uzp1 p0.s, p0.s, p5.s +; CHECK-NEXT: uzp1 p4.s, p6.s, p5.s +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: uzp1 p0.s, p0.s, p2.s ; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: uzp1 p1.h, p3.h, p1.h +; CHECK-NEXT: uzp1 p0.h, p0.h, p4.h ; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK-NEXT: uzp1 p1.h, p2.h, p1.h -; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll b/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll --- a/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll +++ b/llvm/test/CodeGen/AArch64/sve-umulo-sdnode.ll @@ -87,16 +87,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: mul z4.b, p0/m, z4.b, z3.b -; CHECK-NEXT: umulh z1.b, p0/m, z1.b, z3.b -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: umulh z3.b, p0/m, z3.b, z2.b -; CHECK-NEXT: cmpne p1.b, p0/z, z1.b, #0 +; CHECK-NEXT: umulh z4.b, p0/m, z4.b, z3.b +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: umulh z5.b, p0/m, z5.b, z2.b +; CHECK-NEXT: mul z1.b, p0/m, z1.b, z3.b ; CHECK-NEXT: mul z0.b, p0/m, z0.b, z2.b -; CHECK-NEXT: cmpne p0.b, p0/z, z3.b, #0 -; CHECK-NEXT: mov z4.b, p1/m, #0 // =0x0 +; CHECK-NEXT: cmpne p1.b, p0/z, z4.b, #0 +; CHECK-NEXT: cmpne p0.b, p0/z, z5.b, #0 ; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, z4.d +; CHECK-NEXT: mov z1.b, p1/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv32i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -112,27 +111,25 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: movprfx z24, z3 -; CHECK-NEXT: mul z24.b, p0/m, z24.b, z7.b -; CHECK-NEXT: umulh z3.b, p0/m, z3.b, z7.b -; CHECK-NEXT: cmpne p1.b, p0/z, z3.b, #0 -; CHECK-NEXT: movprfx z3, z2 -; CHECK-NEXT: umulh z3.b, p0/m, z3.b, z6.b -; CHECK-NEXT: cmpne p2.b, p0/z, z3.b, #0 -; CHECK-NEXT: movprfx z3, z1 -; CHECK-NEXT: mul z3.b, p0/m, z3.b, z5.b -; CHECK-NEXT: umulh z1.b, p0/m, z1.b, z5.b -; CHECK-NEXT: mul z2.b, p0/m, z2.b, z6.b -; CHECK-NEXT: cmpne p3.b, p0/z, z1.b, #0 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: umulh z1.b, p0/m, z1.b, z4.b +; CHECK-NEXT: umulh z24.b, p0/m, z24.b, z7.b +; CHECK-NEXT: movprfx z25, z0 +; CHECK-NEXT: umulh z25.b, p0/m, z25.b, z4.b +; CHECK-NEXT: movprfx z26, z2 +; CHECK-NEXT: umulh z26.b, p0/m, z26.b, z6.b +; CHECK-NEXT: movprfx z27, z1 +; CHECK-NEXT: umulh z27.b, p0/m, z27.b, z5.b +; CHECK-NEXT: mul z3.b, p0/m, z3.b, z7.b ; CHECK-NEXT: mul z0.b, p0/m, z0.b, z4.b -; CHECK-NEXT: cmpne p0.b, p0/z, z1.b, #0 -; CHECK-NEXT: mov z3.b, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z24.b, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z2.b, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: mov z3.d, z24.d +; CHECK-NEXT: cmpne p1.b, p0/z, z25.b, #0 +; CHECK-NEXT: mul z2.b, p0/m, z2.b, z6.b +; CHECK-NEXT: mul z1.b, p0/m, z1.b, z5.b +; CHECK-NEXT: cmpne p2.b, p0/z, z24.b, #0 +; CHECK-NEXT: cmpne p3.b, p0/z, z26.b, #0 +; CHECK-NEXT: cmpne p0.b, p0/z, z27.b, #0 +; CHECK-NEXT: mov z0.b, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z3.b, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z1.b, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z2.b, p3/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv64i8( %x, %y) %b = extractvalue { , } %a, 0 @@ -207,16 +204,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: mul z4.h, p0/m, z4.h, z3.h -; CHECK-NEXT: umulh z1.h, p0/m, z1.h, z3.h -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: umulh z3.h, p0/m, z3.h, z2.h -; CHECK-NEXT: cmpne p1.h, p0/z, z1.h, #0 +; CHECK-NEXT: umulh z4.h, p0/m, z4.h, z3.h +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: umulh z5.h, p0/m, z5.h, z2.h +; CHECK-NEXT: mul z1.h, p0/m, z1.h, z3.h ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z2.h -; CHECK-NEXT: cmpne p0.h, p0/z, z3.h, #0 -; CHECK-NEXT: mov z4.h, p1/m, #0 // =0x0 +; CHECK-NEXT: cmpne p1.h, p0/z, z4.h, #0 +; CHECK-NEXT: cmpne p0.h, p0/z, z5.h, #0 ; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, z4.d +; CHECK-NEXT: mov z1.h, p1/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv16i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -232,27 +228,25 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: movprfx z24, z3 -; CHECK-NEXT: mul z24.h, p0/m, z24.h, z7.h -; CHECK-NEXT: umulh z3.h, p0/m, z3.h, z7.h -; CHECK-NEXT: cmpne p1.h, p0/z, z3.h, #0 -; CHECK-NEXT: movprfx z3, z2 -; CHECK-NEXT: umulh z3.h, p0/m, z3.h, z6.h -; CHECK-NEXT: cmpne p2.h, p0/z, z3.h, #0 -; CHECK-NEXT: movprfx z3, z1 -; CHECK-NEXT: mul z3.h, p0/m, z3.h, z5.h -; CHECK-NEXT: umulh z1.h, p0/m, z1.h, z5.h -; CHECK-NEXT: mul z2.h, p0/m, z2.h, z6.h -; CHECK-NEXT: cmpne p3.h, p0/z, z1.h, #0 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: umulh z1.h, p0/m, z1.h, z4.h +; CHECK-NEXT: umulh z24.h, p0/m, z24.h, z7.h +; CHECK-NEXT: movprfx z25, z0 +; CHECK-NEXT: umulh z25.h, p0/m, z25.h, z4.h +; CHECK-NEXT: movprfx z26, z2 +; CHECK-NEXT: umulh z26.h, p0/m, z26.h, z6.h +; CHECK-NEXT: movprfx z27, z1 +; CHECK-NEXT: umulh z27.h, p0/m, z27.h, z5.h +; CHECK-NEXT: mul z3.h, p0/m, z3.h, z7.h ; CHECK-NEXT: mul z0.h, p0/m, z0.h, z4.h -; CHECK-NEXT: cmpne p0.h, p0/z, z1.h, #0 -; CHECK-NEXT: mov z3.h, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z24.h, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z0.h, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z2.h, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: mov z3.d, z24.d +; CHECK-NEXT: cmpne p1.h, p0/z, z25.h, #0 +; CHECK-NEXT: mul z2.h, p0/m, z2.h, z6.h +; CHECK-NEXT: mul z1.h, p0/m, z1.h, z5.h +; CHECK-NEXT: cmpne p2.h, p0/z, z24.h, #0 +; CHECK-NEXT: cmpne p3.h, p0/z, z26.h, #0 +; CHECK-NEXT: cmpne p0.h, p0/z, z27.h, #0 +; CHECK-NEXT: mov z0.h, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z3.h, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z1.h, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z2.h, p3/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv32i16( %x, %y) %b = extractvalue { , } %a, 0 @@ -307,16 +301,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: mul z4.s, p0/m, z4.s, z3.s -; CHECK-NEXT: umulh z1.s, p0/m, z1.s, z3.s -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: umulh z3.s, p0/m, z3.s, z2.s -; CHECK-NEXT: cmpne p1.s, p0/z, z1.s, #0 +; CHECK-NEXT: umulh z4.s, p0/m, z4.s, z3.s +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: umulh z5.s, p0/m, z5.s, z2.s +; CHECK-NEXT: mul z1.s, p0/m, z1.s, z3.s ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z2.s -; CHECK-NEXT: cmpne p0.s, p0/z, z3.s, #0 -; CHECK-NEXT: mov z4.s, p1/m, #0 // =0x0 +; CHECK-NEXT: cmpne p1.s, p0/z, z4.s, #0 +; CHECK-NEXT: cmpne p0.s, p0/z, z5.s, #0 ; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, z4.d +; CHECK-NEXT: mov z1.s, p1/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv8i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -332,27 +325,25 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: movprfx z24, z3 -; CHECK-NEXT: mul z24.s, p0/m, z24.s, z7.s -; CHECK-NEXT: umulh z3.s, p0/m, z3.s, z7.s -; CHECK-NEXT: cmpne p1.s, p0/z, z3.s, #0 -; CHECK-NEXT: movprfx z3, z2 -; CHECK-NEXT: umulh z3.s, p0/m, z3.s, z6.s -; CHECK-NEXT: cmpne p2.s, p0/z, z3.s, #0 -; CHECK-NEXT: movprfx z3, z1 -; CHECK-NEXT: mul z3.s, p0/m, z3.s, z5.s -; CHECK-NEXT: umulh z1.s, p0/m, z1.s, z5.s -; CHECK-NEXT: mul z2.s, p0/m, z2.s, z6.s -; CHECK-NEXT: cmpne p3.s, p0/z, z1.s, #0 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: umulh z1.s, p0/m, z1.s, z4.s +; CHECK-NEXT: umulh z24.s, p0/m, z24.s, z7.s +; CHECK-NEXT: movprfx z25, z0 +; CHECK-NEXT: umulh z25.s, p0/m, z25.s, z4.s +; CHECK-NEXT: movprfx z26, z2 +; CHECK-NEXT: umulh z26.s, p0/m, z26.s, z6.s +; CHECK-NEXT: movprfx z27, z1 +; CHECK-NEXT: umulh z27.s, p0/m, z27.s, z5.s +; CHECK-NEXT: mul z3.s, p0/m, z3.s, z7.s ; CHECK-NEXT: mul z0.s, p0/m, z0.s, z4.s -; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 -; CHECK-NEXT: mov z3.s, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z24.s, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z0.s, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z2.s, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: mov z3.d, z24.d +; CHECK-NEXT: cmpne p1.s, p0/z, z25.s, #0 +; CHECK-NEXT: mul z2.s, p0/m, z2.s, z6.s +; CHECK-NEXT: mul z1.s, p0/m, z1.s, z5.s +; CHECK-NEXT: cmpne p2.s, p0/z, z24.s, #0 +; CHECK-NEXT: cmpne p3.s, p0/z, z26.s, #0 +; CHECK-NEXT: cmpne p0.s, p0/z, z27.s, #0 +; CHECK-NEXT: mov z0.s, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z3.s, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z1.s, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z2.s, p3/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv16i32( %x, %y) %b = extractvalue { , } %a, 0 @@ -387,16 +378,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: movprfx z4, z1 -; CHECK-NEXT: mul z4.d, p0/m, z4.d, z3.d -; CHECK-NEXT: umulh z1.d, p0/m, z1.d, z3.d -; CHECK-NEXT: movprfx z3, z0 -; CHECK-NEXT: umulh z3.d, p0/m, z3.d, z2.d -; CHECK-NEXT: cmpne p1.d, p0/z, z1.d, #0 +; CHECK-NEXT: umulh z4.d, p0/m, z4.d, z3.d +; CHECK-NEXT: movprfx z5, z0 +; CHECK-NEXT: umulh z5.d, p0/m, z5.d, z2.d +; CHECK-NEXT: mul z1.d, p0/m, z1.d, z3.d ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z2.d -; CHECK-NEXT: cmpne p0.d, p0/z, z3.d, #0 -; CHECK-NEXT: mov z4.d, p1/m, #0 // =0x0 +; CHECK-NEXT: cmpne p1.d, p0/z, z4.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z5.d, #0 ; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, z4.d +; CHECK-NEXT: mov z1.d, p1/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv4i64( %x, %y) %b = extractvalue { , } %a, 0 @@ -412,27 +402,25 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: movprfx z24, z3 -; CHECK-NEXT: mul z24.d, p0/m, z24.d, z7.d -; CHECK-NEXT: umulh z3.d, p0/m, z3.d, z7.d -; CHECK-NEXT: cmpne p1.d, p0/z, z3.d, #0 -; CHECK-NEXT: movprfx z3, z2 -; CHECK-NEXT: umulh z3.d, p0/m, z3.d, z6.d -; CHECK-NEXT: cmpne p2.d, p0/z, z3.d, #0 -; CHECK-NEXT: movprfx z3, z1 -; CHECK-NEXT: mul z3.d, p0/m, z3.d, z5.d -; CHECK-NEXT: umulh z1.d, p0/m, z1.d, z5.d -; CHECK-NEXT: mul z2.d, p0/m, z2.d, z6.d -; CHECK-NEXT: cmpne p3.d, p0/z, z1.d, #0 -; CHECK-NEXT: movprfx z1, z0 -; CHECK-NEXT: umulh z1.d, p0/m, z1.d, z4.d +; CHECK-NEXT: umulh z24.d, p0/m, z24.d, z7.d +; CHECK-NEXT: movprfx z25, z0 +; CHECK-NEXT: umulh z25.d, p0/m, z25.d, z4.d +; CHECK-NEXT: movprfx z26, z2 +; CHECK-NEXT: umulh z26.d, p0/m, z26.d, z6.d +; CHECK-NEXT: movprfx z27, z1 +; CHECK-NEXT: umulh z27.d, p0/m, z27.d, z5.d +; CHECK-NEXT: mul z3.d, p0/m, z3.d, z7.d ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z4.d -; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: mov z3.d, p3/m, #0 // =0x0 -; CHECK-NEXT: mov z24.d, p1/m, #0 // =0x0 -; CHECK-NEXT: mov z0.d, p0/m, #0 // =0x0 -; CHECK-NEXT: mov z2.d, p2/m, #0 // =0x0 -; CHECK-NEXT: mov z1.d, z3.d -; CHECK-NEXT: mov z3.d, z24.d +; CHECK-NEXT: cmpne p1.d, p0/z, z25.d, #0 +; CHECK-NEXT: mul z2.d, p0/m, z2.d, z6.d +; CHECK-NEXT: mul z1.d, p0/m, z1.d, z5.d +; CHECK-NEXT: cmpne p2.d, p0/z, z24.d, #0 +; CHECK-NEXT: cmpne p3.d, p0/z, z26.d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z27.d, #0 +; CHECK-NEXT: mov z0.d, p1/m, #0 // =0x0 +; CHECK-NEXT: mov z3.d, p2/m, #0 // =0x0 +; CHECK-NEXT: mov z1.d, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z2.d, p3/m, #0 // =0x0 ; CHECK-NEXT: ret %a = call { , } @llvm.umul.with.overflow.nxv8i64( %x, %y) %b = extractvalue { , } %a, 0 diff --git a/llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll b/llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll --- a/llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll +++ b/llvm/test/CodeGen/AArch64/sve-vecreduce-fold.ll @@ -80,8 +80,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: punpklo p3.h, p1.b ; CHECK-NEXT: punpkhi p1.h, p1.b -; CHECK-NEXT: punpkhi p3.h, p3.b ; CHECK-NEXT: ptrue p2.b +; CHECK-NEXT: punpkhi p3.h, p3.b ; CHECK-NEXT: uzp1 p0.h, p0.h, p3.h ; CHECK-NEXT: uzp1 p0.b, p0.b, p1.b ; CHECK-NEXT: nots p0.b, p2/z, p0.b diff --git a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll --- a/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-deinterleave.ll @@ -185,17 +185,17 @@ define {, } @vector_deinterleave_nxv8i64_nxv16i64( %vec) { ; CHECK-LABEL: vector_deinterleave_nxv8i64_nxv16i64: ; CHECK: // %bb.0: -; CHECK-NEXT: uzp1 z24.d, z4.d, z5.d -; CHECK-NEXT: uzp1 z25.d, z2.d, z3.d -; CHECK-NEXT: uzp1 z26.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z24.d, z2.d, z3.d +; CHECK-NEXT: uzp1 z25.d, z0.d, z1.d +; CHECK-NEXT: uzp1 z26.d, z4.d, z5.d ; CHECK-NEXT: uzp1 z27.d, z6.d, z7.d ; CHECK-NEXT: uzp2 z28.d, z0.d, z1.d ; CHECK-NEXT: uzp2 z29.d, z2.d, z3.d ; CHECK-NEXT: uzp2 z30.d, z4.d, z5.d ; CHECK-NEXT: uzp2 z7.d, z6.d, z7.d -; CHECK-NEXT: mov z0.d, z26.d -; CHECK-NEXT: mov z1.d, z25.d -; CHECK-NEXT: mov z2.d, z24.d +; CHECK-NEXT: mov z0.d, z25.d +; CHECK-NEXT: mov z1.d, z24.d +; CHECK-NEXT: mov z2.d, z26.d ; CHECK-NEXT: mov z3.d, z27.d ; CHECK-NEXT: mov z4.d, z28.d ; CHECK-NEXT: mov z5.d, z29.d diff --git a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll --- a/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-interleave.ll @@ -164,12 +164,13 @@ define @interleave2_nxv16i32( %vec0, %vec1) { ; CHECK-LABEL: interleave2_nxv16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: zip1 z4.s, z0.s, z2.s -; CHECK-NEXT: zip2 z5.s, z0.s, z2.s -; CHECK-NEXT: zip1 z2.s, z1.s, z3.s +; CHECK-NEXT: zip1 z4.s, z1.s, z3.s +; CHECK-NEXT: zip1 z5.s, z0.s, z2.s +; CHECK-NEXT: zip2 z2.s, z0.s, z2.s ; CHECK-NEXT: zip2 z3.s, z1.s, z3.s -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z0.d, z5.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d ; CHECK-NEXT: ret %retval = call @llvm.experimental.vector.interleave2.nxv16i32( %vec0, %vec1) ret %retval @@ -178,12 +179,13 @@ define @interleave2_nxv8i64( %vec0, %vec1) { ; CHECK-LABEL: interleave2_nxv8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: zip1 z4.d, z0.d, z2.d -; CHECK-NEXT: zip2 z5.d, z0.d, z2.d -; CHECK-NEXT: zip1 z2.d, z1.d, z3.d +; CHECK-NEXT: zip1 z4.d, z1.d, z3.d +; CHECK-NEXT: zip1 z5.d, z0.d, z2.d +; CHECK-NEXT: zip2 z2.d, z0.d, z2.d ; CHECK-NEXT: zip2 z3.d, z1.d, z3.d -; CHECK-NEXT: mov z0.d, z4.d -; CHECK-NEXT: mov z1.d, z5.d +; CHECK-NEXT: mov z0.d, z5.d +; CHECK-NEXT: mov z1.d, z2.d +; CHECK-NEXT: mov z2.d, z4.d ; CHECK-NEXT: ret %retval = call @llvm.experimental.vector.interleave2.nxv8i64( %vec0, %vec1) ret %retval diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll --- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll +++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll @@ -536,7 +536,7 @@ define @splat_nxv2f32_fmov_fold() { ; CHECK-LABEL: splat_nxv2f32_fmov_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1109917696 +; CHECK-NEXT: mov w8, #1109917696 // =0x42280000 ; CHECK-NEXT: mov z0.s, w8 ; CHECK-NEXT: ret %1 = insertelement undef, float 4.200000e+01, i32 0 @@ -547,7 +547,7 @@ define @splat_nxv4f32_fmov_fold() { ; CHECK-LABEL: splat_nxv4f32_fmov_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1109917696 +; CHECK-NEXT: mov w8, #1109917696 // =0x42280000 ; CHECK-NEXT: mov z0.s, w8 ; CHECK-NEXT: ret %1 = insertelement undef, float 4.200000e+01, i32 0 @@ -558,7 +558,7 @@ define @splat_nxv2f64_fmov_fold() { ; CHECK-LABEL: splat_nxv2f64_fmov_fold: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #4631107791820423168 +; CHECK-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 ; CHECK-NEXT: mov z0.d, x8 ; CHECK-NEXT: ret %1 = insertelement undef, double 4.200000e+01, i32 0 @@ -571,7 +571,7 @@ define @splat_nxv2f32_imm_out_of_range() { ; CHECK-LABEL: splat_nxv2f32_imm_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #7864 +; CHECK-NEXT: mov w8, #7864 // =0x1eb8 ; CHECK-NEXT: movk w8, #16469, lsl #16 ; CHECK-NEXT: mov z0.s, w8 ; CHECK-NEXT: ret @@ -583,7 +583,7 @@ define @splat_nxv4f32_imm_out_of_range() { ; CHECK-LABEL: splat_nxv4f32_imm_out_of_range: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #7864 +; CHECK-NEXT: mov w8, #7864 // =0x1eb8 ; CHECK-NEXT: movk w8, #16469, lsl #16 ; CHECK-NEXT: mov z0.s, w8 ; CHECK-NEXT: ret @@ -595,9 +595,9 @@ define @splat_nxv2f64_imm_out_of_range() { ; CHECK-LABEL: splat_nxv2f64_imm_out_of_range: ; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: adrp x8, .LCPI57_0 ; CHECK-NEXT: add x8, x8, :lo12:.LCPI57_0 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] ; CHECK-NEXT: ret %1 = insertelement undef, double 3.33, i32 0 diff --git a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll --- a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll +++ b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll @@ -7,16 +7,16 @@ define void @func_vscale_none(ptr %a, ptr %b) #0 { ; CHECK-NOARG-LABEL: func_vscale_none: ; CHECK-NOARG: // %bb.0: -; CHECK-NOARG-NEXT: ldp q0, q1, [x0, #32] -; CHECK-NOARG-NEXT: ldp q4, q5, [x1, #32] -; CHECK-NOARG-NEXT: add v0.4s, v0.4s, v4.4s -; CHECK-NOARG-NEXT: ldp q2, q3, [x0] -; CHECK-NOARG-NEXT: add v1.4s, v1.4s, v5.4s -; CHECK-NOARG-NEXT: ldp q6, q4, [x1] -; CHECK-NOARG-NEXT: stp q0, q1, [x0, #32] -; CHECK-NOARG-NEXT: add v2.4s, v2.4s, v6.4s -; CHECK-NOARG-NEXT: add v0.4s, v3.4s, v4.4s -; CHECK-NOARG-NEXT: stp q2, q0, [x0] +; CHECK-NOARG-NEXT: ldp q0, q3, [x1, #32] +; CHECK-NOARG-NEXT: ldp q1, q2, [x0, #32] +; CHECK-NOARG-NEXT: ldp q4, q6, [x1] +; CHECK-NOARG-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NOARG-NEXT: ldp q1, q5, [x0] +; CHECK-NOARG-NEXT: add v2.4s, v2.4s, v3.4s +; CHECK-NOARG-NEXT: add v1.4s, v1.4s, v4.4s +; CHECK-NOARG-NEXT: add v3.4s, v5.4s, v6.4s +; CHECK-NOARG-NEXT: stp q0, q2, [x0, #32] +; CHECK-NOARG-NEXT: stp q1, q3, [x0] ; CHECK-NOARG-NEXT: ret ; ; CHECK-ARG-LABEL: func_vscale_none: @@ -39,16 +39,16 @@ define void @func_vscale1_1(ptr %a, ptr %b) #1 { ; CHECK-LABEL: func_vscale1_1: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0, #32] -; CHECK-NEXT: ldp q4, q5, [x1, #32] -; CHECK-NEXT: add v0.4s, v0.4s, v4.4s -; CHECK-NEXT: ldp q2, q3, [x0] -; CHECK-NEXT: add v1.4s, v1.4s, v5.4s -; CHECK-NEXT: ldp q6, q4, [x1] -; CHECK-NEXT: stp q0, q1, [x0, #32] -; CHECK-NEXT: add v2.4s, v2.4s, v6.4s -; CHECK-NEXT: add v0.4s, v3.4s, v4.4s -; CHECK-NEXT: stp q2, q0, [x0] +; CHECK-NEXT: ldp q0, q3, [x1, #32] +; CHECK-NEXT: ldp q1, q2, [x0, #32] +; CHECK-NEXT: ldp q4, q6, [x1] +; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ldp q1, q5, [x0] +; CHECK-NEXT: add v2.4s, v2.4s, v3.4s +; CHECK-NEXT: add v1.4s, v1.4s, v4.4s +; CHECK-NEXT: add v3.4s, v5.4s, v6.4s +; CHECK-NEXT: stp q0, q2, [x0, #32] +; CHECK-NEXT: stp q1, q3, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, ptr %a %op2 = load <16 x i32>, ptr %b @@ -62,8 +62,8 @@ define void @func_vscale2_2(ptr %a, ptr %b) #2 { ; CHECK-LABEL: func_vscale2_2: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #8 ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] @@ -85,8 +85,8 @@ define void @func_vscale2_4(ptr %a, ptr %b) #3 { ; CHECK-LABEL: func_vscale2_4: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #8 ; CHECK-NEXT: ptrue p0.s, vl8 +; CHECK-NEXT: mov x8, #8 // =0x8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] ; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] ; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] diff --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll --- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll @@ -118,9 +118,9 @@ define @sel_16_illegal_wrong_extension( %p) { ; CHECK-LABEL: sel_16_illegal_wrong_extension: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.h, #0 // =0x0 -; CHECK-NEXT: mov z1.h, #128 // =0x80 -; CHECK-NEXT: mov z0.h, p0/m, z1.h +; CHECK-NEXT: mov z0.h, #128 // =0x80 +; CHECK-NEXT: mov z1.h, #0 // =0x0 +; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h ; CHECK-NEXT: ret %vec = shufflevector insertelement ( undef, i16 128, i32 0), zeroinitializer, zeroinitializer %sel = select %p, %vec, zeroinitializer @@ -130,9 +130,9 @@ define @sel_32_illegal_wrong_extension( %p) { ; CHECK-LABEL: sel_32_illegal_wrong_extension: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.s, #0 // =0x0 -; CHECK-NEXT: mov z1.s, #128 // =0x80 -; CHECK-NEXT: mov z0.s, p0/m, z1.s +; CHECK-NEXT: mov z0.s, #128 // =0x80 +; CHECK-NEXT: mov z1.s, #0 // =0x0 +; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s ; CHECK-NEXT: ret %vec = shufflevector insertelement ( undef, i32 128, i32 0), zeroinitializer, zeroinitializer %sel = select %p, %vec, zeroinitializer @@ -142,9 +142,9 @@ define @sel_64_illegal_wrong_extension( %p) { ; CHECK-LABEL: sel_64_illegal_wrong_extension: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, #0 // =0x0 -; CHECK-NEXT: mov z1.d, #128 // =0x80 -; CHECK-NEXT: mov z0.d, p0/m, z1.d +; CHECK-NEXT: mov z0.d, #128 // =0x80 +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d ; CHECK-NEXT: ret %vec = shufflevector insertelement ( undef, i64 128, i32 0), zeroinitializer, zeroinitializer %sel = select %p, %vec, zeroinitializer @@ -154,7 +154,7 @@ define @sel_16_illegal_shifted( %p) { ; CHECK-LABEL: sel_16_illegal_shifted: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #513 +; CHECK-NEXT: mov w8, #513 // =0x201 ; CHECK-NEXT: mov z1.h, #0 // =0x0 ; CHECK-NEXT: mov z0.h, w8 ; CHECK-NEXT: sel z0.h, p0, z0.h, z1.h @@ -167,7 +167,7 @@ define @sel_32_illegal_shifted( %p) { ; CHECK-LABEL: sel_32_illegal_shifted: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #513 +; CHECK-NEXT: mov w8, #513 // =0x201 ; CHECK-NEXT: mov z1.s, #0 // =0x0 ; CHECK-NEXT: mov z0.s, w8 ; CHECK-NEXT: sel z0.s, p0, z0.s, z1.s @@ -180,7 +180,7 @@ define @sel_64_illegal_shifted( %p) { ; CHECK-LABEL: sel_64_illegal_shifted: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #513 +; CHECK-NEXT: mov w8, #513 // =0x201 ; CHECK-NEXT: mov z1.d, #0 // =0x0 ; CHECK-NEXT: mov z0.d, x8 ; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d @@ -363,7 +363,7 @@ define @sel_merge_nxv8f16_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nxv8f16_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 +; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret @@ -375,7 +375,7 @@ define @sel_merge_nx4f16_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx4f16_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 +; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret @@ -387,7 +387,7 @@ define @sel_merge_nx2f16_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx2f16_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 +; CHECK-NEXT: mov w8, #32768 // =0x8000 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret @@ -399,7 +399,7 @@ define @sel_merge_nx4f32_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx4f32_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 +; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret @@ -411,7 +411,7 @@ define @sel_merge_nx2f32_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx2f32_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 +; CHECK-NEXT: mov w8, #-2147483648 // =0x80000000 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret @@ -423,7 +423,7 @@ define @sel_merge_nx2f64_negative_zero( %p, %in) { ; CHECK-LABEL: sel_merge_nx2f64_negative_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 +; CHECK-NEXT: mov x8, #-9223372036854775808 // =0x8000000000000000 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret @@ -502,7 +502,7 @@ define @sel_merge_16_illegal_shifted( %p, %in) { ; CHECK-LABEL: sel_merge_16_illegal_shifted: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #513 +; CHECK-NEXT: mov w8, #513 // =0x201 ; CHECK-NEXT: mov z1.h, w8 ; CHECK-NEXT: mov z0.h, p0/m, z1.h ; CHECK-NEXT: ret @@ -514,7 +514,7 @@ define @sel_merge_32_illegal_shifted( %p, %in) { ; CHECK-LABEL: sel_merge_32_illegal_shifted: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #513 +; CHECK-NEXT: mov w8, #513 // =0x201 ; CHECK-NEXT: mov z1.s, w8 ; CHECK-NEXT: mov z0.s, p0/m, z1.s ; CHECK-NEXT: ret @@ -526,7 +526,7 @@ define @sel_merge_64_illegal_shifted( %p, %in) { ; CHECK-LABEL: sel_merge_64_illegal_shifted: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #513 +; CHECK-NEXT: mov w8, #513 // =0x201 ; CHECK-NEXT: mov z1.d, x8 ; CHECK-NEXT: mov z0.d, p0/m, z1.d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve2-fcopysign.ll @@ -58,14 +58,14 @@ ; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f32_v4f64: ; CHECK_EXTEND_ROUND: // %bb.0: ; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d -; CHECK_EXTEND_ROUND-NEXT: mov z3.s, #0x7fffffff +; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.d, z0.s +; CHECK_EXTEND_ROUND-NEXT: mov z4.s, #0x7fffffff +; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s ; CHECK_EXTEND_ROUND-NEXT: fcvt z2.s, p0/m, z2.d -; CHECK_EXTEND_ROUND-NEXT: uunpkhi z4.d, z0.s ; CHECK_EXTEND_ROUND-NEXT: fcvt z1.s, p0/m, z1.d -; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s -; CHECK_EXTEND_ROUND-NEXT: bsl z4.d, z4.d, z2.d, z3.d -; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z3.d -; CHECK_EXTEND_ROUND-NEXT: uzp1 z0.s, z0.s, z4.s +; CHECK_EXTEND_ROUND-NEXT: bsl z3.d, z3.d, z2.d, z4.d +; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z4.d +; CHECK_EXTEND_ROUND-NEXT: uzp1 z0.s, z0.s, z3.s ; CHECK_EXTEND_ROUND-NEXT: ret %tmp0 = fptrunc %b to %r = call @llvm.copysign.v4f32( %a, %tmp0) @@ -110,9 +110,9 @@ ; CHECK_NO_EXTEND_ROUND-NEXT: ptrue p0.d ; CHECK_NO_EXTEND_ROUND-NEXT: uunpkhi z3.d, z2.s ; CHECK_NO_EXTEND_ROUND-NEXT: uunpklo z2.d, z2.s +; CHECK_NO_EXTEND_ROUND-NEXT: mov z4.d, #0x7fffffffffffffff ; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z3.d, p0/m, z3.s ; CHECK_NO_EXTEND_ROUND-NEXT: fcvt z2.d, p0/m, z2.s -; CHECK_NO_EXTEND_ROUND-NEXT: mov z4.d, #0x7fffffffffffffff ; CHECK_NO_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z2.d, z4.d ; CHECK_NO_EXTEND_ROUND-NEXT: bsl z1.d, z1.d, z3.d, z4.d ; CHECK_NO_EXTEND_ROUND-NEXT: ret @@ -120,13 +120,13 @@ ; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f64_v4f32: ; CHECK_EXTEND_ROUND: // %bb.0: ; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d -; CHECK_EXTEND_ROUND-NEXT: uunpklo z3.d, z2.s -; CHECK_EXTEND_ROUND-NEXT: uunpkhi z2.d, z2.s -; CHECK_EXTEND_ROUND-NEXT: fcvt z3.d, p0/m, z3.s +; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.d, z2.s +; CHECK_EXTEND_ROUND-NEXT: uunpklo z2.d, z2.s ; CHECK_EXTEND_ROUND-NEXT: mov z4.d, #0x7fffffffffffffff ; CHECK_EXTEND_ROUND-NEXT: fcvt z2.d, p0/m, z2.s -; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z3.d, z4.d -; CHECK_EXTEND_ROUND-NEXT: bsl z1.d, z1.d, z2.d, z4.d +; CHECK_EXTEND_ROUND-NEXT: fcvt z3.d, p0/m, z3.s +; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z2.d, z4.d +; CHECK_EXTEND_ROUND-NEXT: bsl z1.d, z1.d, z3.d, z4.d ; CHECK_EXTEND_ROUND-NEXT: ret %tmp0 = fpext %b to %r = call @llvm.copysign.v4f64( %a, %tmp0) @@ -186,14 +186,14 @@ ; CHECK_EXTEND_ROUND-LABEL: test_copysign_v4f16_v4f64: ; CHECK_EXTEND_ROUND: // %bb.0: ; CHECK_EXTEND_ROUND-NEXT: ptrue p0.d -; CHECK_EXTEND_ROUND-NEXT: mov z3.h, #32767 // =0x7fff +; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.d, z0.s +; CHECK_EXTEND_ROUND-NEXT: mov z4.h, #32767 // =0x7fff +; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s ; CHECK_EXTEND_ROUND-NEXT: fcvt z2.h, p0/m, z2.d -; CHECK_EXTEND_ROUND-NEXT: uunpkhi z4.d, z0.s ; CHECK_EXTEND_ROUND-NEXT: fcvt z1.h, p0/m, z1.d -; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.d, z0.s -; CHECK_EXTEND_ROUND-NEXT: bsl z4.d, z4.d, z2.d, z3.d -; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z3.d -; CHECK_EXTEND_ROUND-NEXT: uzp1 z0.s, z0.s, z4.s +; CHECK_EXTEND_ROUND-NEXT: bsl z3.d, z3.d, z2.d, z4.d +; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z4.d +; CHECK_EXTEND_ROUND-NEXT: uzp1 z0.s, z0.s, z3.s ; CHECK_EXTEND_ROUND-NEXT: ret %tmp0 = fptrunc %b to %r = call @llvm.copysign.v4f16( %a, %tmp0) @@ -228,14 +228,14 @@ ; CHECK_EXTEND_ROUND-LABEL: test_copysign_v8f16_v8f32: ; CHECK_EXTEND_ROUND: // %bb.0: ; CHECK_EXTEND_ROUND-NEXT: ptrue p0.s -; CHECK_EXTEND_ROUND-NEXT: mov z3.h, #32767 // =0x7fff +; CHECK_EXTEND_ROUND-NEXT: uunpkhi z3.s, z0.h +; CHECK_EXTEND_ROUND-NEXT: mov z4.h, #32767 // =0x7fff +; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.s, z0.h ; CHECK_EXTEND_ROUND-NEXT: fcvt z2.h, p0/m, z2.s -; CHECK_EXTEND_ROUND-NEXT: uunpkhi z4.s, z0.h ; CHECK_EXTEND_ROUND-NEXT: fcvt z1.h, p0/m, z1.s -; CHECK_EXTEND_ROUND-NEXT: uunpklo z0.s, z0.h -; CHECK_EXTEND_ROUND-NEXT: bsl z4.d, z4.d, z2.d, z3.d -; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z3.d -; CHECK_EXTEND_ROUND-NEXT: uzp1 z0.h, z0.h, z4.h +; CHECK_EXTEND_ROUND-NEXT: bsl z3.d, z3.d, z2.d, z4.d +; CHECK_EXTEND_ROUND-NEXT: bsl z0.d, z0.d, z1.d, z4.d +; CHECK_EXTEND_ROUND-NEXT: uzp1 z0.h, z0.h, z3.h ; CHECK_EXTEND_ROUND-NEXT: ret %tmp0 = fptrunc %b to %r = call @llvm.copysign.v8f16( %a, %tmp0) diff --git a/llvm/test/CodeGen/AArch64/sve2-fixed-length-fcopysign.ll b/llvm/test/CodeGen/AArch64/sve2-fixed-length-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/sve2-fixed-length-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/sve2-fixed-length-fcopysign.ll @@ -16,10 +16,10 @@ define void @test_copysign_v4f16_v4f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: test_copysign_v4f16_v4f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: mvni v2.4h, #128, lsl #8 -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-NEXT: mvni v0.4h, #128, lsl #8 +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret %a = load <4 x half>, ptr %ap @@ -32,10 +32,10 @@ define void @test_copysign_v8f16_v8f16(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: test_copysign_v8f16_v8f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: mvni v2.8h, #128, lsl #8 -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: mvni v0.8h, #128, lsl #8 +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %a = load <8 x half>, ptr %ap @@ -49,11 +49,11 @@ ; CHECK-LABEL: test_copysign_v16f16_v16f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl16 -; CHECK-NEXT: mov z2.h, #32767 // =0x7fff -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: mov z0.h, #32767 // =0x7fff +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] +; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; CHECK-NEXT: st1h { z1.h }, p0, [x0] ; CHECK-NEXT: ret %a = load <16 x half>, ptr %ap %b = load <16 x half>, ptr %bp @@ -65,27 +65,27 @@ define void @test_copysign_v32f16_v32f16(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: test_copysign_v32f16_v32f16: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #16 ; VBITS_GE_256-NEXT: ptrue p0.h, vl16 -; VBITS_GE_256-NEXT: mov z4.h, #32767 // =0x7fff -; VBITS_GE_256-NEXT: ld1h { z0.h }, p0/z, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x1, x8, lsl #1] -; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1] -; VBITS_GE_256-NEXT: bsl z0.d, z0.d, z2.d, z4.d -; VBITS_GE_256-NEXT: bsl z1.d, z1.d, z3.d, z4.d -; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] -; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0] +; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 +; VBITS_GE_256-NEXT: mov z0.h, #32767 // =0x7fff +; VBITS_GE_256-NEXT: ld1h { z1.h }, p0/z, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: ld1h { z2.h }, p0/z, [x0] +; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1, x8, lsl #1] +; VBITS_GE_256-NEXT: ld1h { z4.h }, p0/z, [x1] +; VBITS_GE_256-NEXT: bsl z1.d, z1.d, z3.d, z0.d +; VBITS_GE_256-NEXT: bsl z2.d, z2.d, z4.d, z0.d +; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0, x8, lsl #1] +; VBITS_GE_256-NEXT: st1h { z2.h }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: test_copysign_v32f16_v32f16: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.h, vl32 -; VBITS_GE_512-NEXT: mov z2.h, #32767 // =0x7fff -; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1] -; VBITS_GE_512-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] +; VBITS_GE_512-NEXT: mov z0.h, #32767 // =0x7fff +; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x0] +; VBITS_GE_512-NEXT: ld1h { z2.h }, p0/z, [x1] +; VBITS_GE_512-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; VBITS_GE_512-NEXT: st1h { z1.h }, p0, [x0] ; VBITS_GE_512-NEXT: ret %a = load <32 x half>, ptr %ap %b = load <32 x half>, ptr %bp @@ -98,11 +98,11 @@ ; CHECK-LABEL: test_copysign_v64f16_v64f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl64 -; CHECK-NEXT: mov z2.h, #32767 // =0x7fff -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: mov z0.h, #32767 // =0x7fff +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] +; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; CHECK-NEXT: st1h { z1.h }, p0, [x0] ; CHECK-NEXT: ret %a = load <64 x half>, ptr %ap %b = load <64 x half>, ptr %bp @@ -115,11 +115,11 @@ ; CHECK-LABEL: test_copysign_v128f16_v128f16: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.h, vl128 -; CHECK-NEXT: mov z2.h, #32767 // =0x7fff -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1] -; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; CHECK-NEXT: st1h { z0.h }, p0, [x0] +; CHECK-NEXT: mov z0.h, #32767 // =0x7fff +; CHECK-NEXT: ld1h { z1.h }, p0/z, [x0] +; CHECK-NEXT: ld1h { z2.h }, p0/z, [x1] +; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; CHECK-NEXT: st1h { z1.h }, p0, [x0] ; CHECK-NEXT: ret %a = load <128 x half>, ptr %ap %b = load <128 x half>, ptr %bp @@ -133,10 +133,10 @@ define void @test_copysign_v2f32_v2f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: test_copysign_v2f32_v2f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: mvni v2.2s, #128, lsl #24 -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b +; CHECK-NEXT: mvni v0.2s, #128, lsl #24 +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: ldr d2, [x1] +; CHECK-NEXT: bsl v0.8b, v1.8b, v2.8b ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret %a = load <2 x float>, ptr %ap @@ -149,10 +149,10 @@ define void @test_copysign_v4f32_v4f32(ptr %ap, ptr %bp) vscale_range(2,0) #0 { ; CHECK-LABEL: test_copysign_v4f32_v4f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: mvni v2.4s, #128, lsl #24 -; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: mvni v0.4s, #128, lsl #24 +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr q2, [x1] +; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b ; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: ret %a = load <4 x float>, ptr %ap @@ -166,11 +166,11 @@ ; CHECK-LABEL: test_copysign_v8f32_v8f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl8 -; CHECK-NEXT: mov z2.s, #0x7fffffff -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: mov z0.s, #0x7fffffff +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] +; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; CHECK-NEXT: st1w { z1.s }, p0, [x0] ; CHECK-NEXT: ret %a = load <8 x float>, ptr %ap %b = load <8 x float>, ptr %bp @@ -182,27 +182,27 @@ define void @test_copysign_v16f32_v16f32(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: test_copysign_v16f32_v16f32: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #8 ; VBITS_GE_256-NEXT: ptrue p0.s, vl8 -; VBITS_GE_256-NEXT: mov z4.s, #0x7fffffff -; VBITS_GE_256-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x1, x8, lsl #2] -; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1] -; VBITS_GE_256-NEXT: bsl z0.d, z0.d, z2.d, z4.d -; VBITS_GE_256-NEXT: bsl z1.d, z1.d, z3.d, z4.d -; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] -; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 +; VBITS_GE_256-NEXT: mov z0.s, #0x7fffffff +; VBITS_GE_256-NEXT: ld1w { z1.s }, p0/z, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z2.s }, p0/z, [x0] +; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1, x8, lsl #2] +; VBITS_GE_256-NEXT: ld1w { z4.s }, p0/z, [x1] +; VBITS_GE_256-NEXT: bsl z1.d, z1.d, z3.d, z0.d +; VBITS_GE_256-NEXT: bsl z2.d, z2.d, z4.d, z0.d +; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0, x8, lsl #2] +; VBITS_GE_256-NEXT: st1w { z2.s }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: test_copysign_v16f32_v16f32: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.s, vl16 -; VBITS_GE_512-NEXT: mov z2.s, #0x7fffffff -; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1] -; VBITS_GE_512-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] +; VBITS_GE_512-NEXT: mov z0.s, #0x7fffffff +; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x0] +; VBITS_GE_512-NEXT: ld1w { z2.s }, p0/z, [x1] +; VBITS_GE_512-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; VBITS_GE_512-NEXT: st1w { z1.s }, p0, [x0] ; VBITS_GE_512-NEXT: ret %a = load <16 x float>, ptr %ap %b = load <16 x float>, ptr %bp @@ -215,11 +215,11 @@ ; CHECK-LABEL: test_copysign_v32f32_v32f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl32 -; CHECK-NEXT: mov z2.s, #0x7fffffff -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: mov z0.s, #0x7fffffff +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] +; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; CHECK-NEXT: st1w { z1.s }, p0, [x0] ; CHECK-NEXT: ret %a = load <32 x float>, ptr %ap %b = load <32 x float>, ptr %bp @@ -232,11 +232,11 @@ ; CHECK-LABEL: test_copysign_v64f32_v64f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl64 -; CHECK-NEXT: mov z2.s, #0x7fffffff -; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: mov z0.s, #0x7fffffff +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] +; CHECK-NEXT: ld1w { z2.s }, p0/z, [x1] +; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; CHECK-NEXT: st1w { z1.s }, p0, [x0] ; CHECK-NEXT: ret %a = load <64 x float>, ptr %ap %b = load <64 x float>, ptr %bp @@ -268,11 +268,11 @@ ; CHECK-LABEL: test_copysign_v4f64_v4f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl4 -; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: mov z0.d, #0x7fffffffffffffff +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; CHECK-NEXT: st1d { z1.d }, p0, [x0] ; CHECK-NEXT: ret %a = load <4 x double>, ptr %ap %b = load <4 x double>, ptr %bp @@ -284,27 +284,27 @@ define void @test_copysign_v8f64_v8f64(ptr %ap, ptr %bp) #0 { ; VBITS_GE_256-LABEL: test_copysign_v8f64_v8f64: ; VBITS_GE_256: // %bb.0: -; VBITS_GE_256-NEXT: mov x8, #4 ; VBITS_GE_256-NEXT: ptrue p0.d, vl4 -; VBITS_GE_256-NEXT: mov z4.d, #0x7fffffffffffffff -; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0] -; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] -; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1] -; VBITS_GE_256-NEXT: bsl z0.d, z0.d, z2.d, z4.d -; VBITS_GE_256-NEXT: bsl z1.d, z1.d, z3.d, z4.d -; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] -; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] +; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 +; VBITS_GE_256-NEXT: mov z0.d, #0x7fffffffffffffff +; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z2.d }, p0/z, [x0] +; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1, x8, lsl #3] +; VBITS_GE_256-NEXT: ld1d { z4.d }, p0/z, [x1] +; VBITS_GE_256-NEXT: bsl z1.d, z1.d, z3.d, z0.d +; VBITS_GE_256-NEXT: bsl z2.d, z2.d, z4.d, z0.d +; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0, x8, lsl #3] +; VBITS_GE_256-NEXT: st1d { z2.d }, p0, [x0] ; VBITS_GE_256-NEXT: ret ; ; VBITS_GE_512-LABEL: test_copysign_v8f64_v8f64: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 -; VBITS_GE_512-NEXT: mov z2.d, #0x7fffffffffffffff -; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1] -; VBITS_GE_512-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] +; VBITS_GE_512-NEXT: mov z0.d, #0x7fffffffffffffff +; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x0] +; VBITS_GE_512-NEXT: ld1d { z2.d }, p0/z, [x1] +; VBITS_GE_512-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; VBITS_GE_512-NEXT: st1d { z1.d }, p0, [x0] ; VBITS_GE_512-NEXT: ret %a = load <8 x double>, ptr %ap %b = load <8 x double>, ptr %bp @@ -317,11 +317,11 @@ ; CHECK-LABEL: test_copysign_v16f64_v16f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl16 -; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: mov z0.d, #0x7fffffffffffffff +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; CHECK-NEXT: st1d { z1.d }, p0, [x0] ; CHECK-NEXT: ret %a = load <16 x double>, ptr %ap %b = load <16 x double>, ptr %bp @@ -334,11 +334,11 @@ ; CHECK-LABEL: test_copysign_v32f64_v32f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl32 -; CHECK-NEXT: mov z2.d, #0x7fffffffffffffff -; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: bsl z0.d, z0.d, z1.d, z2.d -; CHECK-NEXT: st1d { z0.d }, p0, [x0] +; CHECK-NEXT: mov z0.d, #0x7fffffffffffffff +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x0] +; CHECK-NEXT: ld1d { z2.d }, p0/z, [x1] +; CHECK-NEXT: bsl z1.d, z1.d, z2.d, z0.d +; CHECK-NEXT: st1d { z1.d }, p0, [x0] ; CHECK-NEXT: ret %a = load <32 x double>, ptr %ap %b = load <32 x double>, ptr %bp @@ -353,10 +353,10 @@ ; CHECK-LABEL: test_copysign_v2f32_v2f64: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: mvni v2.2s, #128, lsl #24 -; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: mvni v1.2s, #128, lsl #24 +; CHECK-NEXT: ldr d2, [x0] ; CHECK-NEXT: fcvtn v0.2s, v0.2d -; CHECK-NEXT: bit v0.8b, v1.8b, v2.8b +; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret %a = load <2 x float>, ptr %ap @@ -375,10 +375,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mvni v2.4s, #128, lsl #24 -; CHECK-NEXT: fcvt z1.s, p0/m, z1.d +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: fcvt z1.s, p1/m, z1.d ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: str q0, [x0] @@ -452,10 +452,10 @@ ; CHECK-LABEL: test_copysign_v4f16_v4f32: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x1] -; CHECK-NEXT: mvni v2.4h, #128, lsl #8 -; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: mvni v1.4h, #128, lsl #8 +; CHECK-NEXT: ldr d2, [x0] ; CHECK-NEXT: fcvtn v0.4h, v0.4s -; CHECK-NEXT: bit v0.8b, v1.8b, v2.8b +; CHECK-NEXT: bit v0.8b, v2.8b, v1.8b ; CHECK-NEXT: str d0, [x0] ; CHECK-NEXT: ret %a = load <4 x half>, ptr %ap @@ -471,10 +471,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: mvni v2.4h, #128, lsl #8 -; CHECK-NEXT: fcvt z1.h, p0/m, z1.d +; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1] +; CHECK-NEXT: fcvt z1.h, p1/m, z1.d ; CHECK-NEXT: uzp1 z1.s, z1.s, z1.s ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: bif v0.8b, v1.8b, v2.8b @@ -498,10 +498,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.s, vl8 ; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: mvni v2.8h, #128, lsl #8 -; CHECK-NEXT: fcvt z1.h, p0/m, z1.s +; CHECK-NEXT: ptrue p1.s +; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] +; CHECK-NEXT: fcvt z1.h, p1/m, z1.s ; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: str q0, [x0] diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx2.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx2.ll --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx2.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx2.ll @@ -8,10 +8,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: sel { z0.b, z1.b }, pn8, { z6.b, z7.b }, { z4.b, z5.b } @@ -28,10 +28,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: sel { z0.h, z1.h }, pn8, { z6.h, z7.h }, { z4.h, z5.h } @@ -48,10 +48,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: sel { z0.h, z1.h }, pn8, { z6.h, z7.h }, { z4.h, z5.h } @@ -68,10 +68,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: sel { z0.h, z1.h }, pn8, { z6.h, z7.h }, { z4.h, z5.h } @@ -88,10 +88,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: sel { z0.s, z1.s }, pn8, { z6.s, z7.s }, { z4.s, z5.s } @@ -108,10 +108,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: sel { z0.s, z1.s }, pn8, { z6.s, z7.s }, { z4.s, z5.s } @@ -128,10 +128,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: sel { z0.d, z1.d }, pn8, { z6.d, z7.d }, { z4.d, z5.d } @@ -148,10 +148,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z5.d, z4.d -; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z5.d, z4.d +; CHECK-NEXT: mov z7.d, z2.d ; CHECK-NEXT: mov z4.d, z3.d ; CHECK-NEXT: mov z6.d, z1.d ; CHECK-NEXT: sel { z0.d, z1.d }, pn8, { z6.d, z7.d }, { z4.d, z5.d } diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-selx4.ll @@ -8,17 +8,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: ld1b { z27.b }, p1/z, [x0] +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1b { z27.b }, p1/z, [x0] ; CHECK-NEXT: sel { z0.b - z3.b }, pn8, { z28.b - z31.b }, { z24.b - z27.b } ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -33,17 +33,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0] +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0] ; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -58,17 +58,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0] +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0] ; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -83,17 +83,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0] +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1h { z27.h }, p1/z, [x0] ; CHECK-NEXT: sel { z0.h - z3.h }, pn8, { z28.h - z31.h }, { z24.h - z27.h } ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -108,17 +108,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: ld1w { z27.s }, p1/z, [x0] +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1w { z27.s }, p1/z, [x0] ; CHECK-NEXT: sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s } ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -133,17 +133,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: ld1w { z27.s }, p1/z, [x0] +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1w { z27.s }, p1/z, [x0] ; CHECK-NEXT: sel { z0.s - z3.s }, pn8, { z28.s - z31.s }, { z24.s - z27.s } ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -158,17 +158,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0] +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0] ; CHECK-NEXT: sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d } ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 @@ -183,17 +183,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z26.d, z7.d -; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: ptrue p1.d -; CHECK-NEXT: mov z24.d, z5.d +; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: mov z26.d, z7.d ; CHECK-NEXT: mov z31.d, z4.d -; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0] +; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z25.d, z6.d ; CHECK-NEXT: mov z30.d, z3.d +; CHECK-NEXT: mov z24.d, z5.d ; CHECK-NEXT: mov z29.d, z2.d -; CHECK-NEXT: mov p8.b, p0.b ; CHECK-NEXT: mov z28.d, z1.d +; CHECK-NEXT: ld1d { z27.d }, p1/z, [x0] ; CHECK-NEXT: sel { z0.d - z3.d }, pn8, { z28.d - z31.d }, { z24.d - z27.d } ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: addvl sp, sp, #1 diff --git a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll --- a/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve2p1-intrinsics-stores.ll @@ -9,9 +9,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1b { z2.b, z3.b }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -27,9 +27,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -45,9 +45,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -63,9 +63,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -81,9 +81,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -99,9 +99,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -117,9 +117,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1w { z2.s, z3.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -135,9 +135,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: st1d { z2.d, z3.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -153,10 +153,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: st1b { z4.b - z7.b }, pn8, [x0] @@ -173,10 +173,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0] @@ -193,10 +193,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0] @@ -213,10 +213,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0] @@ -233,10 +233,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0] @@ -253,10 +253,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: st1h { z4.h - z7.h }, pn8, [x0] @@ -273,10 +273,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: st1w { z4.s - z7.s }, pn8, [x0] @@ -293,10 +293,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: st1d { z4.d - z7.d }, pn8, [x0] @@ -315,9 +315,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1b { z2.b, z3.b }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -333,9 +333,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -351,9 +351,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -369,9 +369,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -387,9 +387,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -405,9 +405,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1h { z2.h, z3.h }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -423,9 +423,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1w { z2.s, z3.s }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -441,9 +441,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z3.d, z2.d ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: stnt1d { z2.d, z3.d }, pn8, [x0] ; CHECK-NEXT: ldr p8, [sp, #7, mul vl] // 2-byte Folded Reload @@ -459,10 +459,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1b { z4.b - z7.b }, pn8, [x0] @@ -479,10 +479,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] @@ -499,10 +499,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] @@ -519,10 +519,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] @@ -539,10 +539,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] @@ -559,10 +559,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1h { z4.h - z7.h }, pn8, [x0] @@ -579,10 +579,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1w { z4.s - z7.s }, pn8, [x0] @@ -599,10 +599,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: str p8, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK-NEXT: mov z6.d, z3.d +; CHECK-NEXT: mov z7.d, z4.d ; CHECK-NEXT: mov p8.b, p0.b +; CHECK-NEXT: mov z6.d, z3.d ; CHECK-NEXT: mov z5.d, z2.d ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: stnt1d { z4.d - z7.d }, pn8, [x0] diff --git a/llvm/test/CodeGen/AArch64/swift-return.ll b/llvm/test/CodeGen/AArch64/swift-return.ll --- a/llvm/test/CodeGen/AArch64/swift-return.ll +++ b/llvm/test/CodeGen/AArch64/swift-return.ll @@ -1,17 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s ; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=aarch64-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-O0 ; RUN: llc -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck %s ; RUN: llc -O0 -fast-isel -verify-machineinstrs -mtriple=arm64_32-apple-ios -o - %s | FileCheck %s --check-prefix=CHECK-O0 - -; CHECK-LABEL: test1 -; CHECK: bl _gen -; CHECK: sxth [[TMP:w.*]], w0 -; CHECK: add w0, [[TMP]], w1, sxtb -; CHECK-O0-LABEL: test1 -; CHECK-O0: bl _gen -; CHECK-O0: sxth [[TMP:w.*]], w0 -; CHECK-O0: add {{w[0-9]+}}, [[TMP]], w1, sxtb define i16 @test1(i32) { +; +; CHECK-LABEL: test1: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: bl _gen +; CHECK-NEXT: sxth w8, w0 +; CHECK-NEXT: add w0, w8, w1, sxtb +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-O0-LABEL: test1: +; CHECK-O0: ; %bb.0: ; %entry +; CHECK-O0-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: .cfi_offset w30, -8 +; CHECK-O0-NEXT: .cfi_offset w29, -16 +; CHECK-O0-NEXT: bl _gen +; CHECK-O0-NEXT: sxth w8, w0 +; CHECK-O0-NEXT: add w0, w8, w1, sxtb +; CHECK-O0-NEXT: ; kill: def $w1 killed $w0 +; CHECK-O0-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-O0-NEXT: ret entry: %call = call swiftcc { i16, i8 } @gen(i32 %0) %v3 = extractvalue { i16, i8 } %call, 0 @@ -22,48 +39,77 @@ %conv = trunc i32 %add to i16 ret i16 %conv } - declare swiftcc { i16, i8 } @gen(i32) - -; CHECK-LABEL: test2 -; CHECK: bl _gen2 -; CHECK: add [[TMP:x.*]], x0, x1 -; CHECK: add [[TMP2:x.*]], x2, x3 -; CHECK: add [[TMP]], [[TMP]], [[TMP2]] -; CHECK: add x0, [[TMP]], x4 -; CHECK-O0-LABEL: test2 -; CHECK-O0: bl _gen2 -; CHECK-O0: add [[TMP:x.*]], x0, x1 -; CHECK-O0: add [[TMP]], [[TMP]], x2 -; CHECK-O0: add [[TMP]], [[TMP]], x3 -; CHECK-O0: add x0, [[TMP]], x4 - define i64 @test2(i64 %key) { +; +; CHECK-LABEL: test2: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: str x0, [sp, #8] +; CHECK-NEXT: bl _gen2 +; CHECK-NEXT: add x8, x0, x1 +; CHECK-NEXT: add x9, x2, x3 +; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-NEXT: add x8, x8, x9 +; CHECK-NEXT: add x0, x8, x4 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: ret +; +; CHECK-O0-LABEL: test2: +; CHECK-O0: ; %bb.0: ; %entry +; CHECK-O0-NEXT: sub sp, sp, #32 +; CHECK-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-O0-NEXT: .cfi_offset w30, -8 +; CHECK-O0-NEXT: .cfi_offset w29, -16 +; CHECK-O0-NEXT: str x0, [sp, #8] +; CHECK-O0-NEXT: ldr x0, [sp, #8] +; CHECK-O0-NEXT: bl _gen2 +; CHECK-O0-NEXT: add x8, x0, x1 +; CHECK-O0-NEXT: add x8, x8, x2 +; CHECK-O0-NEXT: add x8, x8, x3 +; CHECK-O0-NEXT: add x0, x8, x4 +; CHECK-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-O0-NEXT: add sp, sp, #32 +; CHECK-O0-NEXT: ret entry: %key.addr = alloca i64, align 4 store i64 %key, ptr %key.addr, align 4 %0 = load i64, ptr %key.addr, align 4 %call = call swiftcc { i64, i64, i64, i64, i64 } @gen2(i64 %0) - %v3 = extractvalue { i64, i64, i64, i64, i64 } %call, 0 %v5 = extractvalue { i64, i64, i64, i64, i64 } %call, 1 %v6 = extractvalue { i64, i64, i64, i64, i64 } %call, 2 %v7 = extractvalue { i64, i64, i64, i64, i64 } %call, 3 %v8 = extractvalue { i64, i64, i64, i64, i64 } %call, 4 - %add = add nsw i64 %v3, %v5 %add1 = add nsw i64 %add, %v6 %add2 = add nsw i64 %add1, %v7 %add3 = add nsw i64 %add2, %v8 ret i64 %add3 } -; CHECK-LABEL: gen2: -; CHECK: mov x1, x0 -; CHECK: mov x2, x0 -; CHECK: mov x3, x0 -; CHECK: mov x4, x0 -; CHECK: ret define swiftcc { i64, i64, i64, i64, i64 } @gen2(i64 %key) { +; +; CHECK-LABEL: gen2: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov x1, x0 +; CHECK-NEXT: mov x2, x0 +; CHECK-NEXT: mov x3, x0 +; CHECK-NEXT: mov x4, x0 +; CHECK-NEXT: ret +; +; CHECK-O0-LABEL: gen2: +; CHECK-O0: ; %bb.0: +; CHECK-O0-NEXT: mov x4, x0 +; CHECK-O0-NEXT: mov x0, x4 +; CHECK-O0-NEXT: mov x1, x4 +; CHECK-O0-NEXT: mov x2, x4 +; CHECK-O0-NEXT: mov x3, x4 +; CHECK-O0-NEXT: ret %Y = insertvalue { i64, i64, i64, i64, i64 } undef, i64 %key, 0 %Z = insertvalue { i64, i64, i64, i64, i64 } %Y, i64 %key, 1 %Z2 = insertvalue { i64, i64, i64, i64, i64 } %Z, i64 %key, 2 @@ -71,108 +117,158 @@ %Z4 = insertvalue { i64, i64, i64, i64, i64 } %Z3, i64 %key, 4 ret { i64, i64, i64, i64, i64 } %Z4 } - -; CHECK-LABEL: test3 -; CHECK: bl _gen3 -; CHECK: add [[TMP:w.*]], w0, w1 -; CHECK: add [[TMP2:w.*]], w2, w3 -; CHECK: add w0, [[TMP]], [[TMP2]] -; CHECK-O0-LABEL: test3 -; CHECK-O0: bl _gen3 -; CHECK-O0: add [[TMP:w.*]], w0, w1 -; CHECK-O0: add [[TMP]], [[TMP]], w2 -; CHECK-O0: add w0, [[TMP]], w3 define i32 @test3(i32) { +; +; CHECK-LABEL: test3: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: bl _gen3 +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: add w9, w2, w3 +; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-O0-LABEL: test3: +; CHECK-O0: ; %bb.0: ; %entry +; CHECK-O0-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: .cfi_offset w30, -8 +; CHECK-O0-NEXT: .cfi_offset w29, -16 +; CHECK-O0-NEXT: bl _gen3 +; CHECK-O0-NEXT: add w8, w0, w1 +; CHECK-O0-NEXT: add w8, w8, w2 +; CHECK-O0-NEXT: add w0, w8, w3 +; CHECK-O0-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-O0-NEXT: ret entry: %call = call swiftcc { i32, i32, i32, i32 } @gen3(i32 %0) - %v3 = extractvalue { i32, i32, i32, i32 } %call, 0 %v5 = extractvalue { i32, i32, i32, i32 } %call, 1 %v6 = extractvalue { i32, i32, i32, i32 } %call, 2 %v7 = extractvalue { i32, i32, i32, i32 } %call, 3 - %add = add nsw i32 %v3, %v5 %add1 = add nsw i32 %add, %v6 %add2 = add nsw i32 %add1, %v7 ret i32 %add2 } - declare swiftcc { i32, i32, i32, i32 } @gen3(i32 %key) - -; CHECK-LABEL: test4 -; CHECK: bl _gen4 -; CHECK: fadd s0, s0, s1 -; CHECK: fadd s0, s0, s2 -; CHECK: fadd s0, s0, s3 -; CHECK-O0-LABEL: test4 -; CHECK-O0: bl _gen4 -; CHECK-O0: fadd s0, s0, s1 -; CHECK-O0: fadd s0, s0, s2 -; CHECK-O0: fadd s0, s0, s3 define float @test4(float) { +; +; CHECK-LABEL: test4: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: bl _gen4 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fadd s0, s0, s3 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-O0-LABEL: test4: +; CHECK-O0: ; %bb.0: ; %entry +; CHECK-O0-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: .cfi_offset w30, -8 +; CHECK-O0-NEXT: .cfi_offset w29, -16 +; CHECK-O0-NEXT: bl _gen4 +; CHECK-O0-NEXT: fadd s0, s0, s1 +; CHECK-O0-NEXT: fadd s0, s0, s2 +; CHECK-O0-NEXT: fadd s0, s0, s3 +; CHECK-O0-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-O0-NEXT: ret entry: %call = call swiftcc { float, float, float, float } @gen4(float %0) - %v3 = extractvalue { float, float, float, float } %call, 0 %v5 = extractvalue { float, float, float, float } %call, 1 %v6 = extractvalue { float, float, float, float } %call, 2 %v7 = extractvalue { float, float, float, float } %call, 3 - %add = fadd float %v3, %v5 %add1 = fadd float %add, %v6 %add2 = fadd float %add1, %v7 ret float %add2 } - declare swiftcc { float, float, float, float } @gen4(float %key) - -; CHECK-LABEL: test5 -; CHECK: bl _gen5 -; CHECK: fadd d0, d0, d1 -; CHECK: fadd d0, d0, d2 -; CHECK: fadd d0, d0, d3 -; CHECK-O0-LABEL: test5 -; CHECK-O0: bl _gen5 -; CHECK-O0: fadd d0, d0, d1 -; CHECK-O0: fadd d0, d0, d2 -; CHECK-O0: fadd d0, d0, d3 define swiftcc double @test5(){ +; +; CHECK-LABEL: test5: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: bl _gen5 +; CHECK-NEXT: fadd d0, d0, d1 +; CHECK-NEXT: fadd d0, d0, d2 +; CHECK-NEXT: fadd d0, d0, d3 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-O0-LABEL: test5: +; CHECK-O0: ; %bb.0: ; %entry +; CHECK-O0-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: .cfi_offset w30, -8 +; CHECK-O0-NEXT: .cfi_offset w29, -16 +; CHECK-O0-NEXT: bl _gen5 +; CHECK-O0-NEXT: fadd d0, d0, d1 +; CHECK-O0-NEXT: fadd d0, d0, d2 +; CHECK-O0-NEXT: fadd d0, d0, d3 +; CHECK-O0-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-O0-NEXT: ret entry: %call = call swiftcc { double, double, double, double } @gen5() - %v3 = extractvalue { double, double, double, double } %call, 0 %v5 = extractvalue { double, double, double, double } %call, 1 %v6 = extractvalue { double, double, double, double } %call, 2 %v7 = extractvalue { double, double, double, double } %call, 3 - %add = fadd double %v3, %v5 %add1 = fadd double %add, %v6 %add2 = fadd double %add1, %v7 ret double %add2 } - declare swiftcc { double, double, double, double } @gen5() - -; CHECK-LABEL: test6 -; CHECK: bl _gen6 -; CHECK-DAG: fadd d0, d0, d1 -; CHECK-DAG: fadd d0, d0, d2 -; CHECK-DAG: fadd d0, d0, d3 -; CHECK-DAG: add [[TMP:w.*]], w0, w1 -; CHECK-DAG: add [[TMP2:w.*]], w2, w3 -; CHECK-DAG: add w0, [[TMP]], [[TMP2]] -; CHECK-O0-LABEL: test6 -; CHECK-O0: bl _gen6 -; CHECK-O0-DAG: fadd d0, d0, d1 -; CHECK-O0-DAG: fadd d0, d0, d2 -; CHECK-O0-DAG: fadd d0, d0, d3 -; CHECK-O0-DAG: add [[TMP:w.*]], w0, w1 -; CHECK-O0-DAG: add [[TMP]], [[TMP]], w2 -; CHECK-O0-DAG: add w0, [[TMP]], w3 define swiftcc { double, i32 } @test6() { +; +; CHECK-LABEL: test6: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: bl _gen6 +; CHECK-NEXT: fadd d0, d0, d1 +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: add w9, w2, w3 +; CHECK-NEXT: add w0, w8, w9 +; CHECK-NEXT: fadd d0, d0, d2 +; CHECK-NEXT: fadd d0, d0, d3 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-O0-LABEL: test6: +; CHECK-O0: ; %bb.0: ; %entry +; CHECK-O0-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: .cfi_offset w30, -8 +; CHECK-O0-NEXT: .cfi_offset w29, -16 +; CHECK-O0-NEXT: bl _gen6 +; CHECK-O0-NEXT: fadd d0, d0, d1 +; CHECK-O0-NEXT: fadd d0, d0, d2 +; CHECK-O0-NEXT: fadd d0, d0, d3 +; CHECK-O0-NEXT: add w8, w0, w1 +; CHECK-O0-NEXT: add w8, w8, w2 +; CHECK-O0-NEXT: add w0, w8, w3 +; CHECK-O0-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-O0-NEXT: ret entry: %call = call swiftcc { double, double, double, double, i32, i32, i32, i32 } @gen6() - %v3 = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 0 %v5 = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 1 %v6 = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 2 @@ -181,67 +277,84 @@ %v5.i = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 5 %v6.i = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 6 %v7.i = extractvalue { double, double, double, double, i32, i32, i32, i32 } %call, 7 - %add = fadd double %v3, %v5 %add1 = fadd double %add, %v6 %add2 = fadd double %add1, %v7 - %add.i = add nsw i32 %v3.i, %v5.i %add1.i = add nsw i32 %add.i, %v6.i %add2.i = add nsw i32 %add1.i, %v7.i - %Y = insertvalue { double, i32 } undef, double %add2, 0 %Z = insertvalue { double, i32 } %Y, i32 %add2.i, 1 ret { double, i32} %Z } - declare swiftcc { double, double, double, double, i32, i32, i32, i32 } @gen6() - -; CHECK-LABEL: _gen7 -; CHECK-DAG: mov w1, w0 -; CHECK-DAG: mov w2, w0 -; CHECK-DAG: mov w3, w0 -; CHECK: ret -; CHECK-O0-LABEL: _gen7 -; CHECK-O0: mov w3, w0 -; CHECK-O0: mov w0, w3 -; CHECK-O0: mov w1, w3 -; CHECK-O0: mov w2, w3 define swiftcc { i32, i32, i32, i32 } @gen7(i32 %key) { +; +; CHECK-LABEL: gen7: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w1, w0 +; CHECK-NEXT: mov w2, w0 +; CHECK-NEXT: mov w3, w0 +; CHECK-NEXT: ret +; +; CHECK-O0-LABEL: gen7: +; CHECK-O0: ; %bb.0: +; CHECK-O0-NEXT: mov w3, w0 +; CHECK-O0-NEXT: mov w0, w3 +; CHECK-O0-NEXT: mov w1, w3 +; CHECK-O0-NEXT: mov w2, w3 +; CHECK-O0-NEXT: ret %v0 = insertvalue { i32, i32, i32, i32 } undef, i32 %key, 0 %v1 = insertvalue { i32, i32, i32, i32 } %v0, i32 %key, 1 %v2 = insertvalue { i32, i32, i32, i32 } %v1, i32 %key, 2 %v3 = insertvalue { i32, i32, i32, i32 } %v2, i32 %key, 3 ret { i32, i32, i32, i32 } %v3 } - -; CHECK-LABEL: _gen9 -; CHECK: mov w1, w0 -; CHECK: mov w2, w0 -; CHECK: mov w3, w0 -; CHECK: ret -; CHECK-O0-LABEL: _gen9 -; CHECK-O0: mov w3, w0 -; CHECK-O0: mov w0, w3 -; CHECK-O0: mov w1, w3 -; CHECK-O0: mov w2, w3 define swiftcc { i8, i8, i8, i8 } @gen9(i8 %key) { +; +; CHECK-LABEL: gen9: +; CHECK: ; %bb.0: +; CHECK-NEXT: mov w1, w0 +; CHECK-NEXT: mov w2, w0 +; CHECK-NEXT: mov w3, w0 +; CHECK-NEXT: ret +; +; CHECK-O0-LABEL: gen9: +; CHECK-O0: ; %bb.0: +; CHECK-O0-NEXT: mov w3, w0 +; CHECK-O0-NEXT: mov w0, w3 +; CHECK-O0-NEXT: mov w1, w3 +; CHECK-O0-NEXT: mov w2, w3 +; CHECK-O0-NEXT: ret %v0 = insertvalue { i8, i8, i8, i8 } undef, i8 %key, 0 %v1 = insertvalue { i8, i8, i8, i8 } %v0, i8 %key, 1 %v2 = insertvalue { i8, i8, i8, i8 } %v1, i8 %key, 2 %v3 = insertvalue { i8, i8, i8, i8 } %v2, i8 %key, 3 ret { i8, i8, i8, i8 } %v3 } - -; CHECK-LABEL: _gen10 -; CHECK: fmov d1, d0 -; CHECK: fmov d2, d0 -; CHECK: fmov d3, d0 -; CHECK: mov w1, w0 -; CHECK: mov w2, w0 -; CHECK: mov w3, w0 -; CHECK: ret define swiftcc { double, double, double, double, i32, i32, i32, i32 } @gen10(double %keyd, i32 %keyi) { +; +; CHECK-LABEL: gen10: +; CHECK: ; %bb.0: +; CHECK-NEXT: fmov d1, d0 +; CHECK-NEXT: fmov d2, d0 +; CHECK-NEXT: mov w1, w0 +; CHECK-NEXT: fmov d3, d0 +; CHECK-NEXT: mov w2, w0 +; CHECK-NEXT: mov w3, w0 +; CHECK-NEXT: ret +; +; CHECK-O0-LABEL: gen10: +; CHECK-O0: ; %bb.0: +; CHECK-O0-NEXT: mov w3, w0 +; CHECK-O0-NEXT: fmov d3, d0 +; CHECK-O0-NEXT: fmov d0, d3 +; CHECK-O0-NEXT: fmov d1, d3 +; CHECK-O0-NEXT: fmov d2, d3 +; CHECK-O0-NEXT: mov w0, w3 +; CHECK-O0-NEXT: mov w1, w3 +; CHECK-O0-NEXT: mov w2, w3 +; CHECK-O0-NEXT: ret %v0 = insertvalue { double, double, double, double, i32, i32, i32, i32 } undef, double %keyd, 0 %v1 = insertvalue { double, double, double, double, i32, i32, i32, i32 } %v0, double %keyd, 1 %v2 = insertvalue { double, double, double, double, i32, i32, i32, i32 } %v1, double %keyd, 2 @@ -252,47 +365,86 @@ %v7 = insertvalue { double, double, double, double, i32, i32, i32, i32 } %v6, i32 %keyi, 7 ret { double, double, double, double, i32, i32, i32, i32 } %v7 } - -; CHECK-LABEL: _test11 -; CHECK: bl _gen11 -; CHECK: fadd.4s v0, v0, v1 -; CHECK: fadd.4s v0, v0, v2 -; CHECK: fadd.4s v0, v0, v3 define swiftcc <4 x float> @test11() { +; +; CHECK-LABEL: test11: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: bl _gen11 +; CHECK-NEXT: fadd.4s v0, v0, v1 +; CHECK-NEXT: fadd.4s v0, v0, v2 +; CHECK-NEXT: fadd.4s v0, v0, v3 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-O0-LABEL: test11: +; CHECK-O0: ; %bb.0: ; %entry +; CHECK-O0-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-O0-NEXT: .cfi_def_cfa_offset 16 +; CHECK-O0-NEXT: .cfi_offset w30, -8 +; CHECK-O0-NEXT: .cfi_offset w29, -16 +; CHECK-O0-NEXT: bl _gen11 +; CHECK-O0-NEXT: fadd.4s v0, v0, v1 +; CHECK-O0-NEXT: fadd.4s v0, v0, v2 +; CHECK-O0-NEXT: fadd.4s v0, v0, v3 +; CHECK-O0-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-O0-NEXT: ret entry: %call = call swiftcc { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @gen11() - %v3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %call, 0 %v5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %call, 1 %v6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %call, 2 %v7 = extractvalue { <4 x float>, <4 x float>, <4 x float>, <4 x float> } %call, 3 - %add = fadd <4 x float> %v3, %v5 %add1 = fadd <4 x float> %add, %v6 %add2 = fadd <4 x float> %add1, %v7 ret <4 x float> %add2 } - declare swiftcc { <4 x float>, <4 x float>, <4 x float>, <4 x float> } @gen11() - -; CHECK-LABEL: _test12 -; CHECK: fadd.4s v0, v0, v1 -; CHECK: fmov s1, s3 -; CHECK: fadd.4s v0, v0, v2 define swiftcc { <4 x float>, float } @test12() #0 { +; +; CHECK-LABEL: test12: +; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset w29, -16 +; CHECK-NEXT: bl _gen12 +; CHECK-NEXT: fadd.4s v0, v0, v1 +; CHECK-NEXT: fmov s1, s3 +; CHECK-NEXT: fadd.4s v0, v0, v2 +; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload +; CHECK-NEXT: ret +; +; CHECK-O0-LABEL: test12: +; CHECK-O0: ; %bb.0: ; %entry +; CHECK-O0-NEXT: sub sp, sp, #32 +; CHECK-O0-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill +; CHECK-O0-NEXT: .cfi_def_cfa_offset 32 +; CHECK-O0-NEXT: .cfi_offset w30, -8 +; CHECK-O0-NEXT: .cfi_offset w29, -16 +; CHECK-O0-NEXT: bl _gen12 +; CHECK-O0-NEXT: str q1, [sp] ; 16-byte Folded Spill +; CHECK-O0-NEXT: fmov s1, s3 +; CHECK-O0-NEXT: ldr q3, [sp] ; 16-byte Folded Reload +; CHECK-O0-NEXT: fadd.4s v0, v0, v3 +; CHECK-O0-NEXT: fadd.4s v0, v0, v2 +; CHECK-O0-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload +; CHECK-O0-NEXT: add sp, sp, #32 +; CHECK-O0-NEXT: ret entry: %call = call swiftcc { <4 x float>, <4 x float>, <4 x float>, float } @gen12() - %v3 = extractvalue { <4 x float>, <4 x float>, <4 x float>, float } %call, 0 %v5 = extractvalue { <4 x float>, <4 x float>, <4 x float>, float } %call, 1 %v6 = extractvalue { <4 x float>, <4 x float>, <4 x float>, float } %call, 2 %v8 = extractvalue { <4 x float>, <4 x float>, <4 x float>, float } %call, 3 - %add = fadd <4 x float> %v3, %v5 %add1 = fadd <4 x float> %add, %v6 %res.0 = insertvalue { <4 x float>, float } undef, <4 x float> %add1, 0 %res = insertvalue { <4 x float>, float } %res.0, float %v8, 1 ret { <4 x float>, float } %res } - declare swiftcc { <4 x float>, <4 x float>, <4 x float>, float } @gen12() diff --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll --- a/llvm/test/CodeGen/AArch64/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/swifterror.ll @@ -18,10 +18,10 @@ ; CHECK-APPLE-NEXT: .cfi_def_cfa w29, 16 ; CHECK-APPLE-NEXT: .cfi_offset w30, -8 ; CHECK-APPLE-NEXT: .cfi_offset w29, -16 -; CHECK-APPLE-NEXT: mov w0, #16 +; CHECK-APPLE-NEXT: mov w0, #16 ; =0x10 ; CHECK-APPLE-NEXT: bl _malloc -; CHECK-APPLE-NEXT: mov w8, #1 ; CHECK-APPLE-NEXT: fmov s0, #1.00000000 +; CHECK-APPLE-NEXT: mov w8, #1 ; =0x1 ; CHECK-APPLE-NEXT: mov x21, x0 ; CHECK-APPLE-NEXT: strb w8, [x0, #8] ; CHECK-APPLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload @@ -34,11 +34,11 @@ ; CHECK-O0-AARCH64-NEXT: .cfi_def_cfa w29, 16 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w30, -8 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w29, -16 -; CHECK-O0-AARCH64-NEXT: mov w8, #16 +; CHECK-O0-AARCH64-NEXT: mov w8, #16 ; =0x10 ; CHECK-O0-AARCH64-NEXT: mov w0, w8 ; CHECK-O0-AARCH64-NEXT: bl _malloc ; CHECK-O0-AARCH64-NEXT: mov x21, x0 -; CHECK-O0-AARCH64-NEXT: mov w8, #1 +; CHECK-O0-AARCH64-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-AARCH64-NEXT: strb w8, [x0, #8] ; CHECK-O0-AARCH64-NEXT: fmov s0, #1.00000000 ; CHECK-O0-AARCH64-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload @@ -49,11 +49,11 @@ ; CHECK-O0-ARM64_32-NEXT: str x30, [sp, #-16]! ; 8-byte Folded Spill ; CHECK-O0-ARM64_32-NEXT: .cfi_def_cfa_offset 16 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w30, -16 -; CHECK-O0-ARM64_32-NEXT: mov w8, #16 +; CHECK-O0-ARM64_32-NEXT: mov w8, #16 ; =0x10 ; CHECK-O0-ARM64_32-NEXT: mov w0, w8 ; CHECK-O0-ARM64_32-NEXT: bl _malloc ; CHECK-O0-ARM64_32-NEXT: mov x21, x0 -; CHECK-O0-ARM64_32-NEXT: mov w8, #1 +; CHECK-O0-ARM64_32-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-ARM64_32-NEXT: strb w8, [x0, #8] ; CHECK-O0-ARM64_32-NEXT: fmov s0, #1.00000000 ; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp], #16 ; 8-byte Folded Reload @@ -231,8 +231,8 @@ ; CHECK-APPLE-AARCH64-NEXT: .cfi_offset w22, -48 ; CHECK-APPLE-AARCH64-NEXT: .cfi_offset b8, -56 ; CHECK-APPLE-AARCH64-NEXT: .cfi_offset b9, -64 -; CHECK-APPLE-AARCH64-NEXT: mov x19, x0 ; CHECK-APPLE-AARCH64-NEXT: fmov s8, #1.00000000 +; CHECK-APPLE-AARCH64-NEXT: mov x19, x0 ; CHECK-APPLE-AARCH64-NEXT: LBB2_1: ; %bb_loop ; CHECK-APPLE-AARCH64-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-APPLE-AARCH64-NEXT: mov x21, xzr @@ -313,8 +313,8 @@ ; CHECK-APPLE-ARM64_32-NEXT: .cfi_offset w22, -48 ; CHECK-APPLE-ARM64_32-NEXT: .cfi_offset b8, -56 ; CHECK-APPLE-ARM64_32-NEXT: .cfi_offset b9, -64 -; CHECK-APPLE-ARM64_32-NEXT: mov x19, x0 ; CHECK-APPLE-ARM64_32-NEXT: fmov s8, #1.00000000 +; CHECK-APPLE-ARM64_32-NEXT: mov x19, x0 ; CHECK-APPLE-ARM64_32-NEXT: LBB2_1: ; %bb_loop ; CHECK-APPLE-ARM64_32-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-APPLE-ARM64_32-NEXT: mov x21, xzr @@ -414,11 +414,11 @@ ; CHECK-APPLE-NEXT: .cfi_offset w29, -16 ; CHECK-APPLE-NEXT: cbz w0, LBB3_2 ; CHECK-APPLE-NEXT: ; %bb.1: ; %gen_error -; CHECK-APPLE-NEXT: mov w0, #16 +; CHECK-APPLE-NEXT: mov w0, #16 ; =0x10 ; CHECK-APPLE-NEXT: bl _malloc ; CHECK-APPLE-NEXT: mov x21, x0 -; CHECK-APPLE-NEXT: mov w8, #1 ; CHECK-APPLE-NEXT: fmov s0, #1.00000000 +; CHECK-APPLE-NEXT: mov w8, #1 ; =0x1 ; CHECK-APPLE-NEXT: strb w8, [x0, #8] ; CHECK-APPLE-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: ret @@ -438,11 +438,11 @@ ; CHECK-O0-AARCH64-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: cbz w0, LBB3_2 ; CHECK-O0-AARCH64-NEXT: ; %bb.1: ; %gen_error -; CHECK-O0-AARCH64-NEXT: mov w8, #16 +; CHECK-O0-AARCH64-NEXT: mov w8, #16 ; =0x10 ; CHECK-O0-AARCH64-NEXT: mov w0, w8 ; CHECK-O0-AARCH64-NEXT: bl _malloc ; CHECK-O0-AARCH64-NEXT: mov x21, x0 -; CHECK-O0-AARCH64-NEXT: mov w8, #1 +; CHECK-O0-AARCH64-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-AARCH64-NEXT: strb w8, [x0, #8] ; CHECK-O0-AARCH64-NEXT: fmov s0, #1.00000000 ; CHECK-O0-AARCH64-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload @@ -464,11 +464,11 @@ ; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill ; CHECK-O0-ARM64_32-NEXT: cbz w0, LBB3_2 ; CHECK-O0-ARM64_32-NEXT: ; %bb.1: ; %gen_error -; CHECK-O0-ARM64_32-NEXT: mov w8, #16 +; CHECK-O0-ARM64_32-NEXT: mov w8, #16 ; =0x10 ; CHECK-O0-ARM64_32-NEXT: mov w0, w8 ; CHECK-O0-ARM64_32-NEXT: bl _malloc ; CHECK-O0-ARM64_32-NEXT: mov x21, x0 -; CHECK-O0-ARM64_32-NEXT: mov w8, #1 +; CHECK-O0-ARM64_32-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-ARM64_32-NEXT: strb w8, [x0, #8] ; CHECK-O0-ARM64_32-NEXT: fmov s0, #1.00000000 ; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #16] ; 8-byte Folded Reload @@ -517,8 +517,8 @@ ; CHECK-APPLE-NEXT: fmov s8, s0 ; CHECK-APPLE-NEXT: mov w19, w0 ; CHECK-APPLE-NEXT: mov x0, x21 -; CHECK-APPLE-NEXT: mov w20, #1 ; CHECK-APPLE-NEXT: fmov s9, #1.00000000 +; CHECK-APPLE-NEXT: mov w20, #1 ; =0x1 ; CHECK-APPLE-NEXT: b LBB4_2 ; CHECK-APPLE-NEXT: LBB4_1: ; %bb_cont ; CHECK-APPLE-NEXT: ; in Loop: Header=BB4_2 Depth=1 @@ -529,7 +529,7 @@ ; CHECK-APPLE-NEXT: cbz w19, LBB4_1 ; CHECK-APPLE-NEXT: ; %bb.3: ; %gen_error ; CHECK-APPLE-NEXT: ; in Loop: Header=BB4_2 Depth=1 -; CHECK-APPLE-NEXT: mov w0, #16 +; CHECK-APPLE-NEXT: mov w0, #16 ; =0x10 ; CHECK-APPLE-NEXT: bl _malloc ; CHECK-APPLE-NEXT: strb w20, [x0, #8] ; CHECK-APPLE-NEXT: b LBB4_1 @@ -561,11 +561,11 @@ ; CHECK-O0-AARCH64-NEXT: cbz w8, LBB4_3 ; CHECK-O0-AARCH64-NEXT: ; %bb.2: ; %gen_error ; CHECK-O0-AARCH64-NEXT: ; in Loop: Header=BB4_1 Depth=1 -; CHECK-O0-AARCH64-NEXT: mov w8, #16 +; CHECK-O0-AARCH64-NEXT: mov w8, #16 ; =0x10 ; CHECK-O0-AARCH64-NEXT: mov w0, w8 ; CHECK-O0-AARCH64-NEXT: bl _malloc ; CHECK-O0-AARCH64-NEXT: mov x9, x0 -; CHECK-O0-AARCH64-NEXT: mov w8, #1 +; CHECK-O0-AARCH64-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-AARCH64-NEXT: strb w8, [x9, #8] ; CHECK-O0-AARCH64-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: LBB4_3: ; %bb_cont @@ -602,13 +602,13 @@ ; CHECK-O0-ARM64_32-NEXT: cbz w8, LBB4_3 ; CHECK-O0-ARM64_32-NEXT: ; %bb.2: ; %gen_error ; CHECK-O0-ARM64_32-NEXT: ; in Loop: Header=BB4_1 Depth=1 -; CHECK-O0-ARM64_32-NEXT: mov w8, #16 +; CHECK-O0-ARM64_32-NEXT: mov w8, #16 ; =0x10 ; CHECK-O0-ARM64_32-NEXT: mov w0, w8 ; CHECK-O0-ARM64_32-NEXT: bl _malloc ; CHECK-O0-ARM64_32-NEXT: mov x9, x0 ; CHECK-O0-ARM64_32-NEXT: ; kill: def $x0 killed $x9 ; CHECK-O0-ARM64_32-NEXT: mov x0, x9 -; CHECK-O0-ARM64_32-NEXT: mov w8, #1 +; CHECK-O0-ARM64_32-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-ARM64_32-NEXT: strb w8, [x9, #8] ; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #8] ; 8-byte Folded Spill ; CHECK-O0-ARM64_32-NEXT: LBB4_3: ; %bb_cont @@ -671,11 +671,11 @@ ; CHECK-APPLE-NEXT: .cfi_offset w20, -32 ; CHECK-APPLE-NEXT: mov w19, w0 ; CHECK-APPLE-NEXT: mov x20, x8 -; CHECK-APPLE-NEXT: mov w0, #16 +; CHECK-APPLE-NEXT: mov w0, #16 ; =0x10 ; CHECK-APPLE-NEXT: bl _malloc -; CHECK-APPLE-NEXT: mov w8, #1 -; CHECK-APPLE-NEXT: mov x21, x0 +; CHECK-APPLE-NEXT: mov w8, #1 ; =0x1 ; CHECK-APPLE-NEXT: strb w8, [x0, #8] +; CHECK-APPLE-NEXT: mov x21, x0 ; CHECK-APPLE-NEXT: str w19, [x20, #4] ; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload ; CHECK-APPLE-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload @@ -691,14 +691,14 @@ ; CHECK-O0-AARCH64-NEXT: .cfi_offset w29, -16 ; CHECK-O0-AARCH64-NEXT: stur w0, [x29, #-4] ; 4-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: str x8, [sp] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: mov w8, #16 +; CHECK-O0-AARCH64-NEXT: mov w8, #16 ; =0x10 ; CHECK-O0-AARCH64-NEXT: mov w0, w8 ; CHECK-O0-AARCH64-NEXT: bl _malloc ; CHECK-O0-AARCH64-NEXT: ldr x8, [sp] ; 8-byte Folded Reload ; CHECK-O0-AARCH64-NEXT: mov x10, x0 ; CHECK-O0-AARCH64-NEXT: ldur w0, [x29, #-4] ; 4-byte Folded Reload ; CHECK-O0-AARCH64-NEXT: mov x21, x10 -; CHECK-O0-AARCH64-NEXT: mov w9, #1 +; CHECK-O0-AARCH64-NEXT: mov w9, #1 ; =0x1 ; CHECK-O0-AARCH64-NEXT: strb w9, [x10, #8] ; CHECK-O0-AARCH64-NEXT: str w0, [x8, #4] ; CHECK-O0-AARCH64-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload @@ -713,14 +713,14 @@ ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w30, -16 ; CHECK-O0-ARM64_32-NEXT: str w0, [sp, #12] ; 4-byte Folded Spill ; CHECK-O0-ARM64_32-NEXT: str x8, [sp] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: mov w8, #16 +; CHECK-O0-ARM64_32-NEXT: mov w8, #16 ; =0x10 ; CHECK-O0-ARM64_32-NEXT: mov w0, w8 ; CHECK-O0-ARM64_32-NEXT: bl _malloc ; CHECK-O0-ARM64_32-NEXT: ldr x8, [sp] ; 8-byte Folded Reload ; CHECK-O0-ARM64_32-NEXT: mov x10, x0 ; CHECK-O0-ARM64_32-NEXT: ldr w0, [sp, #12] ; 4-byte Folded Reload ; CHECK-O0-ARM64_32-NEXT: mov x21, x10 -; CHECK-O0-ARM64_32-NEXT: mov w9, #1 +; CHECK-O0-ARM64_32-NEXT: mov w9, #1 ; =0x1 ; CHECK-O0-ARM64_32-NEXT: strb w9, [x10, #8] ; CHECK-O0-ARM64_32-NEXT: str w0, [x8, #4] ; CHECK-O0-ARM64_32-NEXT: ldr x30, [sp, #16] ; 8-byte Folded Reload @@ -757,7 +757,7 @@ ; CHECK-APPLE-AARCH64-NEXT: .cfi_offset w22, -48 ; CHECK-APPLE-AARCH64-NEXT: mov x19, x0 ; CHECK-APPLE-AARCH64-NEXT: add x8, sp, #8 -; CHECK-APPLE-AARCH64-NEXT: mov w0, #1 +; CHECK-APPLE-AARCH64-NEXT: mov w0, #1 ; =0x1 ; CHECK-APPLE-AARCH64-NEXT: mov x21, xzr ; CHECK-APPLE-AARCH64-NEXT: bl _foo_sret ; CHECK-APPLE-AARCH64-NEXT: mov x0, x21 @@ -789,7 +789,7 @@ ; CHECK-O0-AARCH64-NEXT: str x0, [sp] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: mov x21, xzr ; CHECK-O0-AARCH64-NEXT: add x8, sp, #24 -; CHECK-O0-AARCH64-NEXT: mov w0, #1 +; CHECK-O0-AARCH64-NEXT: mov w0, #1 ; =0x1 ; CHECK-O0-AARCH64-NEXT: bl _foo_sret ; CHECK-O0-AARCH64-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: cbnz x21, LBB6_2 @@ -823,7 +823,7 @@ ; CHECK-APPLE-ARM64_32-NEXT: .cfi_offset w22, -48 ; CHECK-APPLE-ARM64_32-NEXT: mov x19, x0 ; CHECK-APPLE-ARM64_32-NEXT: add x8, sp, #8 -; CHECK-APPLE-ARM64_32-NEXT: mov w0, #1 +; CHECK-APPLE-ARM64_32-NEXT: mov w0, #1 ; =0x1 ; CHECK-APPLE-ARM64_32-NEXT: mov x21, xzr ; CHECK-APPLE-ARM64_32-NEXT: bl _foo_sret ; CHECK-APPLE-ARM64_32-NEXT: mov x0, x21 @@ -854,7 +854,7 @@ ; CHECK-O0-ARM64_32-NEXT: str x0, [sp] ; 8-byte Folded Spill ; CHECK-O0-ARM64_32-NEXT: mov x21, xzr ; CHECK-O0-ARM64_32-NEXT: add x8, sp, #24 -; CHECK-O0-ARM64_32-NEXT: mov w0, #1 +; CHECK-O0-ARM64_32-NEXT: mov w0, #1 ; =0x1 ; CHECK-O0-ARM64_32-NEXT: bl _foo_sret ; CHECK-O0-ARM64_32-NEXT: str x21, [sp, #8] ; 8-byte Folded Spill ; CHECK-O0-ARM64_32-NEXT: cmp x21, #0 @@ -908,21 +908,21 @@ ; CHECK-APPLE-AARCH64-NEXT: .cfi_def_cfa w29, 16 ; CHECK-APPLE-AARCH64-NEXT: .cfi_offset w30, -8 ; CHECK-APPLE-AARCH64-NEXT: .cfi_offset w29, -16 -; CHECK-APPLE-AARCH64-NEXT: mov w0, #16 +; CHECK-APPLE-AARCH64-NEXT: mov w0, #16 ; =0x10 ; CHECK-APPLE-AARCH64-NEXT: bl _malloc -; CHECK-APPLE-AARCH64-NEXT: mov w8, #1 -; CHECK-APPLE-AARCH64-NEXT: add x9, x29, #16 -; CHECK-APPLE-AARCH64-NEXT: ldr w10, [x29, #16] -; CHECK-APPLE-AARCH64-NEXT: orr x9, x9, #0x8 +; CHECK-APPLE-AARCH64-NEXT: mov w8, #1 ; =0x1 +; CHECK-APPLE-AARCH64-NEXT: ldr w9, [x29, #16] ; CHECK-APPLE-AARCH64-NEXT: strb w8, [x0, #8] -; CHECK-APPLE-AARCH64-NEXT: stur w10, [x29, #-12] -; CHECK-APPLE-AARCH64-NEXT: ldr w8, [x9], #8 -; CHECK-APPLE-AARCH64-NEXT: str w8, [sp, #16] -; CHECK-APPLE-AARCH64-NEXT: ldr w8, [x9], #8 +; CHECK-APPLE-AARCH64-NEXT: add x8, x29, #16 +; CHECK-APPLE-AARCH64-NEXT: orr x8, x8, #0x8 +; CHECK-APPLE-AARCH64-NEXT: stur w9, [x29, #-12] +; CHECK-APPLE-AARCH64-NEXT: ldr w9, [x8], #8 +; CHECK-APPLE-AARCH64-NEXT: str w9, [sp, #16] ; CHECK-APPLE-AARCH64-NEXT: fmov s0, #1.00000000 +; CHECK-APPLE-AARCH64-NEXT: ldr w9, [x8], #8 +; CHECK-APPLE-AARCH64-NEXT: stur x8, [x29, #-8] ; CHECK-APPLE-AARCH64-NEXT: mov x21, x0 -; CHECK-APPLE-AARCH64-NEXT: stur x9, [x29, #-8] -; CHECK-APPLE-AARCH64-NEXT: str w8, [sp, #12] +; CHECK-APPLE-AARCH64-NEXT: str w9, [sp, #12] ; CHECK-APPLE-AARCH64-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload ; CHECK-APPLE-AARCH64-NEXT: add sp, sp, #48 ; CHECK-APPLE-AARCH64-NEXT: ret @@ -935,11 +935,11 @@ ; CHECK-O0-AARCH64-NEXT: .cfi_def_cfa w29, 16 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w30, -8 ; CHECK-O0-AARCH64-NEXT: .cfi_offset w29, -16 -; CHECK-O0-AARCH64-NEXT: mov w8, #16 +; CHECK-O0-AARCH64-NEXT: mov w8, #16 ; =0x10 ; CHECK-O0-AARCH64-NEXT: mov w0, w8 ; CHECK-O0-AARCH64-NEXT: bl _malloc ; CHECK-O0-AARCH64-NEXT: mov x21, x0 -; CHECK-O0-AARCH64-NEXT: mov w8, #1 +; CHECK-O0-AARCH64-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-AARCH64-NEXT: strb w8, [x0, #8] ; CHECK-O0-AARCH64-NEXT: add x8, x29, #16 ; CHECK-O0-AARCH64-NEXT: stur x8, [x29, #-8] @@ -971,23 +971,23 @@ ; CHECK-APPLE-ARM64_32-NEXT: .cfi_def_cfa w29, 16 ; CHECK-APPLE-ARM64_32-NEXT: .cfi_offset w30, -8 ; CHECK-APPLE-ARM64_32-NEXT: .cfi_offset w29, -16 -; CHECK-APPLE-ARM64_32-NEXT: mov w0, #16 +; CHECK-APPLE-ARM64_32-NEXT: mov w0, #16 ; =0x10 ; CHECK-APPLE-ARM64_32-NEXT: bl _malloc -; CHECK-APPLE-ARM64_32-NEXT: mov w8, #1 +; CHECK-APPLE-ARM64_32-NEXT: mov w8, #1 ; =0x1 ; CHECK-APPLE-ARM64_32-NEXT: add x9, x29, #16 -; CHECK-APPLE-ARM64_32-NEXT: orr w10, w9, #0x4 -; CHECK-APPLE-ARM64_32-NEXT: and x11, x9, #0xfffffff0 ; CHECK-APPLE-ARM64_32-NEXT: strb w8, [x0, #8] -; CHECK-APPLE-ARM64_32-NEXT: stur w10, [x29, #-8] -; CHECK-APPLE-ARM64_32-NEXT: ldr w8, [x11] -; CHECK-APPLE-ARM64_32-NEXT: orr w11, w9, #0x8 -; CHECK-APPLE-ARM64_32-NEXT: stp w8, w11, [x29, #-12] -; CHECK-APPLE-ARM64_32-NEXT: orr w8, w9, #0xc -; CHECK-APPLE-ARM64_32-NEXT: ldr w9, [x10] +; CHECK-APPLE-ARM64_32-NEXT: orr w8, w9, #0x4 +; CHECK-APPLE-ARM64_32-NEXT: and x10, x9, #0xfffffff0 ; CHECK-APPLE-ARM64_32-NEXT: stur w8, [x29, #-8] -; CHECK-APPLE-ARM64_32-NEXT: str w9, [sp, #16] -; CHECK-APPLE-ARM64_32-NEXT: ldr w8, [x11] +; CHECK-APPLE-ARM64_32-NEXT: ldr w11, [x10] +; CHECK-APPLE-ARM64_32-NEXT: orr w10, w9, #0x8 +; CHECK-APPLE-ARM64_32-NEXT: stp w11, w10, [x29, #-12] +; CHECK-APPLE-ARM64_32-NEXT: ldr w8, [x8] +; CHECK-APPLE-ARM64_32-NEXT: orr w9, w9, #0xc +; CHECK-APPLE-ARM64_32-NEXT: str w8, [sp, #16] +; CHECK-APPLE-ARM64_32-NEXT: stur w9, [x29, #-8] ; CHECK-APPLE-ARM64_32-NEXT: fmov s0, #1.00000000 +; CHECK-APPLE-ARM64_32-NEXT: ldr w8, [x10] ; CHECK-APPLE-ARM64_32-NEXT: mov x21, x0 ; CHECK-APPLE-ARM64_32-NEXT: str w8, [sp, #12] ; CHECK-APPLE-ARM64_32-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload @@ -1000,11 +1000,11 @@ ; CHECK-O0-ARM64_32-NEXT: str x30, [sp, #32] ; 8-byte Folded Spill ; CHECK-O0-ARM64_32-NEXT: .cfi_def_cfa_offset 48 ; CHECK-O0-ARM64_32-NEXT: .cfi_offset w30, -16 -; CHECK-O0-ARM64_32-NEXT: mov w8, #16 +; CHECK-O0-ARM64_32-NEXT: mov w8, #16 ; =0x10 ; CHECK-O0-ARM64_32-NEXT: mov w0, w8 ; CHECK-O0-ARM64_32-NEXT: bl _malloc ; CHECK-O0-ARM64_32-NEXT: mov x21, x0 -; CHECK-O0-ARM64_32-NEXT: mov w8, #1 +; CHECK-O0-ARM64_32-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-ARM64_32-NEXT: strb w8, [x0, #8] ; CHECK-O0-ARM64_32-NEXT: add x8, sp, #48 ; CHECK-O0-ARM64_32-NEXT: ; kill: def $w8 killed $w8 killed $x8 @@ -1079,10 +1079,10 @@ ; CHECK-APPLE-AARCH64-NEXT: .cfi_offset w21, -40 ; CHECK-APPLE-AARCH64-NEXT: .cfi_offset w22, -48 ; CHECK-APPLE-AARCH64-NEXT: mov x19, x0 -; CHECK-APPLE-AARCH64-NEXT: mov w8, #10 -; CHECK-APPLE-AARCH64-NEXT: mov w9, #11 -; CHECK-APPLE-AARCH64-NEXT: mov w10, #12 +; CHECK-APPLE-AARCH64-NEXT: mov w8, #10 ; =0xa +; CHECK-APPLE-AARCH64-NEXT: mov w9, #11 ; =0xb ; CHECK-APPLE-AARCH64-NEXT: stp w9, w8, [sp, #32] +; CHECK-APPLE-AARCH64-NEXT: mov w10, #12 ; =0xc ; CHECK-APPLE-AARCH64-NEXT: str w10, [sp, #28] ; CHECK-APPLE-AARCH64-NEXT: mov x21, xzr ; CHECK-APPLE-AARCH64-NEXT: stp x9, x10, [sp, #8] @@ -1116,11 +1116,11 @@ ; CHECK-O0-AARCH64-NEXT: ; implicit-def: $x1 ; CHECK-O0-AARCH64-NEXT: str x0, [sp, #24] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: mov x21, xzr -; CHECK-O0-AARCH64-NEXT: mov w8, #10 +; CHECK-O0-AARCH64-NEXT: mov w8, #10 ; =0xa ; CHECK-O0-AARCH64-NEXT: stur w8, [x29, #-28] -; CHECK-O0-AARCH64-NEXT: mov w8, #11 +; CHECK-O0-AARCH64-NEXT: mov w8, #11 ; =0xb ; CHECK-O0-AARCH64-NEXT: stur w8, [x29, #-32] -; CHECK-O0-AARCH64-NEXT: mov w8, #12 +; CHECK-O0-AARCH64-NEXT: mov w8, #12 ; =0xc ; CHECK-O0-AARCH64-NEXT: stur w8, [x29, #-36] ; CHECK-O0-AARCH64-NEXT: ldur w8, [x29, #-28] ; CHECK-O0-AARCH64-NEXT: ; kill: def $x8 killed $w8 @@ -1164,13 +1164,13 @@ ; CHECK-APPLE-ARM64_32-NEXT: .cfi_offset w21, -40 ; CHECK-APPLE-ARM64_32-NEXT: .cfi_offset w22, -48 ; CHECK-APPLE-ARM64_32-NEXT: mov x19, x0 -; CHECK-APPLE-ARM64_32-NEXT: mov w8, #10 -; CHECK-APPLE-ARM64_32-NEXT: mov w9, #11 -; CHECK-APPLE-ARM64_32-NEXT: mov w10, #12 +; CHECK-APPLE-ARM64_32-NEXT: mov w8, #10 ; =0xa +; CHECK-APPLE-ARM64_32-NEXT: mov w9, #11 ; =0xb ; CHECK-APPLE-ARM64_32-NEXT: stp w9, w8, [sp, #20] +; CHECK-APPLE-ARM64_32-NEXT: mov w10, #12 ; =0xc ; CHECK-APPLE-ARM64_32-NEXT: str w10, [sp, #16] ; CHECK-APPLE-ARM64_32-NEXT: mov x21, xzr -; CHECK-APPLE-ARM64_32-NEXT: mov x9, #11 +; CHECK-APPLE-ARM64_32-NEXT: mov x9, #11 ; =0xb ; CHECK-APPLE-ARM64_32-NEXT: movk x9, #12, lsl #32 ; CHECK-APPLE-ARM64_32-NEXT: stur x9, [sp, #4] ; CHECK-APPLE-ARM64_32-NEXT: str w8, [sp] @@ -1202,11 +1202,11 @@ ; CHECK-O0-ARM64_32-NEXT: ; implicit-def: $x1 ; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #16] ; 8-byte Folded Spill ; CHECK-O0-ARM64_32-NEXT: mov x21, xzr -; CHECK-O0-ARM64_32-NEXT: mov w8, #10 +; CHECK-O0-ARM64_32-NEXT: mov w8, #10 ; =0xa ; CHECK-O0-ARM64_32-NEXT: str w8, [sp, #40] -; CHECK-O0-ARM64_32-NEXT: mov w8, #11 +; CHECK-O0-ARM64_32-NEXT: mov w8, #11 ; =0xb ; CHECK-O0-ARM64_32-NEXT: str w8, [sp, #36] -; CHECK-O0-ARM64_32-NEXT: mov w8, #12 +; CHECK-O0-ARM64_32-NEXT: mov w8, #12 ; =0xc ; CHECK-O0-ARM64_32-NEXT: str w8, [sp, #32] ; CHECK-O0-ARM64_32-NEXT: ldr w8, [sp, #40] ; CHECK-O0-ARM64_32-NEXT: ldr w10, [sp, #36] @@ -1467,14 +1467,14 @@ ; CHECK-APPLE-NEXT: mov x28, x2 ; CHECK-APPLE-NEXT: mov x19, x1 ; CHECK-APPLE-NEXT: mov x22, x0 -; CHECK-APPLE-NEXT: mov w0, #1 -; CHECK-APPLE-NEXT: mov w1, #2 -; CHECK-APPLE-NEXT: mov w2, #3 -; CHECK-APPLE-NEXT: mov w3, #4 -; CHECK-APPLE-NEXT: mov w4, #5 -; CHECK-APPLE-NEXT: mov w5, #6 -; CHECK-APPLE-NEXT: mov w6, #7 -; CHECK-APPLE-NEXT: mov w7, #8 +; CHECK-APPLE-NEXT: mov w0, #1 ; =0x1 +; CHECK-APPLE-NEXT: mov w1, #2 ; =0x2 +; CHECK-APPLE-NEXT: mov w2, #3 ; =0x3 +; CHECK-APPLE-NEXT: mov w3, #4 ; =0x4 +; CHECK-APPLE-NEXT: mov w4, #5 ; =0x5 +; CHECK-APPLE-NEXT: mov w5, #6 ; =0x6 +; CHECK-APPLE-NEXT: mov w6, #7 ; =0x7 +; CHECK-APPLE-NEXT: mov w7, #8 ; =0x8 ; CHECK-APPLE-NEXT: mov x20, xzr ; CHECK-APPLE-NEXT: mov x21, xzr ; CHECK-APPLE-NEXT: bl _params_in_reg2 @@ -1520,21 +1520,21 @@ ; CHECK-O0-AARCH64-NEXT: ; implicit-def: $x0 ; CHECK-O0-AARCH64-NEXT: mov x20, xzr ; CHECK-O0-AARCH64-NEXT: mov x21, x20 -; CHECK-O0-AARCH64-NEXT: mov w8, #1 +; CHECK-O0-AARCH64-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-AARCH64-NEXT: mov w0, w8 -; CHECK-O0-AARCH64-NEXT: mov w8, #2 +; CHECK-O0-AARCH64-NEXT: mov w8, #2 ; =0x2 ; CHECK-O0-AARCH64-NEXT: mov w1, w8 -; CHECK-O0-AARCH64-NEXT: mov w8, #3 +; CHECK-O0-AARCH64-NEXT: mov w8, #3 ; =0x3 ; CHECK-O0-AARCH64-NEXT: mov w2, w8 -; CHECK-O0-AARCH64-NEXT: mov w8, #4 +; CHECK-O0-AARCH64-NEXT: mov w8, #4 ; =0x4 ; CHECK-O0-AARCH64-NEXT: mov w3, w8 -; CHECK-O0-AARCH64-NEXT: mov w8, #5 +; CHECK-O0-AARCH64-NEXT: mov w8, #5 ; =0x5 ; CHECK-O0-AARCH64-NEXT: mov w4, w8 -; CHECK-O0-AARCH64-NEXT: mov w8, #6 +; CHECK-O0-AARCH64-NEXT: mov w8, #6 ; =0x6 ; CHECK-O0-AARCH64-NEXT: mov w5, w8 -; CHECK-O0-AARCH64-NEXT: mov w8, #7 +; CHECK-O0-AARCH64-NEXT: mov w8, #7 ; =0x7 ; CHECK-O0-AARCH64-NEXT: mov w6, w8 -; CHECK-O0-AARCH64-NEXT: mov w8, #8 +; CHECK-O0-AARCH64-NEXT: mov w8, #8 ; =0x8 ; CHECK-O0-AARCH64-NEXT: mov w7, w8 ; CHECK-O0-AARCH64-NEXT: bl _params_in_reg2 ; CHECK-O0-AARCH64-NEXT: ldr x20, [sp, #8] ; 8-byte Folded Reload @@ -1574,21 +1574,21 @@ ; CHECK-O0-ARM64_32-NEXT: ; implicit-def: $x0 ; CHECK-O0-ARM64_32-NEXT: mov x20, xzr ; CHECK-O0-ARM64_32-NEXT: mov x21, x20 -; CHECK-O0-ARM64_32-NEXT: mov w8, #1 +; CHECK-O0-ARM64_32-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-ARM64_32-NEXT: mov w0, w8 -; CHECK-O0-ARM64_32-NEXT: mov w8, #2 +; CHECK-O0-ARM64_32-NEXT: mov w8, #2 ; =0x2 ; CHECK-O0-ARM64_32-NEXT: mov w1, w8 -; CHECK-O0-ARM64_32-NEXT: mov w8, #3 +; CHECK-O0-ARM64_32-NEXT: mov w8, #3 ; =0x3 ; CHECK-O0-ARM64_32-NEXT: mov w2, w8 -; CHECK-O0-ARM64_32-NEXT: mov w8, #4 +; CHECK-O0-ARM64_32-NEXT: mov w8, #4 ; =0x4 ; CHECK-O0-ARM64_32-NEXT: mov w3, w8 -; CHECK-O0-ARM64_32-NEXT: mov w8, #5 +; CHECK-O0-ARM64_32-NEXT: mov w8, #5 ; =0x5 ; CHECK-O0-ARM64_32-NEXT: mov w4, w8 -; CHECK-O0-ARM64_32-NEXT: mov w8, #6 +; CHECK-O0-ARM64_32-NEXT: mov w8, #6 ; =0x6 ; CHECK-O0-ARM64_32-NEXT: mov w5, w8 -; CHECK-O0-ARM64_32-NEXT: mov w8, #7 +; CHECK-O0-ARM64_32-NEXT: mov w8, #7 ; =0x7 ; CHECK-O0-ARM64_32-NEXT: mov w6, w8 -; CHECK-O0-ARM64_32-NEXT: mov w8, #8 +; CHECK-O0-ARM64_32-NEXT: mov w8, #8 ; =0x8 ; CHECK-O0-ARM64_32-NEXT: mov w7, w8 ; CHECK-O0-ARM64_32-NEXT: bl _params_in_reg2 ; CHECK-O0-ARM64_32-NEXT: ldr x20, [sp, #8] ; 8-byte Folded Reload @@ -1646,14 +1646,14 @@ ; CHECK-APPLE-NEXT: mov x28, x2 ; CHECK-APPLE-NEXT: mov x19, x1 ; CHECK-APPLE-NEXT: mov x22, x0 -; CHECK-APPLE-NEXT: mov w0, #1 -; CHECK-APPLE-NEXT: mov w1, #2 -; CHECK-APPLE-NEXT: mov w2, #3 -; CHECK-APPLE-NEXT: mov w3, #4 -; CHECK-APPLE-NEXT: mov w4, #5 -; CHECK-APPLE-NEXT: mov w5, #6 -; CHECK-APPLE-NEXT: mov w6, #7 -; CHECK-APPLE-NEXT: mov w7, #8 +; CHECK-APPLE-NEXT: mov w0, #1 ; =0x1 +; CHECK-APPLE-NEXT: mov w1, #2 ; =0x2 +; CHECK-APPLE-NEXT: mov w2, #3 ; =0x3 +; CHECK-APPLE-NEXT: mov w3, #4 ; =0x4 +; CHECK-APPLE-NEXT: mov w4, #5 ; =0x5 +; CHECK-APPLE-NEXT: mov w5, #6 ; =0x6 +; CHECK-APPLE-NEXT: mov w6, #7 ; =0x7 +; CHECK-APPLE-NEXT: mov w7, #8 ; =0x8 ; CHECK-APPLE-NEXT: mov x20, xzr ; CHECK-APPLE-NEXT: mov x21, xzr ; CHECK-APPLE-NEXT: bl _params_in_reg2 @@ -1677,14 +1677,14 @@ ; CHECK-APPLE-NEXT: mov x28, x6 ; CHECK-APPLE-NEXT: mov x23, x7 ; CHECK-APPLE-NEXT: str x21, [sp, #24] ; 8-byte Folded Spill -; CHECK-APPLE-NEXT: mov w0, #1 -; CHECK-APPLE-NEXT: mov w1, #2 -; CHECK-APPLE-NEXT: mov w2, #3 -; CHECK-APPLE-NEXT: mov w3, #4 -; CHECK-APPLE-NEXT: mov w4, #5 -; CHECK-APPLE-NEXT: mov w5, #6 -; CHECK-APPLE-NEXT: mov w6, #7 -; CHECK-APPLE-NEXT: mov w7, #8 +; CHECK-APPLE-NEXT: mov w0, #1 ; =0x1 +; CHECK-APPLE-NEXT: mov w1, #2 ; =0x2 +; CHECK-APPLE-NEXT: mov w2, #3 ; =0x3 +; CHECK-APPLE-NEXT: mov w3, #4 ; =0x4 +; CHECK-APPLE-NEXT: mov w4, #5 ; =0x5 +; CHECK-APPLE-NEXT: mov w5, #6 ; =0x6 +; CHECK-APPLE-NEXT: mov w6, #7 ; =0x7 +; CHECK-APPLE-NEXT: mov w7, #8 ; =0x8 ; CHECK-APPLE-NEXT: mov x20, xzr ; CHECK-APPLE-NEXT: ldr x21, [sp, #8] ; 8-byte Folded Reload ; CHECK-APPLE-NEXT: bl _params_in_reg2 @@ -1730,28 +1730,28 @@ ; CHECK-O0-AARCH64-NEXT: mov x20, xzr ; CHECK-O0-AARCH64-NEXT: str x20, [sp, #80] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: mov x21, x20 -; CHECK-O0-AARCH64-NEXT: mov w8, #1 +; CHECK-O0-AARCH64-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-AARCH64-NEXT: mov w0, w8 ; CHECK-O0-AARCH64-NEXT: str x0, [sp, #88] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: mov w8, #2 +; CHECK-O0-AARCH64-NEXT: mov w8, #2 ; =0x2 ; CHECK-O0-AARCH64-NEXT: mov w1, w8 ; CHECK-O0-AARCH64-NEXT: str x1, [sp, #96] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: mov w8, #3 +; CHECK-O0-AARCH64-NEXT: mov w8, #3 ; =0x3 ; CHECK-O0-AARCH64-NEXT: mov w2, w8 ; CHECK-O0-AARCH64-NEXT: str x2, [sp, #104] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: mov w8, #4 +; CHECK-O0-AARCH64-NEXT: mov w8, #4 ; =0x4 ; CHECK-O0-AARCH64-NEXT: mov w3, w8 ; CHECK-O0-AARCH64-NEXT: str x3, [sp, #112] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: mov w8, #5 +; CHECK-O0-AARCH64-NEXT: mov w8, #5 ; =0x5 ; CHECK-O0-AARCH64-NEXT: mov w4, w8 ; CHECK-O0-AARCH64-NEXT: str x4, [sp, #120] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: mov w8, #6 +; CHECK-O0-AARCH64-NEXT: mov w8, #6 ; =0x6 ; CHECK-O0-AARCH64-NEXT: mov w5, w8 ; CHECK-O0-AARCH64-NEXT: str x5, [sp, #128] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: mov w8, #7 +; CHECK-O0-AARCH64-NEXT: mov w8, #7 ; =0x7 ; CHECK-O0-AARCH64-NEXT: mov w6, w8 ; CHECK-O0-AARCH64-NEXT: stur x6, [x29, #-120] ; 8-byte Folded Spill -; CHECK-O0-AARCH64-NEXT: mov w8, #8 +; CHECK-O0-AARCH64-NEXT: mov w8, #8 ; =0x8 ; CHECK-O0-AARCH64-NEXT: mov w7, w8 ; CHECK-O0-AARCH64-NEXT: stur x7, [x29, #-112] ; 8-byte Folded Spill ; CHECK-O0-AARCH64-NEXT: bl _params_in_reg2 @@ -1835,28 +1835,28 @@ ; CHECK-O0-ARM64_32-NEXT: mov x20, xzr ; CHECK-O0-ARM64_32-NEXT: str x20, [sp, #80] ; 8-byte Folded Spill ; CHECK-O0-ARM64_32-NEXT: mov x21, x20 -; CHECK-O0-ARM64_32-NEXT: mov w8, #1 +; CHECK-O0-ARM64_32-NEXT: mov w8, #1 ; =0x1 ; CHECK-O0-ARM64_32-NEXT: mov w0, w8 ; CHECK-O0-ARM64_32-NEXT: str x0, [sp, #88] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: mov w8, #2 +; CHECK-O0-ARM64_32-NEXT: mov w8, #2 ; =0x2 ; CHECK-O0-ARM64_32-NEXT: mov w1, w8 ; CHECK-O0-ARM64_32-NEXT: str x1, [sp, #96] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: mov w8, #3 +; CHECK-O0-ARM64_32-NEXT: mov w8, #3 ; =0x3 ; CHECK-O0-ARM64_32-NEXT: mov w2, w8 ; CHECK-O0-ARM64_32-NEXT: str x2, [sp, #104] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: mov w8, #4 +; CHECK-O0-ARM64_32-NEXT: mov w8, #4 ; =0x4 ; CHECK-O0-ARM64_32-NEXT: mov w3, w8 ; CHECK-O0-ARM64_32-NEXT: str x3, [sp, #112] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: mov w8, #5 +; CHECK-O0-ARM64_32-NEXT: mov w8, #5 ; =0x5 ; CHECK-O0-ARM64_32-NEXT: mov w4, w8 ; CHECK-O0-ARM64_32-NEXT: str x4, [sp, #120] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: mov w8, #6 +; CHECK-O0-ARM64_32-NEXT: mov w8, #6 ; =0x6 ; CHECK-O0-ARM64_32-NEXT: mov w5, w8 ; CHECK-O0-ARM64_32-NEXT: str x5, [sp, #128] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: mov w8, #7 +; CHECK-O0-ARM64_32-NEXT: mov w8, #7 ; =0x7 ; CHECK-O0-ARM64_32-NEXT: mov w6, w8 ; CHECK-O0-ARM64_32-NEXT: str x6, [sp, #136] ; 8-byte Folded Spill -; CHECK-O0-ARM64_32-NEXT: mov w8, #8 +; CHECK-O0-ARM64_32-NEXT: mov w8, #8 ; =0x8 ; CHECK-O0-ARM64_32-NEXT: mov w7, w8 ; CHECK-O0-ARM64_32-NEXT: str x7, [sp, #144] ; 8-byte Folded Spill ; CHECK-O0-ARM64_32-NEXT: bl _params_in_reg2 diff --git a/llvm/test/CodeGen/AArch64/tbl-loops.ll b/llvm/test/CodeGen/AArch64/tbl-loops.ll --- a/llvm/test/CodeGen/AArch64/tbl-loops.ll +++ b/llvm/test/CodeGen/AArch64/tbl-loops.ll @@ -16,28 +16,28 @@ ; CHECK-NEXT: b .LBB0_6 ; CHECK-NEXT: .LBB0_3: // %vector.ph ; CHECK-NEXT: add x11, x8, #1 -; CHECK-NEXT: mov w15, #1132396544 -; CHECK-NEXT: and x10, x11, #0x1fffffff8 +; CHECK-NEXT: mov w8, #1132396544 // =0x437f0000 ; CHECK-NEXT: add x12, x0, #4 -; CHECK-NEXT: add x9, x0, x10 +; CHECK-NEXT: and x10, x11, #0x1fffffff8 +; CHECK-NEXT: dup v0.4s, w8 ; CHECK-NEXT: add x13, x1, #16 ; CHECK-NEXT: add x8, x1, x10, lsl #2 +; CHECK-NEXT: add x9, x0, x10 ; CHECK-NEXT: mov x14, x10 -; CHECK-NEXT: dup v0.4s, w15 ; CHECK-NEXT: .LBB0_4: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp q1, q2, [x13, #-16] ; CHECK-NEXT: subs x14, x14, #8 ; CHECK-NEXT: add x13, x13, #32 ; CHECK-NEXT: fcmgt v3.4s, v1.4s, v0.4s -; CHECK-NEXT: fcmlt v5.4s, v1.4s, #0.0 ; CHECK-NEXT: fcmgt v4.4s, v2.4s, v0.4s +; CHECK-NEXT: fcmlt v5.4s, v1.4s, #0.0 ; CHECK-NEXT: fcmlt v6.4s, v2.4s, #0.0 ; CHECK-NEXT: bit v1.16b, v0.16b, v3.16b ; CHECK-NEXT: bit v2.16b, v0.16b, v4.16b ; CHECK-NEXT: bic v1.16b, v1.16b, v5.16b -; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: bic v2.16b, v2.16b, v6.16b +; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-NEXT: xtn v1.4h, v1.4s ; CHECK-NEXT: xtn v2.4h, v2.4s @@ -53,11 +53,11 @@ ; CHECK-NEXT: .LBB0_6: // %for.body.preheader1 ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: sub w10, w2, w10 -; CHECK-NEXT: mov w11, #1132396544 +; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000 ; CHECK-NEXT: .LBB0_7: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr s1, [x8], #4 ; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: ldr s1, [x8], #4 ; CHECK-NEXT: fcmp s1, s2 ; CHECK-NEXT: fcsel s2, s2, s1, gt ; CHECK-NEXT: fcmp s1, #0.0 @@ -166,23 +166,23 @@ ; CHECK-NEXT: .LBB1_5: // %for.body.preheader1 ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: sub w10, w2, w10 -; CHECK-NEXT: mov w11, #1132396544 +; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000 ; CHECK-NEXT: .LBB1_6: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp s2, s3, [x8], #8 -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcmp s2, s1 -; CHECK-NEXT: fcsel s4, s1, s2, gt -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: fcsel s2, s0, s4, mi -; CHECK-NEXT: fcmp s3, s1 -; CHECK-NEXT: fcsel s1, s1, s3, gt +; CHECK-NEXT: ldp s1, s3, [x8], #8 +; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: fcmp s1, s2 +; CHECK-NEXT: fcsel s4, s2, s1, gt +; CHECK-NEXT: fcmp s1, #0.0 +; CHECK-NEXT: fcsel s1, s0, s4, mi +; CHECK-NEXT: fcmp s3, s2 +; CHECK-NEXT: fcsel s2, s2, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 -; CHECK-NEXT: fcvtzs w12, s2 -; CHECK-NEXT: fcsel s1, s0, s1, mi -; CHECK-NEXT: strb w12, [x9] +; CHECK-NEXT: fcvtzs w12, s1 +; CHECK-NEXT: fcsel s2, s0, s2, mi ; CHECK-NEXT: subs w10, w10, #1 -; CHECK-NEXT: fcvtzs w13, s1 +; CHECK-NEXT: strb w12, [x9] +; CHECK-NEXT: fcvtzs w13, s2 ; CHECK-NEXT: strb w13, [x9, #1] ; CHECK-NEXT: add x9, x9, #2 ; CHECK-NEXT: b.ne .LBB1_6 @@ -190,25 +190,25 @@ ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_8: // %vector.ph ; CHECK-NEXT: add x11, x8, #1 -; CHECK-NEXT: mov w13, #1132396544 +; CHECK-NEXT: mov w8, #1132396544 // =0x437f0000 ; CHECK-NEXT: and x10, x11, #0x1fffffffc -; CHECK-NEXT: mov x12, x10 +; CHECK-NEXT: dup v0.4s, w8 ; CHECK-NEXT: add x8, x1, x10, lsl #3 ; CHECK-NEXT: add x9, x0, x10, lsl #1 -; CHECK-NEXT: dup v0.4s, w13 +; CHECK-NEXT: mov x12, x10 ; CHECK-NEXT: .LBB1_9: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld2 { v1.4s, v2.4s }, [x1], #32 ; CHECK-NEXT: fcmgt v3.4s, v1.4s, v0.4s -; CHECK-NEXT: subs x12, x12, #4 ; CHECK-NEXT: fcmgt v4.4s, v2.4s, v0.4s ; CHECK-NEXT: fcmlt v5.4s, v1.4s, #0.0 +; CHECK-NEXT: subs x12, x12, #4 ; CHECK-NEXT: bsl v3.16b, v0.16b, v1.16b -; CHECK-NEXT: bsl v4.16b, v0.16b, v2.16b ; CHECK-NEXT: fcmlt v1.4s, v2.4s, #0.0 +; CHECK-NEXT: bsl v4.16b, v0.16b, v2.16b ; CHECK-NEXT: bic v2.16b, v3.16b, v5.16b -; CHECK-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-NEXT: bic v1.16b, v4.16b, v1.16b +; CHECK-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: xtn v2.4h, v2.4s ; CHECK-NEXT: xtn v1.4h, v1.4s @@ -320,100 +320,97 @@ ; CHECK-LABEL: loop3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: subs w8, w2, #1 -; CHECK-NEXT: b.lt .LBB2_7 +; CHECK-NEXT: b.lt .LBB2_9 ; CHECK-NEXT: // %bb.1: // %for.body.preheader ; CHECK-NEXT: cmp w8, #2 -; CHECK-NEXT: b.ls .LBB2_4 +; CHECK-NEXT: b.ls .LBB2_6 ; CHECK-NEXT: // %bb.2: // %vector.memcheck ; CHECK-NEXT: add x9, x8, w8, uxtw #1 ; CHECK-NEXT: add x9, x9, #3 ; CHECK-NEXT: add x10, x1, x9, lsl #2 +; CHECK-NEXT: add x9, x0, x9 ; CHECK-NEXT: cmp x10, x0 -; CHECK-NEXT: b.ls .LBB2_8 -; CHECK-NEXT: // %bb.3: // %vector.memcheck +; CHECK-NEXT: ccmp x9, x1, #0, hi +; CHECK-NEXT: b.hi .LBB2_6 +; CHECK-NEXT: // %bb.3: // %vector.ph +; CHECK-NEXT: add x11, x8, #1 +; CHECK-NEXT: mov w8, #1132396544 // =0x437f0000 +; CHECK-NEXT: adrp x12, .LCPI2_0 +; CHECK-NEXT: and x10, x11, #0x1fffffffc +; CHECK-NEXT: dup v0.4s, w8 +; CHECK-NEXT: ldr q1, [x12, :lo12:.LCPI2_0] +; CHECK-NEXT: add x9, x10, x10, lsl #1 +; CHECK-NEXT: mov x12, x10 +; CHECK-NEXT: add x8, x1, x9, lsl #2 ; CHECK-NEXT: add x9, x0, x9 -; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: b.ls .LBB2_8 -; CHECK-NEXT: .LBB2_4: +; CHECK-NEXT: .LBB2_4: // %vector.body +; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ld3 { v2.4s, v3.4s, v4.4s }, [x1], #48 +; CHECK-NEXT: fcmgt v5.4s, v2.4s, v0.4s +; CHECK-NEXT: fcmgt v6.4s, v3.4s, v0.4s +; CHECK-NEXT: fcmgt v7.4s, v4.4s, v0.4s +; CHECK-NEXT: fcmlt v16.4s, v2.4s, #0.0 +; CHECK-NEXT: fcmlt v17.4s, v3.4s, #0.0 +; CHECK-NEXT: add x13, x0, #8 +; CHECK-NEXT: subs x12, x12, #4 +; CHECK-NEXT: bsl v5.16b, v0.16b, v2.16b +; CHECK-NEXT: fcmlt v2.4s, v4.4s, #0.0 +; CHECK-NEXT: bsl v6.16b, v0.16b, v3.16b +; CHECK-NEXT: bsl v7.16b, v0.16b, v4.16b +; CHECK-NEXT: bic v3.16b, v5.16b, v16.16b +; CHECK-NEXT: bic v4.16b, v6.16b, v17.16b +; CHECK-NEXT: bic v2.16b, v7.16b, v2.16b +; CHECK-NEXT: fcvtzs v3.4s, v3.4s +; CHECK-NEXT: fcvtzs v4.4s, v4.4s +; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: xtn v5.4h, v3.4s +; CHECK-NEXT: xtn v6.4h, v4.4s +; CHECK-NEXT: xtn v7.4h, v2.4s +; CHECK-NEXT: tbl v2.16b, { v5.16b, v6.16b, v7.16b }, v1.16b +; CHECK-NEXT: st1 { v2.s }[2], [x13] +; CHECK-NEXT: str d2, [x0], #12 +; CHECK-NEXT: b.ne .LBB2_4 +; CHECK-NEXT: // %bb.5: // %middle.block +; CHECK-NEXT: cmp x11, x10 +; CHECK-NEXT: b.ne .LBB2_7 +; CHECK-NEXT: b .LBB2_9 +; CHECK-NEXT: .LBB2_6: ; CHECK-NEXT: mov w10, wzr ; CHECK-NEXT: mov x8, x1 ; CHECK-NEXT: mov x9, x0 -; CHECK-NEXT: .LBB2_5: // %for.body.preheader1 +; CHECK-NEXT: .LBB2_7: // %for.body.preheader1 ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: sub w10, w2, w10 -; CHECK-NEXT: mov w11, #1132396544 -; CHECK-NEXT: .LBB2_6: // %for.body +; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000 +; CHECK-NEXT: .LBB2_8: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp s2, s3, [x8] -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcmp s2, s1 -; CHECK-NEXT: fcsel s4, s1, s2, gt -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: fcsel s2, s0, s4, mi -; CHECK-NEXT: fcmp s3, s1 -; CHECK-NEXT: fcsel s4, s1, s3, gt +; CHECK-NEXT: ldp s1, s3, [x8] +; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: fcmp s1, s2 +; CHECK-NEXT: fcsel s4, s2, s1, gt +; CHECK-NEXT: fcmp s1, #0.0 +; CHECK-NEXT: fcsel s1, s0, s4, mi +; CHECK-NEXT: fcmp s3, s2 +; CHECK-NEXT: fcsel s4, s2, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 ; CHECK-NEXT: ldr s3, [x8, #8] -; CHECK-NEXT: fcvtzs w12, s2 +; CHECK-NEXT: fcvtzs w12, s1 ; CHECK-NEXT: add x8, x8, #12 ; CHECK-NEXT: fcsel s4, s0, s4, mi -; CHECK-NEXT: fcmp s3, s1 +; CHECK-NEXT: fcmp s3, s2 ; CHECK-NEXT: strb w12, [x9] -; CHECK-NEXT: fcsel s1, s1, s3, gt +; CHECK-NEXT: fcsel s2, s2, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 ; CHECK-NEXT: fcvtzs w13, s4 -; CHECK-NEXT: fcsel s1, s0, s1, mi -; CHECK-NEXT: strb w13, [x9, #1] +; CHECK-NEXT: fcsel s2, s0, s2, mi ; CHECK-NEXT: subs w10, w10, #1 -; CHECK-NEXT: fcvtzs w14, s1 +; CHECK-NEXT: strb w13, [x9, #1] +; CHECK-NEXT: fcvtzs w14, s2 ; CHECK-NEXT: strb w14, [x9, #2] ; CHECK-NEXT: add x9, x9, #3 -; CHECK-NEXT: b.ne .LBB2_6 -; CHECK-NEXT: .LBB2_7: // %for.cond.cleanup +; CHECK-NEXT: b.ne .LBB2_8 +; CHECK-NEXT: .LBB2_9: // %for.cond.cleanup ; CHECK-NEXT: ret -; CHECK-NEXT: .LBB2_8: // %vector.ph -; CHECK-NEXT: add x11, x8, #1 -; CHECK-NEXT: adrp x12, .LCPI2_0 -; CHECK-NEXT: and x10, x11, #0x1fffffffc -; CHECK-NEXT: mov w13, #1132396544 -; CHECK-NEXT: add x8, x10, x10, lsl #1 -; CHECK-NEXT: ldr q0, [x12, :lo12:.LCPI2_0] -; CHECK-NEXT: add x9, x0, x8 -; CHECK-NEXT: mov x12, x10 -; CHECK-NEXT: add x8, x1, x8, lsl #2 -; CHECK-NEXT: dup v1.4s, w13 -; CHECK-NEXT: .LBB2_9: // %vector.body -; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ld3 { v2.4s, v3.4s, v4.4s }, [x1], #48 -; CHECK-NEXT: fcmgt v5.4s, v2.4s, v1.4s -; CHECK-NEXT: add x13, x0, #8 -; CHECK-NEXT: fcmgt v7.4s, v3.4s, v1.4s -; CHECK-NEXT: subs x12, x12, #4 -; CHECK-NEXT: fcmgt v17.4s, v4.4s, v1.4s -; CHECK-NEXT: fcmlt v6.4s, v2.4s, #0.0 -; CHECK-NEXT: bsl v5.16b, v1.16b, v2.16b -; CHECK-NEXT: fcmlt v16.4s, v3.4s, #0.0 -; CHECK-NEXT: bsl v7.16b, v1.16b, v3.16b -; CHECK-NEXT: mov v2.16b, v17.16b -; CHECK-NEXT: bic v5.16b, v5.16b, v6.16b -; CHECK-NEXT: fcmlt v6.4s, v4.4s, #0.0 -; CHECK-NEXT: bsl v2.16b, v1.16b, v4.16b -; CHECK-NEXT: bic v3.16b, v7.16b, v16.16b -; CHECK-NEXT: fcvtzs v4.4s, v5.4s -; CHECK-NEXT: fcvtzs v3.4s, v3.4s -; CHECK-NEXT: bic v2.16b, v2.16b, v6.16b -; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: xtn v4.4h, v4.4s -; CHECK-NEXT: xtn v5.4h, v3.4s -; CHECK-NEXT: xtn v6.4h, v2.4s -; CHECK-NEXT: tbl v2.16b, { v4.16b, v5.16b, v6.16b }, v0.16b -; CHECK-NEXT: str d2, [x0], #12 -; CHECK-NEXT: st1 { v2.s }[2], [x13] -; CHECK-NEXT: b.ne .LBB2_9 -; CHECK-NEXT: // %bb.10: // %middle.block -; CHECK-NEXT: cmp x11, x10 -; CHECK-NEXT: b.ne .LBB2_5 -; CHECK-NEXT: b .LBB2_7 entry: %cmp29 = icmp sgt i32 %width, 0 br i1 %cmp29, label %for.body.preheader, label %for.cond.cleanup @@ -553,82 +550,81 @@ ; CHECK-NEXT: .LBB3_5: // %for.body.preheader1 ; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: sub w10, w2, w10 -; CHECK-NEXT: mov w11, #1132396544 +; CHECK-NEXT: mov w11, #1132396544 // =0x437f0000 ; CHECK-NEXT: .LBB3_6: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp s2, s3, [x8] -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcmp s2, s1 -; CHECK-NEXT: fcsel s4, s1, s2, gt -; CHECK-NEXT: fcmp s2, #0.0 -; CHECK-NEXT: fcsel s2, s0, s4, mi -; CHECK-NEXT: fcmp s3, s1 -; CHECK-NEXT: fcsel s4, s1, s3, gt +; CHECK-NEXT: ldp s1, s3, [x8] +; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: fcmp s1, s2 +; CHECK-NEXT: fcsel s4, s2, s1, gt +; CHECK-NEXT: fcmp s1, #0.0 +; CHECK-NEXT: fcsel s1, s0, s4, mi +; CHECK-NEXT: fcmp s3, s2 +; CHECK-NEXT: fcsel s4, s2, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 ; CHECK-NEXT: ldp s3, s5, [x8, #8] -; CHECK-NEXT: fcvtzs w12, s2 +; CHECK-NEXT: fcvtzs w12, s1 ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: fcsel s4, s0, s4, mi -; CHECK-NEXT: fcmp s3, s1 +; CHECK-NEXT: fcmp s3, s2 ; CHECK-NEXT: strb w12, [x9] -; CHECK-NEXT: fcsel s6, s1, s3, gt +; CHECK-NEXT: fcsel s6, s2, s3, gt ; CHECK-NEXT: fcmp s3, #0.0 ; CHECK-NEXT: fcvtzs w13, s4 ; CHECK-NEXT: fcsel s3, s0, s6, mi -; CHECK-NEXT: fcmp s5, s1 +; CHECK-NEXT: fcmp s5, s2 ; CHECK-NEXT: strb w13, [x9, #1] -; CHECK-NEXT: fcsel s1, s1, s5, gt +; CHECK-NEXT: fcsel s2, s2, s5, gt ; CHECK-NEXT: fcmp s5, #0.0 ; CHECK-NEXT: fcvtzs w14, s3 -; CHECK-NEXT: fcsel s1, s0, s1, mi -; CHECK-NEXT: strb w14, [x9, #2] +; CHECK-NEXT: fcsel s2, s0, s2, mi ; CHECK-NEXT: subs w10, w10, #1 -; CHECK-NEXT: fcvtzs w12, s1 -; CHECK-NEXT: strb w12, [x9, #3] +; CHECK-NEXT: strb w14, [x9, #2] +; CHECK-NEXT: fcvtzs w15, s2 +; CHECK-NEXT: strb w15, [x9, #3] ; CHECK-NEXT: add x9, x9, #4 ; CHECK-NEXT: b.ne .LBB3_6 ; CHECK-NEXT: .LBB3_7: // %for.cond.cleanup ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_8: // %vector.ph ; CHECK-NEXT: add x11, x8, #1 +; CHECK-NEXT: mov w8, #1132396544 // =0x437f0000 ; CHECK-NEXT: adrp x12, .LCPI3_0 ; CHECK-NEXT: and x10, x11, #0x1fffffffc -; CHECK-NEXT: mov w13, #1132396544 +; CHECK-NEXT: dup v0.4s, w8 +; CHECK-NEXT: ldr q1, [x12, :lo12:.LCPI3_0] ; CHECK-NEXT: add x8, x1, x10, lsl #4 ; CHECK-NEXT: add x9, x0, x10, lsl #2 -; CHECK-NEXT: ldr q0, [x12, :lo12:.LCPI3_0] ; CHECK-NEXT: mov x12, x10 -; CHECK-NEXT: dup v1.4s, w13 ; CHECK-NEXT: .LBB3_9: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ld4 { v2.4s, v3.4s, v4.4s, v5.4s }, [x1], #64 -; CHECK-NEXT: fcmgt v6.4s, v2.4s, v1.4s +; CHECK-NEXT: fcmgt v6.4s, v2.4s, v0.4s +; CHECK-NEXT: fcmgt v7.4s, v3.4s, v0.4s +; CHECK-NEXT: fcmgt v16.4s, v4.4s, v0.4s +; CHECK-NEXT: fcmgt v17.4s, v5.4s, v0.4s +; CHECK-NEXT: fcmlt v18.4s, v2.4s, #0.0 +; CHECK-NEXT: fcmlt v19.4s, v3.4s, #0.0 ; CHECK-NEXT: subs x12, x12, #4 -; CHECK-NEXT: fcmlt v7.4s, v2.4s, #0.0 -; CHECK-NEXT: fcmgt v16.4s, v3.4s, v1.4s -; CHECK-NEXT: fcmgt v19.4s, v4.4s, v1.4s -; CHECK-NEXT: bsl v6.16b, v1.16b, v2.16b -; CHECK-NEXT: fcmlt v17.4s, v3.4s, #0.0 -; CHECK-NEXT: bsl v16.16b, v1.16b, v3.16b -; CHECK-NEXT: fcmlt v18.4s, v4.4s, #0.0 -; CHECK-NEXT: bic v6.16b, v6.16b, v7.16b -; CHECK-NEXT: fcmgt v7.4s, v5.4s, v1.4s -; CHECK-NEXT: bsl v19.16b, v1.16b, v4.16b -; CHECK-NEXT: bic v16.16b, v16.16b, v17.16b -; CHECK-NEXT: fcmlt v17.4s, v5.4s, #0.0 -; CHECK-NEXT: mov v2.16b, v7.16b -; CHECK-NEXT: bsl v2.16b, v1.16b, v5.16b -; CHECK-NEXT: fcvtzs v4.4s, v6.4s -; CHECK-NEXT: bic v3.16b, v19.16b, v18.16b -; CHECK-NEXT: fcvtzs v5.4s, v16.4s +; CHECK-NEXT: fcmlt v20.4s, v4.4s, #0.0 +; CHECK-NEXT: bsl v6.16b, v0.16b, v2.16b +; CHECK-NEXT: fcmlt v2.4s, v5.4s, #0.0 +; CHECK-NEXT: bsl v7.16b, v0.16b, v3.16b +; CHECK-NEXT: bsl v16.16b, v0.16b, v4.16b +; CHECK-NEXT: bsl v17.16b, v0.16b, v5.16b +; CHECK-NEXT: bic v3.16b, v6.16b, v18.16b +; CHECK-NEXT: bic v4.16b, v7.16b, v19.16b +; CHECK-NEXT: bic v5.16b, v16.16b, v20.16b +; CHECK-NEXT: bic v2.16b, v17.16b, v2.16b ; CHECK-NEXT: fcvtzs v3.4s, v3.4s -; CHECK-NEXT: bic v2.16b, v2.16b, v17.16b +; CHECK-NEXT: fcvtzs v4.4s, v4.4s +; CHECK-NEXT: fcvtzs v5.4s, v5.4s ; CHECK-NEXT: fcvtzs v2.4s, v2.4s -; CHECK-NEXT: xtn v16.4h, v4.4s -; CHECK-NEXT: xtn v17.4h, v5.4s -; CHECK-NEXT: xtn v18.4h, v3.4s +; CHECK-NEXT: xtn v16.4h, v3.4s +; CHECK-NEXT: xtn v17.4h, v4.4s +; CHECK-NEXT: xtn v18.4h, v5.4s ; CHECK-NEXT: xtn v19.4h, v2.4s -; CHECK-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b +; CHECK-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b ; CHECK-NEXT: str q2, [x0], #16 ; CHECK-NEXT: b.ne .LBB3_9 ; CHECK-NEXT: // %bb.10: // %middle.block diff --git a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll --- a/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/trunc-to-tbl.ll @@ -43,10 +43,10 @@ ; CHECK-LABEL: trunc_v16i32_to_v16i8_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh0: -; CHECK-NEXT: adrp x9, lCPI0_0@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x8, lCPI0_0@PAGE ; CHECK-NEXT: Lloh1: -; CHECK-NEXT: ldr q0, [x9, lCPI0_0@PAGEOFF] +; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB0_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8, lsl #6 @@ -71,10 +71,10 @@ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #6 ; CHECK-BE-NEXT: add x10, x9, #16 -; CHECK-BE-NEXT: add x11, x9, #32 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] -; CHECK-BE-NEXT: add x9, x9, #48 +; CHECK-BE-NEXT: add x11, x9, #32 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] +; CHECK-BE-NEXT: add x9, x9, #48 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x11] ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 @@ -109,26 +109,26 @@ define void @trunc_v16i32_to_v16i8_no_loop(ptr %A, ptr %dst) { ; CHECK-LABEL: trunc_v16i32_to_v16i8_no_loop: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: ldp q1, q0, [x0, #32] -; CHECK-NEXT: ldp q3, q2, [x0] +; CHECK-NEXT: ldp q1, q0, [x0] +; CHECK-NEXT: ldp q3, q2, [x0, #32] ; CHECK-NEXT: uzp1.8h v0, v1, v0 -; CHECK-NEXT: uzp1.8h v1, v3, v2 -; CHECK-NEXT: uzp1.16b v0, v1, v0 +; CHECK-NEXT: uzp1.8h v2, v3, v2 +; CHECK-NEXT: uzp1.16b v0, v0, v2 ; CHECK-NEXT: str q0, [x1] ; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: trunc_v16i32_to_v16i8_no_loop: ; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: add x8, x0, #48 -; CHECK-BE-NEXT: add x9, x0, #32 +; CHECK-BE-NEXT: add x8, x0, #16 +; CHECK-BE-NEXT: add x9, x0, #48 +; CHECK-BE-NEXT: add x10, x0, #32 ; CHECK-BE-NEXT: ld1 { v0.4s }, [x0] ; CHECK-BE-NEXT: ld1 { v1.4s }, [x8] -; CHECK-BE-NEXT: add x8, x0, #16 ; CHECK-BE-NEXT: ld1 { v2.4s }, [x9] -; CHECK-BE-NEXT: ld1 { v3.4s }, [x8] -; CHECK-BE-NEXT: uzp1 v1.8h, v2.8h, v1.8h -; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v3.8h -; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b +; CHECK-BE-NEXT: ld1 { v3.4s }, [x10] +; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h +; CHECK-BE-NEXT: uzp1 v2.8h, v3.8h, v2.8h +; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v2.16b ; CHECK-BE-NEXT: st1 { v0.16b }, [x1] ; CHECK-BE-NEXT: ret entry: @@ -179,10 +179,10 @@ ; CHECK-LABEL: trunc_v8i32_to_v8i8_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x9, lCPI2_0@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x8, lCPI2_0@PAGE ; CHECK-NEXT: Lloh3: -; CHECK-NEXT: ldr q0, [x9, lCPI2_0@PAGEOFF] +; CHECK-NEXT: ldr q0, [x8, lCPI2_0@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB2_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8, lsl #5 @@ -208,8 +208,8 @@ ; CHECK-BE-NEXT: add x10, x9, #16 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 -; CHECK-BE-NEXT: add x8, x8, #1 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] +; CHECK-BE-NEXT: add x8, x8, #1 ; CHECK-BE-NEXT: cmp x8, #1000 ; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b }, v0.16b ; CHECK-BE-NEXT: st1 { v1.8b }, [x9] @@ -274,18 +274,18 @@ ; CHECK-LABEL: trunc_v16i64_to_v16i8_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh4: -; CHECK-NEXT: adrp x9, lCPI3_0@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x8, lCPI3_0@PAGE ; CHECK-NEXT: Lloh5: -; CHECK-NEXT: ldr q0, [x9, lCPI3_0@PAGEOFF] +; CHECK-NEXT: ldr q0, [x8, lCPI3_0@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB3_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8, lsl #7 ; CHECK-NEXT: ldp q1, q2, [x9] -; CHECK-NEXT: ldp q3, q4, [x9, #32] ; CHECK-NEXT: ldp q16, q17, [x9, #64] -; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0 +; CHECK-NEXT: ldp q3, q4, [x9, #32] ; CHECK-NEXT: ldp q18, q19, [x9, #96] +; CHECK-NEXT: tbl.16b v1, { v1, v2, v3, v4 }, v0 ; CHECK-NEXT: tbl.16b v2, { v16, v17, v18, v19 }, v0 ; CHECK-NEXT: mov.d v1[1], v2[0] ; CHECK-NEXT: str q1, [x1, x8, lsl #4] @@ -305,25 +305,25 @@ ; CHECK-BE-NEXT: .LBB3_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #7 -; CHECK-BE-NEXT: add x10, x9, #16 -; CHECK-BE-NEXT: add x11, x9, #32 +; CHECK-BE-NEXT: add x13, x9, #64 +; CHECK-BE-NEXT: add x12, x9, #80 +; CHECK-BE-NEXT: add x14, x9, #16 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] -; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] -; CHECK-BE-NEXT: add x10, x9, #48 -; CHECK-BE-NEXT: ld1 { v3.16b }, [x11] -; CHECK-BE-NEXT: add x11, x9, #64 -; CHECK-BE-NEXT: ld1 { v4.16b }, [x10] -; CHECK-BE-NEXT: add x10, x9, #80 -; CHECK-BE-NEXT: ld1 { v16.16b }, [x11] +; CHECK-BE-NEXT: ld1 { v16.16b }, [x13] ; CHECK-BE-NEXT: add x11, x9, #96 -; CHECK-BE-NEXT: add x9, x9, #112 -; CHECK-BE-NEXT: ld1 { v17.16b }, [x10] -; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b +; CHECK-BE-NEXT: add x13, x9, #32 +; CHECK-BE-NEXT: ld1 { v2.16b }, [x14] +; CHECK-BE-NEXT: ld1 { v17.16b }, [x12] +; CHECK-BE-NEXT: add x10, x9, #112 +; CHECK-BE-NEXT: add x9, x9, #48 +; CHECK-BE-NEXT: ld1 { v3.16b }, [x13] ; CHECK-BE-NEXT: ld1 { v18.16b }, [x11] -; CHECK-BE-NEXT: ld1 { v19.16b }, [x9] +; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 +; CHECK-BE-NEXT: ld1 { v19.16b }, [x10] ; CHECK-BE-NEXT: add x8, x8, #1 ; CHECK-BE-NEXT: cmp x8, #1000 +; CHECK-BE-NEXT: tbl v1.16b, { v1.16b, v2.16b, v3.16b, v4.16b }, v0.16b ; CHECK-BE-NEXT: tbl v2.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v0.16b ; CHECK-BE-NEXT: mov v1.d[1], v2.d[0] ; CHECK-BE-NEXT: st1 { v1.16b }, [x9] @@ -389,10 +389,10 @@ ; CHECK-LABEL: trunc_v8i64_to_v8i8_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh6: -; CHECK-NEXT: adrp x9, lCPI4_0@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x8, lCPI4_0@PAGE ; CHECK-NEXT: Lloh7: -; CHECK-NEXT: ldr q0, [x9, lCPI4_0@PAGEOFF] +; CHECK-NEXT: ldr q0, [x8, lCPI4_0@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB4_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8, lsl #6 @@ -417,10 +417,10 @@ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8, lsl #6 ; CHECK-BE-NEXT: add x10, x9, #16 -; CHECK-BE-NEXT: add x11, x9, #32 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] -; CHECK-BE-NEXT: add x9, x9, #48 +; CHECK-BE-NEXT: add x11, x9, #32 ; CHECK-BE-NEXT: ld1 { v2.16b }, [x10] +; CHECK-BE-NEXT: add x9, x9, #48 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x11] ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 @@ -458,25 +458,25 @@ ; CHECK-NEXT: LBB5_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldp x10, x9, [x0] -; CHECK-NEXT: ldrb w11, [x0, #18] -; CHECK-NEXT: ldrh w13, [x0, #16] +; CHECK-NEXT: ldrb w13, [x0, #18] +; CHECK-NEXT: ldrh w14, [x0, #16] ; CHECK-NEXT: add x0, x0, #32 -; CHECK-NEXT: lsr x14, x10, #19 -; CHECK-NEXT: fmov s0, w10 ; CHECK-NEXT: ubfx x12, x9, #12, #20 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: lsr x11, x10, #19 ; CHECK-NEXT: lsr x15, x9, #31 -; CHECK-NEXT: orr w11, w13, w11, lsl #16 -; CHECK-NEXT: lsr x13, x9, #50 -; CHECK-NEXT: mov.s v0[1], w14 ; CHECK-NEXT: fmov s1, w12 -; CHECK-NEXT: lsr x12, x10, #38 -; CHECK-NEXT: orr w13, w13, w11, lsl #14 +; CHECK-NEXT: lsr x12, x9, #50 +; CHECK-NEXT: mov.s v0[1], w11 +; CHECK-NEXT: orr w11, w14, w13, lsl #16 +; CHECK-NEXT: lsr x13, x10, #38 ; CHECK-NEXT: lsr x10, x10, #57 +; CHECK-NEXT: mov.s v1[1], w15 +; CHECK-NEXT: orr w12, w12, w11, lsl #14 ; CHECK-NEXT: orr w9, w10, w9, lsl #7 ; CHECK-NEXT: lsr w10, w11, #5 -; CHECK-NEXT: mov.s v1[1], w15 -; CHECK-NEXT: mov.s v0[2], w12 -; CHECK-NEXT: mov.s v1[2], w13 +; CHECK-NEXT: mov.s v0[2], w13 +; CHECK-NEXT: mov.s v1[2], w12 ; CHECK-NEXT: mov.s v0[3], w9 ; CHECK-NEXT: mov.s v1[3], w10 ; CHECK-NEXT: uzp1.8h v0, v0, v1 @@ -494,33 +494,33 @@ ; CHECK-BE-NEXT: .LBB5_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: ldp x10, x9, [x0] -; CHECK-BE-NEXT: ldrh w11, [x0, #16] -; CHECK-BE-NEXT: lsr x13, x10, #45 -; CHECK-BE-NEXT: lsr x15, x10, #40 -; CHECK-BE-NEXT: lsr x12, x9, #40 -; CHECK-BE-NEXT: ubfx x14, x9, #33, #7 -; CHECK-BE-NEXT: ubfx x16, x10, #26, #14 -; CHECK-BE-NEXT: orr w12, w14, w12, lsl #7 -; CHECK-BE-NEXT: ldrb w14, [x0, #18] -; CHECK-BE-NEXT: orr w15, w16, w15, lsl #14 -; CHECK-BE-NEXT: fmov s0, w13 +; CHECK-BE-NEXT: ldrb w16, [x0, #18] +; CHECK-BE-NEXT: lsr x11, x9, #40 +; CHECK-BE-NEXT: ubfx x12, x9, #33, #7 +; CHECK-BE-NEXT: lsr x15, x10, #45 +; CHECK-BE-NEXT: lsr x13, x10, #40 +; CHECK-BE-NEXT: ubfx x14, x10, #26, #14 +; CHECK-BE-NEXT: orr w11, w12, w11, lsl #7 +; CHECK-BE-NEXT: ldrh w12, [x0, #16] +; CHECK-BE-NEXT: fmov s0, w15 +; CHECK-BE-NEXT: orr w13, w14, w13, lsl #14 +; CHECK-BE-NEXT: ubfx x14, x9, #14, #18 ; CHECK-BE-NEXT: add x0, x0, #32 -; CHECK-BE-NEXT: fmov s1, w12 -; CHECK-BE-NEXT: ubfx x12, x9, #14, #18 -; CHECK-BE-NEXT: orr w11, w14, w11, lsl #8 -; CHECK-BE-NEXT: mov v0.s[1], w15 -; CHECK-BE-NEXT: mov v1.s[1], w12 -; CHECK-BE-NEXT: extr x12, x10, x9, #40 -; CHECK-BE-NEXT: lsl x9, x9, #24 -; CHECK-BE-NEXT: ubfx x10, x10, #7, #25 -; CHECK-BE-NEXT: orr w9, w11, w9 -; CHECK-BE-NEXT: lsr w9, w9, #19 -; CHECK-BE-NEXT: mov v0.s[2], w10 -; CHECK-BE-NEXT: ubfx x10, x12, #12, #20 -; CHECK-BE-NEXT: mov v1.s[2], w9 +; CHECK-BE-NEXT: fmov s1, w11 +; CHECK-BE-NEXT: orr w11, w16, w12, lsl #8 +; CHECK-BE-NEXT: lsl x12, x9, #24 +; CHECK-BE-NEXT: mov v0.s[1], w13 +; CHECK-BE-NEXT: ubfx x13, x10, #7, #25 +; CHECK-BE-NEXT: extr x9, x10, x9, #40 +; CHECK-BE-NEXT: orr w12, w11, w12 +; CHECK-BE-NEXT: mov v1.s[1], w14 +; CHECK-BE-NEXT: lsr w12, w12, #19 +; CHECK-BE-NEXT: ubfx x9, x9, #12, #20 +; CHECK-BE-NEXT: mov v0.s[2], w13 +; CHECK-BE-NEXT: mov v1.s[2], w12 +; CHECK-BE-NEXT: mov v0.s[3], w9 ; CHECK-BE-NEXT: add x9, x1, x8, lsl #3 ; CHECK-BE-NEXT: add x8, x8, #1 -; CHECK-BE-NEXT: mov v0.s[3], w10 ; CHECK-BE-NEXT: cmp x8, #1000 ; CHECK-BE-NEXT: mov v1.s[3], w11 ; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h @@ -554,24 +554,24 @@ ; CHECK-NEXT: mov w8, #1000 ; =0x3e8 ; CHECK-NEXT: LBB6_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldp q1, q0, [x0, #32] +; CHECK-NEXT: ldp q4, q1, [x0, #48] ; CHECK-NEXT: add x9, x1, #8 -; CHECK-NEXT: add x10, x1, #10 -; CHECK-NEXT: subs x8, x8, #1 ; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: uzp1.4s v0, v1, v0 -; CHECK-NEXT: ldr d4, [x0, #80] -; CHECK-NEXT: ldr q1, [x0, #64] +; CHECK-NEXT: subs x8, x8, #1 +; CHECK-NEXT: ldr d0, [x0, #80] +; CHECK-NEXT: ldr q5, [x0, #32] ; CHECK-NEXT: add x0, x0, #128 +; CHECK-NEXT: uzp1.4s v4, v5, v4 ; CHECK-NEXT: uzp1.4s v2, v3, v2 -; CHECK-NEXT: uzp1.4s v1, v1, v4 -; CHECK-NEXT: uzp1.8h v0, v2, v0 -; CHECK-NEXT: xtn.4h v1, v1 -; CHECK-NEXT: uzp1.16b v0, v0, v1 -; CHECK-NEXT: xtn.8b v1, v1 -; CHECK-NEXT: st1.b { v1 }[2], [x10] -; CHECK-NEXT: str d0, [x1], #16 -; CHECK-NEXT: st1.h { v0 }[4], [x9] +; CHECK-NEXT: uzp1.4s v0, v1, v0 +; CHECK-NEXT: uzp1.8h v1, v2, v4 +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: uzp1.16b v1, v1, v0 +; CHECK-NEXT: xtn.8b v0, v0 +; CHECK-NEXT: st1.h { v1 }[4], [x9] +; CHECK-NEXT: add x9, x1, #10 +; CHECK-NEXT: st1.b { v0 }[2], [x9] +; CHECK-NEXT: str d1, [x1], #16 ; CHECK-NEXT: b.eq LBB6_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret @@ -581,32 +581,32 @@ ; CHECK-BE-NEXT: mov w8, #1000 // =0x3e8 ; CHECK-BE-NEXT: .LBB6_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-BE-NEXT: add x9, x0, #64 +; CHECK-BE-NEXT: add x10, x0, #16 +; CHECK-BE-NEXT: ld1 { v3.2d }, [x0] +; CHECK-BE-NEXT: ld1 { v0.2d }, [x9] ; CHECK-BE-NEXT: add x9, x0, #48 +; CHECK-BE-NEXT: ld1 { v1.2d }, [x10] ; CHECK-BE-NEXT: add x10, x0, #32 -; CHECK-BE-NEXT: ld1 { v0.2d }, [x0] -; CHECK-BE-NEXT: subs x8, x8, #1 -; CHECK-BE-NEXT: ld1 { v1.2d }, [x9] -; CHECK-BE-NEXT: add x9, x0, #16 -; CHECK-BE-NEXT: ld1 { v2.2d }, [x10] -; CHECK-BE-NEXT: add x10, x0, #64 -; CHECK-BE-NEXT: ld1 { v3.2d }, [x9] -; CHECK-BE-NEXT: add x9, x1, #10 +; CHECK-BE-NEXT: ld1 { v2.2d }, [x9] +; CHECK-BE-NEXT: ldr d5, [x0, #80] ; CHECK-BE-NEXT: ld1 { v4.2d }, [x10] -; CHECK-BE-NEXT: add x10, x1, #8 -; CHECK-BE-NEXT: uzp1 v1.4s, v2.4s, v1.4s -; CHECK-BE-NEXT: ldr d2, [x0, #80] +; CHECK-BE-NEXT: add x9, x1, #10 +; CHECK-BE-NEXT: subs x8, x8, #1 +; CHECK-BE-NEXT: uzp1 v1.4s, v3.4s, v1.4s +; CHECK-BE-NEXT: uzp1 v0.4s, v0.4s, v5.4s ; CHECK-BE-NEXT: add x0, x0, #128 -; CHECK-BE-NEXT: uzp1 v0.4s, v0.4s, v3.4s ; CHECK-BE-NEXT: uzp1 v2.4s, v4.4s, v2.4s -; CHECK-BE-NEXT: uzp1 v0.8h, v0.8h, v1.8h -; CHECK-BE-NEXT: xtn v1.4h, v2.4s -; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b -; CHECK-BE-NEXT: xtn v1.8b, v1.8h -; CHECK-BE-NEXT: st1 { v1.b }[2], [x9] -; CHECK-BE-NEXT: rev64 v2.16b, v0.16b -; CHECK-BE-NEXT: rev16 v0.16b, v0.16b -; CHECK-BE-NEXT: str d2, [x1], #16 -; CHECK-BE-NEXT: st1 { v0.h }[4], [x10] +; CHECK-BE-NEXT: xtn v0.4h, v0.4s +; CHECK-BE-NEXT: uzp1 v1.8h, v1.8h, v2.8h +; CHECK-BE-NEXT: uzp1 v1.16b, v1.16b, v0.16b +; CHECK-BE-NEXT: xtn v0.8b, v0.8h +; CHECK-BE-NEXT: rev16 v2.16b, v1.16b +; CHECK-BE-NEXT: rev64 v1.16b, v1.16b +; CHECK-BE-NEXT: st1 { v0.b }[2], [x9] +; CHECK-BE-NEXT: add x9, x1, #8 +; CHECK-BE-NEXT: st1 { v2.h }[4], [x9] +; CHECK-BE-NEXT: str d1, [x1], #16 ; CHECK-BE-NEXT: b.eq .LBB6_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret @@ -654,8 +654,8 @@ ; CHECK-BE-NEXT: add x10, x9, #16 ; CHECK-BE-NEXT: ld1 { v0.8h }, [x9] ; CHECK-BE-NEXT: add x9, x1, x8, lsl #4 -; CHECK-BE-NEXT: add x8, x8, #1 ; CHECK-BE-NEXT: ld1 { v1.8h }, [x10] +; CHECK-BE-NEXT: add x8, x8, #1 ; CHECK-BE-NEXT: cmp x8, #1000 ; CHECK-BE-NEXT: uzp1 v0.16b, v0.16b, v1.16b ; CHECK-BE-NEXT: st1 { v0.16b }, [x9] diff --git a/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll b/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll --- a/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll +++ b/llvm/test/CodeGen/AArch64/typepromotion-overflow.ll @@ -4,12 +4,12 @@ define zeroext i16 @overflow_add(i16 zeroext %a, i16 zeroext %b) { ; CHECK-LABEL: overflow_add: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w1, w0 -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: orr w8, w8, #0x1 -; CHECK-NEXT: and w8, w8, #0xffff -; CHECK-NEXT: cmp w8, #1024 -; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: add w9, w1, w0 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: orr w9, w9, #0x1 +; CHECK-NEXT: and w9, w9, #0xffff +; CHECK-NEXT: cmp w9, #1024 +; CHECK-NEXT: mov w9, #2 // =0x2 ; CHECK-NEXT: csel w0, w9, w8, hi ; CHECK-NEXT: ret %add = add i16 %b, %a @@ -22,12 +22,12 @@ define zeroext i16 @overflow_sub(i16 zeroext %a, i16 zeroext %b) { ; CHECK-LABEL: overflow_sub: ; CHECK: // %bb.0: -; CHECK-NEXT: sub w8, w0, w1 -; CHECK-NEXT: mov w9, #2 -; CHECK-NEXT: orr w8, w8, #0x1 -; CHECK-NEXT: and w8, w8, #0xffff -; CHECK-NEXT: cmp w8, #1024 -; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: sub w9, w0, w1 +; CHECK-NEXT: mov w8, #5 // =0x5 +; CHECK-NEXT: orr w9, w9, #0x1 +; CHECK-NEXT: and w9, w9, #0xffff +; CHECK-NEXT: cmp w9, #1024 +; CHECK-NEXT: mov w9, #2 // =0x2 ; CHECK-NEXT: csel w0, w9, w8, hi ; CHECK-NEXT: ret %add = sub i16 %a, %b @@ -41,11 +41,11 @@ ; CHECK-LABEL: overflow_mul: ; CHECK: // %bb.0: ; CHECK-NEXT: mul w9, w1, w0 -; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: mov w8, #5 // =0x5 ; CHECK-NEXT: orr w9, w9, #0x1 ; CHECK-NEXT: and w9, w9, #0xffff ; CHECK-NEXT: cmp w9, #1024 -; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: mov w9, #2 // =0x2 ; CHECK-NEXT: csel w0, w9, w8, hi ; CHECK-NEXT: ret %add = mul i16 %b, %a @@ -59,11 +59,11 @@ ; CHECK-LABEL: overflow_shl: ; CHECK: // %bb.0: ; CHECK-NEXT: lsl w9, w0, w1 -; CHECK-NEXT: mov w8, #5 +; CHECK-NEXT: mov w8, #5 // =0x5 ; CHECK-NEXT: orr w9, w9, #0x1 ; CHECK-NEXT: and w9, w9, #0xffff ; CHECK-NEXT: cmp w9, #1024 -; CHECK-NEXT: mov w9, #2 +; CHECK-NEXT: mov w9, #2 // =0x2 ; CHECK-NEXT: csel w0, w9, w8, hi ; CHECK-NEXT: ret %add = shl i16 %a, %b @@ -76,10 +76,10 @@ define i32 @overflow_add_no_consts(i8 zeroext %a, i8 zeroext %b, i8 zeroext %limit) { ; CHECK-LABEL: overflow_add_no_consts: ; CHECK: // %bb.0: -; CHECK-NEXT: add w9, w1, w0 -; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: cmp w2, w9, uxtb -; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: add w8, w1, w0 +; CHECK-NEXT: mov w9, #8 // =0x8 +; CHECK-NEXT: cmp w2, w8, uxtb +; CHECK-NEXT: mov w8, #16 // =0x10 ; CHECK-NEXT: csel w0, w9, w8, lo ; CHECK-NEXT: ret %add = add i8 %b, %a @@ -91,11 +91,11 @@ define i32 @overflow_add_const_limit(i8 zeroext %a, i8 zeroext %b) { ; CHECK-LABEL: overflow_add_const_limit: ; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w1, w0 -; CHECK-NEXT: mov w9, #8 -; CHECK-NEXT: and w8, w8, #0xff -; CHECK-NEXT: cmp w8, #128 -; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: add w9, w1, w0 +; CHECK-NEXT: mov w8, #16 // =0x10 +; CHECK-NEXT: and w9, w9, #0xff +; CHECK-NEXT: cmp w9, #128 +; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: csel w0, w9, w8, hi ; CHECK-NEXT: ret %add = add i8 %b, %a @@ -107,10 +107,10 @@ define i32 @overflow_add_positive_const_limit(i8 zeroext %a) { ; CHECK-LABEL: overflow_add_positive_const_limit: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: cmp w8, w0, sxtb -; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: mov w8, #16 // =0x10 ; CHECK-NEXT: csel w0, w9, w8, gt ; CHECK-NEXT: ret %cmp = icmp slt i8 %a, -1 @@ -121,9 +121,9 @@ define i32 @unsafe_add_underflow(i8 zeroext %a) { ; CHECK-LABEL: unsafe_add_underflow: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: mov w8, #16 // =0x10 ; CHECK-NEXT: cmp w0, #1 -; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: csel w0, w9, w8, eq ; CHECK-NEXT: ret %cmp = icmp eq i8 %a, 1 @@ -134,9 +134,9 @@ define i32 @safe_add_underflow(i8 zeroext %a) { ; CHECK-LABEL: safe_add_underflow: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: mov w8, #16 // =0x10 ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: csel w0, w9, w8, eq ; CHECK-NEXT: ret %cmp = icmp eq i8 %a, 0 @@ -148,9 +148,9 @@ ; CHECK-LABEL: safe_add_underflow_neg: ; CHECK: // %bb.0: ; CHECK-NEXT: sub w9, w0, #2 -; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: mov w8, #16 // =0x10 ; CHECK-NEXT: cmp w9, #251 -; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: csel w0, w9, w8, lo ; CHECK-NEXT: ret %add = add i8 %a, -2 @@ -162,10 +162,10 @@ define i32 @overflow_sub_negative_const_limit(i8 zeroext %a) { ; CHECK-LABEL: overflow_sub_negative_const_limit: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: cmp w8, w0, sxtb -; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: mov w8, #16 // =0x10 ; CHECK-NEXT: csel w0, w9, w8, gt ; CHECK-NEXT: ret %cmp = icmp slt i8 %a, -1 @@ -178,9 +178,9 @@ ; CHECK-LABEL: sext_sub_underflow: ; CHECK: // %bb.0: ; CHECK-NEXT: sub w9, w0, #6 -; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: mov w8, #16 // =0x10 ; CHECK-NEXT: cmn w9, #6 -; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: csel w0, w9, w8, hi ; CHECK-NEXT: ret %sub = add i8 %a, -6 @@ -192,9 +192,9 @@ define i32 @safe_sub_underflow(i8 zeroext %a) { ; CHECK-LABEL: safe_sub_underflow: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #8 +; CHECK-NEXT: mov w8, #8 // =0x8 ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w9, #16 +; CHECK-NEXT: mov w9, #16 // =0x10 ; CHECK-NEXT: csel w0, w9, w8, eq ; CHECK-NEXT: ret %cmp.not = icmp eq i8 %a, 0 @@ -206,9 +206,9 @@ ; CHECK-LABEL: safe_sub_underflow_neg: ; CHECK: // %bb.0: ; CHECK-NEXT: sub w9, w0, #4 -; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: mov w8, #16 // =0x10 ; CHECK-NEXT: cmp w9, #250 -; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: csel w0, w9, w8, hi ; CHECK-NEXT: ret %sub = add i8 %a, -4 @@ -222,9 +222,9 @@ ; CHECK-LABEL: sext_sub_underflow_neg: ; CHECK: // %bb.0: ; CHECK-NEXT: sub w9, w0, #4 -; CHECK-NEXT: mov w8, #16 +; CHECK-NEXT: mov w8, #16 // =0x10 ; CHECK-NEXT: cmn w9, #3 -; CHECK-NEXT: mov w9, #8 +; CHECK-NEXT: mov w9, #8 // =0x8 ; CHECK-NEXT: csel w0, w9, w8, lo ; CHECK-NEXT: ret %sub = add i8 %a, -4 @@ -262,7 +262,7 @@ define i32 @safe_add_imm_var(ptr nocapture readnone %b) { ; CHECK-LABEL: safe_add_imm_var: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret entry: ret i32 1 @@ -271,7 +271,7 @@ define i32 @safe_add_var_imm(ptr nocapture readnone %b) { ; CHECK-LABEL: safe_add_var_imm: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov w0, #1 +; CHECK-NEXT: mov w0, #1 // =0x1 ; CHECK-NEXT: ret entry: ret i32 1 @@ -281,12 +281,12 @@ ; CHECK-LABEL: convert_add_order: ; CHECK: // %bb.0: ; CHECK-NEXT: orr w9, w0, #0x1 -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: sub w10, w9, #40 ; CHECK-NEXT: cmp w10, #20 ; CHECK-NEXT: cinc w8, w8, hs ; CHECK-NEXT: cmp w9, #50 -; CHECK-NEXT: mov w9, #255 +; CHECK-NEXT: mov w9, #255 // =0xff ; CHECK-NEXT: csel w8, w8, w9, lo ; CHECK-NEXT: and w0, w8, w0 ; CHECK-NEXT: ret @@ -304,12 +304,12 @@ ; CHECK-LABEL: underflow_if_sub: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w9, #100 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: add w8, w8, #245 -; CHECK-NEXT: cmp w8, w1 -; CHECK-NEXT: csel w0, w8, w9, lo +; CHECK-NEXT: mov w8, #100 // =0x64 +; CHECK-NEXT: cset w9, gt +; CHECK-NEXT: and w9, w9, w0 +; CHECK-NEXT: add w9, w9, #245 +; CHECK-NEXT: cmp w9, w1 +; CHECK-NEXT: csel w0, w9, w8, lo ; CHECK-NEXT: ret %cmp = icmp sgt i32 %arg, 0 %conv = zext i1 %cmp to i32 @@ -325,12 +325,12 @@ ; CHECK-LABEL: underflow_if_sub_signext: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp w0, #0 -; CHECK-NEXT: mov w9, #100 -; CHECK-NEXT: cset w8, gt -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: add w8, w8, #245 -; CHECK-NEXT: cmp w8, w1, uxtb -; CHECK-NEXT: csel w0, w8, w9, lo +; CHECK-NEXT: mov w8, #100 // =0x64 +; CHECK-NEXT: cset w9, gt +; CHECK-NEXT: and w9, w9, w0 +; CHECK-NEXT: add w9, w9, #245 +; CHECK-NEXT: cmp w9, w1, uxtb +; CHECK-NEXT: csel w0, w9, w8, lo ; CHECK-NEXT: ret %cmp = icmp sgt i32 %arg, 0 %conv = zext i1 %cmp to i32 diff --git a/llvm/test/CodeGen/AArch64/typepromotion-phisret.ll b/llvm/test/CodeGen/AArch64/typepromotion-phisret.ll --- a/llvm/test/CodeGen/AArch64/typepromotion-phisret.ll +++ b/llvm/test/CodeGen/AArch64/typepromotion-phisret.ll @@ -10,10 +10,10 @@ ; CHECK-NEXT: csel w8, w8, w9, hi ; CHECK-NEXT: .LBB0_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub w9, w8, #2 -; CHECK-NEXT: lsl w10, w8, #1 +; CHECK-NEXT: lsl w9, w8, #1 +; CHECK-NEXT: sub w10, w8, #2 ; CHECK-NEXT: cmp w8, #254 -; CHECK-NEXT: csel w8, w9, w10, lo +; CHECK-NEXT: csel w8, w10, w9, lo ; CHECK-NEXT: cmp w8, #255 ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit @@ -58,10 +58,10 @@ ; CHECK-NEXT: csel w8, w0, w1, hi ; CHECK-NEXT: .LBB1_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: sub w9, w8, #2 -; CHECK-NEXT: lsl w10, w8, #1 +; CHECK-NEXT: lsl w9, w8, #1 +; CHECK-NEXT: sub w10, w8, #2 ; CHECK-NEXT: cmp w8, #254 -; CHECK-NEXT: csel w8, w9, w10, lo +; CHECK-NEXT: csel w8, w10, w9, lo ; CHECK-NEXT: cmp w8, #255 ; CHECK-NEXT: b.ne .LBB1_1 ; CHECK-NEXT: // %bb.2: // %exit @@ -103,7 +103,7 @@ ; CHECK-LABEL: phi_i16: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: .LBB2_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmp w8, #128 @@ -142,7 +142,7 @@ ; CHECK-LABEL: ret_i8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: .LBB3_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmp w0, #128 @@ -181,7 +181,7 @@ ; CHECK-LABEL: phi_multiple_undefs: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: mov w9, #1 +; CHECK-NEXT: mov w9, #1 // =0x1 ; CHECK-NEXT: .LBB4_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: cmp w8, #128 @@ -237,21 +237,21 @@ define i16 @signext_bitcast_phi_select(i16 signext %start, ptr %in) { ; CHECK-LABEL: signext_bitcast_phi_select: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: mov w9, #-1 -; CHECK-NEXT: cmp w9, w8, sxth +; CHECK-NEXT: mov w8, #-1 // =0xffffffff +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: cmp w8, w9, sxth ; CHECK-NEXT: b.lt .LBB6_3 ; CHECK-NEXT: .LBB6_1: // %if.then ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldrh w0, [x1, w8, sxtw #1] -; CHECK-NEXT: cmp w0, w8 +; CHECK-NEXT: ldrh w0, [x1, w9, sxtw #1] +; CHECK-NEXT: cmp w0, w9 ; CHECK-NEXT: b.eq .LBB6_4 ; CHECK-NEXT: // %bb.2: // %if.else ; CHECK-NEXT: // in Loop: Header=BB6_1 Depth=1 -; CHECK-NEXT: lsr w10, w8, #15 +; CHECK-NEXT: lsr w10, w9, #15 ; CHECK-NEXT: eor w10, w10, #0x1 -; CHECK-NEXT: add w8, w10, w8 -; CHECK-NEXT: cmp w9, w8, sxth +; CHECK-NEXT: add w9, w10, w9 +; CHECK-NEXT: cmp w8, w9, sxth ; CHECK-NEXT: b.ge .LBB6_1 ; CHECK-NEXT: .LBB6_3: ; CHECK-NEXT: mov w0, wzr diff --git a/llvm/test/CodeGen/AArch64/typepromotion-signed.ll b/llvm/test/CodeGen/AArch64/typepromotion-signed.ll --- a/llvm/test/CodeGen/AArch64/typepromotion-signed.ll +++ b/llvm/test/CodeGen/AArch64/typepromotion-signed.ll @@ -57,11 +57,11 @@ ; CHECK-LABEL: test_signext_b: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: mov w8, #20894 +; CHECK-NEXT: mov w8, #20894 // =0x519e ; CHECK-NEXT: add w9, w9, w1 ; CHECK-NEXT: sxtb w9, w9 ; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: mov w9, #42 +; CHECK-NEXT: mov w9, #42 // =0x2a ; CHECK-NEXT: csel w0, w9, w8, ge ; CHECK-NEXT: ret entry: @@ -75,12 +75,12 @@ define i32 @test_signext_b_ult_slt(ptr nocapture readonly %ptr, i8 signext %arg) { ; CHECK-LABEL: test_signext_b_ult_slt: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ldrb w8, [x0] -; CHECK-NEXT: add w9, w8, w1, uxtb -; CHECK-NEXT: cmp w9, #127 -; CHECK-NEXT: mov w9, #42 -; CHECK-NEXT: ccmp w8, #0, #0, ne -; CHECK-NEXT: mov w8, #57 +; CHECK-NEXT: ldrb w9, [x0] +; CHECK-NEXT: mov w8, #57 // =0x39 +; CHECK-NEXT: add w10, w9, w1, uxtb +; CHECK-NEXT: cmp w10, #127 +; CHECK-NEXT: ccmp w9, #0, #0, ne +; CHECK-NEXT: mov w9, #42 // =0x2a ; CHECK-NEXT: csel w0, w9, w8, eq ; CHECK-NEXT: ret entry: @@ -97,11 +97,11 @@ ; CHECK-LABEL: test_signext_h: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: mov w8, #20894 +; CHECK-NEXT: mov w8, #20894 // =0x519e ; CHECK-NEXT: add w9, w9, w1 ; CHECK-NEXT: sxth w9, w9 ; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: mov w9, #42 +; CHECK-NEXT: mov w9, #42 // =0x2a ; CHECK-NEXT: csel w0, w9, w8, ge ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/uadd_sat.ll b/llvm/test/CodeGen/AArch64/uadd_sat.ll --- a/llvm/test/CodeGen/AArch64/uadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat.ll @@ -31,7 +31,7 @@ ; CHECK-LABEL: func16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: mov w9, #65535 +; CHECK-NEXT: mov w9, #65535 // =0xffff ; CHECK-NEXT: add w8, w8, w1, uxth ; CHECK-NEXT: cmp w8, w9 ; CHECK-NEXT: csel w0, w8, w9, lo @@ -43,11 +43,11 @@ define i8 @func8(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: func8: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: mov w9, #255 -; CHECK-NEXT: add w8, w8, w1, uxtb -; CHECK-NEXT: cmp w8, #255 -; CHECK-NEXT: csel w0, w8, w9, lo +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: mov w8, #255 // =0xff +; CHECK-NEXT: add w9, w9, w1, uxtb +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: csel w0, w9, w8, lo ; CHECK-NEXT: ret %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %y); ret i8 %tmp; @@ -56,12 +56,12 @@ define i4 @func3(i4 %x, i4 %y) nounwind { ; CHECK-LABEL: func3: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, #0xf -; CHECK-NEXT: and w9, w0, #0xf -; CHECK-NEXT: add w8, w9, w8 -; CHECK-NEXT: mov w9, #15 -; CHECK-NEXT: cmp w8, #15 -; CHECK-NEXT: csel w0, w8, w9, lo +; CHECK-NEXT: and w9, w1, #0xf +; CHECK-NEXT: and w10, w0, #0xf +; CHECK-NEXT: mov w8, #15 // =0xf +; CHECK-NEXT: add w9, w10, w9 +; CHECK-NEXT: cmp w9, #15 +; CHECK-NEXT: csel w0, w9, w8, lo ; CHECK-NEXT: ret %tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %y); ret i4 %tmp; diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll b/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll --- a/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_plus.ll @@ -33,12 +33,12 @@ define i16 @func16(i16 %x, i16 %y, i16 %z) nounwind { ; CHECK-LABEL: func16: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: and w10, w0, #0xffff -; CHECK-NEXT: mov w8, #65535 -; CHECK-NEXT: add w9, w10, w9, uxth -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: csel w0, w9, w8, lo +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: add w8, w9, w8, uxth +; CHECK-NEXT: mov w9, #65535 // =0xffff +; CHECK-NEXT: cmp w8, w9 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %a = mul i16 %y, %z %tmp = call i16 @llvm.uadd.sat.i16(i16 %x, i16 %a) @@ -48,12 +48,12 @@ define i8 @func8(i8 %x, i8 %y, i8 %z) nounwind { ; CHECK-LABEL: func8: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: and w10, w0, #0xff -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: add w9, w10, w9, uxtb -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w0, w9, w8, lo +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: add w8, w9, w8, uxtb +; CHECK-NEXT: mov w9, #255 // =0xff +; CHECK-NEXT: cmp w8, #255 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %a = mul i8 %y, %z %tmp = call i8 @llvm.uadd.sat.i8(i8 %x, i8 %a) @@ -63,13 +63,13 @@ define i4 @func4(i4 %x, i4 %y, i4 %z) nounwind { ; CHECK-LABEL: func4: ; CHECK: // %bb.0: -; CHECK-NEXT: mul w9, w1, w2 -; CHECK-NEXT: and w10, w0, #0xf -; CHECK-NEXT: mov w8, #15 -; CHECK-NEXT: and w9, w9, #0xf -; CHECK-NEXT: add w9, w10, w9 -; CHECK-NEXT: cmp w9, #15 -; CHECK-NEXT: csel w0, w9, w8, lo +; CHECK-NEXT: mul w8, w1, w2 +; CHECK-NEXT: and w9, w0, #0xf +; CHECK-NEXT: and w8, w8, #0xf +; CHECK-NEXT: add w8, w9, w8 +; CHECK-NEXT: mov w9, #15 // =0xf +; CHECK-NEXT: cmp w8, #15 +; CHECK-NEXT: csel w0, w8, w9, lo ; CHECK-NEXT: ret %a = mul i4 %y, %z %tmp = call i4 @llvm.uadd.sat.i4(i4 %x, i4 %a) diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -44,8 +44,8 @@ define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; CHECK-LABEL: v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v0.16b, v0.16b, v2.16b ; CHECK-NEXT: uqadd v1.16b, v1.16b, v3.16b +; CHECK-NEXT: uqadd v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %z = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z @@ -75,8 +75,8 @@ define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { ; CHECK-LABEL: v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v0.8h, v0.8h, v2.8h ; CHECK-NEXT: uqadd v1.8h, v1.8h, v3.8h +; CHECK-NEXT: uqadd v0.8h, v0.8h, v2.8h ; CHECK-NEXT: ret %z = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z @@ -97,9 +97,9 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, ptr %px @@ -112,11 +112,11 @@ define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: movi d2, #0xff00ff00ff00ff -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: uaddl v0.8h, v0.8b, v1.8b -; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h +; CHECK-NEXT: ldr s1, [x0] +; CHECK-NEXT: ldr s2, [x1] +; CHECK-NEXT: movi d0, #0xff00ff00ff00ff +; CHECK-NEXT: uaddl v1.8h, v1.8b, v2.8b +; CHECK-NEXT: umin v0.4h, v1.4h, v0.4h ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret @@ -130,17 +130,17 @@ define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x1] -; CHECK-NEXT: movi d0, #0x0000ff000000ff -; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: ldrb w10, [x1, #1] -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x1] +; CHECK-NEXT: movi d2, #0x0000ff000000ff +; CHECK-NEXT: ldrb w10, [x0, #1] +; CHECK-NEXT: ldrb w11, [x1, #1] +; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: mov v2.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: add v1.2s, v1.2s, v2.2s -; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strb w9, [x2] @@ -156,9 +156,9 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: uqadd v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, ptr %px @@ -171,17 +171,17 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x1] -; CHECK-NEXT: movi d0, #0x00ffff0000ffff -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: ldrh w10, [x1, #2] -; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: movi d2, #0x00ffff0000ffff +; CHECK-NEXT: ldrh w10, [x0, #2] +; CHECK-NEXT: ldrh w11, [x1, #2] +; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldrh w9, [x0, #2] -; CHECK-NEXT: mov v2.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: add v1.2s, v1.2s, v2.2s -; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: strh w9, [x2] @@ -223,9 +223,9 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x1] -; CHECK-NEXT: ldr b1, [x0] -; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: ldr b1, [x1] +; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, ptr %px @@ -238,9 +238,9 @@ define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x1] -; CHECK-NEXT: ldr h1, [x0] -; CHECK-NEXT: uqadd v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ldr h1, [x1] +; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, ptr %px @@ -293,8 +293,8 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { ; CHECK-LABEL: v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v0.4s, v0.4s, v2.4s ; CHECK-NEXT: uqadd v1.4s, v1.4s, v3.4s +; CHECK-NEXT: uqadd v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %z = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z @@ -324,8 +324,8 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { ; CHECK-LABEL: v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v0.2d, v0.2d, v2.2d ; CHECK-NEXT: uqadd v1.2d, v1.2d, v3.2d +; CHECK-NEXT: uqadd v0.2d, v0.2d, v2.2d ; CHECK-NEXT: ret %z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z diff --git a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll --- a/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll +++ b/llvm/test/CodeGen/AArch64/umulo-128-legalisation-lowering.ll @@ -4,19 +4,19 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 { ; AARCH-LABEL: muloti_test: ; AARCH: // %bb.0: // %start +; AARCH-NEXT: mul x9, x3, x0 ; AARCH-NEXT: cmp x1, #0 -; AARCH-NEXT: umulh x8, x1, x2 ; AARCH-NEXT: ccmp x3, #0, #4, ne -; AARCH-NEXT: umulh x9, x3, x0 +; AARCH-NEXT: umulh x8, x1, x2 +; AARCH-NEXT: umulh x10, x3, x0 +; AARCH-NEXT: madd x9, x1, x2, x9 ; AARCH-NEXT: ccmp xzr, x8, #0, eq -; AARCH-NEXT: mul x8, x3, x0 -; AARCH-NEXT: madd x8, x1, x2, x8 -; AARCH-NEXT: ccmp xzr, x9, #0, eq -; AARCH-NEXT: umulh x9, x0, x2 +; AARCH-NEXT: umulh x11, x0, x2 +; AARCH-NEXT: ccmp xzr, x10, #0, eq ; AARCH-NEXT: mul x0, x0, x2 -; AARCH-NEXT: cset w10, ne -; AARCH-NEXT: adds x1, x9, x8 -; AARCH-NEXT: csinc w2, w10, wzr, lo +; AARCH-NEXT: cset w8, ne +; AARCH-NEXT: adds x1, x11, x9 +; AARCH-NEXT: csinc w2, w8, wzr, lo ; AARCH-NEXT: ret start: %0 = tail call { i128, i1 } @llvm.umul.with.overflow.i128(i128 %l, i128 %r) #2 @@ -35,41 +35,41 @@ define i128 @__muloti4(i128 %0, i128 %1, ptr nocapture nonnull writeonly align 4 %2) #2 { ; AARCH-LABEL: __muloti4: ; AARCH: // %bb.0: // %Entry -; AARCH-NEXT: asr x9, x1, #63 -; AARCH-NEXT: asr x10, x3, #63 +; AARCH-NEXT: asr x10, x1, #63 +; AARCH-NEXT: asr x9, x3, #63 ; AARCH-NEXT: umulh x14, x0, x2 ; AARCH-NEXT: mov x8, x1 -; AARCH-NEXT: mul x11, x2, x9 ; AARCH-NEXT: str wzr, [x4] -; AARCH-NEXT: umulh x12, x10, x0 -; AARCH-NEXT: umulh x13, x2, x9 -; AARCH-NEXT: madd x12, x10, x1, x12 -; AARCH-NEXT: add x13, x13, x11 -; AARCH-NEXT: mul x10, x10, x0 -; AARCH-NEXT: madd x9, x3, x9, x13 -; AARCH-NEXT: add x12, x12, x10 -; AARCH-NEXT: adds x10, x10, x11 -; AARCH-NEXT: mul x11, x1, x2 -; AARCH-NEXT: adc x9, x12, x9 +; AARCH-NEXT: mul x12, x2, x10 +; AARCH-NEXT: umulh x13, x2, x10 +; AARCH-NEXT: umulh x11, x9, x0 +; AARCH-NEXT: mul x15, x1, x2 +; AARCH-NEXT: add x13, x13, x12 +; AARCH-NEXT: madd x11, x9, x1, x11 +; AARCH-NEXT: mul x9, x9, x0 +; AARCH-NEXT: madd x10, x3, x10, x13 ; AARCH-NEXT: umulh x13, x1, x2 -; AARCH-NEXT: mul x12, x0, x3 -; AARCH-NEXT: adds x11, x11, x14 -; AARCH-NEXT: umulh x14, x0, x3 +; AARCH-NEXT: add x11, x11, x9 +; AARCH-NEXT: adds x9, x9, x12 +; AARCH-NEXT: mul x16, x0, x3 +; AARCH-NEXT: adc x10, x11, x10 +; AARCH-NEXT: adds x11, x15, x14 +; AARCH-NEXT: umulh x17, x0, x3 ; AARCH-NEXT: cinc x13, x13, hs -; AARCH-NEXT: adds x1, x12, x11 -; AARCH-NEXT: mul x12, x8, x3 -; AARCH-NEXT: cinc x11, x14, hs +; AARCH-NEXT: mul x12, x1, x3 +; AARCH-NEXT: adds x1, x16, x11 +; AARCH-NEXT: umulh x11, x8, x3 +; AARCH-NEXT: cinc x14, x17, hs +; AARCH-NEXT: adds x13, x13, x14 ; AARCH-NEXT: mul x0, x0, x2 -; AARCH-NEXT: adds x11, x13, x11 -; AARCH-NEXT: umulh x13, x8, x3 ; AARCH-NEXT: cset w14, hs -; AARCH-NEXT: adds x11, x12, x11 -; AARCH-NEXT: adc x12, x13, x14 -; AARCH-NEXT: adds x10, x11, x10 -; AARCH-NEXT: asr x11, x1, #63 -; AARCH-NEXT: adc x9, x12, x9 -; AARCH-NEXT: cmp x10, x11 -; AARCH-NEXT: ccmp x9, x11, #0, eq +; AARCH-NEXT: adds x12, x12, x13 +; AARCH-NEXT: asr x13, x1, #63 +; AARCH-NEXT: adc x11, x11, x14 +; AARCH-NEXT: adds x9, x12, x9 +; AARCH-NEXT: adc x10, x11, x10 +; AARCH-NEXT: cmp x9, x13 +; AARCH-NEXT: ccmp x10, x13, #0, eq ; AARCH-NEXT: cset w9, ne ; AARCH-NEXT: tbz x8, #63, .LBB1_2 ; AARCH-NEXT: // %bb.1: // %Entry @@ -79,7 +79,7 @@ ; AARCH-NEXT: .LBB1_2: // %Else2 ; AARCH-NEXT: cbz w9, .LBB1_4 ; AARCH-NEXT: .LBB1_3: // %Then7 -; AARCH-NEXT: mov w8, #1 +; AARCH-NEXT: mov w8, #1 // =0x1 ; AARCH-NEXT: str w8, [x4] ; AARCH-NEXT: .LBB1_4: // %Block9 ; AARCH-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-innerouter.ll @@ -205,8 +205,8 @@ ; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: and w20, w8, #0xffff00 ; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w8, #0xffff00 ; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbits.ll @@ -9,8 +9,8 @@ define i8 @out8_constmask(i8 %x, i8 %y) { ; CHECK-LABEL: out8_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #85 -; CHECK-NEXT: mov w9, #-86 +; CHECK-NEXT: mov w8, #85 // =0x55 +; CHECK-NEXT: mov w9, #-86 // =0xffffffaa ; CHECK-NEXT: and w8, w0, w8 ; CHECK-NEXT: and w9, w1, w9 ; CHECK-NEXT: orr w0, w8, w9 @@ -24,8 +24,8 @@ define i16 @out16_constmask(i16 %x, i16 %y) { ; CHECK-LABEL: out16_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #21845 -; CHECK-NEXT: mov w9, #-21846 +; CHECK-NEXT: mov w8, #21845 // =0x5555 +; CHECK-NEXT: mov w9, #-21846 // =0xffffaaaa ; CHECK-NEXT: and w8, w0, w8 ; CHECK-NEXT: and w9, w1, w9 ; CHECK-NEXT: orr w0, w8, w9 @@ -69,9 +69,9 @@ define i8 @in8_constmask(i8 %x, i8 %y) { ; CHECK-LABEL: in8_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: mov w9, #85 -; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: mov w8, #85 // =0x55 +; CHECK-NEXT: eor w9, w0, w1 +; CHECK-NEXT: and w8, w9, w8 ; CHECK-NEXT: eor w0, w8, w1 ; CHECK-NEXT: ret %n0 = xor i8 %x, %y @@ -83,9 +83,9 @@ define i16 @in16_constmask(i16 %x, i16 %y) { ; CHECK-LABEL: in16_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: mov w9, #21845 -; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: mov w8, #21845 // =0x5555 +; CHECK-NEXT: eor w9, w0, w1 +; CHECK-NEXT: and w8, w9, w8 ; CHECK-NEXT: eor w0, w8, w1 ; CHECK-NEXT: ret %n0 = xor i16 %x, %y @@ -211,8 +211,8 @@ ; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: and w20, w8, #0x55555555 ; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w8, #0x55555555 ; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 @@ -251,7 +251,7 @@ define i32 @n0_badconstmask(i32 %x, i32 %y) { ; CHECK-LABEL: n0_badconstmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: and w9, w0, #0x55555555 ; CHECK-NEXT: movk w8, #43690, lsl #16 ; CHECK-NEXT: and w8, w1, w8 diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-interleavedbytehalves.ll @@ -21,8 +21,8 @@ define i16 @out16_constmask(i16 %x, i16 %y) { ; CHECK-LABEL: out16_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #3855 -; CHECK-NEXT: mov w9, #-3856 +; CHECK-NEXT: mov w8, #3855 // =0xf0f +; CHECK-NEXT: mov w9, #-3856 // =0xfffff0f0 ; CHECK-NEXT: and w8, w0, w8 ; CHECK-NEXT: and w9, w1, w9 ; CHECK-NEXT: orr w0, w8, w9 @@ -79,9 +79,9 @@ define i16 @in16_constmask(i16 %x, i16 %y) { ; CHECK-LABEL: in16_constmask: ; CHECK: // %bb.0: -; CHECK-NEXT: eor w8, w0, w1 -; CHECK-NEXT: mov w9, #3855 -; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: mov w8, #3855 // =0xf0f +; CHECK-NEXT: eor w9, w0, w1 +; CHECK-NEXT: and w8, w9, w8 ; CHECK-NEXT: eor w0, w8, w1 ; CHECK-NEXT: ret %n0 = xor i16 %x, %y @@ -207,8 +207,8 @@ ; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: and w20, w8, #0xf0f0f0f ; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w8, #0xf0f0f0f ; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 @@ -247,7 +247,7 @@ define i32 @n0_badconstmask(i32 %x, i32 %y) { ; CHECK-LABEL: n0_badconstmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #61681 +; CHECK-NEXT: mov w8, #61681 // =0xf0f1 ; CHECK-NEXT: and w9, w0, #0xf0f0f0f ; CHECK-NEXT: movk w8, #61680, lsl #16 ; CHECK-NEXT: and w8, w1, w8 diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-constmask-lowhigh.ll @@ -200,8 +200,8 @@ ; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: and w20, w8, #0xffff ; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w8, #0xffff ; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-scalar-variablemask.ll @@ -6,9 +6,9 @@ define i8 @out8(i8 %x, i8 %y, i8 %mask) { ; CHECK-LABEL: out8: ; CHECK: // %bb.0: -; CHECK-NEXT: bic w8, w1, w2 -; CHECK-NEXT: and w9, w0, w2 -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %mx = and i8 %x, %mask %notmask = xor i8 %mask, -1 @@ -20,9 +20,9 @@ define i16 @out16(i16 %x, i16 %y, i16 %mask) { ; CHECK-LABEL: out16: ; CHECK: // %bb.0: -; CHECK-NEXT: bic w8, w1, w2 -; CHECK-NEXT: and w9, w0, w2 -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %mx = and i16 %x, %mask %notmask = xor i16 %mask, -1 @@ -34,9 +34,9 @@ define i32 @out32(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: out32: ; CHECK: // %bb.0: -; CHECK-NEXT: bic w8, w1, w2 -; CHECK-NEXT: and w9, w0, w2 -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: and w8, w0, w2 +; CHECK-NEXT: bic w9, w1, w2 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %mx = and i32 %x, %mask %notmask = xor i32 %mask, -1 @@ -48,9 +48,9 @@ define i64 @out64(i64 %x, i64 %y, i64 %mask) { ; CHECK-LABEL: out64: ; CHECK: // %bb.0: -; CHECK-NEXT: bic x8, x1, x2 -; CHECK-NEXT: and x9, x0, x2 -; CHECK-NEXT: orr x0, x9, x8 +; CHECK-NEXT: and x8, x0, x2 +; CHECK-NEXT: bic x9, x1, x2 +; CHECK-NEXT: orr x0, x8, x9 ; CHECK-NEXT: ret %mx = and i64 %x, %mask %notmask = xor i64 %mask, -1 @@ -155,9 +155,9 @@ define i32 @in_commutativity_1_0_0(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_commutativity_1_0_0: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, w2 -; CHECK-NEXT: bic w9, w0, w2 -; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask @@ -167,9 +167,9 @@ define i32 @in_commutativity_1_0_1(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_commutativity_1_0_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, w2 -; CHECK-NEXT: bic w9, w0, w2 -; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped @@ -179,9 +179,9 @@ define i32 @in_commutativity_1_1_0(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_commutativity_1_1_0: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, w2 -; CHECK-NEXT: bic w9, w0, w2 -; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %n0, %mask @@ -191,9 +191,9 @@ define i32 @in_commutativity_1_1_1(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_commutativity_1_1_1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, w2 -; CHECK-NEXT: bic w9, w0, w2 -; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: bic w8, w0, w2 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %n0 = xor i32 %x, %y %n1 = and i32 %mask, %n0 ; swapped @@ -268,11 +268,11 @@ define i32 @in_complex_y0_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { ; CHECK-LABEL: in_complex_y0_m0: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, w2 -; CHECK-NEXT: eor w9, w3, w4 -; CHECK-NEXT: bic w8, w8, w9 -; CHECK-NEXT: and w9, w0, w9 -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: eor w8, w3, w4 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: bic w9, w9, w8 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %y = and i32 %y_hi, %y_low %mask = xor i32 %m_a, %m_b @@ -284,11 +284,11 @@ define i32 @in_complex_y1_m0(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { ; CHECK-LABEL: in_complex_y1_m0: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, w2 -; CHECK-NEXT: eor w9, w3, w4 -; CHECK-NEXT: bic w8, w8, w9 -; CHECK-NEXT: and w9, w0, w9 -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: eor w8, w3, w4 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: bic w9, w9, w8 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %y = and i32 %y_hi, %y_low %mask = xor i32 %m_a, %m_b @@ -300,11 +300,11 @@ define i32 @in_complex_y0_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { ; CHECK-LABEL: in_complex_y0_m1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, w2 -; CHECK-NEXT: eor w9, w3, w4 -; CHECK-NEXT: bic w8, w8, w9 -; CHECK-NEXT: and w9, w0, w9 -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: eor w8, w3, w4 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: bic w9, w9, w8 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %y = and i32 %y_hi, %y_low %mask = xor i32 %m_a, %m_b @@ -316,11 +316,11 @@ define i32 @in_complex_y1_m1(i32 %x, i32 %y_hi, i32 %y_low, i32 %m_a, i32 %m_b) { ; CHECK-LABEL: in_complex_y1_m1: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w1, w2 -; CHECK-NEXT: eor w9, w3, w4 -; CHECK-NEXT: bic w8, w8, w9 -; CHECK-NEXT: and w9, w0, w9 -; CHECK-NEXT: orr w0, w9, w8 +; CHECK-NEXT: eor w8, w3, w4 +; CHECK-NEXT: and w9, w1, w2 +; CHECK-NEXT: bic w9, w9, w8 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret %y = and i32 %y_hi, %y_low %mask = xor i32 %m_a, %m_b @@ -384,7 +384,7 @@ define i32 @out_constant_varx_42(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: out_constant_varx_42: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: and w9, w2, w0 ; CHECK-NEXT: bic w8, w8, w2 ; CHECK-NEXT: orr w0, w9, w8 @@ -398,7 +398,7 @@ define i32 @in_constant_varx_42(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_varx_42: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: and w9, w0, w2 ; CHECK-NEXT: bic w8, w8, w2 ; CHECK-NEXT: orr w0, w9, w8 @@ -412,7 +412,7 @@ define i32 @out_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: out_constant_varx_42_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: bic w9, w0, w2 ; CHECK-NEXT: and w8, w2, w8 ; CHECK-NEXT: orr w0, w9, w8 @@ -427,7 +427,7 @@ define i32 @in_constant_varx_42_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_varx_42_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: bic w9, w0, w2 ; CHECK-NEXT: and w8, w2, w8 ; CHECK-NEXT: orr w0, w9, w8 @@ -487,7 +487,7 @@ define i32 @out_constant_42_vary(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: out_constant_42_vary: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: bic w9, w1, w2 ; CHECK-NEXT: and w8, w2, w8 ; CHECK-NEXT: orr w0, w8, w9 @@ -501,7 +501,7 @@ define i32 @in_constant_42_vary(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_42_vary: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: bic w9, w1, w2 ; CHECK-NEXT: and w8, w2, w8 ; CHECK-NEXT: orr w0, w8, w9 @@ -515,7 +515,7 @@ define i32 @out_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: out_constant_42_vary_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: and w9, w2, w1 ; CHECK-NEXT: bic w8, w8, w2 ; CHECK-NEXT: orr w0, w8, w9 @@ -530,7 +530,7 @@ define i32 @in_constant_42_vary_invmask(i32 %x, i32 %y, i32 %mask) { ; CHECK-LABEL: in_constant_42_vary_invmask: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #42 +; CHECK-NEXT: mov w8, #42 // =0x2a ; CHECK-NEXT: and w9, w1, w2 ; CHECK-NEXT: bic w8, w8, w2 ; CHECK-NEXT: orr w0, w8, w9 @@ -552,8 +552,8 @@ ; CHECK-NEXT: str x30, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: eor w8, w0, w1 ; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: and w20, w8, w3 ; CHECK-NEXT: mov w19, w1 +; CHECK-NEXT: and w20, w8, w3 ; CHECK-NEXT: mov w0, w20 ; CHECK-NEXT: bl use32 ; CHECK-NEXT: eor w0, w20, w19 @@ -589,9 +589,9 @@ define i32 @n0_badmask(i32 %x, i32 %y, i32 %mask, i32 %mask2) { ; CHECK-LABEL: n0_badmask: ; CHECK: // %bb.0: -; CHECK-NEXT: and w8, w0, w2 -; CHECK-NEXT: bic w9, w1, w3 -; CHECK-NEXT: orr w0, w8, w9 +; CHECK-NEXT: bic w8, w1, w3 +; CHECK-NEXT: and w9, w0, w2 +; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret %mx = and i32 %x, %mask %notmask = xor i32 %mask2, -1 ; %mask2 instead of %mask diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll @@ -31,8 +31,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0x0000ff000000ff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <2 x i8> %x, %mask @@ -63,8 +63,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0xff00ff00ff00ff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <4 x i8> %x, %mask @@ -79,8 +79,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0xff00ff00ff00ff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <4 x i8> %x, %mask @@ -95,8 +95,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0x00ffff0000ffff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <2 x i16> %x, %mask diff --git a/llvm/test/CodeGen/AArch64/urem-lkk.ll b/llvm/test/CodeGen/AArch64/urem-lkk.ll --- a/llvm/test/CodeGen/AArch64/urem-lkk.ll +++ b/llvm/test/CodeGen/AArch64/urem-lkk.ll @@ -4,13 +4,13 @@ define i32 @fold_urem_positive_odd(i32 %x) { ; CHECK-LABEL: fold_urem_positive_odd: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #8969 +; CHECK-NEXT: mov w8, #8969 // =0x2309 ; CHECK-NEXT: movk w8, #22765, lsl #16 ; CHECK-NEXT: umull x8, w0, w8 ; CHECK-NEXT: lsr x8, x8, #32 ; CHECK-NEXT: sub w9, w0, w8 ; CHECK-NEXT: add w8, w8, w9, lsr #1 -; CHECK-NEXT: mov w9, #95 +; CHECK-NEXT: mov w9, #95 // =0x5f ; CHECK-NEXT: lsr w8, w8, #6 ; CHECK-NEXT: msub w0, w8, w9, w0 ; CHECK-NEXT: ret @@ -22,8 +22,8 @@ define i32 @fold_urem_positive_even(i32 %x) { ; CHECK-LABEL: fold_urem_positive_even: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16323 -; CHECK-NEXT: mov w9, #1060 +; CHECK-NEXT: mov w8, #16323 // =0x3fc3 +; CHECK-NEXT: mov w9, #1060 // =0x424 ; CHECK-NEXT: movk w8, #63310, lsl #16 ; CHECK-NEXT: umull x8, w0, w8 ; CHECK-NEXT: lsr x8, x8, #42 @@ -38,13 +38,13 @@ define i32 @combine_urem_udiv(i32 %x) { ; CHECK-LABEL: combine_urem_udiv: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #8969 +; CHECK-NEXT: mov w8, #8969 // =0x2309 ; CHECK-NEXT: movk w8, #22765, lsl #16 ; CHECK-NEXT: umull x8, w0, w8 ; CHECK-NEXT: lsr x8, x8, #32 ; CHECK-NEXT: sub w9, w0, w8 ; CHECK-NEXT: add w8, w8, w9, lsr #1 -; CHECK-NEXT: mov w9, #95 +; CHECK-NEXT: mov w9, #95 // =0x5f ; CHECK-NEXT: lsr w8, w8, #6 ; CHECK-NEXT: msub w9, w8, w9, w0 ; CHECK-NEXT: add w0, w9, w8 @@ -88,13 +88,13 @@ define i64 @dont_fold_urem_i64(i64 %x) { ; CHECK-LABEL: dont_fold_urem_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #58849 -; CHECK-NEXT: lsr x9, x0, #1 -; CHECK-NEXT: movk x8, #48148, lsl #16 -; CHECK-NEXT: movk x8, #33436, lsl #32 -; CHECK-NEXT: movk x8, #21399, lsl #48 -; CHECK-NEXT: umulh x8, x9, x8 -; CHECK-NEXT: mov w9, #98 +; CHECK-NEXT: mov x9, #58849 // =0xe5e1 +; CHECK-NEXT: lsr x8, x0, #1 +; CHECK-NEXT: movk x9, #48148, lsl #16 +; CHECK-NEXT: movk x9, #33436, lsl #32 +; CHECK-NEXT: movk x9, #21399, lsl #48 +; CHECK-NEXT: umulh x8, x8, x9 +; CHECK-NEXT: mov w9, #98 // =0x62 ; CHECK-NEXT: lsr x8, x8, #4 ; CHECK-NEXT: msub x0, x8, x9, x0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll @@ -67,25 +67,25 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: adrp x8, .LCPI4_0 -; CHECK-NEXT: adrp x9, .LCPI4_1 -; CHECK-NEXT: mov v0.h[1], w1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI4_1] -; CHECK-NEXT: adrp x8, .LCPI4_2 +; CHECK-NEXT: adrp x8, .LCPI4_1 +; CHECK-NEXT: mov v0.h[1], w1 ; CHECK-NEXT: mov v0.h[2], w2 ; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: movi d1, #0x0000000000ffff -; CHECK-NEXT: mul v0.4h, v0.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2] +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_1] +; CHECK-NEXT: adrp x8, .LCPI4_2 +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_2] ; CHECK-NEXT: adrp x8, .LCPI4_3 -; CHECK-NEXT: add v3.4h, v0.4h, v0.4h +; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h +; CHECK-NEXT: movi d1, #0x0000000000ffff +; CHECK-NEXT: add v2.4h, v0.4h, v0.4h ; CHECK-NEXT: bic v0.4h, #248, lsl #8 +; CHECK-NEXT: ushl v2.4h, v2.4h, v3.4h ; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ushl v1.4h, v3.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_3] -; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_3] +; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b ; CHECK-NEXT: bic v0.4h, #248, lsl #8 -; CHECK-NEXT: cmhi v0.4h, v0.4h, v2.4h +; CHECK-NEXT: cmhi v0.4h, v0.4h, v1.4h ; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: umov w1, v0.h[1] ; CHECK-NEXT: umov w2, v0.h[2] diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll @@ -7,7 +7,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: adrp x9, .LCPI0_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: adrp x8, .LCPI0_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_2] @@ -16,10 +15,11 @@ ; CHECK-NEXT: adrp x8, .LCPI0_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -71,7 +71,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: adrp x9, .LCPI3_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: adrp x8, .LCPI3_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_2] @@ -80,10 +79,11 @@ ; CHECK-NEXT: adrp x8, .LCPI3_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -95,7 +95,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: adrp x9, .LCPI4_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: adrp x8, .LCPI4_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_2] @@ -104,10 +103,11 @@ ; CHECK-NEXT: adrp x8, .LCPI4_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_3] +; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp ne <4 x i32> %urem, @@ -121,7 +121,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: adrp x9, .LCPI5_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] ; CHECK-NEXT: adrp x8, .LCPI5_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_2] @@ -130,10 +129,11 @@ ; CHECK-NEXT: adrp x8, .LCPI5_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -145,7 +145,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: adrp x9, .LCPI6_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0] ; CHECK-NEXT: adrp x8, .LCPI6_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI6_2] @@ -154,10 +153,11 @@ ; CHECK-NEXT: adrp x8, .LCPI6_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_3] +; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp ne <4 x i32> %urem, @@ -173,7 +173,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: adrp x9, .LCPI7_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0] ; CHECK-NEXT: adrp x8, .LCPI7_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI7_2] @@ -182,10 +181,11 @@ ; CHECK-NEXT: adrp x8, .LCPI7_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -199,7 +199,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI8_0 ; CHECK-NEXT: adrp x9, .LCPI8_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0] ; CHECK-NEXT: adrp x8, .LCPI8_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI8_2] @@ -208,10 +207,11 @@ ; CHECK-NEXT: adrp x8, .LCPI8_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -225,7 +225,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI9_0 ; CHECK-NEXT: adrp x9, .LCPI9_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0] ; CHECK-NEXT: adrp x8, .LCPI9_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI9_2] @@ -234,10 +233,11 @@ ; CHECK-NEXT: adrp x8, .LCPI9_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -251,9 +251,9 @@ define <4 x i32> @test_urem_odd_one(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_urem_odd_one: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: movk w8, #52428, lsl #16 +; CHECK-NEXT: mov w8, #52429 // =0xcccd ; CHECK-NEXT: movi v2.4s, #1 +; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s @@ -271,18 +271,18 @@ define <4 x i32> @test_urem_even_one(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_urem_even_one: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #28087 +; CHECK-NEXT: mov w8, #28087 // =0x6db7 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: movk w8, #46811, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -296,7 +296,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: adrp x9, .LCPI12_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: adrp x8, .LCPI12_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI12_2] @@ -305,10 +304,11 @@ ; CHECK-NEXT: adrp x8, .LCPI12_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -324,7 +324,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: adrp x9, .LCPI13_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0] ; CHECK-NEXT: adrp x8, .LCPI13_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI13_2] @@ -333,10 +332,11 @@ ; CHECK-NEXT: adrp x8, .LCPI13_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -350,7 +350,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: adrp x9, .LCPI14_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] ; CHECK-NEXT: adrp x8, .LCPI14_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_2] @@ -359,10 +358,11 @@ ; CHECK-NEXT: adrp x8, .LCPI14_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -376,7 +376,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: adrp x9, .LCPI15_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI15_2] @@ -385,10 +384,11 @@ ; CHECK-NEXT: adrp x8, .LCPI15_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -404,7 +404,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: adrp x9, .LCPI16_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: adrp x8, .LCPI16_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI16_2] @@ -413,10 +412,11 @@ ; CHECK-NEXT: adrp x8, .LCPI16_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -430,7 +430,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: adrp x9, .LCPI17_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: adrp x8, .LCPI17_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI17_2] @@ -439,10 +438,11 @@ ; CHECK-NEXT: adrp x8, .LCPI17_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -456,7 +456,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: adrp x9, .LCPI18_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: adrp x8, .LCPI18_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_2] @@ -465,10 +464,11 @@ ; CHECK-NEXT: adrp x8, .LCPI18_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -503,7 +503,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI20_0 ; CHECK-NEXT: adrp x9, .LCPI20_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] ; CHECK-NEXT: adrp x8, .LCPI20_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI20_2] @@ -512,10 +511,11 @@ ; CHECK-NEXT: adrp x8, .LCPI20_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -529,7 +529,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI21_0 ; CHECK-NEXT: adrp x9, .LCPI21_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0] ; CHECK-NEXT: adrp x8, .LCPI21_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI21_2] @@ -538,10 +537,11 @@ ; CHECK-NEXT: adrp x8, .LCPI21_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -557,7 +557,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI22_0 ; CHECK-NEXT: adrp x9, .LCPI22_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0] ; CHECK-NEXT: adrp x8, .LCPI22_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI22_2] @@ -566,10 +565,11 @@ ; CHECK-NEXT: adrp x8, .LCPI22_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -583,7 +583,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 ; CHECK-NEXT: adrp x9, .LCPI23_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] ; CHECK-NEXT: adrp x8, .LCPI23_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI23_2] @@ -592,10 +591,11 @@ ; CHECK-NEXT: adrp x8, .LCPI23_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -609,7 +609,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: adrp x9, .LCPI24_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0] ; CHECK-NEXT: adrp x8, .LCPI24_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI24_2] @@ -618,10 +617,11 @@ ; CHECK-NEXT: adrp x8, .LCPI24_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -636,7 +636,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI25_0 ; CHECK-NEXT: adrp x9, .LCPI25_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] ; CHECK-NEXT: adrp x8, .LCPI25_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI25_2] @@ -645,10 +644,11 @@ ; CHECK-NEXT: adrp x8, .LCPI25_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -661,7 +661,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI26_0 ; CHECK-NEXT: adrp x9, .LCPI26_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_0] ; CHECK-NEXT: adrp x8, .LCPI26_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI26_2] @@ -670,10 +669,11 @@ ; CHECK-NEXT: adrp x8, .LCPI26_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_3] +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_3] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll @@ -6,7 +6,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: movk w8, #43690, lsl #16 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: dup v1.4s, w8 @@ -26,11 +26,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: mov w8, #52429 +; CHECK-NEXT: mov w8, #52429 // =0xcccd ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: mov w8, #13106 +; CHECK-NEXT: mov w8, #13106 // =0x3332 ; CHECK-NEXT: movk w8, #13107, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: dup v1.4s, w8 @@ -47,18 +47,18 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] -; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: movk w8, #43690, lsl #16 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: mov w8, #43690 +; CHECK-NEXT: mov w8, #43690 // =0xaaaa ; CHECK-NEXT: movk w8, #10922, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, @@ -70,18 +70,18 @@ ; CHECK-LABEL: t32_6_part1: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: mov w9, #43691 -; CHECK-NEXT: movk w9, #43690, lsl #16 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: adrp x8, .LCPI3_1 -; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: mov w8, #43691 // =0xaaab +; CHECK-NEXT: movk w8, #43690, lsl #16 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: adrp x8, .LCPI3_1 +; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_1] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, @@ -94,7 +94,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: movk w8, #43690, lsl #16 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: dup v1.4s, w8 diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll @@ -5,11 +5,11 @@ define <4 x i32> @test_urem_odd_25(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_urem_odd_25: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23593 -; CHECK-NEXT: movk w8, #49807, lsl #16 +; CHECK-NEXT: mov w8, #23593 // =0x5c29 ; CHECK-NEXT: movi v2.4s, #1 +; CHECK-NEXT: movk w8, #49807, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: mov w8, #28835 +; CHECK-NEXT: mov w8, #28835 // =0x70a3 ; CHECK-NEXT: movk w8, #2621, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: dup v1.4s, w8 @@ -26,19 +26,19 @@ define <4 x i32> @test_urem_even_100(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_urem_even_100: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #23593 +; CHECK-NEXT: mov w8, #23593 // =0x5c29 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: movk w8, #49807, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: mov w8, #23592 +; CHECK-NEXT: mov w8, #23592 // =0x5c28 ; CHECK-NEXT: movk w8, #655, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: shl v1.4s, v0.4s, #30 ; CHECK-NEXT: ushr v0.4s, v0.4s, #2 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -72,16 +72,16 @@ ; CHECK-LABEL: test_urem_even_neg100: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: movi v3.4s, #1 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: adrp x8, .LCPI3_1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] ; CHECK-NEXT: shl v1.4s, v0.4s, #30 ; CHECK-NEXT: ushr v0.4s, v0.4s, #2 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_1] +; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -96,7 +96,7 @@ define <4 x i32> @test_urem_odd_undef1(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_urem_odd_undef1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #34079 +; CHECK-NEXT: mov w8, #34079 // =0x851f ; CHECK-NEXT: movk w8, #20971, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s @@ -118,7 +118,7 @@ define <4 x i32> @test_urem_even_undef1(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_urem_even_undef1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #34079 +; CHECK-NEXT: mov w8, #34079 // =0x851f ; CHECK-NEXT: movk w8, #20971, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s @@ -167,10 +167,10 @@ ; CHECK-LABEL: test_urem_pow2: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #15 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -182,8 +182,8 @@ define <4 x i32> @test_urem_int_min(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_urem_int_min: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: bic v0.4s, #128, lsl #24 +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll @@ -5,11 +5,11 @@ ; CHECK-LABEL: t0_all_tautological: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: adrp x9, .LCPI0_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] +; CHECK-NEXT: adrp x8, .LCPI0_1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_1] +; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, @@ -20,7 +20,7 @@ define <4 x i1> @t1_all_odd_eq(<4 x i32> %X) nounwind { ; CHECK-LABEL: t1_all_odd_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: movk w8, #43690, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI1_0 @@ -39,7 +39,7 @@ define <4 x i1> @t1_all_odd_ne(<4 x i32> %X) nounwind { ; CHECK-LABEL: t1_all_odd_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: movk w8, #43690, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI2_0 @@ -58,7 +58,7 @@ define <8 x i1> @t2_narrow(<8 x i16> %X) nounwind { ; CHECK-LABEL: t2_narrow: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #43691 +; CHECK-NEXT: mov w8, #43691 // =0xaaab ; CHECK-NEXT: dup v1.8h, w8 ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h @@ -76,16 +76,16 @@ define <2 x i1> @t3_wide(<2 x i64> %X) nounwind { ; CHECK-LABEL: t3_wide: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-6148914691236517206 -; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: mov x8, #-6148914691236517206 // =0xaaaaaaaaaaaaaaaa +; CHECK-NEXT: mov x9, v0.d[1] ; CHECK-NEXT: movk x8, #43691 -; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: mul x9, x9, x8 -; CHECK-NEXT: mul x8, x10, x8 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: adrp x9, .LCPI4_0 +; CHECK-NEXT: mul x10, x10, x8 +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: mov v0.d[1], x8 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI4_0] +; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: cmhs v0.2d, v1.2d, v0.2d ; CHECK-NEXT: movi d1, #0xffffffff00000000 ; CHECK-NEXT: xtn v0.2s, v0.2d diff --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll --- a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll @@ -5,23 +5,23 @@ ; CHECK-LABEL: fold_urem_vec_1: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 -; CHECK-NEXT: adrp x9, .LCPI0_1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: adrp x8, .LCPI0_1 +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_1] ; CHECK-NEXT: adrp x8, .LCPI0_2 -; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI0_1] -; CHECK-NEXT: adrp x9, .LCPI0_4 ; CHECK-NEXT: ushl v1.4h, v0.4h, v1.4h -; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_2] +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI0_2] ; CHECK-NEXT: adrp x8, .LCPI0_3 +; CHECK-NEXT: umull v1.4s, v1.4h, v2.4h ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 -; CHECK-NEXT: sub v3.4h, v0.4h, v1.4h -; CHECK-NEXT: umull v2.4s, v3.4h, v2.4h -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI0_3] +; CHECK-NEXT: sub v2.4h, v0.4h, v1.4h +; CHECK-NEXT: umull v2.4s, v2.4h, v3.4h ; CHECK-NEXT: shrn v2.4h, v2.4s, #16 ; CHECK-NEXT: add v1.4h, v2.4h, v1.4h -; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI0_4] -; CHECK-NEXT: ushl v1.4h, v1.4h, v3.4h +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_3] +; CHECK-NEXT: adrp x8, .LCPI0_4 +; CHECK-NEXT: ushl v1.4h, v1.4h, v2.4h +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_4] ; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, @@ -69,15 +69,15 @@ ; CHECK-LABEL: dont_fold_urem_power_of_two: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: adrp x9, .LCPI3_2 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: adrp x8, .LCPI3_1 -; CHECK-NEXT: ldr d3, [x9, :lo12:.LCPI3_2] -; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_1] +; CHECK-NEXT: adrp x8, .LCPI3_2 +; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 ; CHECK-NEXT: ushl v1.4h, v1.4h, v2.4h -; CHECK-NEXT: mls v0.4h, v1.4h, v3.4h +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI3_2] +; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -88,26 +88,26 @@ ; CHECK-LABEL: dont_fold_urem_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: movi d4, #0x0000000000ffff ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: adrp x8, .LCPI4_1 -; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_1] +; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_1] ; CHECK-NEXT: adrp x8, .LCPI4_2 +; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h ; CHECK-NEXT: shrn v1.4h, v1.4s, #16 -; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI4_2] -; CHECK-NEXT: adrp x8, .LCPI4_3 -; CHECK-NEXT: sub v3.4h, v0.4h, v1.4h -; CHECK-NEXT: umull v2.4s, v3.4h, v2.4h +; CHECK-NEXT: sub v2.4h, v0.4h, v1.4h +; CHECK-NEXT: umull v2.4s, v2.4h, v3.4h ; CHECK-NEXT: movi d3, #0xffffffffffff0000 ; CHECK-NEXT: shrn v2.4h, v2.4s, #16 ; CHECK-NEXT: add v1.4h, v2.4h, v1.4h -; CHECK-NEXT: movi d2, #0x0000000000ffff -; CHECK-NEXT: ushl v1.4h, v1.4h, v4.4h -; CHECK-NEXT: ldr d4, [x8, :lo12:.LCPI4_3] +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2] +; CHECK-NEXT: adrp x8, .LCPI4_3 +; CHECK-NEXT: ushl v1.4h, v1.4h, v2.4h +; CHECK-NEXT: and v2.8b, v0.8b, v4.8b ; CHECK-NEXT: and v1.8b, v1.8b, v3.8b -; CHECK-NEXT: and v2.8b, v0.8b, v2.8b ; CHECK-NEXT: orr v1.8b, v2.8b, v1.8b -; CHECK-NEXT: mls v0.4h, v1.4h, v4.4h +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_3] +; CHECK-NEXT: mls v0.4h, v1.4h, v2.4h ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -128,35 +128,35 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #17097 // =0x42c9 ; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: mov x10, v1.d[1] ; CHECK-NEXT: movk x8, #45590, lsl #16 -; CHECK-NEXT: mov x13, #21445 // =0x53c5 -; CHECK-NEXT: movk x8, #34192, lsl #32 -; CHECK-NEXT: movk x13, #1603, lsl #16 -; CHECK-NEXT: movk x8, #25644, lsl #48 -; CHECK-NEXT: movk x13, #15432, lsl #32 -; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: movk x13, #25653, lsl #48 -; CHECK-NEXT: umulh x8, x9, x8 -; CHECK-NEXT: mov x11, v1.d[1] -; CHECK-NEXT: sub x12, x9, x8 -; CHECK-NEXT: lsr x14, x10, #1 -; CHECK-NEXT: add x8, x8, x12, lsr #1 +; CHECK-NEXT: mov x11, v0.d[1] ; CHECK-NEXT: mov x12, #12109 // =0x2f4d +; CHECK-NEXT: movk x8, #34192, lsl #32 ; CHECK-NEXT: movk x12, #52170, lsl #16 -; CHECK-NEXT: umulh x13, x14, x13 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movk x8, #25644, lsl #48 ; CHECK-NEXT: movk x12, #28749, lsl #32 -; CHECK-NEXT: mov w14, #23 // =0x17 +; CHECK-NEXT: umulh x8, x9, x8 ; CHECK-NEXT: movk x12, #49499, lsl #48 +; CHECK-NEXT: lsr x13, x11, #1 +; CHECK-NEXT: umulh x12, x10, x12 +; CHECK-NEXT: sub x14, x9, x8 +; CHECK-NEXT: add x8, x8, x14, lsr #1 +; CHECK-NEXT: mov x14, #21445 // =0x53c5 +; CHECK-NEXT: movk x14, #1603, lsl #16 +; CHECK-NEXT: movk x14, #15432, lsl #32 ; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: lsr x13, x13, #7 -; CHECK-NEXT: umulh x12, x11, x12 +; CHECK-NEXT: movk x14, #25653, lsl #48 +; CHECK-NEXT: umulh x13, x13, x14 +; CHECK-NEXT: mov w14, #23 // =0x17 ; CHECK-NEXT: msub x8, x8, x14, x9 -; CHECK-NEXT: mov w9, #5423 // =0x152f -; CHECK-NEXT: lsr x12, x12, #12 -; CHECK-NEXT: mov w14, #654 // =0x28e -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: msub x9, x12, x9, x11 -; CHECK-NEXT: msub x10, x13, x14, x10 +; CHECK-NEXT: lsr x9, x12, #12 +; CHECK-NEXT: mov w12, #5423 // =0x152f +; CHECK-NEXT: msub x9, x9, x12, x10 +; CHECK-NEXT: mov w12, #654 // =0x28e +; CHECK-NEXT: lsr x10, x13, #7 +; CHECK-NEXT: msub x10, x10, x12, x11 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: mov v1.d[1], x9 ; CHECK-NEXT: mov v0.d[1], x10 @@ -261,18 +261,18 @@ define <2 x i64> @fold_urem_v2i64(<2 x i64> %x) { ; CHECK-LABEL: fold_urem_v2i64: ; CHECK: // %bb.0: +; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: mov x8, #-3689348814741910324 // =0xcccccccccccccccc -; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: mov x9, v0.d[1] ; CHECK-NEXT: movk x8, #52429 ; CHECK-NEXT: mov w12, #10 // =0xa -; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: umulh x11, x9, x8 +; CHECK-NEXT: umulh x11, x10, x8 +; CHECK-NEXT: umulh x8, x9, x8 ; CHECK-NEXT: lsr x11, x11, #3 -; CHECK-NEXT: umulh x8, x10, x8 -; CHECK-NEXT: msub x9, x11, x12, x9 +; CHECK-NEXT: msub x10, x11, x12, x10 ; CHECK-NEXT: lsr x8, x8, #3 -; CHECK-NEXT: msub x8, x8, x12, x10 -; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: msub x8, x8, x12, x9 +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %1 = urem <2 x i64> %x, @@ -283,10 +283,10 @@ ; CHECK-LABEL: fold_urem_v1i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov x8, #-3689348814741910324 // =0xcccccccccccccccc ; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: movk x8, #52429 +; CHECK-NEXT: mov x8, #-3689348814741910324 // =0xcccccccccccccccc ; CHECK-NEXT: mov w10, #10 // =0xa +; CHECK-NEXT: movk x8, #52429 ; CHECK-NEXT: umulh x8, x9, x8 ; CHECK-NEXT: lsr x8, x8, #3 ; CHECK-NEXT: msub x8, x8, x10, x9 diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -45,8 +45,8 @@ define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { ; CHECK-LABEL: v32i8: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v0.16b, v0.16b, v2.16b ; CHECK-NEXT: uqsub v1.16b, v1.16b, v3.16b +; CHECK-NEXT: uqsub v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %z = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z @@ -76,8 +76,8 @@ define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { ; CHECK-LABEL: v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v0.8h, v0.8h, v2.8h ; CHECK-NEXT: uqsub v1.8h, v1.8h, v3.8h +; CHECK-NEXT: uqsub v0.8h, v0.8h, v2.8h ; CHECK-NEXT: ret %z = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z @@ -98,9 +98,9 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: uqsub v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, ptr %px @@ -131,14 +131,14 @@ define void @v2i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x1] -; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: ldrb w10, [x1, #1] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #1] +; CHECK-NEXT: ldrb w11, [x1, #1] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w11 ; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 @@ -155,9 +155,9 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x1] -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: uqsub v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, ptr %px @@ -170,14 +170,14 @@ define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x1] -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: ldrh w10, [x1, #2] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: ldrh w9, [x0, #2] -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: ldrh w10, [x0, #2] +; CHECK-NEXT: ldrh w11, [x1, #2] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w11 ; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 @@ -220,9 +220,9 @@ define void @v1i8(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x1] -; CHECK-NEXT: ldr b1, [x0] -; CHECK-NEXT: uqsub v0.8b, v1.8b, v0.8b +; CHECK-NEXT: ldr b0, [x0] +; CHECK-NEXT: ldr b1, [x1] +; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, ptr %px @@ -235,9 +235,9 @@ define void @v1i16(ptr %px, ptr %py, ptr %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x1] -; CHECK-NEXT: ldr h1, [x0] -; CHECK-NEXT: uqsub v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ldr h0, [x0] +; CHECK-NEXT: ldr h1, [x1] +; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, ptr %px @@ -291,8 +291,8 @@ define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { ; CHECK-LABEL: v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: uqsub v1.4s, v1.4s, v3.4s +; CHECK-NEXT: uqsub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %z = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z @@ -322,8 +322,8 @@ define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { ; CHECK-LABEL: v4i64: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v0.2d, v0.2d, v2.2d ; CHECK-NEXT: uqsub v1.2d, v1.2d, v3.2d +; CHECK-NEXT: uqsub v0.2d, v0.2d, v2.2d ; CHECK-NEXT: ret %z = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z diff --git a/llvm/test/CodeGen/AArch64/vcvt-oversize.ll b/llvm/test/CodeGen/AArch64/vcvt-oversize.ll --- a/llvm/test/CodeGen/AArch64/vcvt-oversize.ll +++ b/llvm/test/CodeGen/AArch64/vcvt-oversize.ll @@ -4,14 +4,14 @@ define <8 x i8> @float_to_i8(ptr %in) { ; CHECK-LABEL: float_to_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: fadd v0.4s, v0.4s, v0.4s +; CHECK-NEXT: ldp q1, q0, [x0] ; CHECK-NEXT: fadd v1.4s, v1.4s, v1.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v0.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v1.8b +; CHECK-NEXT: uzp1 v0.8b, v1.8b, v0.8b ; CHECK-NEXT: ret %l = load <8 x float>, ptr %in %scale = fmul <8 x float> %l, diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll --- a/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll +++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-to-bitmask.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -verify-machineinstrs < %s | FileCheck %s ; Basic tests from input vector to bitmask @@ -6,40 +7,24 @@ define i16 @convert_to_bitmask16(<16 x i8> %vec) { ; Bits used in mask -; CHECK-LABEL: lCPI0_0 -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 2 -; CHECK-NEXT: .byte 4 -; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .byte 16 -; CHECK-NEXT: .byte 32 -; CHECK-NEXT: .byte 64 -; CHECK-NEXT: .byte 128 -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 2 -; CHECK-NEXT: .byte 4 -; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .byte 16 -; CHECK-NEXT: .byte 32 -; CHECK-NEXT: .byte 64 -; CHECK-NEXT: .byte 128 +; CHECK-LABEL: convert_to_bitmask16: +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x8, lCPI0_0@PAGE +; CHECK-NEXT: cmeq.16b v0, v0, #0 +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: ldr q1, [x8, lCPI0_0@PAGEOFF] +; CHECK-NEXT: bic.16b v0, v1, v0 +; CHECK-NEXT: ext.16b v1, v0, v0, #8 +; CHECK-NEXT: addv.8b b0, v0 +; CHECK-NEXT: addv.8b b1, v1 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: orr w0, w8, w9, lsl #8 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 ; Actual conversion -; CHECK-LABEL: convert_to_bitmask16 -; CHECK: ; %bb.0: -; CHECK-NEXT: Lloh0: -; CHECK-NEXT: adrp x8, lCPI0_0@PAGE -; CHECK-NEXT: cmeq.16b v0, v0, #0 -; CHECK-NEXT: Lloh1: -; CHECK-NEXT: ldr q1, [x8, lCPI0_0@PAGEOFF] -; CHECK-NEXT: bic.16b v0, v1, v0 -; CHECK-NEXT: ext.16b v1, v0, v0, #8 -; CHECK-NEXT: addv.8b b0, v0 -; CHECK-NEXT: addv.8b b1, v1 -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: orr w0, w9, w8, lsl #8 -; CHECK-NEXT: ret %cmp_result = icmp ne <16 x i8> %vec, zeroinitializer %bitmask = bitcast <16 x i1> %cmp_result to i16 @@ -47,28 +32,20 @@ } define i16 @convert_to_bitmask8(<8 x i16> %vec) { -; CHECK-LABEL: lCPI1_0: -; CHECK-NEXT: .short 1 -; CHECK-NEXT: .short 2 -; CHECK-NEXT: .short 4 -; CHECK-NEXT: .short 8 -; CHECK-NEXT: .short 16 -; CHECK-NEXT: .short 32 -; CHECK-NEXT: .short 64 -; CHECK-NEXT: .short 128 - -; CHECK-LABEL: convert_to_bitmask8 +; CHECK-LABEL: convert_to_bitmask8: ; CHECK: ; %bb.0: -; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x8, lCPI1_0@PAGE -; CHECK-NEXT: cmeq.8h v0, v0, #0 -; CHECK-NEXT: Lloh3: -; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] -; CHECK-NEXT: bic.16b v0, v1, v0 -; CHECK-NEXT: addv.8h h0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: and w0, w8, #0xff -; CHECK-NEXT: ret +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: adrp x8, lCPI1_0@PAGE +; CHECK-NEXT: cmeq.8h v0, v0, #0 +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] +; CHECK-NEXT: bic.16b v0, v1, v0 +; CHECK-NEXT: addv.8h h0, v0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0xff +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3 + %cmp_result = icmp ne <8 x i16> %vec, zeroinitializer %bitmask = bitcast <8 x i1> %cmp_result to i8 @@ -77,23 +54,19 @@ } define i4 @convert_to_bitmask4(<4 x i32> %vec) { -; CHECK-LABEL: lCPI2_0: -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 8 - -; CHECK-LABEL: convert_to_bitmask4 +; CHECK-LABEL: convert_to_bitmask4: ; CHECK: ; %bb.0: -; CHECK-NEXT: Lloh4: -; CHECK-NEXT: adrp x8, lCPI2_0@PAGE -; CHECK-NEXT: cmeq.4s v0, v0, #0 -; CHECK-NEXT: Lloh5: -; CHECK-NEXT: ldr q1, [x8, lCPI2_0@PAGEOFF] -; CHECK-NEXT: bic.16b v0, v1, v0 -; CHECK-NEXT: addv.4s s0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-NEXT: Lloh4: +; CHECK-NEXT: adrp x8, lCPI2_0@PAGE +; CHECK-NEXT: cmeq.4s v0, v0, #0 +; CHECK-NEXT: Lloh5: +; CHECK-NEXT: ldr q1, [x8, lCPI2_0@PAGEOFF] +; CHECK-NEXT: bic.16b v0, v1, v0 +; CHECK-NEXT: addv.4s s0, v0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5 + %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer %bitmask = bitcast <4 x i1> %cmp_result to i4 @@ -101,22 +74,20 @@ } define i8 @convert_to_bitmask2(<2 x i64> %vec) { -; CHECK-LABEL: lCPI3_0: -; CHECK-NEXT: .quad 1 -; CHECK-NEXT: .quad 2 - -; CHECK-LABEL: convert_to_bitmask2 +; CHECK-LABEL: convert_to_bitmask2: ; CHECK: ; %bb.0: -; CHECK-NEXT: Lloh6: -; CHECK-NEXT: adrp x8, lCPI3_0@PAGE -; CHECK-NEXT: cmeq.2d v0, v0, #0 -; CHECK-NEXT: Lloh7: -; CHECK-NEXT: ldr q1, [x8, lCPI3_0@PAGEOFF] -; CHECK-NEXT: bic.16b v0, v1, v0 -; CHECK-NEXT: addp.2d d0, v0 -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: and w0, w8, #0x3 -; CHECK-NEXT: ret +; CHECK-NEXT: Lloh6: +; CHECK-NEXT: adrp x8, lCPI3_0@PAGE +; CHECK-NEXT: cmeq.2d v0, v0, #0 +; CHECK-NEXT: Lloh7: +; CHECK-NEXT: ldr q1, [x8, lCPI3_0@PAGEOFF] +; CHECK-NEXT: bic.16b v0, v1, v0 +; CHECK-NEXT: addp.2d d0, v0 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: and w0, w8, #0x3 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7 + %cmp_result = icmp ne <2 x i64> %vec, zeroinitializer %bitmask = bitcast <2 x i1> %cmp_result to i2 @@ -126,23 +97,19 @@ ; Clang's __builtin_convertvector adds an undef vector concat for vectors with <8 elements. define i8 @clang_builtins_undef_concat_convert_to_bitmask4(<4 x i32> %vec) { -; CHECK-LABEL: lCPI4_0: -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 8 - -; CHECK-LABEL: clang_builtins_undef_concat_convert_to_bitmask4 +; CHECK-LABEL: clang_builtins_undef_concat_convert_to_bitmask4: ; CHECK: ; %bb.0: -; CHECK-NEXT: Lloh8: -; CHECK-NEXT: adrp x8, lCPI4_0@PAGE -; CHECK-NEXT: cmeq.4s v0, v0, #0 -; CHECK-NEXT: Lloh9: -; CHECK-NEXT: ldr q1, [x8, lCPI4_0@PAGEOFF] -; CHECK-NEXT: bic.16b v0, v1, v0 -; CHECK-NEXT: addv.4s s0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-NEXT: Lloh8: +; CHECK-NEXT: adrp x8, lCPI4_0@PAGE +; CHECK-NEXT: cmeq.4s v0, v0, #0 +; CHECK-NEXT: Lloh9: +; CHECK-NEXT: ldr q1, [x8, lCPI4_0@PAGEOFF] +; CHECK-NEXT: bic.16b v0, v1, v0 +; CHECK-NEXT: addv.4s s0, v0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh9 + %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer %vector_pad = shufflevector <4 x i1> %cmp_result, <4 x i1> poison, <8 x i32> @@ -152,25 +119,21 @@ define i4 @convert_to_bitmask_no_compare(<4 x i32> %vec1, <4 x i32> %vec2) { -; CHECK-LABEL: lCPI5_0: -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 8 - -; CHECK-LABEL: convert_to_bitmask_no_compare +; CHECK-LABEL: convert_to_bitmask_no_compare: ; CHECK: ; %bb.0: -; CHECK-NEXT: Lloh10: -; CHECK-NEXT: adrp x8, lCPI5_0@PAGE -; CHECK-NEXT: and.16b v0, v0, v1 -; CHECK-NEXT: shl.4s v0, v0, #31 -; CHECK-NEXT: Lloh11: -; CHECK-NEXT: ldr q1, [x8, lCPI5_0@PAGEOFF] -; CHECK-NEXT: cmlt.4s v0, v0, #0 -; CHECK-NEXT: and.16b v0, v0, v1 -; CHECK-NEXT: addv.4s s0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: Lloh10: +; CHECK-NEXT: adrp x8, lCPI5_0@PAGE +; CHECK-NEXT: Lloh11: +; CHECK-NEXT: ldr q1, [x8, lCPI5_0@PAGEOFF] +; CHECK-NEXT: shl.4s v0, v0, #31 +; CHECK-NEXT: cmlt.4s v0, v0, #0 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: addv.4s s0, v0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh11 + %cmp = and <4 x i32> %vec1, %vec2 %trunc = trunc <4 x i32> %cmp to <4 x i1> @@ -179,25 +142,21 @@ } define i4 @convert_to_bitmask_with_compare_chain(<4 x i32> %vec1, <4 x i32> %vec2) { -; CHECK-LABEL: lCPI6_0: -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 8 - -; CHECK-LABEL: convert_to_bitmask_with_compare_chain +; CHECK-LABEL: convert_to_bitmask_with_compare_chain: ; CHECK: ; %bb.0: -; CHECK-NEXT: Lloh12: -; CHECK-NEXT: adrp x8, lCPI6_0@PAGE -; CHECK-NEXT: cmeq.4s v2, v0, #0 -; CHECK-NEXT: cmeq.4s v0, v0, v1 -; CHECK-NEXT: Lloh13: -; CHECK-NEXT: ldr q1, [x8, lCPI6_0@PAGEOFF] -; CHECK-NEXT: bic.16b v0, v0, v2 -; CHECK-NEXT: and.16b v0, v0, v1 -; CHECK-NEXT: addv.4s s0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-NEXT: cmeq.4s v2, v0, #0 +; CHECK-NEXT: cmeq.4s v0, v0, v1 +; CHECK-NEXT: Lloh12: +; CHECK-NEXT: adrp x8, lCPI6_0@PAGE +; CHECK-NEXT: Lloh13: +; CHECK-NEXT: ldr q1, [x8, lCPI6_0@PAGEOFF] +; CHECK-NEXT: bic.16b v0, v0, v2 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: addv.4s s0, v0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh13 + %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer %cmp2 = icmp eq <4 x i32> %vec1, %vec2 @@ -207,26 +166,22 @@ } define i4 @convert_to_bitmask_with_trunc_in_chain(<4 x i32> %vec1, <4 x i32> %vec2) { -; CHECK-LABEL: lCPI7_0: -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 8 - -; CHECK-LABEL: convert_to_bitmask_with_trunc_in_chain +; CHECK-LABEL: convert_to_bitmask_with_trunc_in_chain: ; CHECK: ; %bb.0: -; CHECK-NEXT: cmeq.4s v0, v0, #0 -; CHECK-NEXT: Lloh14: -; CHECK-NEXT: adrp x8, lCPI7_0@PAGE -; CHECK-NEXT: bic.16b v0, v1, v0 -; CHECK-NEXT: shl.4s v0, v0, #31 -; CHECK-NEXT: Lloh15: -; CHECK-NEXT: ldr q1, [x8, lCPI7_0@PAGEOFF] -; CHECK-NEXT: cmlt.4s v0, v0, #0 -; CHECK-NEXT: and.16b v0, v0, v1 -; CHECK-NEXT: addv.4s s0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-NEXT: cmeq.4s v0, v0, #0 +; CHECK-NEXT: Lloh14: +; CHECK-NEXT: adrp x8, lCPI7_0@PAGE +; CHECK-NEXT: bic.16b v0, v1, v0 +; CHECK-NEXT: Lloh15: +; CHECK-NEXT: ldr q1, [x8, lCPI7_0@PAGEOFF] +; CHECK-NEXT: shl.4s v0, v0, #31 +; CHECK-NEXT: cmlt.4s v0, v0, #0 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: addv.4s s0, v0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh15 + %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer %trunc_vec = trunc <4 x i32> %vec2 to <4 x i1> @@ -236,38 +191,34 @@ } define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, <4 x i32> %vec2) { -; CHECK-LABEL: lCPI8_0: -; CHECK-NEXT: .short 1 -; CHECK-NEXT: .short 2 -; CHECK-NEXT: .short 4 -; CHECK-NEXT: .short 8 - -; CHECK-LABEL: convert_to_bitmask_with_unknown_type_in_long_chain -; CHECK: ; %bb.0: -; CHECK-NEXT: cmeq.4s v0, v0, #0 -; CHECK-NEXT: Lloh16: -; CHECK-NEXT: adrp x8, lCPI8_0@PAGE -; CHECK-NEXT: cmeq.4s v1, v1, #0 -; CHECK-NEXT: movi d2, #0x000000ffffffff -; CHECK-NEXT: bic.16b v0, v1, v0 -; CHECK-NEXT: movi d1, #0xffff0000ffff0000 -; CHECK-NEXT: xtn.4h v0, v0 -; CHECK-NEXT: movi d3, #0x00ffffffffffff -; CHECK-NEXT: orr.8b v0, v0, v2 -; CHECK-NEXT: movi d2, #0x00ffffffff0000 -; CHECK-NEXT: eor.8b v1, v0, v1 -; CHECK-NEXT: mov.h v1[2], wzr -; CHECK-NEXT: eor.8b v0, v0, v2 -; CHECK-NEXT: orr.8b v0, v0, v3 -; CHECK-NEXT: orr.8b v0, v1, v0 -; CHECK-NEXT: Lloh17: -; CHECK-NEXT: ldr d1, [x8, lCPI8_0@PAGEOFF] -; CHECK-NEXT: shl.4h v0, v0, #15 -; CHECK-NEXT: cmlt.4h v0, v0, #0 -; CHECK-NEXT: and.8b v0, v0, v1 -; CHECK-NEXT: addv.4h h0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-LABEL: convert_to_bitmask_with_unknown_type_in_long_chain: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmeq.4s v0, v0, #0 +; CHECK-NEXT: cmeq.4s v1, v1, #0 +; CHECK-NEXT: Lloh16: +; CHECK-NEXT: adrp x8, lCPI8_0@PAGE +; CHECK-NEXT: movi d2, #0x000000ffffffff +; CHECK-NEXT: movi d3, #0x00ffffffffffff +; CHECK-NEXT: bic.16b v0, v1, v0 +; CHECK-NEXT: movi d1, #0xffff0000ffff0000 +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: orr.8b v0, v0, v2 +; CHECK-NEXT: movi d2, #0x00ffffffff0000 +; CHECK-NEXT: eor.8b v1, v0, v1 +; CHECK-NEXT: eor.8b v0, v0, v2 +; CHECK-NEXT: mov.h v1[2], wzr +; CHECK-NEXT: orr.8b v0, v0, v3 +; CHECK-NEXT: orr.8b v0, v1, v0 +; CHECK-NEXT: Lloh17: +; CHECK-NEXT: ldr d1, [x8, lCPI8_0@PAGEOFF] +; CHECK-NEXT: shl.4h v0, v0, #15 +; CHECK-NEXT: cmlt.4h v0, v0, #0 +; CHECK-NEXT: and.8b v0, v0, v1 +; CHECK-NEXT: addv.4h h0, v0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh16, Lloh17 + %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer %cmp2 = icmp eq <4 x i32> %vec2, zeroinitializer @@ -285,26 +236,22 @@ } define i4 @convert_to_bitmask_with_different_types_in_chain(<4 x i16> %vec1, <4 x i32> %vec2) { -; CHECK-LABEL: lCPI9_0: -; CHECK-NEXT: .short 1 -; CHECK-NEXT: .short 2 -; CHECK-NEXT: .short 4 -; CHECK-NEXT: .short 8 - -; CHECK-LABEL: convert_to_bitmask_with_different_types_in_chain -; CHECK: ; %bb.0: -; CHECK-NEXT: Lloh18: -; CHECK-NEXT: adrp x8, lCPI9_0@PAGE -; CHECK-NEXT: cmeq.4h v0, v0, #0 -; CHECK-NEXT: cmeq.4s v1, v1, #0 -; CHECK-NEXT: xtn.4h v1, v1 -; CHECK-NEXT: Lloh19: -; CHECK-NEXT: ldr d2, [x8, lCPI9_0@PAGEOFF] -; CHECK-NEXT: orn.8b v0, v1, v0 -; CHECK-NEXT: and.8b v0, v0, v2 -; CHECK-NEXT: addv.4h h0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-LABEL: convert_to_bitmask_with_different_types_in_chain: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmeq.4s v1, v1, #0 +; CHECK-NEXT: cmeq.4h v0, v0, #0 +; CHECK-NEXT: Lloh18: +; CHECK-NEXT: adrp x8, lCPI9_0@PAGE +; CHECK-NEXT: xtn.4h v1, v1 +; CHECK-NEXT: orn.8b v0, v1, v0 +; CHECK-NEXT: Lloh19: +; CHECK-NEXT: ldr d1, [x8, lCPI9_0@PAGEOFF] +; CHECK-NEXT: and.8b v0, v0, v1 +; CHECK-NEXT: addv.4h h0, v0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh19 + %cmp1 = icmp ne <4 x i16> %vec1, zeroinitializer %cmp2 = icmp eq <4 x i32> %vec2, zeroinitializer @@ -316,37 +263,39 @@ define i16 @convert_to_bitmask_without_knowing_type(<16 x i1> %vec) { ; CHECK-LABEL: convert_to_bitmask_without_knowing_type: ; CHECK: ; %bb.0: +; CHECK-NEXT: shl.16b v0, v0, #7 ; CHECK-NEXT: Lloh20: ; CHECK-NEXT: adrp x8, lCPI10_0@PAGE -; CHECK-NEXT: shl.16b v0, v0, #7 -; CHECK-NEXT: cmlt.16b v0, v0, #0 ; CHECK-NEXT: Lloh21: ; CHECK-NEXT: ldr q1, [x8, lCPI10_0@PAGEOFF] +; CHECK-NEXT: cmlt.16b v0, v0, #0 ; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: ext.16b v1, v0, v0, #8 ; CHECK-NEXT: addv.8b b0, v0 ; CHECK-NEXT: addv.8b b1, v1 -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: orr w0, w9, w8, lsl #8 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: orr w0, w8, w9, lsl #8 ; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh20, Lloh21 %bitmask = bitcast <16 x i1> %vec to i16 ret i16 %bitmask } define i2 @convert_to_bitmask_2xi32(<2 x i32> %vec) { -; CHECK-LABEL: convert_to_bitmask_2xi32 +; CHECK-LABEL: convert_to_bitmask_2xi32: ; CHECK: ; %bb.0: ; CHECK-NEXT: Lloh22: -; CHECK-NEXT: adrp x8, lCPI11_0@PAGE -; CHECK-NEXT: cmeq.2s v0, v0, #0 +; CHECK-NEXT: adrp x8, lCPI11_0@PAGE +; CHECK-NEXT: cmeq.2s v0, v0, #0 ; CHECK-NEXT: Lloh23: -; CHECK-NEXT: ldr d1, [x8, lCPI11_0@PAGEOFF] -; CHECK-NEXT: bic.8b v0, v1, v0 -; CHECK-NEXT: addp.2s v0, v0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-NEXT: ldr d1, [x8, lCPI11_0@PAGEOFF] +; CHECK-NEXT: bic.8b v0, v1, v0 +; CHECK-NEXT: addp.2s v0, v0, v0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh23 %cmp_result = icmp ne <2 x i32> %vec, zeroinitializer %bitmask = bitcast <2 x i1> %cmp_result to i2 @@ -354,18 +303,19 @@ } define i4 @convert_to_bitmask_4xi8(<4 x i8> %vec) { -; CHECK-LABEL: convert_to_bitmask_4xi8 +; CHECK-LABEL: convert_to_bitmask_4xi8: ; CHECK: ; %bb.0: +; CHECK-NEXT: bic.4h v0, #255, lsl #8 ; CHECK-NEXT: Lloh24: -; CHECK-NEXT: adrp x8, lCPI12_0@PAGE -; CHECK-NEXT: bic.4h v0, #255, lsl #8 -; CHECK-NEXT: cmeq.4h v0, v0, #0 +; CHECK-NEXT: adrp x8, lCPI12_0@PAGE ; CHECK-NEXT: Lloh25: -; CHECK-NEXT: ldr d1, [x8, lCPI12_0@PAGEOFF] -; CHECK-NEXT: bic.8b v0, v1, v0 -; CHECK-NEXT: addv.4h h0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-NEXT: ldr d1, [x8, lCPI12_0@PAGEOFF] +; CHECK-NEXT: cmeq.4h v0, v0, #0 +; CHECK-NEXT: bic.8b v0, v1, v0 +; CHECK-NEXT: addv.4h h0, v0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh24, Lloh25 %cmp_result = icmp ne <4 x i8> %vec, zeroinitializer %bitmask = bitcast <4 x i1> %cmp_result to i4 @@ -373,19 +323,20 @@ } define i8 @convert_to_bitmask_8xi2(<8 x i2> %vec) { -; CHECK-LABEL: convert_to_bitmask_8xi2 +; CHECK-LABEL: convert_to_bitmask_8xi2: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.8b v1, #3 +; CHECK-NEXT: movi.8b v1, #3 ; CHECK-NEXT: Lloh26: -; CHECK-NEXT: adrp x8, lCPI13_0@PAGE -; CHECK-NEXT: and.8b v0, v0, v1 +; CHECK-NEXT: adrp x8, lCPI13_0@PAGE +; CHECK-NEXT: and.8b v0, v0, v1 ; CHECK-NEXT: Lloh27: -; CHECK-NEXT: ldr d1, [x8, lCPI13_0@PAGEOFF] -; CHECK-NEXT: cmeq.8b v0, v0, #0 -; CHECK-NEXT: bic.8b v0, v1, v0 -; CHECK-NEXT: addv.8b b0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-NEXT: ldr d1, [x8, lCPI13_0@PAGEOFF] +; CHECK-NEXT: cmeq.8b v0, v0, #0 +; CHECK-NEXT: bic.8b v0, v1, v0 +; CHECK-NEXT: addv.8b b0, v0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh26, Lloh27 %cmp_result = icmp ne <8 x i2> %vec, zeroinitializer %bitmask = bitcast <8 x i1> %cmp_result to i8 @@ -393,25 +344,21 @@ } define i4 @convert_to_bitmask_float(<4 x float> %vec) { -; CHECK-LABEL: lCPI14_0: -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 8 - -; CHECK-LABEL: convert_to_bitmask_float +; CHECK-LABEL: convert_to_bitmask_float: ; CHECK: ; %bb.0: -; CHECK-NEXT: Lloh28: -; CHECK-NEXT: adrp x8, lCPI14_0@PAGE -; CHECK-NEXT: fcmgt.4s v1, v0, #0.0 -; CHECK-NEXT: fcmlt.4s v0, v0, #0.0 -; CHECK-NEXT: Lloh29: -; CHECK-NEXT: ldr q2, [x8, lCPI14_0@PAGEOFF] -; CHECK-NEXT: orr.16b v0, v0, v1 -; CHECK-NEXT: and.16b v0, v0, v2 -; CHECK-NEXT: addv.4s s0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-NEXT: fcmgt.4s v1, v0, #0.0 +; CHECK-NEXT: fcmlt.4s v0, v0, #0.0 +; CHECK-NEXT: Lloh28: +; CHECK-NEXT: adrp x8, lCPI14_0@PAGE +; CHECK-NEXT: orr.16b v0, v0, v1 +; CHECK-NEXT: Lloh29: +; CHECK-NEXT: ldr q1, [x8, lCPI14_0@PAGEOFF] +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: addv.4s s0, v0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh28, Lloh29 + %cmp_result = fcmp one <4 x float> %vec, zeroinitializer %bitmask = bitcast <4 x i1> %cmp_result to i4 @@ -421,30 +368,25 @@ ; Larger vector types don't map directly, but the can be split/truncated and then converted. ; After the comparison against 0, this is truncated to <8 x i16>, which is valid again. define i8 @convert_large_vector(<8 x i32> %vec) { -; CHECK-LABEL: lCPI15_0: -; CHECK-NEXT: .short 1 -; CHECK-NEXT: .short 2 -; CHECK-NEXT: .short 4 -; CHECK-NEXT: .short 8 -; CHECK-NEXT: .short 16 -; CHECK-NEXT: .short 32 -; CHECK-NEXT: .short 64 -; CHECK-NEXT: .short 128 - ; CHECK-LABEL: convert_large_vector: -; CHECK: Lloh30: -; CHECK-NEXT: adrp x8, lCPI15_0@PAGE -; CHECK-NEXT: cmeq.4s v1, v1, #0 -; CHECK-NEXT: cmeq.4s v0, v0, #0 -; CHECK-NEXT: uzp1.8h v0, v0, v1 -; CHECK-NEXT: Lloh31: -; CHECK-NEXT: ldr q1, [x8, lCPI15_0@PAGEOFF] -; CHECK-NEXT: bic.16b v0, v1, v0 -; CHECK-NEXT: addv.8h h0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: and w0, w8, #0xff -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: cmeq.4s v1, v1, #0 +; CHECK-NEXT: cmeq.4s v0, v0, #0 +; CHECK-NEXT: Lloh30: +; CHECK-NEXT: adrp x8, lCPI15_0@PAGE +; CHECK-NEXT: uzp1.8h v0, v0, v1 +; CHECK-NEXT: Lloh31: +; CHECK-NEXT: ldr q1, [x8, lCPI15_0@PAGEOFF] +; CHECK-NEXT: bic.16b v0, v1, v0 +; CHECK-NEXT: addv.8h h0, v0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: and w0, w8, #0xff +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh30, Lloh31 + %cmp_result = icmp ne <8 x i32> %vec, zeroinitializer %bitmask = bitcast <8 x i1> %cmp_result to i8 @@ -452,19 +394,20 @@ } define i4 @convert_legalized_illegal_element_size(<4 x i22> %vec) { -; CHECK-LABEL: convert_legalized_illegal_element_size +; CHECK-LABEL: convert_legalized_illegal_element_size: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.4s v1, #63, msl #16 +; CHECK-NEXT: movi.4s v1, #63, msl #16 ; CHECK-NEXT: Lloh32: -; CHECK-NEXT: adrp x8, lCPI16_0@PAGE -; CHECK-NEXT: cmtst.4s v0, v0, v1 +; CHECK-NEXT: adrp x8, lCPI16_0@PAGE +; CHECK-NEXT: cmtst.4s v0, v0, v1 ; CHECK-NEXT: Lloh33: -; CHECK-NEXT: ldr d1, [x8, lCPI16_0@PAGEOFF] -; CHECK-NEXT: xtn.4h v0, v0 -; CHECK-NEXT: and.8b v0, v0, v1 -; CHECK-NEXT: addv.4h h0, v0 -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: ret +; CHECK-NEXT: ldr d1, [x8, lCPI16_0@PAGEOFF] +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: and.8b v0, v0, v1 +; CHECK-NEXT: addv.4h h0, v0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh33 %cmp_result = icmp ne <4 x i22> %vec, zeroinitializer %bitmask = bitcast <4 x i1> %cmp_result to i4 @@ -474,8 +417,28 @@ ; This may still be converted as a v8i8 after the vector concat (but not as v4iX). define i8 @no_direct_convert_for_bad_concat(<4 x i32> %vec) { ; CHECK-LABEL: no_direct_convert_for_bad_concat: -; CHECK: cmtst.4s v0, v0, v0 -; CHECK-NOT: addv.4 +; CHECK: ; %bb.0: +; CHECK-NEXT: cmtst.4s v0, v0, v0 +; CHECK-NEXT: Lloh34: +; CHECK-NEXT: adrp x8, lCPI17_0@PAGE +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: umov.h w9, v0[0] +; CHECK-NEXT: mov.b v1[4], w9 +; CHECK-NEXT: umov.h w9, v0[1] +; CHECK-NEXT: mov.b v1[5], w9 +; CHECK-NEXT: umov.h w9, v0[2] +; CHECK-NEXT: mov.b v1[6], w9 +; CHECK-NEXT: umov.h w9, v0[3] +; CHECK-NEXT: mov.b v1[7], w9 +; CHECK-NEXT: shl.8b v0, v1, #7 +; CHECK-NEXT: Lloh35: +; CHECK-NEXT: ldr d1, [x8, lCPI17_0@PAGEOFF] +; CHECK-NEXT: cmlt.8b v0, v0, #0 +; CHECK-NEXT: and.8b v0, v0, v1 +; CHECK-NEXT: addv.8b b0, v0 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh34, Lloh35 %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer %vector_pad = shufflevector <4 x i1> poison, <4 x i1> %cmp_result, <8 x i32> @@ -485,16 +448,46 @@ define <8 x i1> @no_convert_without_direct_bitcast(<8 x i16> %vec) { ; CHECK-LABEL: no_convert_without_direct_bitcast: -; CHECK: cmtst.8h v0, v0, v0 -; CHECK-NOT: addv.4s s0, v0 +; CHECK: ; %bb.0: +; CHECK-NEXT: cmtst.8h v0, v0, v0 +; CHECK-NEXT: xtn.8b v0, v0 +; CHECK-NEXT: ret %cmp_result = icmp ne <8 x i16> %vec, zeroinitializer ret <8 x i1> %cmp_result } define i6 @no_combine_illegal_num_elements(<6 x i32> %vec) { -; CHECK-LABEL: no_combine_illegal_num_elements -; CHECK-NOT: addv +; CHECK-LABEL: no_combine_illegal_num_elements: +; CHECK: ; %bb.0: +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s1, w4 +; CHECK-NEXT: mov.s v0[1], w1 +; CHECK-NEXT: mov.s v1[1], w5 +; CHECK-NEXT: mov.s v0[2], w2 +; CHECK-NEXT: cmeq.4s v1, v1, #0 +; CHECK-NEXT: mov.s v0[3], w3 +; CHECK-NEXT: cmeq.4s v0, v0, #0 +; CHECK-NEXT: uzp1.8h v0, v0, v1 +; CHECK-NEXT: mvn.16b v0, v0 +; CHECK-NEXT: xtn.8b v0, v0 +; CHECK-NEXT: umov.b w8, v0[0] +; CHECK-NEXT: umov.b w9, v0[1] +; CHECK-NEXT: umov.b w10, v0[2] +; CHECK-NEXT: and w8, w8, #0x1 +; CHECK-NEXT: bfi w8, w9, #1, #1 +; CHECK-NEXT: umov.b w9, v0[3] +; CHECK-NEXT: bfi w8, w10, #2, #1 +; CHECK-NEXT: umov.b w10, v0[4] +; CHECK-NEXT: bfi w8, w9, #3, #1 +; CHECK-NEXT: umov.b w9, v0[5] +; CHECK-NEXT: bfi w8, w10, #4, #1 +; CHECK-NEXT: orr w8, w8, w9, lsl #5 +; CHECK-NEXT: and w0, w8, #0x3f +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: ret %cmp_result = icmp ne <6 x i32> %vec, zeroinitializer %bitmask = bitcast <6 x i1> %cmp_result to i6 diff --git a/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll b/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll --- a/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll +++ b/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll @@ -1,42 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 ; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -verify-machineinstrs < %s | FileCheck %s define void @store_16_elements(<16 x i8> %vec, ptr %out) { ; Bits used in mask -; CHECK-LABEL: lCPI0_0 -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 2 -; CHECK-NEXT: .byte 4 -; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .byte 16 -; CHECK-NEXT: .byte 32 -; CHECK-NEXT: .byte 64 -; CHECK-NEXT: .byte 128 -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 2 -; CHECK-NEXT: .byte 4 -; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .byte 16 -; CHECK-NEXT: .byte 32 -; CHECK-NEXT: .byte 64 -; CHECK-NEXT: .byte 128 - -; Actual conversion -; CHECK-LABEL: store_16_elements +; CHECK-LABEL: store_16_elements: ; CHECK: ; %bb.0: ; CHECK-NEXT: Lloh0: -; CHECK-NEXT: adrp x8, lCPI0_0@PAGE -; CHECK-NEXT: cmeq.16b v0, v0, #0 +; CHECK-NEXT: adrp x8, lCPI0_0@PAGE +; CHECK-NEXT: cmeq.16b v0, v0, #0 ; CHECK-NEXT: Lloh1: -; CHECK-NEXT: ldr q1, [x8, lCPI0_0@PAGEOFF] -; CHECK-NEXT: bic.16b v0, v1, v0 -; CHECK-NEXT: ext.16b v1, v0, v0, #8 -; CHECK-NEXT: addv.8b b0, v0 -; CHECK-NEXT: addv.8b b1, v1 -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: orr w8, w9, w8, lsl #8 -; CHECK-NEXT: strh w8, [x0] -; CHECK-NEXT: ret +; CHECK-NEXT: ldr q1, [x8, lCPI0_0@PAGEOFF] +; CHECK-NEXT: bic.16b v0, v1, v0 +; CHECK-NEXT: ext.16b v1, v0, v0, #8 +; CHECK-NEXT: addv.8b b0, v0 +; CHECK-NEXT: addv.8b b1, v1 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: orr w8, w8, w9, lsl #8 +; CHECK-NEXT: strh w8, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 + +; Actual conversion %cmp_result = icmp ne <16 x i8> %vec, zeroinitializer store <16 x i1> %cmp_result, ptr %out @@ -44,28 +29,20 @@ } define void @store_8_elements(<8 x i16> %vec, ptr %out) { -; CHECK-LABEL: lCPI1_0: -; CHECK-NEXT: .short 1 -; CHECK-NEXT: .short 2 -; CHECK-NEXT: .short 4 -; CHECK-NEXT: .short 8 -; CHECK-NEXT: .short 16 -; CHECK-NEXT: .short 32 -; CHECK-NEXT: .short 64 -; CHECK-NEXT: .short 128 - -; CHECK-LABEL: store_8_elements +; CHECK-LABEL: store_8_elements: ; CHECK: ; %bb.0: ; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x8, lCPI1_0@PAGE -; CHECK-NEXT: cmeq.8h v0, v0, #0 +; CHECK-NEXT: adrp x8, lCPI1_0@PAGE +; CHECK-NEXT: cmeq.8h v0, v0, #0 ; CHECK-NEXT: Lloh3: -; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] -; CHECK-NEXT: bic.16b v0, v1, v0 -; CHECK-NEXT: addv.8h h0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strb w8, [x0] -; CHECK-NEXT: ret +; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] +; CHECK-NEXT: bic.16b v0, v1, v0 +; CHECK-NEXT: addv.8h h0, v0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strb w8, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3 + %cmp_result = icmp ne <8 x i16> %vec, zeroinitializer store <8 x i1> %cmp_result, ptr %out @@ -73,24 +50,20 @@ } define void @store_4_elements(<4 x i32> %vec, ptr %out) { -; CHECK-LABEL: lCPI2_0: -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 8 - -; CHECK-LABEL: store_4_elements +; CHECK-LABEL: store_4_elements: ; CHECK: ; %bb.0: ; CHECK-NEXT: Lloh4: -; CHECK-NEXT: adrp x8, lCPI2_0@PAGE -; CHECK-NEXT: cmeq.4s v0, v0, #0 +; CHECK-NEXT: adrp x8, lCPI2_0@PAGE +; CHECK-NEXT: cmeq.4s v0, v0, #0 ; CHECK-NEXT: Lloh5: -; CHECK-NEXT: ldr q1, [x8, lCPI2_0@PAGEOFF] -; CHECK-NEXT: bic.16b v0, v1, v0 -; CHECK-NEXT: addv.4s s0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strb w8, [x0] -; CHECK-NEXT: ret +; CHECK-NEXT: ldr q1, [x8, lCPI2_0@PAGEOFF] +; CHECK-NEXT: bic.16b v0, v1, v0 +; CHECK-NEXT: addv.4s s0, v0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strb w8, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5 + %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer store <4 x i1> %cmp_result, ptr %out @@ -98,22 +71,20 @@ } define void @store_2_elements(<2 x i64> %vec, ptr %out) { -; CHECK-LABEL: lCPI3_0: -; CHECK-NEXT: .quad 1 -; CHECK-NEXT: .quad 2 - -; CHECK-LABEL: store_2_elements +; CHECK-LABEL: store_2_elements: ; CHECK: ; %bb.0: ; CHECK-NEXT: Lloh6: -; CHECK-NEXT: adrp x8, lCPI3_0@PAGE -; CHECK-NEXT: cmeq.2d v0, v0, #0 +; CHECK-NEXT: adrp x8, lCPI3_0@PAGE +; CHECK-NEXT: cmeq.2d v0, v0, #0 ; CHECK-NEXT: Lloh7: -; CHECK-NEXT: ldr q1, [x8, lCPI3_0@PAGEOFF] -; CHECK-NEXT: bic.16b v0, v1, v0 -; CHECK-NEXT: addp.2d d0, v0 -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: strb w8, [x0] -; CHECK-NEXT: ret +; CHECK-NEXT: ldr q1, [x8, lCPI3_0@PAGEOFF] +; CHECK-NEXT: bic.16b v0, v1, v0 +; CHECK-NEXT: addp.2d d0, v0 +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: strb w8, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7 + %cmp_result = icmp ne <2 x i64> %vec, zeroinitializer store <2 x i1> %cmp_result, ptr %out @@ -121,25 +92,21 @@ } define void @add_trunc_compare_before_store(<4 x i32> %vec, ptr %out) { -; CHECK-LABEL: lCPI4_0: -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 8 - -; CHECK-LABEL: add_trunc_compare_before_store +; CHECK-LABEL: add_trunc_compare_before_store: ; CHECK: ; %bb.0: +; CHECK-NEXT: shl.4s v0, v0, #31 ; CHECK-NEXT: Lloh8: -; CHECK-NEXT: adrp x8, lCPI4_0@PAGE -; CHECK-NEXT: shl.4s v0, v0, #31 -; CHECK-NEXT: cmlt.4s v0, v0, #0 +; CHECK-NEXT: adrp x8, lCPI4_0@PAGE ; CHECK-NEXT: Lloh9: -; CHECK-NEXT: ldr q1, [x8, lCPI4_0@PAGEOFF] -; CHECK-NEXT: and.16b v0, v0, v1 -; CHECK-NEXT: addv.4s s0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strb w8, [x0] -; CHECK-NEXT: ret +; CHECK-NEXT: ldr q1, [x8, lCPI4_0@PAGEOFF] +; CHECK-NEXT: cmlt.4s v0, v0, #0 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: addv.4s s0, v0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strb w8, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh9 + %trunc = trunc <4 x i32> %vec to <4 x i1> store <4 x i1> %trunc, ptr %out @@ -147,52 +114,40 @@ } define void @add_trunc_mask_unknown_vector_type(<4 x i1> %vec, ptr %out) { -; CHECK-LABEL: lCPI5_0: -; CHECK: .short 1 -; CHECK: .short 2 -; CHECK: .short 4 -; CHECK: .short 8 - -; CHECK-LABEL: add_trunc_mask_unknown_vector_type +; CHECK-LABEL: add_trunc_mask_unknown_vector_type: ; CHECK: ; %bb.0: -; CHECK-NEXT: Lloh10: -; CHECK-NEXT: adrp x8, lCPI5_0@PAGE -; CHECK-NEXT: shl.4h v0, v0, #15 -; CHECK-NEXT: cmlt.4h v0, v0, #0 -; CHECK-NEXT: Lloh11: -; CHECK-NEXT: ldr d1, [x8, lCPI5_0@PAGEOFF] -; CHECK-NEXT: and.8b v0, v0, v1 -; CHECK-NEXT: addv.4h h0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strb w8, [x0] -; CHECK-NEXT: ret +; CHECK-NEXT: shl.4h v0, v0, #15 +; CHECK-NEXT: Lloh10: +; CHECK-NEXT: adrp x8, lCPI5_0@PAGE +; CHECK-NEXT: Lloh11: +; CHECK-NEXT: ldr d1, [x8, lCPI5_0@PAGEOFF] +; CHECK-NEXT: cmlt.4h v0, v0, #0 +; CHECK-NEXT: and.8b v0, v0, v1 +; CHECK-NEXT: addv.4h h0, v0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strb w8, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh11 + store <4 x i1> %vec, ptr %out ret void } define void @store_8_elements_64_bit_vector(<8 x i8> %vec, ptr %out) { -; CHECK-LABEL: lCPI6_0: -; CHECK-NEXT: .byte 1 -; CHECK-NEXT: .byte 2 -; CHECK-NEXT: .byte 4 -; CHECK-NEXT: .byte 8 -; CHECK-NEXT: .byte 16 -; CHECK-NEXT: .byte 32 -; CHECK-NEXT: .byte 64 -; CHECK-NEXT: .byte 128 - -; CHECK-LABEL: store_8_elements_64_bit_vector +; CHECK-LABEL: store_8_elements_64_bit_vector: ; CHECK: ; %bb.0: ; CHECK-NEXT: Lloh12: -; CHECK-NEXT: adrp x8, lCPI6_0@PAGE -; CHECK-NEXT: cmeq.8b v0, v0, #0 +; CHECK-NEXT: adrp x8, lCPI6_0@PAGE +; CHECK-NEXT: cmeq.8b v0, v0, #0 ; CHECK-NEXT: Lloh13: -; CHECK-NEXT: ldr d1, [x8, lCPI6_0@PAGEOFF] -; CHECK-NEXT: bic.8b v0, v1, v0 -; CHECK-NEXT: addv.8b b0, v0 -; CHECK-NEXT: st1.b { v0 }[0], [x0] -; CHECK-NEXT: ret +; CHECK-NEXT: ldr d1, [x8, lCPI6_0@PAGEOFF] +; CHECK-NEXT: bic.8b v0, v1, v0 +; CHECK-NEXT: addv.8b b0, v0 +; CHECK-NEXT: st1.b { v0 }[0], [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh13 + %cmp_result = icmp ne <8 x i8> %vec, zeroinitializer store <8 x i1> %cmp_result, ptr %out @@ -200,24 +155,20 @@ } define void @store_4_elements_64_bit_vector(<4 x i16> %vec, ptr %out) { -; CHECK-LABEL: lCPI7_0: -; CHECK-NEXT: .short 1 -; CHECK-NEXT: .short 2 -; CHECK-NEXT: .short 4 -; CHECK-NEXT: .short 8 - -; CHECK-LABEL: store_4_elements_64_bit_vector +; CHECK-LABEL: store_4_elements_64_bit_vector: ; CHECK: ; %bb.0: ; CHECK-NEXT: Lloh14: -; CHECK-NEXT: adrp x8, lCPI7_0@PAGE -; CHECK-NEXT: cmeq.4h v0, v0, #0 +; CHECK-NEXT: adrp x8, lCPI7_0@PAGE +; CHECK-NEXT: cmeq.4h v0, v0, #0 ; CHECK-NEXT: Lloh15: -; CHECK-NEXT: ldr d1, [x8, lCPI7_0@PAGEOFF] -; CHECK-NEXT: bic.8b v0, v1, v0 -; CHECK-NEXT: addv.4h h0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strb w8, [x0] -; CHECK-NEXT: ret +; CHECK-NEXT: ldr d1, [x8, lCPI7_0@PAGEOFF] +; CHECK-NEXT: bic.8b v0, v1, v0 +; CHECK-NEXT: addv.4h h0, v0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strb w8, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh15 + %cmp_result = icmp ne <4 x i16> %vec, zeroinitializer store <4 x i1> %cmp_result, ptr %out @@ -225,22 +176,20 @@ } define void @store_2_elements_64_bit_vector(<2 x i32> %vec, ptr %out) { -; CHECK-LABEL: lCPI8_0: -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 2 - -; CHECK-LABEL: store_2_elements_64_bit_vector +; CHECK-LABEL: store_2_elements_64_bit_vector: ; CHECK: ; %bb.0: ; CHECK-NEXT: Lloh16: -; CHECK-NEXT: adrp x8, lCPI8_0@PAGE -; CHECK-NEXT: cmeq.2s v0, v0, #0 +; CHECK-NEXT: adrp x8, lCPI8_0@PAGE +; CHECK-NEXT: cmeq.2s v0, v0, #0 ; CHECK-NEXT: Lloh17: -; CHECK-NEXT: ldr d1, [x8, lCPI8_0@PAGEOFF] -; CHECK-NEXT: bic.8b v0, v1, v0 -; CHECK-NEXT: addp.2s v0, v0, v0 -; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: strb w8, [x0] -; CHECK-NEXT: ret +; CHECK-NEXT: ldr d1, [x8, lCPI8_0@PAGEOFF] +; CHECK-NEXT: bic.8b v0, v1, v0 +; CHECK-NEXT: addp.2s v0, v0, v0 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: strb w8, [x0] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh16, Lloh17 + %cmp_result = icmp ne <2 x i32> %vec, zeroinitializer store <2 x i1> %cmp_result, ptr %out @@ -248,9 +197,11 @@ } define void @no_combine_without_truncate(<16 x i8> %vec, ptr %out) { -; CHECK-LABEL: no_combine_without_truncate -; CHECK: cmtst.16b v0, v0, v0 -; CHECK-NOT: addv.8b b0, v0 +; CHECK-LABEL: no_combine_without_truncate: +; CHECK: ; %bb.0: +; CHECK-NEXT: cmtst.16b v0, v0, v0 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret %cmp_result = icmp ne <16 x i8> %vec, zeroinitializer %extended_result = sext <16 x i1> %cmp_result to <16 x i8> @@ -259,9 +210,12 @@ } define void @no_combine_for_non_bool_truncate(<4 x i32> %vec, ptr %out) { -; CHECK-LABEL: no_combine_for_non_bool_truncate -; CHECK: xtn.4h v0, v0 -; CHECK-NOT: addv.4s s0, v0 +; CHECK-LABEL: no_combine_for_non_bool_truncate: +; CHECK: ; %bb.0: +; CHECK-NEXT: xtn.4h v0, v0 +; CHECK-NEXT: xtn.8b v0, v0 +; CHECK-NEXT: str s0, [x0] +; CHECK-NEXT: ret %trunc = trunc <4 x i32> %vec to <4 x i8> store <4 x i8> %trunc, ptr %out @@ -269,8 +223,13 @@ } define void @no_combine_for_build_vector(i1 %a, i1 %b, i1 %c, i1 %d, ptr %out) { -; CHECK-LABEL: no_combine_for_build_vector -; CHECK-NOT: addv +; CHECK-LABEL: no_combine_for_build_vector: +; CHECK: ; %bb.0: +; CHECK-NEXT: orr w8, w0, w1, lsl #1 +; CHECK-NEXT: orr w8, w8, w2, lsl #2 +; CHECK-NEXT: orr w8, w8, w3, lsl #3 +; CHECK-NEXT: strb w8, [x4] +; CHECK-NEXT: ret %1 = insertelement <4 x i1> undef, i1 %a, i64 0 %2 = insertelement <4 x i1> %1, i1 %b, i64 1 diff --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll --- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll +++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll @@ -144,11 +144,11 @@ ; CHECK-NEXT: str d12, [sp, #-48]! // 8-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #8] // 16-byte Folded Spill ; CHECK-NEXT: fmov s10, s2 +; CHECK-NEXT: fmov s11, s1 ; CHECK-NEXT: stp d9, d8, [sp, #24] // 16-byte Folded Spill ; CHECK-NEXT: fmov s8, s4 -; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill ; CHECK-NEXT: fmov s9, s3 -; CHECK-NEXT: fmov s11, s1 +; CHECK-NEXT: str x30, [sp, #40] // 8-byte Folded Spill ; CHECK-NEXT: bl sinf ; CHECK-NEXT: fmov s12, s0 ; CHECK-NEXT: fmov s0, s11 @@ -164,12 +164,12 @@ ; CHECK-NEXT: bl sinf ; CHECK-NEXT: fmov s1, s11 ; CHECK-NEXT: fmov s2, s10 -; CHECK-NEXT: fmov s3, s9 ; CHECK-NEXT: ldr x30, [sp, #40] // 8-byte Folded Reload +; CHECK-NEXT: fmov s3, s9 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload ; CHECK-NEXT: fmov s4, s0 ; CHECK-NEXT: fmov s0, s12 -; CHECK-NEXT: ldp d11, d10, [sp, #8] // 16-byte Folded Reload ; CHECK-NEXT: ldr d12, [sp], #48 // 8-byte Folded Reload ; CHECK-NEXT: ret %r = call <5 x float> @llvm.sin.v5f32(<5 x float> %x) @@ -182,11 +182,11 @@ ; CHECK-NEXT: stp d13, d12, [sp, #-64]! // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: fmov s10, s3 +; CHECK-NEXT: fmov s11, s2 ; CHECK-NEXT: stp d9, d8, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: fmov s8, s5 -; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill ; CHECK-NEXT: fmov s9, s4 -; CHECK-NEXT: fmov s11, s2 +; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill ; CHECK-NEXT: fmov s12, s1 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: fmov s13, s0 @@ -206,12 +206,12 @@ ; CHECK-NEXT: bl sinf ; CHECK-NEXT: fmov s2, s11 ; CHECK-NEXT: fmov s3, s10 -; CHECK-NEXT: fmov s4, s9 ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload +; CHECK-NEXT: fmov s4, s9 ; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov s5, s0 ; CHECK-NEXT: fmov s0, s13 -; CHECK-NEXT: ldp d11, d10, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov s1, s12 ; CHECK-NEXT: ldp d13, d12, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: ret @@ -225,8 +225,8 @@ ; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill ; CHECK-NEXT: fmov d8, d2 -; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: fmov d9, d1 +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: bl sin ; CHECK-NEXT: fmov d10, d0 ; CHECK-NEXT: fmov d0, d9 @@ -235,8 +235,8 @@ ; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: bl sin ; CHECK-NEXT: fmov d1, d9 -; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload ; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload ; CHECK-NEXT: fmov d2, d0 ; CHECK-NEXT: fmov d0, d10 ; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload diff --git a/llvm/test/CodeGen/AArch64/vec_cttz.ll b/llvm/test/CodeGen/AArch64/vec_cttz.ll --- a/llvm/test/CodeGen/AArch64/vec_cttz.ll +++ b/llvm/test/CodeGen/AArch64/vec_cttz.ll @@ -54,7 +54,7 @@ define <1 x i64> @cttz_v1i64(<1 x i64> %a) nounwind { ; CHECK-LABEL: cttz_v1i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: sub d1, d0, d1 ; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b @@ -85,8 +85,8 @@ ; CHECK-NEXT: movi v1.8h, #1 ; CHECK-NEXT: sub v1.8h, v0.8h, v1.8h ; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-NEXT: clz v0.8h, v0.8h ; CHECK-NEXT: movi v1.8h, #16 +; CHECK-NEXT: clz v0.8h, v0.8h ; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %b = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) @@ -99,8 +99,8 @@ ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: sub v1.4s, v0.4s, v1.4s ; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-NEXT: clz v0.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #32 +; CHECK-NEXT: clz v0.4s, v0.4s ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ret %b = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) @@ -110,7 +110,7 @@ define <2 x i64> @cttz_v2i64(<2 x i64> %a) nounwind { ; CHECK-LABEL: cttz_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov w8, #1 // =0x1 ; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: sub v1.2d, v0.2d, v1.2d ; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -52,8 +52,8 @@ ; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: st1 { v1.s }[2], [x8] +; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: ret %t = call {<3 x i32>, <3 x i1>} @llvm.uadd.with.overflow.v3i32(<3 x i32> %a0, <3 x i32> %a1) %val = extractvalue {<3 x i32>, <3 x i1>} %t, 0 @@ -81,34 +81,34 @@ define <6 x i32> @uaddo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind { ; CHECK-LABEL: uaddo_v6i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s0, w6 -; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s1, w6 ; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: fmov s3, w4 ; CHECK-NEXT: ldr s2, [sp, #16] ; CHECK-NEXT: add x9, sp, #24 -; CHECK-NEXT: add x10, sp, #8 -; CHECK-NEXT: mov v0.s[1], w7 -; CHECK-NEXT: fmov s3, w4 -; CHECK-NEXT: mov v1.s[1], w1 +; CHECK-NEXT: mov v0.s[1], w1 +; CHECK-NEXT: mov v1.s[1], w7 ; CHECK-NEXT: ld1 { v2.s }[1], [x9] ; CHECK-NEXT: mov v3.s[1], w5 -; CHECK-NEXT: ld1 { v0.s }[2], [x8] -; CHECK-NEXT: mov v1.s[2], w2 -; CHECK-NEXT: ldr x8, [sp, #32] +; CHECK-NEXT: mov v0.s[2], w2 +; CHECK-NEXT: ld1 { v1.s }[2], [x8] +; CHECK-NEXT: add x8, sp, #8 ; CHECK-NEXT: add v2.4s, v3.4s, v2.4s -; CHECK-NEXT: ld1 { v0.s }[3], [x10] -; CHECK-NEXT: mov v1.s[3], w3 -; CHECK-NEXT: str d2, [x8, #16] +; CHECK-NEXT: ld1 { v1.s }[3], [x8] +; CHECK-NEXT: ldr x8, [sp, #32] +; CHECK-NEXT: mov v0.s[3], w3 ; CHECK-NEXT: cmhi v3.4s, v3.4s, v2.4s +; CHECK-NEXT: str d2, [x8, #16] +; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: mov w5, v3.s[1] ; CHECK-NEXT: fmov w4, s3 -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s -; CHECK-NEXT: cmhi v1.4s, v1.4s, v0.4s -; CHECK-NEXT: str q0, [x8] -; CHECK-NEXT: mov w1, v1.s[1] -; CHECK-NEXT: mov w2, v1.s[2] -; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s +; CHECK-NEXT: str q1, [x8] +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: mov w2, v0.s[2] +; CHECK-NEXT: mov w3, v0.s[3] +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %t = call {<6 x i32>, <6 x i1>} @llvm.uadd.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1) %val = extractvalue {<6 x i32>, <6 x i1>} %t, 0 @@ -121,10 +121,10 @@ define <8 x i32> @uaddo_v8i32(<8 x i32> %a0, <8 x i32> %a1, ptr %p2) nounwind { ; CHECK-LABEL: uaddo_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add v3.4s, v1.4s, v3.4s ; CHECK-NEXT: add v2.4s, v0.4s, v2.4s -; CHECK-NEXT: cmhi v1.4s, v1.4s, v3.4s +; CHECK-NEXT: add v3.4s, v1.4s, v3.4s ; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s +; CHECK-NEXT: cmhi v1.4s, v1.4s, v3.4s ; CHECK-NEXT: stp q2, q3, [x0] ; CHECK-NEXT: ret %t = call {<8 x i32>, <8 x i1>} @llvm.uadd.with.overflow.v8i32(<8 x i32> %a0, <8 x i32> %a1) @@ -141,23 +141,23 @@ ; CHECK-NEXT: add v4.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhi v0.16b, v0.16b, v4.16b ; CHECK-NEXT: str q4, [x0] -; CHECK-NEXT: zip1 v1.8b, v0.8b, v0.8b -; CHECK-NEXT: zip2 v2.8b, v0.8b, v0.8b -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: zip1 v3.8b, v0.8b, v0.8b -; CHECK-NEXT: zip2 v5.8b, v0.8b, v0.8b -; CHECK-NEXT: shl v0.4s, v1.4s, #31 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b +; CHECK-NEXT: zip2 v0.8b, v0.8b, v0.8b +; CHECK-NEXT: zip1 v3.8b, v1.8b, v0.8b +; CHECK-NEXT: zip2 v1.8b, v1.8b, v0.8b ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 -; CHECK-NEXT: shl v1.4s, v2.4s, #31 -; CHECK-NEXT: ushll v2.4s, v3.4h, #0 -; CHECK-NEXT: ushll v3.4s, v5.4h, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: shl v2.4s, v2.4s, #31 +; CHECK-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: shl v5.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v2.4s, #0 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 -; CHECK-NEXT: cmlt v2.4s, v2.4s, #0 -; CHECK-NEXT: cmlt v3.4s, v3.4s, #0 +; CHECK-NEXT: shl v6.4s, v1.4s, #31 +; CHECK-NEXT: cmlt v1.4s, v5.4s, #0 +; CHECK-NEXT: cmlt v2.4s, v3.4s, #0 +; CHECK-NEXT: cmlt v3.4s, v6.4s, #0 ; CHECK-NEXT: ret %t = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1) %val = extractvalue {<16 x i8>, <16 x i1>} %t, 0 @@ -213,26 +213,26 @@ ; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: bic v0.4s, #255, lsl #24 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, v0.s[3] ; CHECK-NEXT: mov w9, v0.s[2] ; CHECK-NEXT: mov w10, v0.s[1] ; CHECK-NEXT: fmov w11, s0 -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: bic v1.4s, #1, lsl #24 ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: cmeq v1.4s, v1.4s, v0.4s ; CHECK-NEXT: strh w9, [x0, #6] -; CHECK-NEXT: sturh w10, [x0, #3] ; CHECK-NEXT: lsr w9, w9, #16 -; CHECK-NEXT: lsr w10, w10, #16 +; CHECK-NEXT: cmeq v1.4s, v1.4s, v0.4s ; CHECK-NEXT: strb w8, [x0, #11] -; CHECK-NEXT: lsr w8, w11, #16 -; CHECK-NEXT: strh w11, [x0] -; CHECK-NEXT: mvn v0.16b, v1.16b +; CHECK-NEXT: lsr w8, w10, #16 ; CHECK-NEXT: strb w9, [x0, #8] -; CHECK-NEXT: strb w10, [x0, #5] -; CHECK-NEXT: strb w8, [x0, #2] +; CHECK-NEXT: lsr w9, w11, #16 +; CHECK-NEXT: sturh w10, [x0, #3] +; CHECK-NEXT: mvn v0.16b, v1.16b +; CHECK-NEXT: strh w11, [x0] +; CHECK-NEXT: strb w8, [x0, #5] +; CHECK-NEXT: strb w9, [x0, #2] ; CHECK-NEXT: ret %t = call {<4 x i24>, <4 x i1>} @llvm.uadd.with.overflow.v4i24(<4 x i24> %a0, <4 x i24> %a1) %val = extractvalue {<4 x i24>, <4 x i1>} %t, 0 @@ -247,16 +247,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi v2.4h, #1 ; CHECK-NEXT: adrp x8, .LCPI10_0 -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI10_0] ; CHECK-NEXT: and v1.8b, v1.8b, v2.8b ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: fmov d1, d0 ; CHECK-NEXT: shl v2.4h, v0.4h, #15 +; CHECK-NEXT: cmlt v2.4h, v2.4h, #0 ; CHECK-NEXT: bic v1.4h, #2 ; CHECK-NEXT: cmeq v0.4h, v1.4h, v0.4h -; CHECK-NEXT: cmlt v1.4h, v2.4h, #0 -; CHECK-NEXT: and v1.8b, v1.8b, v3.8b +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: and v1.8b, v2.8b, v1.8b ; CHECK-NEXT: mvn v0.8b, v0.8b ; CHECK-NEXT: addv h1, v1.4h ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 @@ -284,8 +284,8 @@ ; CHECK-NEXT: mov v0.s[1], w10 ; CHECK-NEXT: ldr x10, [sp] ; CHECK-NEXT: stp x8, x9, [x10, #16] -; CHECK-NEXT: shl v0.2s, v0.2s, #31 ; CHECK-NEXT: stp x11, x12, [x10] +; CHECK-NEXT: shl v0.2s, v0.2s, #31 ; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.uadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -54,8 +54,8 @@ ; CHECK-LABEL: umulo_v3i32: ; CHECK: // %bb.0: ; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s -; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: umull v3.2d, v0.2s, v1.2s +; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: mul v1.4s, v0.4s, v1.4s ; CHECK-NEXT: uzp2 v2.4s, v3.4s, v2.4s ; CHECK-NEXT: st1 { v1.s }[2], [x8] @@ -93,40 +93,40 @@ define <6 x i32> @umulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, ptr %p2) nounwind { ; CHECK-LABEL: umulo_v6i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s0, w6 -; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: fmov s1, w6 ; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: fmov s3, w4 ; CHECK-NEXT: ldr s2, [sp, #16] ; CHECK-NEXT: add x9, sp, #24 -; CHECK-NEXT: add x10, sp, #8 -; CHECK-NEXT: mov v0.s[1], w7 -; CHECK-NEXT: fmov s3, w4 -; CHECK-NEXT: mov v1.s[1], w1 +; CHECK-NEXT: mov v0.s[1], w1 +; CHECK-NEXT: mov v1.s[1], w7 ; CHECK-NEXT: ld1 { v2.s }[1], [x9] ; CHECK-NEXT: mov v3.s[1], w5 -; CHECK-NEXT: ld1 { v0.s }[2], [x8] -; CHECK-NEXT: mov v1.s[2], w2 -; CHECK-NEXT: ldr x8, [sp, #32] -; CHECK-NEXT: umull2 v4.2d, v3.4s, v2.4s -; CHECK-NEXT: ld1 { v0.s }[3], [x10] -; CHECK-NEXT: mov v1.s[3], w3 +; CHECK-NEXT: mov v0.s[2], w2 +; CHECK-NEXT: ld1 { v1.s }[2], [x8] +; CHECK-NEXT: add x8, sp, #8 +; CHECK-NEXT: umull2 v6.2d, v3.4s, v2.4s ; CHECK-NEXT: umull v7.2d, v3.2s, v2.2s ; CHECK-NEXT: mul v2.4s, v3.4s, v2.4s -; CHECK-NEXT: umull2 v5.2d, v1.4s, v0.4s -; CHECK-NEXT: umull v6.2d, v1.2s, v0.2s -; CHECK-NEXT: uzp2 v4.4s, v7.4s, v4.4s +; CHECK-NEXT: ld1 { v1.s }[3], [x8] +; CHECK-NEXT: ldr x8, [sp, #32] +; CHECK-NEXT: mov v0.s[3], w3 ; CHECK-NEXT: str d2, [x8, #16] -; CHECK-NEXT: mul v0.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp2 v5.4s, v6.4s, v5.4s -; CHECK-NEXT: cmtst v4.4s, v4.4s, v4.4s +; CHECK-NEXT: umull2 v4.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v5.2d, v0.2s, v1.2s +; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp2 v4.4s, v5.4s, v4.4s +; CHECK-NEXT: uzp2 v5.4s, v7.4s, v6.4s ; CHECK-NEXT: str q0, [x8] +; CHECK-NEXT: cmtst v4.4s, v4.4s, v4.4s ; CHECK-NEXT: cmtst v5.4s, v5.4s, v5.4s -; CHECK-NEXT: mov w5, v4.s[1] -; CHECK-NEXT: fmov w4, s4 -; CHECK-NEXT: mov w1, v5.s[1] -; CHECK-NEXT: mov w2, v5.s[2] -; CHECK-NEXT: mov w3, v5.s[3] -; CHECK-NEXT: fmov w0, s5 +; CHECK-NEXT: mov w1, v4.s[1] +; CHECK-NEXT: mov w2, v4.s[2] +; CHECK-NEXT: mov w5, v5.s[1] +; CHECK-NEXT: mov w3, v4.s[3] +; CHECK-NEXT: fmov w4, s5 +; CHECK-NEXT: fmov w0, s4 ; CHECK-NEXT: ret %t = call {<6 x i32>, <6 x i1>} @llvm.umul.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1) %val = extractvalue {<6 x i32>, <6 x i1>} %t, 0 @@ -139,17 +139,17 @@ define <8 x i32> @umulo_v8i32(<8 x i32> %a0, <8 x i32> %a1, ptr %p2) nounwind { ; CHECK-LABEL: umulo_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: umull2 v4.2d, v1.4s, v3.4s -; CHECK-NEXT: umull2 v5.2d, v0.4s, v2.4s -; CHECK-NEXT: umull v6.2d, v0.2s, v2.2s +; CHECK-NEXT: umull2 v4.2d, v0.4s, v2.4s +; CHECK-NEXT: umull v5.2d, v0.2s, v2.2s +; CHECK-NEXT: umull2 v6.2d, v1.4s, v3.4s ; CHECK-NEXT: umull v7.2d, v1.2s, v3.2s -; CHECK-NEXT: mul v3.4s, v1.4s, v3.4s +; CHECK-NEXT: mul v1.4s, v1.4s, v3.4s ; CHECK-NEXT: mul v2.4s, v0.4s, v2.4s -; CHECK-NEXT: uzp2 v5.4s, v6.4s, v5.4s -; CHECK-NEXT: uzp2 v6.4s, v7.4s, v4.4s -; CHECK-NEXT: stp q2, q3, [x0] -; CHECK-NEXT: cmtst v4.4s, v5.4s, v5.4s -; CHECK-NEXT: cmtst v5.4s, v6.4s, v6.4s +; CHECK-NEXT: uzp2 v4.4s, v5.4s, v4.4s +; CHECK-NEXT: uzp2 v5.4s, v7.4s, v6.4s +; CHECK-NEXT: stp q2, q1, [x0] +; CHECK-NEXT: cmtst v4.4s, v4.4s, v4.4s +; CHECK-NEXT: cmtst v5.4s, v5.4s, v5.4s ; CHECK-NEXT: mov v0.16b, v4.16b ; CHECK-NEXT: mov v1.16b, v5.16b ; CHECK-NEXT: ret @@ -170,23 +170,23 @@ ; CHECK-NEXT: uzp2 v2.16b, v3.16b, v2.16b ; CHECK-NEXT: str q6, [x0] ; CHECK-NEXT: cmtst v2.16b, v2.16b, v2.16b -; CHECK-NEXT: zip1 v3.8b, v2.8b, v0.8b -; CHECK-NEXT: zip2 v4.8b, v2.8b, v0.8b -; CHECK-NEXT: ext v2.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-NEXT: zip1 v5.8b, v2.8b, v0.8b +; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: zip1 v4.8b, v2.8b, v0.8b ; CHECK-NEXT: zip2 v2.8b, v2.8b, v0.8b -; CHECK-NEXT: shl v3.4s, v3.4s, #31 +; CHECK-NEXT: zip1 v5.8b, v3.8b, v0.8b +; CHECK-NEXT: zip2 v3.8b, v3.8b, v0.8b ; CHECK-NEXT: ushll v4.4s, v4.4h, #0 -; CHECK-NEXT: cmlt v0.4s, v3.4s, #0 -; CHECK-NEXT: ushll v3.4s, v5.4h, #0 ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: shl v1.4s, v4.4s, #31 +; CHECK-NEXT: shl v4.4s, v4.4s, #31 +; CHECK-NEXT: ushll v5.4s, v5.4h, #0 +; CHECK-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-NEXT: shl v2.4s, v2.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v4.4s, #0 +; CHECK-NEXT: shl v5.4s, v5.4s, #31 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: shl v4.4s, v2.4s, #31 -; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 -; CHECK-NEXT: cmlt v2.4s, v3.4s, #0 -; CHECK-NEXT: cmlt v3.4s, v4.4s, #0 +; CHECK-NEXT: cmlt v1.4s, v2.4s, #0 +; CHECK-NEXT: cmlt v2.4s, v5.4s, #0 +; CHECK-NEXT: cmlt v3.4s, v3.4s, #0 ; CHECK-NEXT: ret %t = call {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1) %val = extractvalue {<16 x i8>, <16 x i1>} %t, 0 @@ -201,7 +201,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h ; CHECK-NEXT: umull v3.4s, v0.4h, v1.4h +; CHECK-NEXT: mul v4.8h, v0.8h, v1.8h ; CHECK-NEXT: uzp2 v2.8h, v3.8h, v2.8h +; CHECK-NEXT: str q4, [x0] ; CHECK-NEXT: cmtst v2.8h, v2.8h, v2.8h ; CHECK-NEXT: xtn v2.8b, v2.8h ; CHECK-NEXT: zip1 v3.8b, v2.8b, v0.8b @@ -209,13 +211,9 @@ ; CHECK-NEXT: ushll v3.4s, v3.4h, #0 ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: shl v4.4s, v2.4s, #31 -; CHECK-NEXT: cmlt v2.4s, v3.4s, #0 -; CHECK-NEXT: cmlt v3.4s, v4.4s, #0 -; CHECK-NEXT: mul v4.8h, v0.8h, v1.8h -; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: mov v1.16b, v3.16b -; CHECK-NEXT: str q4, [x0] +; CHECK-NEXT: shl v2.4s, v2.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v3.4s, #0 +; CHECK-NEXT: cmlt v1.4s, v2.4s, #0 ; CHECK-NEXT: ret %t = call {<8 x i16>, <8 x i1>} @llvm.umul.with.overflow.v8i16(<8 x i16> %a0, <8 x i16> %a1) %val = extractvalue {<8 x i16>, <8 x i1>} %t, 0 @@ -229,23 +227,23 @@ ; CHECK-LABEL: umulo_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, v1.d[1] -; CHECK-NEXT: fmov x10, d1 ; CHECK-NEXT: mov x9, v0.d[1] -; CHECK-NEXT: fmov x11, d0 -; CHECK-NEXT: umulh x12, x9, x8 -; CHECK-NEXT: umulh x13, x11, x10 -; CHECK-NEXT: cmp xzr, x12 -; CHECK-NEXT: mul x10, x11, x10 -; CHECK-NEXT: csetm x12, ne +; CHECK-NEXT: fmov x11, d1 +; CHECK-NEXT: fmov x12, d0 +; CHECK-NEXT: umulh x10, x9, x8 +; CHECK-NEXT: umulh x13, x12, x11 +; CHECK-NEXT: mul x11, x12, x11 +; CHECK-NEXT: cmp xzr, x10 +; CHECK-NEXT: csetm x10, ne +; CHECK-NEXT: mul x8, x9, x8 ; CHECK-NEXT: cmp xzr, x13 ; CHECK-NEXT: csetm x13, ne -; CHECK-NEXT: mul x8, x9, x8 -; CHECK-NEXT: fmov d1, x10 ; CHECK-NEXT: fmov d0, x13 +; CHECK-NEXT: fmov d1, x11 +; CHECK-NEXT: mov v0.d[1], x10 ; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.d[1], x12 -; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: ret %t = call {<2 x i64>, <2 x i1>} @llvm.umul.with.overflow.v2i64(<2 x i64> %a0, <2 x i64> %a1) %val = extractvalue {<2 x i64>, <2 x i1>} %t, 0 @@ -260,30 +258,30 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: bic v0.4s, #255, lsl #24 -; CHECK-NEXT: mul v2.4s, v0.4s, v1.4s -; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s -; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v2.s[3] -; CHECK-NEXT: mov w10, v2.s[2] -; CHECK-NEXT: mov w11, v2.s[1] -; CHECK-NEXT: ushr v1.4s, v2.4s, #24 -; CHECK-NEXT: uzp2 v0.4s, v0.4s, v3.4s -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: cmtst v1.4s, v1.4s, v1.4s +; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v3.2d, v0.2s, v1.2s +; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp2 v1.4s, v3.4s, v2.4s +; CHECK-NEXT: ushr v2.4s, v0.4s, #24 +; CHECK-NEXT: mov w8, v0.s[3] +; CHECK-NEXT: mov w9, v0.s[2] +; CHECK-NEXT: mov w10, v0.s[1] +; CHECK-NEXT: fmov w11, s0 +; CHECK-NEXT: cmtst v2.4s, v2.4s, v2.4s +; CHECK-NEXT: cmeq v1.4s, v1.4s, #0 ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: strh w10, [x0, #6] -; CHECK-NEXT: lsr w10, w10, #16 -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: sturh w11, [x0, #3] -; CHECK-NEXT: lsr w11, w11, #16 +; CHECK-NEXT: strh w9, [x0, #6] +; CHECK-NEXT: lsr w9, w9, #16 ; CHECK-NEXT: strb w8, [x0, #11] -; CHECK-NEXT: lsr w8, w9, #16 -; CHECK-NEXT: strh w9, [x0] -; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b -; CHECK-NEXT: strb w10, [x0, #8] -; CHECK-NEXT: strb w11, [x0, #5] -; CHECK-NEXT: strb w8, [x0, #2] +; CHECK-NEXT: lsr w8, w10, #16 +; CHECK-NEXT: orn v0.16b, v2.16b, v1.16b +; CHECK-NEXT: strb w9, [x0, #8] +; CHECK-NEXT: lsr w9, w11, #16 +; CHECK-NEXT: sturh w10, [x0, #3] +; CHECK-NEXT: strh w11, [x0] +; CHECK-NEXT: strb w8, [x0, #5] +; CHECK-NEXT: strb w9, [x0, #2] ; CHECK-NEXT: ret %t = call {<4 x i24>, <4 x i1>} @llvm.umul.with.overflow.v4i24(<4 x i24> %a0, <4 x i24> %a1) %val = extractvalue {<4 x i24>, <4 x i1>} %t, 0 @@ -296,10 +294,10 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, ptr %p2) nounwind { ; CHECK-LABEL: umulo_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: shl v0.4h, v0.4h, #15 +; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI10_0] +; CHECK-NEXT: shl v0.4h, v0.4h, #15 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NEXT: addv h1, v0.4h @@ -318,38 +316,38 @@ define <2 x i32> @umulo_v2i128(<2 x i128> %a0, <2 x i128> %a1, ptr %p2) nounwind { ; CHECK-LABEL: umulo_v2i128: ; CHECK: // %bb.0: +; CHECK-NEXT: mul x9, x7, x2 ; CHECK-NEXT: cmp x3, #0 -; CHECK-NEXT: umulh x8, x3, x6 ; CHECK-NEXT: ccmp x7, #0, #4, ne -; CHECK-NEXT: umulh x9, x7, x2 -; CHECK-NEXT: umulh x11, x5, x0 +; CHECK-NEXT: umulh x10, x3, x6 +; CHECK-NEXT: umulh x8, x7, x2 +; CHECK-NEXT: madd x9, x3, x6, x9 +; CHECK-NEXT: ccmp xzr, x10, #0, eq +; CHECK-NEXT: umulh x11, x2, x6 ; CHECK-NEXT: ccmp xzr, x8, #0, eq -; CHECK-NEXT: mul x8, x7, x2 -; CHECK-NEXT: madd x8, x3, x6, x8 -; CHECK-NEXT: ccmp xzr, x9, #0, eq -; CHECK-NEXT: umulh x9, x2, x6 -; CHECK-NEXT: cset w10, ne -; CHECK-NEXT: adds x8, x9, x8 -; CHECK-NEXT: csinc w9, w10, wzr, lo +; CHECK-NEXT: mul x13, x5, x0 +; CHECK-NEXT: cset w8, ne +; CHECK-NEXT: umulh x14, x1, x4 +; CHECK-NEXT: adds x9, x11, x9 +; CHECK-NEXT: umulh x12, x5, x0 +; CHECK-NEXT: csinc w8, w8, wzr, lo ; CHECK-NEXT: cmp x1, #0 ; CHECK-NEXT: ccmp x5, #0, #4, ne -; CHECK-NEXT: umulh x10, x1, x4 -; CHECK-NEXT: ccmp xzr, x10, #0, eq -; CHECK-NEXT: mul x10, x5, x0 -; CHECK-NEXT: madd x10, x1, x4, x10 -; CHECK-NEXT: ccmp xzr, x11, #0, eq +; CHECK-NEXT: madd x10, x1, x4, x13 +; CHECK-NEXT: ccmp xzr, x14, #0, eq ; CHECK-NEXT: umulh x11, x0, x4 +; CHECK-NEXT: ccmp xzr, x12, #0, eq ; CHECK-NEXT: cset w12, ne ; CHECK-NEXT: adds x10, x11, x10 ; CHECK-NEXT: csinc w11, w12, wzr, lo -; CHECK-NEXT: mul x12, x0, x4 +; CHECK-NEXT: ldr x12, [sp] ; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: ldr x11, [sp] -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: mul x9, x2, x6 -; CHECK-NEXT: stp x12, x10, [x11] +; CHECK-NEXT: mul x11, x0, x4 +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mul x8, x2, x6 +; CHECK-NEXT: stp x11, x10, [x12] ; CHECK-NEXT: shl v0.2s, v0.2s, #31 -; CHECK-NEXT: stp x9, x8, [x11, #16] +; CHECK-NEXT: stp x8, x9, [x12, #16] ; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -276,10 +276,10 @@ ; ; CHECK-DOT-LABEL: add_v8i8_v8i32_zext: ; CHECK-DOT: // %bb.0: // %entry -; CHECK-DOT-NEXT: movi v1.8b, #1 -; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 -; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v1.8b -; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s +; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000 +; CHECK-DOT-NEXT: movi v2.8b, #1 +; CHECK-DOT-NEXT: udot v1.2s, v0.8b, v2.8b +; CHECK-DOT-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-DOT-NEXT: fmov w0, s0 ; CHECK-DOT-NEXT: ret entry: @@ -298,10 +298,10 @@ ; ; CHECK-DOT-LABEL: add_v8i8_v8i32_sext: ; CHECK-DOT: // %bb.0: // %entry -; CHECK-DOT-NEXT: movi v1.8b, #1 -; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 -; CHECK-DOT-NEXT: sdot v2.2s, v0.8b, v1.8b -; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s +; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000 +; CHECK-DOT-NEXT: movi v2.8b, #1 +; CHECK-DOT-NEXT: sdot v1.2s, v0.8b, v2.8b +; CHECK-DOT-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-DOT-NEXT: fmov w0, s0 ; CHECK-DOT-NEXT: ret entry: @@ -407,17 +407,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v2.4s, v1.4h, #0 -; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 +; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0 ; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v1.4s -; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v2.4s -; CHECK-NEXT: uaddl v1.2d, v3.2s, v1.2s -; CHECK-NEXT: uaddl v0.2d, v0.2s, v2.2s -; CHECK-NEXT: add v2.2d, v5.2d, v4.2d -; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v2.4s +; CHECK-NEXT: uaddl v2.2d, v3.2s, v2.2s +; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v1.4s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v1.2d, v5.2d, v4.2d ; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d ; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -432,17 +432,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v2.4s, v1.4h, #0 -; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0 +; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0 ; CHECK-NEXT: sshll2 v3.4s, v0.8h, #0 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: saddl2 v4.2d, v3.4s, v1.4s -; CHECK-NEXT: saddl2 v5.2d, v0.4s, v2.4s -; CHECK-NEXT: saddl v1.2d, v3.2s, v1.2s -; CHECK-NEXT: saddl v0.2d, v0.2s, v2.2s -; CHECK-NEXT: add v2.2d, v5.2d, v4.2d -; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: saddl2 v4.2d, v3.4s, v2.4s +; CHECK-NEXT: saddl v2.2d, v3.2s, v2.2s +; CHECK-NEXT: saddl2 v5.2d, v0.4s, v1.4s +; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v1.2d, v5.2d, v4.2d ; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d ; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -876,10 +876,10 @@ ; ; CHECK-DOT-LABEL: add_v8i8_v8i32_acc_zext: ; CHECK-DOT: // %bb.0: // %entry -; CHECK-DOT-NEXT: movi v1.8b, #1 -; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 -; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v1.8b -; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s +; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000 +; CHECK-DOT-NEXT: movi v2.8b, #1 +; CHECK-DOT-NEXT: udot v1.2s, v0.8b, v2.8b +; CHECK-DOT-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-DOT-NEXT: fmov w8, s0 ; CHECK-DOT-NEXT: add w0, w8, w0 ; CHECK-DOT-NEXT: ret @@ -901,10 +901,10 @@ ; ; CHECK-DOT-LABEL: add_v8i8_v8i32_acc_sext: ; CHECK-DOT: // %bb.0: // %entry -; CHECK-DOT-NEXT: movi v1.8b, #1 -; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 -; CHECK-DOT-NEXT: sdot v2.2s, v0.8b, v1.8b -; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s +; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000 +; CHECK-DOT-NEXT: movi v2.8b, #1 +; CHECK-DOT-NEXT: sdot v1.2s, v0.8b, v2.8b +; CHECK-DOT-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-DOT-NEXT: fmov w8, s0 ; CHECK-DOT-NEXT: add w0, w8, w0 ; CHECK-DOT-NEXT: ret @@ -1029,17 +1029,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v2.4s, v1.4h, #0 -; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 +; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0 ; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v1.4s -; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v2.4s -; CHECK-NEXT: uaddl v1.2d, v3.2s, v1.2s -; CHECK-NEXT: uaddl v0.2d, v0.2s, v2.2s -; CHECK-NEXT: add v2.2d, v5.2d, v4.2d -; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v2.4s +; CHECK-NEXT: uaddl v2.2d, v3.2s, v2.2s +; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v1.4s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v1.2d, v5.2d, v4.2d ; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d ; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x8, d0 ; CHECK-NEXT: add x0, x8, x0 @@ -1056,17 +1056,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v2.4s, v1.4h, #0 -; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0 +; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0 ; CHECK-NEXT: sshll2 v3.4s, v0.8h, #0 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: saddl2 v4.2d, v3.4s, v1.4s -; CHECK-NEXT: saddl2 v5.2d, v0.4s, v2.4s -; CHECK-NEXT: saddl v1.2d, v3.2s, v1.2s -; CHECK-NEXT: saddl v0.2d, v0.2s, v2.2s -; CHECK-NEXT: add v2.2d, v5.2d, v4.2d -; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: saddl2 v4.2d, v3.4s, v2.4s +; CHECK-NEXT: saddl v2.2d, v3.2s, v2.2s +; CHECK-NEXT: saddl2 v5.2d, v0.4s, v1.4s +; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s +; CHECK-NEXT: add v1.2d, v5.2d, v4.2d ; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d ; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x8, d0 ; CHECK-NEXT: add x0, x8, x0 @@ -1577,11 +1577,11 @@ ; ; CHECK-DOT-LABEL: add_pair_v8i8_v8i32_zext: ; CHECK-DOT: // %bb.0: // %entry -; CHECK-DOT-NEXT: movi v2.8b, #1 -; CHECK-DOT-NEXT: movi v3.2d, #0000000000000000 -; CHECK-DOT-NEXT: udot v3.2s, v1.8b, v2.8b -; CHECK-DOT-NEXT: udot v3.2s, v0.8b, v2.8b -; CHECK-DOT-NEXT: addp v0.2s, v3.2s, v3.2s +; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 +; CHECK-DOT-NEXT: movi v3.8b, #1 +; CHECK-DOT-NEXT: udot v2.2s, v1.8b, v3.8b +; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b +; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-DOT-NEXT: fmov w0, s0 ; CHECK-DOT-NEXT: ret entry: @@ -1606,11 +1606,11 @@ ; ; CHECK-DOT-LABEL: add_pair_v8i8_v8i32_sext: ; CHECK-DOT: // %bb.0: // %entry -; CHECK-DOT-NEXT: movi v2.8b, #1 -; CHECK-DOT-NEXT: movi v3.2d, #0000000000000000 -; CHECK-DOT-NEXT: sdot v3.2s, v1.8b, v2.8b -; CHECK-DOT-NEXT: sdot v3.2s, v0.8b, v2.8b -; CHECK-DOT-NEXT: addp v0.2s, v3.2s, v3.2s +; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 +; CHECK-DOT-NEXT: movi v3.8b, #1 +; CHECK-DOT-NEXT: sdot v2.2s, v1.8b, v3.8b +; CHECK-DOT-NEXT: sdot v2.2s, v0.8b, v3.8b +; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-DOT-NEXT: fmov w0, s0 ; CHECK-DOT-NEXT: ret entry: @@ -1746,29 +1746,29 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ushll2 v2.8h, v0.16b, #0 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v3.4s, v2.4h, #0 +; CHECK-NEXT: ushll2 v3.8h, v1.16b, #0 +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: ushll v4.4s, v2.4h, #0 ; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0 -; CHECK-NEXT: ushll2 v4.4s, v0.8h, #0 +; CHECK-NEXT: ushll2 v5.4s, v0.8h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: uaddl2 v5.2d, v4.4s, v2.4s -; CHECK-NEXT: uaddl2 v6.2d, v0.4s, v3.4s -; CHECK-NEXT: ushll2 v7.8h, v1.16b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: uaddl v2.2d, v4.2s, v2.2s -; CHECK-NEXT: add v4.2d, v6.2d, v5.2d -; CHECK-NEXT: uaddl v0.2d, v0.2s, v3.2s -; CHECK-NEXT: ushll v3.4s, v7.4h, #0 -; CHECK-NEXT: ushll2 v5.4s, v7.8h, #0 -; CHECK-NEXT: ushll2 v6.4s, v1.8h, #0 +; CHECK-NEXT: ushll2 v6.4s, v3.8h, #0 +; CHECK-NEXT: ushll2 v7.4s, v1.8h, #0 +; CHECK-NEXT: ushll v3.4s, v3.4h, #0 ; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: uaddl2 v7.2d, v6.4s, v5.4s -; CHECK-NEXT: uaddl v5.2d, v6.2s, v5.2s -; CHECK-NEXT: uaddl2 v6.2d, v1.4s, v3.4s +; CHECK-NEXT: uaddl2 v16.2d, v5.4s, v2.4s +; CHECK-NEXT: uaddl v2.2d, v5.2s, v2.2s +; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v4.4s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v4.2s +; CHECK-NEXT: uaddl2 v4.2d, v7.4s, v6.4s +; CHECK-NEXT: uaddl v6.2d, v7.2s, v6.2s +; CHECK-NEXT: uaddl2 v7.2d, v1.4s, v3.4s ; CHECK-NEXT: uaddl v1.2d, v1.2s, v3.2s +; CHECK-NEXT: add v3.2d, v5.2d, v16.2d ; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v2.2d, v6.2d, v7.2d -; CHECK-NEXT: add v1.2d, v1.2d, v5.2d -; CHECK-NEXT: add v0.2d, v0.2d, v4.2d +; CHECK-NEXT: add v2.2d, v7.2d, v4.2d +; CHECK-NEXT: add v1.2d, v1.2d, v6.2d +; CHECK-NEXT: add v0.2d, v0.2d, v3.2d ; CHECK-NEXT: add v1.2d, v1.2d, v2.2d ; CHECK-NEXT: add v0.2d, v0.2d, v1.2d ; CHECK-NEXT: addp d0, v0.2d @@ -1788,29 +1788,29 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sshll2 v2.8h, v0.16b, #0 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v3.4s, v2.4h, #0 +; CHECK-NEXT: sshll2 v3.8h, v1.16b, #0 +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: sshll v4.4s, v2.4h, #0 ; CHECK-NEXT: sshll2 v2.4s, v2.8h, #0 -; CHECK-NEXT: sshll2 v4.4s, v0.8h, #0 +; CHECK-NEXT: sshll2 v5.4s, v0.8h, #0 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: saddl2 v5.2d, v4.4s, v2.4s -; CHECK-NEXT: saddl2 v6.2d, v0.4s, v3.4s -; CHECK-NEXT: sshll2 v7.8h, v1.16b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: saddl v2.2d, v4.2s, v2.2s -; CHECK-NEXT: add v4.2d, v6.2d, v5.2d -; CHECK-NEXT: saddl v0.2d, v0.2s, v3.2s -; CHECK-NEXT: sshll v3.4s, v7.4h, #0 -; CHECK-NEXT: sshll2 v5.4s, v7.8h, #0 -; CHECK-NEXT: sshll2 v6.4s, v1.8h, #0 +; CHECK-NEXT: sshll2 v6.4s, v3.8h, #0 +; CHECK-NEXT: sshll2 v7.4s, v1.8h, #0 +; CHECK-NEXT: sshll v3.4s, v3.4h, #0 ; CHECK-NEXT: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: saddl2 v7.2d, v6.4s, v5.4s -; CHECK-NEXT: saddl v5.2d, v6.2s, v5.2s -; CHECK-NEXT: saddl2 v6.2d, v1.4s, v3.4s +; CHECK-NEXT: saddl2 v16.2d, v5.4s, v2.4s +; CHECK-NEXT: saddl v2.2d, v5.2s, v2.2s +; CHECK-NEXT: saddl2 v5.2d, v0.4s, v4.4s +; CHECK-NEXT: saddl v0.2d, v0.2s, v4.2s +; CHECK-NEXT: saddl2 v4.2d, v7.4s, v6.4s +; CHECK-NEXT: saddl v6.2d, v7.2s, v6.2s +; CHECK-NEXT: saddl2 v7.2d, v1.4s, v3.4s ; CHECK-NEXT: saddl v1.2d, v1.2s, v3.2s +; CHECK-NEXT: add v3.2d, v5.2d, v16.2d ; CHECK-NEXT: add v0.2d, v0.2d, v2.2d -; CHECK-NEXT: add v2.2d, v6.2d, v7.2d -; CHECK-NEXT: add v1.2d, v1.2d, v5.2d -; CHECK-NEXT: add v0.2d, v0.2d, v4.2d +; CHECK-NEXT: add v2.2d, v7.2d, v4.2d +; CHECK-NEXT: add v1.2d, v1.2d, v6.2d +; CHECK-NEXT: add v0.2d, v0.2d, v3.2d ; CHECK-NEXT: add v1.2d, v1.2d, v2.2d ; CHECK-NEXT: add v0.2d, v0.2d, v1.2d ; CHECK-NEXT: addp d0, v0.2d @@ -1905,21 +1905,21 @@ define i64 @add_pair_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: add_pair_v4i8_v4i64_sext: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ushll v2.2d, v1.2s, #0 -; CHECK-NEXT: ushll v3.2d, v0.2s, #0 -; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ushll v2.2d, v0.2s, #0 +; CHECK-NEXT: ushll v3.2d, v1.2s, #0 ; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0 -; CHECK-NEXT: shl v3.2d, v3.2d, #56 +; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0 ; CHECK-NEXT: shl v2.2d, v2.2d, #56 +; CHECK-NEXT: shl v3.2d, v3.2d, #56 ; CHECK-NEXT: shl v0.2d, v0.2d, #56 -; CHECK-NEXT: sshr v3.2d, v3.2d, #56 ; CHECK-NEXT: shl v1.2d, v1.2d, #56 ; CHECK-NEXT: sshr v2.2d, v2.2d, #56 -; CHECK-NEXT: ssra v3.2d, v0.2d, #56 -; CHECK-NEXT: ssra v2.2d, v1.2d, #56 -; CHECK-NEXT: add v0.2d, v3.2d, v2.2d +; CHECK-NEXT: sshr v3.2d, v3.2d, #56 +; CHECK-NEXT: ssra v2.2d, v0.2d, #56 +; CHECK-NEXT: ssra v3.2d, v1.2d, #56 +; CHECK-NEXT: add v0.2d, v2.2d, v3.2d ; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -1975,12 +1975,12 @@ define i32 @add_pair_v8i8_v8i32_double_sext_zext(<8 x i8> %ax, <8 x i8> %ay, <8 x i8> %bx, <8 x i8> %by) { ; CHECK-BASE-LABEL: add_pair_v8i8_v8i32_double_sext_zext: ; CHECK-BASE: // %bb.0: // %entry -; CHECK-BASE-NEXT: sshll v3.8h, v3.8b, #0 ; CHECK-BASE-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-BASE-NEXT: saddlp v3.4s, v3.8h -; CHECK-BASE-NEXT: uaddlp v1.4s, v1.8h +; CHECK-BASE-NEXT: sshll v3.8h, v3.8b, #0 ; CHECK-BASE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BASE-NEXT: sshll v2.8h, v2.8b, #0 +; CHECK-BASE-NEXT: uaddlp v1.4s, v1.8h +; CHECK-BASE-NEXT: saddlp v3.4s, v3.8h ; CHECK-BASE-NEXT: uadalp v1.4s, v0.8h ; CHECK-BASE-NEXT: sadalp v3.4s, v2.8h ; CHECK-BASE-NEXT: add v0.4s, v3.4s, v1.4s @@ -1993,10 +1993,10 @@ ; CHECK-DOT-NEXT: movi v4.2d, #0000000000000000 ; CHECK-DOT-NEXT: movi v5.8b, #1 ; CHECK-DOT-NEXT: movi v6.2d, #0000000000000000 -; CHECK-DOT-NEXT: sdot v4.2s, v3.8b, v5.8b ; CHECK-DOT-NEXT: udot v6.2s, v1.8b, v5.8b -; CHECK-DOT-NEXT: sdot v4.2s, v2.8b, v5.8b +; CHECK-DOT-NEXT: sdot v4.2s, v3.8b, v5.8b ; CHECK-DOT-NEXT: udot v6.2s, v0.8b, v5.8b +; CHECK-DOT-NEXT: sdot v4.2s, v2.8b, v5.8b ; CHECK-DOT-NEXT: add v0.2s, v6.2s, v4.2s ; CHECK-DOT-NEXT: addp v0.2s, v0.2s, v0.2s ; CHECK-DOT-NEXT: fmov w0, s0 @@ -2019,10 +2019,10 @@ define i32 @add_pair_v8i16_v4i32_double_sext_zext_shuffle(<8 x i16> %ax, <8 x i16> %ay, <8 x i16> %bx, <8 x i16> %by) { ; CHECK-LABEL: add_pair_v8i16_v4i32_double_sext_zext_shuffle: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: uaddlp v3.4s, v3.8h ; CHECK-NEXT: uaddlp v1.4s, v1.8h -; CHECK-NEXT: uadalp v3.4s, v2.8h +; CHECK-NEXT: uaddlp v3.4s, v3.8h ; CHECK-NEXT: uadalp v1.4s, v0.8h +; CHECK-NEXT: uadalp v3.4s, v2.8h ; CHECK-NEXT: add v0.4s, v3.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 @@ -2068,53 +2068,53 @@ define i32 @full(ptr %p1, i32 noundef %s1, ptr %p2, i32 noundef %s2) { ; CHECK-BASE-LABEL: full: ; CHECK-BASE: // %bb.0: // %entry +; CHECK-BASE-NEXT: ldr d0, [x2] +; CHECK-BASE-NEXT: ldr d1, [x0] ; CHECK-BASE-NEXT: // kill: def $w3 killed $w3 def $x3 ; CHECK-BASE-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-BASE-NEXT: sxtw x8, w1 -; CHECK-BASE-NEXT: sxtw x10, w3 -; CHECK-BASE-NEXT: add x9, x0, x8 -; CHECK-BASE-NEXT: ldr d0, [x0] -; CHECK-BASE-NEXT: ldr d1, [x2] -; CHECK-BASE-NEXT: add x11, x2, x10 -; CHECK-BASE-NEXT: ldr d2, [x9] -; CHECK-BASE-NEXT: add x9, x9, x8 -; CHECK-BASE-NEXT: uabdl v0.8h, v0.8b, v1.8b -; CHECK-BASE-NEXT: ldr d1, [x11] -; CHECK-BASE-NEXT: add x11, x11, x10 +; CHECK-BASE-NEXT: sxtw x8, w3 +; CHECK-BASE-NEXT: sxtw x9, w1 +; CHECK-BASE-NEXT: uabdl v0.8h, v1.8b, v0.8b +; CHECK-BASE-NEXT: add x11, x2, x8 +; CHECK-BASE-NEXT: add x10, x0, x9 +; CHECK-BASE-NEXT: ldr d2, [x11] +; CHECK-BASE-NEXT: add x11, x11, x8 +; CHECK-BASE-NEXT: ldr d1, [x10] +; CHECK-BASE-NEXT: add x10, x10, x9 ; CHECK-BASE-NEXT: uaddlp v0.4s, v0.8h -; CHECK-BASE-NEXT: uabdl v1.8h, v2.8b, v1.8b -; CHECK-BASE-NEXT: ldr d2, [x9] -; CHECK-BASE-NEXT: ldr d3, [x11] -; CHECK-BASE-NEXT: add x9, x9, x8 -; CHECK-BASE-NEXT: add x11, x11, x10 +; CHECK-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b +; CHECK-BASE-NEXT: ldr d2, [x11] +; CHECK-BASE-NEXT: add x11, x11, x8 +; CHECK-BASE-NEXT: uadalp v0.4s, v1.8h +; CHECK-BASE-NEXT: ldr d1, [x10] +; CHECK-BASE-NEXT: add x10, x10, x9 +; CHECK-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b +; CHECK-BASE-NEXT: ldr d2, [x11] +; CHECK-BASE-NEXT: add x11, x11, x8 ; CHECK-BASE-NEXT: uadalp v0.4s, v1.8h -; CHECK-BASE-NEXT: uabdl v1.8h, v2.8b, v3.8b -; CHECK-BASE-NEXT: ldr d2, [x9] -; CHECK-BASE-NEXT: ldr d3, [x11] -; CHECK-BASE-NEXT: add x9, x9, x8 -; CHECK-BASE-NEXT: add x11, x11, x10 +; CHECK-BASE-NEXT: ldr d1, [x10] +; CHECK-BASE-NEXT: add x10, x10, x9 +; CHECK-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b +; CHECK-BASE-NEXT: ldr d2, [x11] +; CHECK-BASE-NEXT: add x11, x11, x8 ; CHECK-BASE-NEXT: uadalp v0.4s, v1.8h -; CHECK-BASE-NEXT: uabdl v1.8h, v2.8b, v3.8b -; CHECK-BASE-NEXT: ldr d2, [x9] -; CHECK-BASE-NEXT: ldr d3, [x11] -; CHECK-BASE-NEXT: add x9, x9, x8 -; CHECK-BASE-NEXT: add x11, x11, x10 +; CHECK-BASE-NEXT: ldr d1, [x10] +; CHECK-BASE-NEXT: add x10, x10, x9 +; CHECK-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b +; CHECK-BASE-NEXT: ldr d2, [x11] +; CHECK-BASE-NEXT: add x11, x11, x8 ; CHECK-BASE-NEXT: uadalp v0.4s, v1.8h -; CHECK-BASE-NEXT: uabdl v1.8h, v2.8b, v3.8b -; CHECK-BASE-NEXT: ldr d2, [x9] -; CHECK-BASE-NEXT: ldr d3, [x11] -; CHECK-BASE-NEXT: add x9, x9, x8 -; CHECK-BASE-NEXT: add x11, x11, x10 +; CHECK-BASE-NEXT: ldr d1, [x10] +; CHECK-BASE-NEXT: add x10, x10, x9 +; CHECK-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b +; CHECK-BASE-NEXT: ldr d2, [x11] ; CHECK-BASE-NEXT: uadalp v0.4s, v1.8h -; CHECK-BASE-NEXT: uabdl v1.8h, v2.8b, v3.8b -; CHECK-BASE-NEXT: ldr d2, [x9] -; CHECK-BASE-NEXT: ldr d3, [x11] +; CHECK-BASE-NEXT: ldr d1, [x10] +; CHECK-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b +; CHECK-BASE-NEXT: ldr d2, [x11, x8] ; CHECK-BASE-NEXT: uadalp v0.4s, v1.8h -; CHECK-BASE-NEXT: ldr d1, [x9, x8] -; CHECK-BASE-NEXT: uabdl v2.8h, v2.8b, v3.8b -; CHECK-BASE-NEXT: ldr d3, [x11, x10] -; CHECK-BASE-NEXT: uadalp v0.4s, v2.8h -; CHECK-BASE-NEXT: uabdl v1.8h, v1.8b, v3.8b +; CHECK-BASE-NEXT: ldr d1, [x10, x9] +; CHECK-BASE-NEXT: uabdl v1.8h, v1.8b, v2.8b ; CHECK-BASE-NEXT: uadalp v0.4s, v1.8h ; CHECK-BASE-NEXT: addv s0, v0.4s ; CHECK-BASE-NEXT: fmov w0, s0 @@ -2122,21 +2122,21 @@ ; ; CHECK-DOT-LABEL: full: ; CHECK-DOT: // %bb.0: // %entry +; CHECK-DOT-NEXT: ldr d0, [x0] +; CHECK-DOT-NEXT: ldr d1, [x2] ; CHECK-DOT-NEXT: // kill: def $w3 killed $w3 def $x3 ; CHECK-DOT-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-DOT-NEXT: sxtw x8, w3 ; CHECK-DOT-NEXT: sxtw x9, w1 -; CHECK-DOT-NEXT: ldr d0, [x0] -; CHECK-DOT-NEXT: add x10, x0, x9 -; CHECK-DOT-NEXT: ldr d1, [x2] -; CHECK-DOT-NEXT: add x11, x2, x8 ; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 ; CHECK-DOT-NEXT: movi v3.8b, #1 ; CHECK-DOT-NEXT: uabd v0.8b, v0.8b, v1.8b -; CHECK-DOT-NEXT: ldr d1, [x10] +; CHECK-DOT-NEXT: add x11, x2, x8 +; CHECK-DOT-NEXT: add x10, x0, x9 ; CHECK-DOT-NEXT: ldr d4, [x11] -; CHECK-DOT-NEXT: add x10, x10, x9 ; CHECK-DOT-NEXT: add x11, x11, x8 +; CHECK-DOT-NEXT: ldr d1, [x10] +; CHECK-DOT-NEXT: add x10, x10, x9 ; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b ; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b ; CHECK-DOT-NEXT: ldr d1, [x10] @@ -2166,11 +2166,11 @@ ; CHECK-DOT-NEXT: ldr d1, [x10] ; CHECK-DOT-NEXT: ldr d4, [x11] ; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b -; CHECK-DOT-NEXT: ldr d0, [x10, x9] -; CHECK-DOT-NEXT: uabd v1.8b, v1.8b, v4.8b +; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b +; CHECK-DOT-NEXT: ldr d1, [x10, x9] ; CHECK-DOT-NEXT: ldr d4, [x11, x8] -; CHECK-DOT-NEXT: udot v2.2s, v1.8b, v3.8b -; CHECK-DOT-NEXT: uabd v0.8b, v0.8b, v4.8b +; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b +; CHECK-DOT-NEXT: uabd v0.8b, v1.8b, v4.8b ; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b ; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-DOT-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll @@ -101,8 +101,8 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind { ; CHECK-LABEL: test_v9i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: mov v1.b[9], w8 ; CHECK-NEXT: mov v1.b[10], w8 ; CHECK-NEXT: mov v1.b[11], w8 @@ -165,8 +165,8 @@ define i128 @test_v2i128(<2 x i128> %a) nounwind { ; CHECK-LABEL: test_v2i128: ; CHECK: // %bb.0: -; CHECK-NEXT: and x0, x0, x2 ; CHECK-NEXT: and x1, x1, x3 +; CHECK-NEXT: and x0, x0, x2 ; CHECK-NEXT: ret %b = call i128 @llvm.vector.reduce.and.v2i128(<2 x i128> %a) ret i128 %b diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll @@ -144,8 +144,8 @@ ; CHECK-NEXT: sub sp, sp, #32 ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: bl __addtf3 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload @@ -170,7 +170,7 @@ ; CHECK-NEXT: fadd s4, s4, s0 ; CHECK-NEXT: mov s7, v0.s[2] ; CHECK-NEXT: mov s0, v0.s[3] -; CHECK-NEXT: mov s5, v3.s[1] +; CHECK-NEXT: mov s5, v2.s[1] ; CHECK-NEXT: fadd s4, s4, s6 ; CHECK-NEXT: mov s6, v1.s[2] ; CHECK-NEXT: fadd s4, s4, s7 @@ -179,21 +179,21 @@ ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v1.s[3] ; CHECK-NEXT: fadd s0, s0, s4 -; CHECK-NEXT: mov s4, v2.s[2] ; CHECK-NEXT: fadd s0, s0, s6 ; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: mov s1, v2.s[1] +; CHECK-NEXT: mov s1, v2.s[2] ; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: mov s2, v2.s[3] +; CHECK-NEXT: fadd s0, s0, s5 ; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: mov s1, v2.s[3] -; CHECK-NEXT: mov s2, v3.s[3] -; CHECK-NEXT: fadd s0, s0, s4 -; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: mov s1, v3.s[2] +; CHECK-NEXT: mov s1, v3.s[1] +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: mov s2, v3.s[2] ; CHECK-NEXT: fadd s0, s0, s3 -; CHECK-NEXT: fadd s0, s0, s5 ; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v3.s[3] ; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float %s, <16 x float> %a) ret float %b @@ -205,24 +205,24 @@ ; CHECK-NEXT: mov s5, v0.s[2] ; CHECK-NEXT: faddp s6, v0.2s ; CHECK-NEXT: mov s0, v0.s[3] -; CHECK-NEXT: mov s4, v2.s[1] +; CHECK-NEXT: mov s4, v1.s[1] ; CHECK-NEXT: fadd s5, s6, s5 -; CHECK-NEXT: mov s6, v1.s[2] ; CHECK-NEXT: fadd s0, s5, s0 -; CHECK-NEXT: mov s5, v1.s[1] +; CHECK-NEXT: mov s5, v1.s[2] ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v1.s[3] +; CHECK-NEXT: fadd s0, s0, s4 +; CHECK-NEXT: mov s4, v2.s[2] ; CHECK-NEXT: fadd s0, s0, s5 -; CHECK-NEXT: fadd s0, s0, s6 ; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: mov s1, v2.s[2] +; CHECK-NEXT: mov s1, v2.s[1] ; CHECK-NEXT: fadd s0, s0, s2 -; CHECK-NEXT: mov s2, v2.s[3] +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v2.s[3] +; CHECK-NEXT: mov s2, v3.s[2] ; CHECK-NEXT: fadd s0, s0, s4 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v3.s[1] -; CHECK-NEXT: fadd s0, s0, s2 -; CHECK-NEXT: mov s2, v3.s[2] ; CHECK-NEXT: fadd s0, s0, s3 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v3.s[3] diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll @@ -172,8 +172,8 @@ ; CHECKNOFP16-NEXT: mov h5, v0.h[6] ; CHECKNOFP16-NEXT: mov h0, v0.h[7] ; CHECKNOFP16-NEXT: fadd s2, s2, s3 -; CHECKNOFP16-NEXT: mov h3, v1.h[6] -; CHECKNOFP16-NEXT: fcvt h4, s4 +; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: mov h4, v1.h[6] ; CHECKNOFP16-NEXT: fcvt s5, h5 ; CHECKNOFP16-NEXT: mov h1, v1.h[7] ; CHECKNOFP16-NEXT: fcvt s0, h0 @@ -182,15 +182,15 @@ ; CHECKNOFP16-NEXT: fcvt s4, h4 ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fcvt s2, h2 -; CHECKNOFP16-NEXT: fadd s3, s5, s3 ; CHECKNOFP16-NEXT: fadd s0, s0, s1 -; CHECKNOFP16-NEXT: fadd s2, s2, s4 -; CHECKNOFP16-NEXT: fcvt h3, s3 +; CHECKNOFP16-NEXT: fadd s2, s2, s3 +; CHECKNOFP16-NEXT: fadd s3, s5, s4 ; CHECKNOFP16-NEXT: fcvt h0, s0 ; CHECKNOFP16-NEXT: fcvt h2, s2 -; CHECKNOFP16-NEXT: fcvt s3, h3 +; CHECKNOFP16-NEXT: fcvt h3, s3 ; CHECKNOFP16-NEXT: fcvt s0, h0 ; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt s3, h3 ; CHECKNOFP16-NEXT: fadd s2, s2, s3 ; CHECKNOFP16-NEXT: fcvt h1, s2 ; CHECKNOFP16-NEXT: fcvt s1, h1 @@ -494,8 +494,8 @@ ; CHECKNOFP16-NEXT: fadd s3, s3, s5 ; CHECKNOFP16-NEXT: mov h4, v0.h[6] ; CHECKNOFP16-NEXT: mov h5, v1.h[6] -; CHECKNOFP16-NEXT: mov h0, v0.h[7] ; CHECKNOFP16-NEXT: mov h1, v1.h[7] +; CHECKNOFP16-NEXT: mov h0, v0.h[7] ; CHECKNOFP16-NEXT: fcvt h2, s2 ; CHECKNOFP16-NEXT: fcvt h3, s3 ; CHECKNOFP16-NEXT: fcvt s4, h4 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -116,30 +116,30 @@ ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: adrp x8, .LCPI6_0 ; CHECK-NOFP-NEXT: fcvt s16, h16 -; CHECK-NOFP-NEXT: fcvt s3, h3 ; CHECK-NOFP-NEXT: fcvt s17, h17 +; CHECK-NOFP-NEXT: fcvt s3, h3 ; CHECK-NOFP-NEXT: fcmp s1, s16 ; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt ; CHECK-NOFP-NEXT: fcmp s0, s17 ; CHECK-NOFP-NEXT: ldr h16, [sp, #16] +; CHECK-NOFP-NEXT: fcvt s16, h16 ; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt ; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s16, h16 +; CHECK-NOFP-NEXT: fcmp s2, s16 ; CHECK-NOFP-NEXT: fcvt h0, s0 ; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcmp s2, s16 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 ; CHECK-NOFP-NEXT: fcsel s1, s2, s16, gt ; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI6_0] ; CHECK-NOFP-NEXT: mov w8, #-8388608 // =0xff800000 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: fmov s16, w8 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt h1, s1 +; CHECK-NOFP-NEXT: fcmp s3, s2 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcmp s3, s2 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 ; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt ; CHECK-NOFP-NEXT: fcvt s3, h4 @@ -189,8 +189,8 @@ ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 ; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] @@ -198,11 +198,11 @@ ; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 -; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] +; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] @@ -223,31 +223,31 @@ ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: adrp x8, .LCPI7_0 ; CHECK-NOFP-NEXT: fcvt s16, h16 -; CHECK-NOFP-NEXT: fcvt s3, h3 ; CHECK-NOFP-NEXT: fcvt s17, h17 +; CHECK-NOFP-NEXT: fcvt s3, h3 ; CHECK-NOFP-NEXT: fcmp s1, s16 ; CHECK-NOFP-NEXT: fcsel s1, s1, s16, gt ; CHECK-NOFP-NEXT: fcmp s0, s17 ; CHECK-NOFP-NEXT: ldr h16, [sp, #16] +; CHECK-NOFP-NEXT: fcvt s16, h16 ; CHECK-NOFP-NEXT: fcsel s0, s0, s17, gt ; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s16, h16 +; CHECK-NOFP-NEXT: fcmp s2, s16 ; CHECK-NOFP-NEXT: fcvt h0, s0 ; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcmp s2, s16 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 ; CHECK-NOFP-NEXT: fcsel s1, s2, s16, gt ; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI7_0] ; CHECK-NOFP-NEXT: mov w8, #57344 // =0xe000 +; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: movk w8, #51071, lsl #16 +; CHECK-NOFP-NEXT: fmov s16, w8 ; CHECK-NOFP-NEXT: fcvt h0, s0 ; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmov s16, w8 +; CHECK-NOFP-NEXT: fcmp s3, s2 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcmp s3, s2 ; CHECK-NOFP-NEXT: fmaxnm s0, s0, s1 ; CHECK-NOFP-NEXT: fcsel s1, s3, s16, gt ; CHECK-NOFP-NEXT: fcvt s3, h4 @@ -297,8 +297,8 @@ ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 ; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmaximum.ll @@ -81,15 +81,15 @@ ; CHECK-NOFP-LABEL: test_v11f16: ; CHECK-NOFP: // %bb.0: ; CHECK-NOFP-NEXT: ldr h16, [sp, #8] -; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: ldr h17, [sp] +; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: fcvt s16, h16 ; CHECK-NOFP-NEXT: fcvt s17, h17 ; CHECK-NOFP-NEXT: fmax s1, s1, s16 -; CHECK-NOFP-NEXT: ldr h16, [sp, #16] ; CHECK-NOFP-NEXT: fmax s0, s0, s17 +; CHECK-NOFP-NEXT: ldr h16, [sp, #16] ; CHECK-NOFP-NEXT: fcvt s16, h16 ; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt h0, s0 @@ -132,8 +132,8 @@ ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 ; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] @@ -141,11 +141,11 @@ ; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 -; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] +; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -116,30 +116,30 @@ ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: adrp x8, .LCPI6_0 ; CHECK-NOFP-NEXT: fcvt s16, h16 -; CHECK-NOFP-NEXT: fcvt s3, h3 ; CHECK-NOFP-NEXT: fcvt s17, h17 +; CHECK-NOFP-NEXT: fcvt s3, h3 ; CHECK-NOFP-NEXT: fcmp s1, s16 ; CHECK-NOFP-NEXT: fcsel s1, s1, s16, lt ; CHECK-NOFP-NEXT: fcmp s0, s17 ; CHECK-NOFP-NEXT: ldr h16, [sp, #16] +; CHECK-NOFP-NEXT: fcvt s16, h16 ; CHECK-NOFP-NEXT: fcsel s0, s0, s17, lt ; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s16, h16 +; CHECK-NOFP-NEXT: fcmp s2, s16 ; CHECK-NOFP-NEXT: fcvt h0, s0 ; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcmp s2, s16 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fminnm s0, s0, s1 ; CHECK-NOFP-NEXT: fcsel s1, s2, s16, lt ; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI6_0] ; CHECK-NOFP-NEXT: mov w8, #2139095040 // =0x7f800000 -; CHECK-NOFP-NEXT: fcvt h0, s0 -; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: fmov s16, w8 +; CHECK-NOFP-NEXT: fcvt h0, s0 +; CHECK-NOFP-NEXT: fcvt h1, s1 +; CHECK-NOFP-NEXT: fcmp s3, s2 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcmp s3, s2 ; CHECK-NOFP-NEXT: fminnm s0, s0, s1 ; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt ; CHECK-NOFP-NEXT: fcvt s3, h4 @@ -189,8 +189,8 @@ ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 ; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] @@ -198,11 +198,11 @@ ; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 -; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] +; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] @@ -223,31 +223,31 @@ ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: adrp x8, .LCPI7_0 ; CHECK-NOFP-NEXT: fcvt s16, h16 -; CHECK-NOFP-NEXT: fcvt s3, h3 ; CHECK-NOFP-NEXT: fcvt s17, h17 +; CHECK-NOFP-NEXT: fcvt s3, h3 ; CHECK-NOFP-NEXT: fcmp s1, s16 ; CHECK-NOFP-NEXT: fcsel s1, s1, s16, lt ; CHECK-NOFP-NEXT: fcmp s0, s17 ; CHECK-NOFP-NEXT: ldr h16, [sp, #16] +; CHECK-NOFP-NEXT: fcvt s16, h16 ; CHECK-NOFP-NEXT: fcsel s0, s0, s17, lt ; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s16, h16 +; CHECK-NOFP-NEXT: fcmp s2, s16 ; CHECK-NOFP-NEXT: fcvt h0, s0 ; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcmp s2, s16 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fminnm s0, s0, s1 ; CHECK-NOFP-NEXT: fcsel s1, s2, s16, lt ; CHECK-NOFP-NEXT: ldr h2, [x8, :lo12:.LCPI7_0] ; CHECK-NOFP-NEXT: mov w8, #57344 // =0xe000 +; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: movk w8, #18303, lsl #16 +; CHECK-NOFP-NEXT: fmov s16, w8 ; CHECK-NOFP-NEXT: fcvt h0, s0 ; CHECK-NOFP-NEXT: fcvt h1, s1 -; CHECK-NOFP-NEXT: fcvt s2, h2 -; CHECK-NOFP-NEXT: fmov s16, w8 +; CHECK-NOFP-NEXT: fcmp s3, s2 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fcvt s1, h1 -; CHECK-NOFP-NEXT: fcmp s3, s2 ; CHECK-NOFP-NEXT: fminnm s0, s0, s1 ; CHECK-NOFP-NEXT: fcsel s1, s3, s16, lt ; CHECK-NOFP-NEXT: fcvt s3, h4 @@ -297,8 +297,8 @@ ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 ; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fminimum.ll @@ -81,15 +81,15 @@ ; CHECK-NOFP-LABEL: test_v11f16: ; CHECK-NOFP: // %bb.0: ; CHECK-NOFP-NEXT: ldr h16, [sp, #8] -; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: ldr h17, [sp] +; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fcvt s0, h0 ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: fcvt s16, h16 ; CHECK-NOFP-NEXT: fcvt s17, h17 ; CHECK-NOFP-NEXT: fmin s1, s1, s16 -; CHECK-NOFP-NEXT: ldr h16, [sp, #16] ; CHECK-NOFP-NEXT: fmin s0, s0, s17 +; CHECK-NOFP-NEXT: ldr h16, [sp, #16] ; CHECK-NOFP-NEXT: fcvt s16, h16 ; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt h0, s0 @@ -132,8 +132,8 @@ ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 ; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] @@ -141,11 +141,11 @@ ; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 -; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] +; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] diff --git a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll @@ -40,8 +40,8 @@ ; CHECK-LABEL: test_copysign_v1f64_v1f32: ; CHECK: ; %bb.0: ; CHECK-NEXT: movi.2d v2, #0xffffffffffffffff -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fcvtl v1.2d, v1.2s +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fneg.2d v2, v2 ; CHECK-NEXT: bif.16b v0, v1, v2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 @@ -156,11 +156,11 @@ ; CHECK-LABEL: test_copysign_v4f64_v4f32: ; CHECK: ; %bb.0: ; CHECK-NEXT: movi.2d v3, #0xffffffffffffffff -; CHECK-NEXT: fcvtl2 v4.2d, v2.4s -; CHECK-NEXT: fcvtl v2.2d, v2.2s +; CHECK-NEXT: fcvtl v4.2d, v2.2s +; CHECK-NEXT: fcvtl2 v2.2d, v2.4s ; CHECK-NEXT: fneg.2d v3, v3 -; CHECK-NEXT: bif.16b v1, v4, v3 -; CHECK-NEXT: bif.16b v0, v2, v3 +; CHECK-NEXT: bif.16b v1, v2, v3 +; CHECK-NEXT: bif.16b v0, v4, v3 ; CHECK-NEXT: ret %tmp0 = fpext <4 x float> %b to <4 x double> %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0) @@ -191,28 +191,29 @@ ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 ; NOFP16-NEXT: mov h3, v1[1] ; NOFP16-NEXT: mov h4, v0[1] -; NOFP16-NEXT: fcvt s5, h1 -; NOFP16-NEXT: fcvt s6, h0 -; NOFP16-NEXT: mov h7, v1[2] -; NOFP16-NEXT: mov h16, v0[2] +; NOFP16-NEXT: mov h5, v1[2] +; NOFP16-NEXT: mov h6, v0[2] ; NOFP16-NEXT: mvni.4s v2, #128, lsl #24 +; NOFP16-NEXT: fcvt s7, h1 +; NOFP16-NEXT: fcvt s16, h0 ; NOFP16-NEXT: mov h1, v1[3] ; NOFP16-NEXT: fcvt s3, h3 ; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: bit.16b v5, v6, v2 -; NOFP16-NEXT: fcvt s6, h7 -; NOFP16-NEXT: fcvt s7, h16 ; NOFP16-NEXT: fcvt s1, h1 ; NOFP16-NEXT: bit.16b v3, v4, v2 -; NOFP16-NEXT: mov h4, v0[3] -; NOFP16-NEXT: fcvt h0, s5 -; NOFP16-NEXT: bit.16b v6, v7, v2 +; NOFP16-NEXT: fcvt s4, h5 +; NOFP16-NEXT: fcvt s5, h6 +; NOFP16-NEXT: mov.16b v6, v2 +; NOFP16-NEXT: bsl.16b v6, v16, v7 +; NOFP16-NEXT: mov h7, v0[3] +; NOFP16-NEXT: bit.16b v4, v5, v2 ; NOFP16-NEXT: fcvt h3, s3 -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: fcvt h5, s6 +; NOFP16-NEXT: fcvt h0, s6 +; NOFP16-NEXT: fcvt s5, h7 ; NOFP16-NEXT: mov.h v0[1], v3[0] -; NOFP16-NEXT: bit.16b v1, v4, v2 -; NOFP16-NEXT: mov.h v0[2], v5[0] +; NOFP16-NEXT: fcvt h3, s4 +; NOFP16-NEXT: bit.16b v1, v5, v2 +; NOFP16-NEXT: mov.h v0[2], v3[0] ; NOFP16-NEXT: fcvt h1, s1 ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 @@ -233,29 +234,30 @@ ; NOFP16-NEXT: fcvtn v1.4h, v1.4s ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 ; NOFP16-NEXT: mov h3, v0[1] -; NOFP16-NEXT: fcvt s5, h0 -; NOFP16-NEXT: mov h7, v0[2] +; NOFP16-NEXT: mov h5, v0[2] ; NOFP16-NEXT: mvni.4s v2, #128, lsl #24 +; NOFP16-NEXT: fcvt s7, h0 ; NOFP16-NEXT: mov h4, v1[1] -; NOFP16-NEXT: fcvt s6, h1 -; NOFP16-NEXT: mov h16, v1[2] ; NOFP16-NEXT: fcvt s3, h3 +; NOFP16-NEXT: mov h6, v1[2] +; NOFP16-NEXT: fcvt s16, h1 ; NOFP16-NEXT: mov h1, v1[3] ; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: bif.16b v5, v6, v2 -; NOFP16-NEXT: fcvt s6, h7 -; NOFP16-NEXT: fcvt s7, h16 ; NOFP16-NEXT: fcvt s1, h1 ; NOFP16-NEXT: bif.16b v3, v4, v2 -; NOFP16-NEXT: mov h4, v0[3] -; NOFP16-NEXT: fcvt h0, s5 -; NOFP16-NEXT: bif.16b v6, v7, v2 +; NOFP16-NEXT: fcvt s4, h5 +; NOFP16-NEXT: fcvt s5, h6 +; NOFP16-NEXT: mov.16b v6, v2 +; NOFP16-NEXT: bsl.16b v6, v7, v16 +; NOFP16-NEXT: mov h7, v0[3] +; NOFP16-NEXT: bif.16b v4, v5, v2 ; NOFP16-NEXT: fcvt h3, s3 -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: fcvt h5, s6 +; NOFP16-NEXT: fcvt h0, s6 +; NOFP16-NEXT: fcvt s5, h7 ; NOFP16-NEXT: mov.h v0[1], v3[0] -; NOFP16-NEXT: bit.16b v1, v4, v2 -; NOFP16-NEXT: mov.h v0[2], v5[0] +; NOFP16-NEXT: fcvt h3, s4 +; NOFP16-NEXT: bit.16b v1, v5, v2 +; NOFP16-NEXT: mov.h v0[2], v3[0] ; NOFP16-NEXT: fcvt h1, s1 ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 @@ -276,31 +278,31 @@ ; NOFP16-LABEL: test_copysign_v4f16_v4f64: ; NOFP16: ; %bb.0: ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 -; NOFP16-NEXT: mov d4, v1[1] -; NOFP16-NEXT: mov h5, v0[1] +; NOFP16-NEXT: mov d3, v1[1] +; NOFP16-NEXT: mov h4, v0[1] ; NOFP16-NEXT: fcvt s1, d1 -; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: fcvt s5, h0 ; NOFP16-NEXT: mov h7, v0[2] -; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 -; NOFP16-NEXT: fcvt s4, d4 -; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: bit.16b v1, v6, v3 -; NOFP16-NEXT: fcvt s6, d2 +; NOFP16-NEXT: mvni.4s v6, #128, lsl #24 +; NOFP16-NEXT: fcvt s3, d3 +; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: bit.16b v1, v5, v6 ; NOFP16-NEXT: fcvt s7, h7 -; NOFP16-NEXT: mov d2, v2[1] -; NOFP16-NEXT: bit.16b v4, v5, v3 ; NOFP16-NEXT: mov h5, v0[3] -; NOFP16-NEXT: fcvt h0, s1 -; NOFP16-NEXT: bit.16b v6, v7, v3 +; NOFP16-NEXT: bit.16b v3, v4, v6 +; NOFP16-NEXT: mov d4, v2[1] ; NOFP16-NEXT: fcvt s2, d2 -; NOFP16-NEXT: fcvt h1, s4 +; NOFP16-NEXT: fcvt h0, s1 +; NOFP16-NEXT: fcvt h1, s3 +; NOFP16-NEXT: bit.16b v2, v7, v6 +; NOFP16-NEXT: fcvt s3, d4 ; NOFP16-NEXT: fcvt s4, h5 -; NOFP16-NEXT: fcvt h5, s6 ; NOFP16-NEXT: mov.h v0[1], v1[0] -; NOFP16-NEXT: mov.16b v1, v3 -; NOFP16-NEXT: bsl.16b v1, v4, v2 -; NOFP16-NEXT: mov.h v0[2], v5[0] -; NOFP16-NEXT: fcvt h1, s1 +; NOFP16-NEXT: fcvt h1, s2 +; NOFP16-NEXT: mov.16b v2, v6 +; NOFP16-NEXT: bsl.16b v2, v4, v3 +; NOFP16-NEXT: mov.h v0[2], v1[0] +; NOFP16-NEXT: fcvt h1, s2 ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; NOFP16-NEXT: ret @@ -331,63 +333,61 @@ define <8 x half> @test_copysign_v8f16_v8f16(<8 x half> %a, <8 x half> %b) #0 { ; NOFP16-LABEL: test_copysign_v8f16_v8f16: ; NOFP16: ; %bb.0: -; NOFP16-NEXT: mov h5, v1[1] -; NOFP16-NEXT: mov h6, v0[1] -; NOFP16-NEXT: fcvt s2, h1 -; NOFP16-NEXT: fcvt s4, h0 +; NOFP16-NEXT: mov h2, v1[1] +; NOFP16-NEXT: mov h4, v0[1] +; NOFP16-NEXT: fcvt s5, h1 +; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 ; NOFP16-NEXT: mov h7, v1[2] ; NOFP16-NEXT: mov h16, v0[2] -; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 -; NOFP16-NEXT: mov h17, v0[3] -; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: fcvt s6, h6 -; NOFP16-NEXT: mov h18, v1[5] -; NOFP16-NEXT: bit.16b v2, v4, v3 -; NOFP16-NEXT: mov h4, v1[3] +; NOFP16-NEXT: mov h17, v1[3] +; NOFP16-NEXT: fcvt s2, h2 +; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: bit.16b v5, v6, v3 +; NOFP16-NEXT: mov h6, v0[3] ; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: fcvt s16, h16 ; NOFP16-NEXT: fcvt s17, h17 -; NOFP16-NEXT: bit.16b v5, v6, v3 -; NOFP16-NEXT: fcvt s18, h18 -; NOFP16-NEXT: mov.16b v6, v3 -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: bsl.16b v6, v16, v7 +; NOFP16-NEXT: bif.16b v4, v2, v3 +; NOFP16-NEXT: fcvt h2, s5 +; NOFP16-NEXT: mov.16b v5, v3 +; NOFP16-NEXT: fcvt s6, h6 +; NOFP16-NEXT: bsl.16b v5, v16, v7 +; NOFP16-NEXT: fcvt h4, s4 ; NOFP16-NEXT: mov h7, v1[4] ; NOFP16-NEXT: mov h16, v0[4] -; NOFP16-NEXT: fcvt h2, s2 -; NOFP16-NEXT: fcvt h5, s5 -; NOFP16-NEXT: bit.16b v4, v17, v3 +; NOFP16-NEXT: bif.16b v6, v17, v3 ; NOFP16-NEXT: mov h17, v0[5] -; NOFP16-NEXT: fcvt s7, h7 -; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: fcvt h5, s5 +; NOFP16-NEXT: mov.h v2[1], v4[0] +; NOFP16-NEXT: fcvt s4, h7 +; NOFP16-NEXT: fcvt s7, h16 +; NOFP16-NEXT: mov h16, v1[5] ; NOFP16-NEXT: fcvt h6, s6 -; NOFP16-NEXT: mov.h v2[1], v5[0] -; NOFP16-NEXT: mov.16b v5, v3 ; NOFP16-NEXT: fcvt s17, h17 -; NOFP16-NEXT: bsl.16b v5, v16, v7 -; NOFP16-NEXT: mov h7, v1[6] -; NOFP16-NEXT: mov h16, v0[6] -; NOFP16-NEXT: mov.h v2[2], v6[0] -; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: mov.16b v6, v3 -; NOFP16-NEXT: bsl.16b v6, v17, v18 -; NOFP16-NEXT: fcvt s7, h7 -; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: mov.h v2[2], v5[0] +; NOFP16-NEXT: mov h5, v1[6] ; NOFP16-NEXT: mov h1, v1[7] -; NOFP16-NEXT: mov.h v2[3], v4[0] -; NOFP16-NEXT: fcvt h4, s5 +; NOFP16-NEXT: bit.16b v4, v7, v3 +; NOFP16-NEXT: mov h7, v0[6] +; NOFP16-NEXT: fcvt s16, h16 ; NOFP16-NEXT: mov h0, v0[7] -; NOFP16-NEXT: mov.16b v5, v3 -; NOFP16-NEXT: bsl.16b v5, v16, v7 -; NOFP16-NEXT: mov.h v2[4], v4[0] -; NOFP16-NEXT: fcvt h4, s6 +; NOFP16-NEXT: mov.h v2[3], v6[0] +; NOFP16-NEXT: fcvt s5, h5 ; NOFP16-NEXT: fcvt s1, h1 +; NOFP16-NEXT: fcvt s6, h7 +; NOFP16-NEXT: mov.16b v7, v3 +; NOFP16-NEXT: fcvt h4, s4 ; NOFP16-NEXT: fcvt s0, h0 -; NOFP16-NEXT: fcvt h5, s5 -; NOFP16-NEXT: mov.h v2[5], v4[0] +; NOFP16-NEXT: bsl.16b v7, v17, v16 +; NOFP16-NEXT: bit.16b v5, v6, v3 +; NOFP16-NEXT: mov.h v2[4], v4[0] ; NOFP16-NEXT: bif.16b v0, v1, v3 -; NOFP16-NEXT: mov.h v2[6], v5[0] +; NOFP16-NEXT: fcvt h4, s7 ; NOFP16-NEXT: fcvt h0, s0 +; NOFP16-NEXT: mov.h v2[5], v4[0] +; NOFP16-NEXT: fcvt h4, s5 +; NOFP16-NEXT: mov.h v2[6], v4[0] ; NOFP16-NEXT: mov.h v2[7], v0[0] ; NOFP16-NEXT: mov.16b v0, v2 ; NOFP16-NEXT: ret @@ -407,59 +407,58 @@ ; NOFP16-NEXT: fcvtn v1.4h, v1.4s ; NOFP16-NEXT: mov h4, v0[1] ; NOFP16-NEXT: fcvt s6, h0 -; NOFP16-NEXT: mov h16, v0[2] ; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 +; NOFP16-NEXT: mov h7, v0[2] ; NOFP16-NEXT: fcvtn v2.4h, v2.4s ; NOFP16-NEXT: mov h5, v1[1] -; NOFP16-NEXT: fcvt s7, h1 +; NOFP16-NEXT: fcvt s16, h1 ; NOFP16-NEXT: fcvt s4, h4 ; NOFP16-NEXT: mov h17, v1[2] ; NOFP16-NEXT: mov h1, v1[3] -; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: bif.16b v6, v7, v3 -; NOFP16-NEXT: mov h7, v0[3] +; NOFP16-NEXT: bif.16b v6, v16, v3 +; NOFP16-NEXT: mov h16, v0[3] ; NOFP16-NEXT: fcvt s17, h17 ; NOFP16-NEXT: fcvt s18, h1 ; NOFP16-NEXT: bif.16b v4, v5, v3 -; NOFP16-NEXT: mov h5, v0[4] ; NOFP16-NEXT: fcvt h1, s6 ; NOFP16-NEXT: mov.16b v6, v3 -; NOFP16-NEXT: fcvt s7, h7 -; NOFP16-NEXT: bsl.16b v6, v16, v17 +; NOFP16-NEXT: mov h5, v0[4] +; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: bsl.16b v6, v7, v17 +; NOFP16-NEXT: mov h7, v0[5] +; NOFP16-NEXT: mov h17, v2[1] ; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: mov h16, v0[5] ; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: fcvt s17, h2 -; NOFP16-NEXT: bif.16b v7, v18, v3 -; NOFP16-NEXT: mov h18, v2[1] -; NOFP16-NEXT: mov.h v1[1], v4[0] +; NOFP16-NEXT: bif.16b v16, v18, v3 ; NOFP16-NEXT: fcvt h6, s6 -; NOFP16-NEXT: fcvt s4, h16 -; NOFP16-NEXT: bif.16b v5, v17, v3 -; NOFP16-NEXT: fcvt s16, h18 -; NOFP16-NEXT: mov h17, v0[6] -; NOFP16-NEXT: mov h18, v2[2] -; NOFP16-NEXT: mov h0, v0[7] +; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: fcvt s17, h17 +; NOFP16-NEXT: mov.h v1[1], v4[0] +; NOFP16-NEXT: fcvt s4, h2 +; NOFP16-NEXT: bif.16b v7, v17, v3 +; NOFP16-NEXT: bit.16b v4, v5, v3 +; NOFP16-NEXT: fcvt h5, s16 ; NOFP16-NEXT: mov.h v1[2], v6[0] -; NOFP16-NEXT: fcvt h6, s7 -; NOFP16-NEXT: bif.16b v4, v16, v3 -; NOFP16-NEXT: fcvt s7, h17 -; NOFP16-NEXT: fcvt s16, h18 -; NOFP16-NEXT: fcvt h5, s5 -; NOFP16-NEXT: mov.h v1[3], v6[0] +; NOFP16-NEXT: mov h6, v0[6] +; NOFP16-NEXT: mov h16, v2[2] +; NOFP16-NEXT: mov h0, v0[7] ; NOFP16-NEXT: mov h2, v2[3] -; NOFP16-NEXT: mov.16b v6, v3 +; NOFP16-NEXT: mov.h v1[3], v5[0] ; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: bsl.16b v6, v7, v16 +; NOFP16-NEXT: fcvt s5, h6 +; NOFP16-NEXT: fcvt s6, h16 ; NOFP16-NEXT: fcvt s0, h0 -; NOFP16-NEXT: mov.h v1[4], v5[0] ; NOFP16-NEXT: fcvt s2, h2 -; NOFP16-NEXT: fcvt h5, s6 -; NOFP16-NEXT: mov.h v1[5], v4[0] +; NOFP16-NEXT: mov.h v1[4], v4[0] +; NOFP16-NEXT: fcvt h4, s7 +; NOFP16-NEXT: bif.16b v5, v6, v3 ; NOFP16-NEXT: bif.16b v0, v2, v3 -; NOFP16-NEXT: mov.h v1[6], v5[0] +; NOFP16-NEXT: mov.h v1[5], v4[0] +; NOFP16-NEXT: fcvt h4, s5 ; NOFP16-NEXT: fcvt h0, s0 +; NOFP16-NEXT: mov.h v1[6], v4[0] ; NOFP16-NEXT: mov.h v1[7], v0[0] ; NOFP16-NEXT: mov.16b v0, v1 ; NOFP16-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vector-fcvt.ll b/llvm/test/CodeGen/AArch64/vector-fcvt.ll --- a/llvm/test/CodeGen/AArch64/vector-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcvt.ll @@ -36,24 +36,24 @@ define <16 x float> @sitofp_v16i8_float(<16 x i8> %a) { ; CHECK-LABEL: sitofp_v16i8_float: ; CHECK: // %bb.0: -; CHECK-NEXT: zip1 v1.8b, v0.8b, v0.8b -; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b ; CHECK-NEXT: zip2 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: shl v1.4h, v1.4h, #8 -; CHECK-NEXT: zip1 v3.8b, v2.8b, v0.8b +; CHECK-NEXT: zip1 v3.8b, v1.8b, v0.8b +; CHECK-NEXT: zip2 v1.8b, v1.8b, v0.8b +; CHECK-NEXT: shl v2.4h, v2.4h, #8 ; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: sshr v1.4h, v1.4h, #8 -; CHECK-NEXT: zip2 v2.8b, v2.8b, v0.8b -; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: sshr v2.4h, v2.4h, #8 ; CHECK-NEXT: shl v3.4h, v3.4h, #8 +; CHECK-NEXT: shl v1.4h, v1.4h, #8 +; CHECK-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-NEXT: sshll v2.4s, v2.4h, #0 +; CHECK-NEXT: sshr v3.4h, v3.4h, #8 +; CHECK-NEXT: sshr v1.4h, v1.4h, #8 ; CHECK-NEXT: sshll v4.4s, v0.4h, #0 -; CHECK-NEXT: shl v2.4h, v2.4h, #8 -; CHECK-NEXT: sshr v0.4h, v3.4h, #8 -; CHECK-NEXT: sshr v2.4h, v2.4h, #8 -; CHECK-NEXT: sshll v3.4s, v0.4h, #0 -; CHECK-NEXT: scvtf v0.4s, v1.4s -; CHECK-NEXT: sshll v5.4s, v2.4h, #0 +; CHECK-NEXT: scvtf v0.4s, v2.4s +; CHECK-NEXT: sshll v3.4s, v3.4h, #0 +; CHECK-NEXT: sshll v5.4s, v1.4h, #0 ; CHECK-NEXT: scvtf v1.4s, v4.4s ; CHECK-NEXT: scvtf v2.4s, v3.4s ; CHECK-NEXT: scvtf v3.4s, v5.4s @@ -87,8 +87,8 @@ define <8 x float> @sitofp_i64_float(<8 x i64> %a) { ; CHECK-LABEL: sitofp_i64_float: ; CHECK: // %bb.0: -; CHECK-NEXT: scvtf v2.2d, v2.2d ; CHECK-NEXT: scvtf v0.2d, v0.2d +; CHECK-NEXT: scvtf v2.2d, v2.2d ; CHECK-NEXT: scvtf v4.2d, v1.2d ; CHECK-NEXT: fcvtn v0.2s, v0.2d ; CHECK-NEXT: fcvtn v1.2s, v2.2d @@ -130,23 +130,23 @@ define <16 x float> @uitofp_v16i8_float(<16 x i8> %a) { ; CHECK-LABEL: uitofp_v16i8_float: ; CHECK: // %bb.0: -; CHECK-NEXT: zip1 v1.8b, v0.8b, v0.8b -; CHECK-NEXT: zip2 v2.8b, v0.8b, v0.8b -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: bic v1.4h, #255, lsl #8 -; CHECK-NEXT: bic v2.4h, #255, lsl #8 -; CHECK-NEXT: zip1 v3.8b, v0.8b, v0.8b +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b ; CHECK-NEXT: zip2 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: zip1 v3.8b, v1.8b, v0.8b +; CHECK-NEXT: zip2 v1.8b, v1.8b, v0.8b +; CHECK-NEXT: bic v2.4h, #255, lsl #8 +; CHECK-NEXT: bic v0.4h, #255, lsl #8 ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: bic v3.4h, #255, lsl #8 -; CHECK-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-NEXT: bic v1.4h, #255, lsl #8 ; CHECK-NEXT: ushll v4.4s, v0.4h, #0 -; CHECK-NEXT: ucvtf v0.4s, v1.4s -; CHECK-NEXT: ucvtf v1.4s, v2.4s +; CHECK-NEXT: ucvtf v0.4s, v2.4s +; CHECK-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-NEXT: ushll v5.4s, v1.4h, #0 +; CHECK-NEXT: ucvtf v1.4s, v4.4s ; CHECK-NEXT: ucvtf v2.4s, v3.4s -; CHECK-NEXT: ucvtf v3.4s, v4.4s +; CHECK-NEXT: ucvtf v3.4s, v5.4s ; CHECK-NEXT: ret %1 = uitofp <16 x i8> %a to <16 x float> ret <16 x float> %1 @@ -177,8 +177,8 @@ define <8 x float> @uitofp_i64_float(<8 x i64> %a) { ; CHECK-LABEL: uitofp_i64_float: ; CHECK: // %bb.0: -; CHECK-NEXT: ucvtf v2.2d, v2.2d ; CHECK-NEXT: ucvtf v0.2d, v0.2d +; CHECK-NEXT: ucvtf v2.2d, v2.2d ; CHECK-NEXT: ucvtf v4.2d, v1.2d ; CHECK-NEXT: fcvtn v0.2s, v0.2d ; CHECK-NEXT: fcvtn v1.2s, v2.2d @@ -215,25 +215,25 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w8, v0.b[0] ; CHECK-NEXT: umov w9, v0.b[2] -; CHECK-NEXT: umov w10, v0.b[4] -; CHECK-NEXT: umov w11, v0.b[6] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v0.b[3] -; CHECK-NEXT: fmov s3, w10 -; CHECK-NEXT: umov w10, v0.b[5] -; CHECK-NEXT: fmov s4, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: mov v1.s[1], w8 -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: mov v4.s[1], w11 -; CHECK-NEXT: shl v0.2s, v1.2s, #24 -; CHECK-NEXT: shl v1.2s, v2.2s, #24 -; CHECK-NEXT: shl v2.2s, v3.2s, #24 +; CHECK-NEXT: umov w11, v0.b[4] +; CHECK-NEXT: umov w12, v0.b[6] +; CHECK-NEXT: umov w10, v0.b[1] +; CHECK-NEXT: umov w13, v0.b[3] +; CHECK-NEXT: umov w14, v0.b[5] +; CHECK-NEXT: umov w15, v0.b[7] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: fmov s3, w12 +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w13 +; CHECK-NEXT: mov v2.s[1], w14 +; CHECK-NEXT: mov v3.s[1], w15 +; CHECK-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-NEXT: shl v1.2s, v1.2s, #24 +; CHECK-NEXT: shl v2.2s, v2.2s, #24 +; CHECK-NEXT: shl v3.2s, v3.2s, #24 ; CHECK-NEXT: sshr v0.2s, v0.2s, #24 -; CHECK-NEXT: shl v3.2s, v4.2s, #24 ; CHECK-NEXT: sshr v1.2s, v1.2s, #24 ; CHECK-NEXT: sshr v2.2s, v2.2s, #24 ; CHECK-NEXT: sshr v3.2s, v3.2s, #24 @@ -253,71 +253,71 @@ define <16 x double> @sitofp_v16i8_double(<16 x i8> %a) { ; CHECK-LABEL: sitofp_v16i8_double: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: umov w9, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[1] +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: umov w9, v0.b[1] +; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w10, v1.b[0] +; CHECK-NEXT: umov w14, v0.b[6] +; CHECK-NEXT: umov w11, v0.b[3] +; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: umov w8, v1.b[2] -; CHECK-NEXT: fmov s3, w9 -; CHECK-NEXT: umov w9, v1.b[1] -; CHECK-NEXT: fmov s7, w12 -; CHECK-NEXT: mov v2.s[1], w11 -; CHECK-NEXT: umov w11, v1.b[3] -; CHECK-NEXT: fmov s4, w10 -; CHECK-NEXT: umov w10, v1.b[4] -; CHECK-NEXT: fmov s5, w8 -; CHECK-NEXT: umov w8, v1.b[6] -; CHECK-NEXT: umov w12, v0.b[7] -; CHECK-NEXT: mov v4.s[1], w9 -; CHECK-NEXT: umov w9, v1.b[5] -; CHECK-NEXT: mov v5.s[1], w11 -; CHECK-NEXT: fmov s6, w10 -; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: umov w11, v1.b[7] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: mov v6.s[1], w9 -; CHECK-NEXT: umov w9, v0.b[5] -; CHECK-NEXT: shl v4.2s, v4.2s, #24 +; CHECK-NEXT: umov w15, v1.b[0] +; CHECK-NEXT: umov w17, v1.b[2] +; CHECK-NEXT: umov w0, v1.b[4] +; CHECK-NEXT: umov w16, v1.b[1] +; CHECK-NEXT: umov w18, v1.b[3] +; CHECK-NEXT: umov w8, v0.b[7] ; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: shl v5.2s, v5.2s, #24 -; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: umov w10, v1.b[5] +; CHECK-NEXT: mov v2.s[1], w9 +; CHECK-NEXT: umov w9, v1.b[6] +; CHECK-NEXT: fmov s3, w12 +; CHECK-NEXT: umov w12, v1.b[7] +; CHECK-NEXT: fmov s1, w14 +; CHECK-NEXT: fmov s4, w15 +; CHECK-NEXT: fmov s5, w17 +; CHECK-NEXT: fmov s6, w0 +; CHECK-NEXT: mov v0.s[1], w11 +; CHECK-NEXT: mov v3.s[1], w13 +; CHECK-NEXT: fmov s7, w9 +; CHECK-NEXT: mov v1.s[1], w8 +; CHECK-NEXT: mov v4.s[1], w16 +; CHECK-NEXT: mov v5.s[1], w18 +; CHECK-NEXT: mov v6.s[1], w10 ; CHECK-NEXT: shl v2.2s, v2.2s, #24 -; CHECK-NEXT: mov v3.s[1], w8 -; CHECK-NEXT: mov v7.s[1], w9 -; CHECK-NEXT: mov v0.s[1], w12 -; CHECK-NEXT: shl v6.2s, v6.2s, #24 -; CHECK-NEXT: shl v1.2s, v1.2s, #24 +; CHECK-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-NEXT: mov v7.s[1], w12 ; CHECK-NEXT: shl v3.2s, v3.2s, #24 +; CHECK-NEXT: shl v1.2s, v1.2s, #24 +; CHECK-NEXT: shl v4.2s, v4.2s, #24 +; CHECK-NEXT: sshr v2.2s, v2.2s, #24 +; CHECK-NEXT: shl v5.2s, v5.2s, #24 +; CHECK-NEXT: shl v6.2s, v6.2s, #24 +; CHECK-NEXT: sshr v0.2s, v0.2s, #24 +; CHECK-NEXT: sshr v3.2s, v3.2s, #24 ; CHECK-NEXT: shl v7.2s, v7.2s, #24 ; CHECK-NEXT: sshr v4.2s, v4.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 +; CHECK-NEXT: sshr v1.2s, v1.2s, #24 ; CHECK-NEXT: sshr v5.2s, v5.2s, #24 ; CHECK-NEXT: sshr v6.2s, v6.2s, #24 -; CHECK-NEXT: sshr v1.2s, v1.2s, #24 -; CHECK-NEXT: sshr v2.2s, v2.2s, #24 -; CHECK-NEXT: sshr v3.2s, v3.2s, #24 +; CHECK-NEXT: sshll v2.2d, v2.2s, #0 +; CHECK-NEXT: sshll v16.2d, v0.2s, #0 +; CHECK-NEXT: sshll v3.2d, v3.2s, #0 ; CHECK-NEXT: sshr v7.2s, v7.2s, #24 -; CHECK-NEXT: sshr v0.2s, v0.2s, #24 ; CHECK-NEXT: sshll v4.2d, v4.2s, #0 +; CHECK-NEXT: sshll v17.2d, v1.2s, #0 ; CHECK-NEXT: sshll v5.2d, v5.2s, #0 ; CHECK-NEXT: sshll v6.2d, v6.2s, #0 -; CHECK-NEXT: sshll v16.2d, v1.2s, #0 -; CHECK-NEXT: sshll v1.2d, v2.2s, #0 -; CHECK-NEXT: sshll v2.2d, v3.2s, #0 -; CHECK-NEXT: sshll v3.2d, v7.2s, #0 -; CHECK-NEXT: sshll v7.2d, v0.2s, #0 -; CHECK-NEXT: scvtf v0.2d, v1.2d -; CHECK-NEXT: scvtf v1.2d, v2.2d +; CHECK-NEXT: scvtf v0.2d, v2.2d +; CHECK-NEXT: scvtf v1.2d, v16.2d ; CHECK-NEXT: scvtf v2.2d, v3.2d -; CHECK-NEXT: scvtf v3.2d, v7.2d +; CHECK-NEXT: sshll v7.2d, v7.2s, #0 ; CHECK-NEXT: scvtf v4.2d, v4.2d +; CHECK-NEXT: scvtf v3.2d, v17.2d ; CHECK-NEXT: scvtf v5.2d, v5.2d ; CHECK-NEXT: scvtf v6.2d, v6.2d -; CHECK-NEXT: scvtf v7.2d, v16.2d +; CHECK-NEXT: scvtf v7.2d, v7.2d ; CHECK-NEXT: ret %1 = sitofp <16 x i8> %a to <16 x double> ret <16 x double> %1 @@ -326,15 +326,15 @@ define <8 x double> @sitofp_i16_double(<8 x i16> %a) { ; CHECK-LABEL: sitofp_i16_double: ; CHECK: // %bb.0: -; CHECK-NEXT: sshll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: sshll2 v2.2d, v1.4s, #0 +; CHECK-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: sshll v2.2d, v1.2s, #0 ; CHECK-NEXT: sshll2 v3.2d, v0.4s, #0 -; CHECK-NEXT: sshll v4.2d, v1.2s, #0 -; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: scvtf v1.2d, v3.2d -; CHECK-NEXT: scvtf v0.2d, v0.2d -; CHECK-NEXT: scvtf v3.2d, v2.2d +; CHECK-NEXT: sshll2 v1.2d, v1.4s, #0 +; CHECK-NEXT: sshll v4.2d, v0.2s, #0 +; CHECK-NEXT: scvtf v0.2d, v2.2d +; CHECK-NEXT: scvtf v3.2d, v3.2d +; CHECK-NEXT: scvtf v1.2d, v1.2d ; CHECK-NEXT: scvtf v2.2d, v4.2d ; CHECK-NEXT: ret %1 = sitofp <8 x i16> %a to <8 x double> @@ -344,14 +344,14 @@ define <8 x double> @sitofp_i32_double(<8 x i32> %a) { ; CHECK-LABEL: sitofp_i32_double: ; CHECK: // %bb.0: -; CHECK-NEXT: sshll2 v2.2d, v0.4s, #0 -; CHECK-NEXT: sshll2 v3.2d, v1.4s, #0 -; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: sshll v4.2d, v1.2s, #0 -; CHECK-NEXT: scvtf v1.2d, v2.2d -; CHECK-NEXT: scvtf v0.2d, v0.2d -; CHECK-NEXT: scvtf v3.2d, v3.2d -; CHECK-NEXT: scvtf v2.2d, v4.2d +; CHECK-NEXT: sshll v2.2d, v0.2s, #0 +; CHECK-NEXT: sshll2 v3.2d, v0.4s, #0 +; CHECK-NEXT: sshll2 v4.2d, v1.4s, #0 +; CHECK-NEXT: sshll v5.2d, v1.2s, #0 +; CHECK-NEXT: scvtf v0.2d, v2.2d +; CHECK-NEXT: scvtf v1.2d, v3.2d +; CHECK-NEXT: scvtf v3.2d, v4.2d +; CHECK-NEXT: scvtf v2.2d, v5.2d ; CHECK-NEXT: ret %1 = sitofp <8 x i32> %a to <8 x double> ret <8 x double> %1 @@ -376,8 +376,8 @@ ; CHECK-NEXT: movi d1, #0x0000ff000000ff ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: and v1.8b, v2.8b, v1.8b +; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-NEXT: ucvtf v0.2d, v0.2d ; CHECK-NEXT: ucvtf v1.2d, v1.2d @@ -392,25 +392,25 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w8, v0.b[0] ; CHECK-NEXT: umov w9, v0.b[2] -; CHECK-NEXT: umov w10, v0.b[4] -; CHECK-NEXT: umov w11, v0.b[6] +; CHECK-NEXT: umov w11, v0.b[4] +; CHECK-NEXT: umov w12, v0.b[6] +; CHECK-NEXT: umov w10, v0.b[1] +; CHECK-NEXT: umov w13, v0.b[3] +; CHECK-NEXT: umov w14, v0.b[5] +; CHECK-NEXT: umov w15, v0.b[7] ; CHECK-NEXT: movi d1, #0x0000ff000000ff -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: fmov s3, w9 -; CHECK-NEXT: umov w9, v0.b[3] -; CHECK-NEXT: fmov s4, w10 -; CHECK-NEXT: umov w10, v0.b[5] -; CHECK-NEXT: fmov s5, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: mov v2.s[1], w8 -; CHECK-NEXT: mov v3.s[1], w9 -; CHECK-NEXT: mov v4.s[1], w10 -; CHECK-NEXT: mov v5.s[1], w11 -; CHECK-NEXT: and v0.8b, v2.8b, v1.8b -; CHECK-NEXT: and v2.8b, v3.8b, v1.8b -; CHECK-NEXT: and v3.8b, v4.8b, v1.8b -; CHECK-NEXT: and v1.8b, v5.8b, v1.8b +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: fmov s3, w11 +; CHECK-NEXT: fmov s4, w12 +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: mov v2.s[1], w13 +; CHECK-NEXT: mov v3.s[1], w14 +; CHECK-NEXT: mov v4.s[1], w15 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: and v2.8b, v2.8b, v1.8b +; CHECK-NEXT: and v3.8b, v3.8b, v1.8b +; CHECK-NEXT: and v1.8b, v4.8b, v1.8b ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: ushll v2.2d, v2.2s, #0 ; CHECK-NEXT: ushll v3.2d, v3.2s, #0 @@ -428,60 +428,60 @@ ; CHECK-LABEL: uitofp_v16i8_double: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: umov w9, v0.b[0] -; CHECK-NEXT: umov w11, v0.b[1] +; CHECK-NEXT: umov w8, v0.b[0] +; CHECK-NEXT: umov w10, v0.b[2] +; CHECK-NEXT: umov w9, v0.b[1] +; CHECK-NEXT: umov w12, v0.b[4] +; CHECK-NEXT: umov w11, v0.b[3] +; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w18, v0.b[6] ; CHECK-NEXT: movi d1, #0x0000ff000000ff -; CHECK-NEXT: umov w8, v2.b[0] -; CHECK-NEXT: umov w10, v2.b[2] -; CHECK-NEXT: umov w12, v2.b[1] -; CHECK-NEXT: fmov s4, w9 -; CHECK-NEXT: umov w9, v2.b[3] +; CHECK-NEXT: umov w14, v2.b[0] +; CHECK-NEXT: umov w16, v2.b[2] +; CHECK-NEXT: umov w0, v2.b[4] ; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: umov w8, v2.b[4] -; CHECK-NEXT: fmov s5, w10 +; CHECK-NEXT: umov w8, v0.b[7] +; CHECK-NEXT: fmov s0, w10 ; CHECK-NEXT: umov w10, v2.b[6] -; CHECK-NEXT: mov v4.s[1], w11 -; CHECK-NEXT: mov v3.s[1], w12 +; CHECK-NEXT: umov w15, v2.b[1] +; CHECK-NEXT: umov w17, v2.b[3] +; CHECK-NEXT: fmov s4, w12 ; CHECK-NEXT: umov w12, v2.b[5] -; CHECK-NEXT: fmov s6, w8 -; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: mov v5.s[1], w9 +; CHECK-NEXT: fmov s7, w18 +; CHECK-NEXT: mov v3.s[1], w9 ; CHECK-NEXT: umov w9, v2.b[7] -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: umov w10, v0.b[4] -; CHECK-NEXT: and v3.8b, v3.8b, v1.8b -; CHECK-NEXT: mov v6.s[1], w12 -; CHECK-NEXT: umov w12, v0.b[6] -; CHECK-NEXT: fmov s7, w8 -; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: and v5.8b, v5.8b, v1.8b -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: umov w9, v0.b[5] +; CHECK-NEXT: fmov s2, w14 +; CHECK-NEXT: fmov s5, w16 +; CHECK-NEXT: fmov s6, w0 +; CHECK-NEXT: mov v0.s[1], w11 ; CHECK-NEXT: fmov s16, w10 -; CHECK-NEXT: umov w10, v0.b[7] -; CHECK-NEXT: fmov s0, w12 +; CHECK-NEXT: mov v4.s[1], w13 ; CHECK-NEXT: mov v7.s[1], w8 -; CHECK-NEXT: and v6.8b, v6.8b, v1.8b +; CHECK-NEXT: mov v2.s[1], w15 +; CHECK-NEXT: mov v5.s[1], w17 +; CHECK-NEXT: mov v6.s[1], w12 +; CHECK-NEXT: and v3.8b, v3.8b, v1.8b ; CHECK-NEXT: mov v16.s[1], w9 -; CHECK-NEXT: and v2.8b, v2.8b, v1.8b -; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NEXT: and v4.8b, v4.8b, v1.8b ; CHECK-NEXT: and v7.8b, v7.8b, v1.8b -; CHECK-NEXT: and v16.8b, v16.8b, v1.8b -; CHECK-NEXT: ushll v17.2d, v3.2s, #0 -; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: and v2.8b, v2.8b, v1.8b +; CHECK-NEXT: ushll v3.2d, v3.2s, #0 +; CHECK-NEXT: and v5.8b, v5.8b, v1.8b +; CHECK-NEXT: and v6.8b, v6.8b, v1.8b +; CHECK-NEXT: and v1.8b, v16.8b, v1.8b +; CHECK-NEXT: ushll v16.2d, v0.2s, #0 +; CHECK-NEXT: ushll v17.2d, v4.2s, #0 +; CHECK-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-NEXT: ushll v7.2d, v7.2s, #0 +; CHECK-NEXT: ucvtf v0.2d, v3.2d ; CHECK-NEXT: ushll v5.2d, v5.2s, #0 ; CHECK-NEXT: ushll v6.2d, v6.2s, #0 -; CHECK-NEXT: ushll v18.2d, v2.2s, #0 -; CHECK-NEXT: ushll v1.2d, v4.2s, #0 -; CHECK-NEXT: ushll v2.2d, v7.2s, #0 -; CHECK-NEXT: ushll v3.2d, v16.2s, #0 -; CHECK-NEXT: ushll v4.2d, v0.2s, #0 -; CHECK-NEXT: ucvtf v0.2d, v1.2d -; CHECK-NEXT: ucvtf v1.2d, v2.2d -; CHECK-NEXT: ucvtf v2.2d, v3.2d -; CHECK-NEXT: ucvtf v3.2d, v4.2d -; CHECK-NEXT: ucvtf v4.2d, v17.2d +; CHECK-NEXT: ushll v18.2d, v1.2s, #0 +; CHECK-NEXT: ucvtf v1.2d, v16.2d +; CHECK-NEXT: ucvtf v4.2d, v2.2d +; CHECK-NEXT: ucvtf v2.2d, v17.2d +; CHECK-NEXT: ucvtf v3.2d, v7.2d ; CHECK-NEXT: ucvtf v5.2d, v5.2d ; CHECK-NEXT: ucvtf v6.2d, v6.2d ; CHECK-NEXT: ucvtf v7.2d, v18.2d @@ -493,15 +493,15 @@ define <8 x double> @uitofp_i16_double(<8 x i16> %a) { ; CHECK-LABEL: uitofp_i16_double: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll2 v1.4s, v0.8h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ushll2 v2.2d, v1.4s, #0 +; CHECK-NEXT: ushll v1.4s, v0.4h, #0 +; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-NEXT: ushll v2.2d, v1.2s, #0 ; CHECK-NEXT: ushll2 v3.2d, v0.4s, #0 -; CHECK-NEXT: ushll v4.2d, v1.2s, #0 -; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: ucvtf v1.2d, v3.2d -; CHECK-NEXT: ucvtf v0.2d, v0.2d -; CHECK-NEXT: ucvtf v3.2d, v2.2d +; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0 +; CHECK-NEXT: ushll v4.2d, v0.2s, #0 +; CHECK-NEXT: ucvtf v0.2d, v2.2d +; CHECK-NEXT: ucvtf v3.2d, v3.2d +; CHECK-NEXT: ucvtf v1.2d, v1.2d ; CHECK-NEXT: ucvtf v2.2d, v4.2d ; CHECK-NEXT: ret %1 = uitofp <8 x i16> %a to <8 x double> @@ -511,14 +511,14 @@ define <8 x double> @uitofp_i32_double(<8 x i32> %a) { ; CHECK-LABEL: uitofp_i32_double: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll2 v2.2d, v0.4s, #0 -; CHECK-NEXT: ushll2 v3.2d, v1.4s, #0 -; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: ushll v4.2d, v1.2s, #0 -; CHECK-NEXT: ucvtf v1.2d, v2.2d -; CHECK-NEXT: ucvtf v0.2d, v0.2d -; CHECK-NEXT: ucvtf v3.2d, v3.2d -; CHECK-NEXT: ucvtf v2.2d, v4.2d +; CHECK-NEXT: ushll v2.2d, v0.2s, #0 +; CHECK-NEXT: ushll2 v3.2d, v0.4s, #0 +; CHECK-NEXT: ushll2 v4.2d, v1.4s, #0 +; CHECK-NEXT: ushll v5.2d, v1.2s, #0 +; CHECK-NEXT: ucvtf v0.2d, v2.2d +; CHECK-NEXT: ucvtf v1.2d, v3.2d +; CHECK-NEXT: ucvtf v3.2d, v4.2d +; CHECK-NEXT: ucvtf v2.2d, v5.2d ; CHECK-NEXT: ret %1 = uitofp <8 x i32> %a to <8 x double> ret <8 x double> %1 diff --git a/llvm/test/CodeGen/AArch64/vector-gep.ll b/llvm/test/CodeGen/AArch64/vector-gep.ll --- a/llvm/test/CodeGen/AArch64/vector-gep.ll +++ b/llvm/test/CodeGen/AArch64/vector-gep.ll @@ -13,11 +13,11 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh0: ; CHECK-NEXT: adrp x8, lCPI0_0@PAGE -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: Lloh1: ; CHECK-NEXT: ldr q1, [x8, lCPI0_0@PAGEOFF] ; CHECK-NEXT: add v0.2d, v0.2d, v1.2d -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 entry: diff --git a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll --- a/llvm/test/CodeGen/AArch64/vldn_shuffle.ll +++ b/llvm/test/CodeGen/AArch64/vldn_shuffle.ll @@ -136,11 +136,11 @@ ; CHECK-NEXT: .LBB3_1: // %vector.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8 -; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x9] -; CHECK-NEXT: add x9, x1, x8 +; CHECK-NEXT: add x10, x1, x8 ; CHECK-NEXT: add x8, x8, #32 +; CHECK-NEXT: ld2 { v0.4s, v1.4s }, [x9] ; CHECK-NEXT: cmp x8, #2, lsl #12 // =8192 -; CHECK-NEXT: ld2 { v2.4s, v3.4s }, [x9] +; CHECK-NEXT: ld2 { v2.4s, v3.4s }, [x10] ; CHECK-NEXT: fmul v4.4s, v2.4s, v0.4s ; CHECK-NEXT: fmla v4.4s, v1.4s, v3.4s ; CHECK-NEXT: str q4, [x2], #16 @@ -301,18 +301,18 @@ ; CHECK-LABEL: transpose_s16_8x8_simpler: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldp q0, q1, [x0] -; CHECK-NEXT: ldp q2, q3, [x0, #32] +; CHECK-NEXT: ldp q2, q3, [x0, #64] +; CHECK-NEXT: ldp q4, q5, [x0, #32] +; CHECK-NEXT: ldp q6, q7, [x0, #96] ; CHECK-NEXT: trn1 v0.8h, v0.8h, v1.8h -; CHECK-NEXT: ldp q4, q5, [x0, #64] -; CHECK-NEXT: trn1 v2.8h, v2.8h, v3.8h -; CHECK-NEXT: ldp q6, q1, [x0, #96] -; CHECK-NEXT: trn1 v3.8h, v4.8h, v5.8h -; CHECK-NEXT: trn1 v3.4s, v0.4s, v3.4s -; CHECK-NEXT: trn1 v1.8h, v6.8h, v1.8h -; CHECK-NEXT: trn1 v4.4s, v2.4s, v1.4s -; CHECK-NEXT: zip2 v0.4s, v3.4s, v4.4s -; CHECK-NEXT: st2 { v3.2s, v4.2s }, [x0] -; CHECK-NEXT: str q0, [x0, #64] +; CHECK-NEXT: trn1 v1.8h, v2.8h, v3.8h +; CHECK-NEXT: trn1 v2.8h, v4.8h, v5.8h +; CHECK-NEXT: trn1 v3.8h, v6.8h, v7.8h +; CHECK-NEXT: trn1 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: trn1 v1.4s, v2.4s, v3.4s +; CHECK-NEXT: zip2 v2.4s, v0.4s, v1.4s +; CHECK-NEXT: st2 { v0.2s, v1.2s }, [x0] +; CHECK-NEXT: str q2, [x0, #64] ; CHECK-NEXT: ret entry: %0 = load <8 x i16>, ptr %a, align 16 @@ -352,14 +352,14 @@ ; CHECK-LABEL: transpose_s16_8x8_simpler2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ldp q0, q2, [x0] -; CHECK-NEXT: ldp q3, q4, [x0, #32] +; CHECK-NEXT: ldp q3, q4, [x0, #64] +; CHECK-NEXT: ldp q5, q6, [x0, #32] +; CHECK-NEXT: ldp q7, q16, [x0, #96] ; CHECK-NEXT: mov v0.h[5], v2.h[4] -; CHECK-NEXT: ldp q5, q6, [x0, #64] -; CHECK-NEXT: zip1 v3.8h, v3.8h, v4.8h -; CHECK-NEXT: ldp q7, q2, [x0, #96] -; CHECK-NEXT: zip1 v4.8h, v5.8h, v6.8h -; CHECK-NEXT: mov v0.s[1], v4.s[0] -; CHECK-NEXT: mov v7.h[5], v2.h[4] +; CHECK-NEXT: zip1 v2.8h, v3.8h, v4.8h +; CHECK-NEXT: zip1 v3.8h, v5.8h, v6.8h +; CHECK-NEXT: mov v7.h[5], v16.h[4] +; CHECK-NEXT: mov v0.s[1], v2.s[0] ; CHECK-NEXT: uzp1 v1.4s, v3.4s, v7.4s ; CHECK-NEXT: zip2 v2.4s, v0.4s, v1.4s ; CHECK-NEXT: st2 { v0.2s, v1.2s }, [x0] @@ -402,36 +402,36 @@ define void @transpose_s16_8x8(ptr nocapture noundef %0, ptr nocapture noundef %1, ptr nocapture noundef %2, ptr nocapture noundef %3, ptr nocapture noundef %4, ptr nocapture noundef %5, ptr nocapture noundef %6, ptr nocapture noundef %7) { ; CHECK-LABEL: transpose_s16_8x8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: ldr q2, [x1] -; CHECK-NEXT: ldr q0, [x4] -; CHECK-NEXT: ldr q3, [x2] -; CHECK-NEXT: ldr q4, [x3] -; CHECK-NEXT: ldr q6, [x5] -; CHECK-NEXT: trn1 v5.8h, v1.8h, v2.8h -; CHECK-NEXT: ldr q16, [x7] -; CHECK-NEXT: trn2 v1.8h, v1.8h, v2.8h -; CHECK-NEXT: ldr q2, [x6] -; CHECK-NEXT: trn1 v7.8h, v3.8h, v4.8h -; CHECK-NEXT: trn2 v3.8h, v3.8h, v4.8h -; CHECK-NEXT: trn1 v4.8h, v0.8h, v6.8h -; CHECK-NEXT: trn2 v0.8h, v0.8h, v6.8h -; CHECK-NEXT: trn1 v17.8h, v2.8h, v16.8h -; CHECK-NEXT: trn2 v2.8h, v2.8h, v16.8h -; CHECK-NEXT: trn1 v18.4s, v5.4s, v4.4s -; CHECK-NEXT: trn1 v20.4s, v1.4s, v0.4s -; CHECK-NEXT: trn2 v4.4s, v5.4s, v4.4s -; CHECK-NEXT: trn2 v0.4s, v1.4s, v0.4s -; CHECK-NEXT: trn1 v19.4s, v7.4s, v17.4s -; CHECK-NEXT: trn1 v21.4s, v3.4s, v2.4s -; CHECK-NEXT: trn2 v5.4s, v7.4s, v17.4s -; CHECK-NEXT: trn2 v1.4s, v3.4s, v2.4s -; CHECK-NEXT: st2 { v18.2s, v19.2s }, [x0] -; CHECK-NEXT: zip2 v2.4s, v18.4s, v19.4s -; CHECK-NEXT: st2 { v20.2s, v21.2s }, [x1] -; CHECK-NEXT: zip2 v3.4s, v20.4s, v21.4s -; CHECK-NEXT: st2 { v4.2s, v5.2s }, [x2] -; CHECK-NEXT: zip2 v4.4s, v4.4s, v5.4s +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: ldr q3, [x4] +; CHECK-NEXT: ldr q4, [x5] +; CHECK-NEXT: ldr q2, [x2] +; CHECK-NEXT: ldr q5, [x3] +; CHECK-NEXT: trn1 v16.8h, v0.8h, v1.8h +; CHECK-NEXT: trn2 v0.8h, v0.8h, v1.8h +; CHECK-NEXT: ldr q6, [x6] +; CHECK-NEXT: ldr q7, [x7] +; CHECK-NEXT: trn1 v17.8h, v3.8h, v4.8h +; CHECK-NEXT: trn2 v1.8h, v3.8h, v4.8h +; CHECK-NEXT: trn1 v18.8h, v2.8h, v5.8h +; CHECK-NEXT: trn2 v2.8h, v2.8h, v5.8h +; CHECK-NEXT: trn1 v19.8h, v6.8h, v7.8h +; CHECK-NEXT: trn2 v3.8h, v6.8h, v7.8h +; CHECK-NEXT: trn1 v4.4s, v16.4s, v17.4s +; CHECK-NEXT: trn1 v6.4s, v0.4s, v1.4s +; CHECK-NEXT: trn2 v16.4s, v16.4s, v17.4s +; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s +; CHECK-NEXT: trn1 v5.4s, v18.4s, v19.4s +; CHECK-NEXT: trn1 v7.4s, v2.4s, v3.4s +; CHECK-NEXT: trn2 v17.4s, v18.4s, v19.4s +; CHECK-NEXT: trn2 v1.4s, v2.4s, v3.4s +; CHECK-NEXT: st2 { v4.2s, v5.2s }, [x0] +; CHECK-NEXT: zip2 v2.4s, v4.4s, v5.4s +; CHECK-NEXT: zip2 v3.4s, v6.4s, v7.4s +; CHECK-NEXT: zip2 v4.4s, v16.4s, v17.4s +; CHECK-NEXT: st2 { v6.2s, v7.2s }, [x1] +; CHECK-NEXT: st2 { v16.2s, v17.2s }, [x2] ; CHECK-NEXT: st2 { v0.2s, v1.2s }, [x3] ; CHECK-NEXT: zip2 v0.4s, v0.4s, v1.4s ; CHECK-NEXT: str q2, [x4] @@ -494,40 +494,40 @@ ; CHECK-LABEL: transpose_s16_8x8_: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: ldp q4, q5, [x0, #64] ; CHECK-NEXT: mov x9, x0 -; CHECK-NEXT: ldp q1, q2, [x0, #64] +; CHECK-NEXT: ldr q0, [x8, #16]! ; CHECK-NEXT: mov x10, x0 +; CHECK-NEXT: ldr q3, [x0] ; CHECK-NEXT: ldp q6, q7, [x0, #96] -; CHECK-NEXT: trn1 v16.8h, v1.8h, v2.8h -; CHECK-NEXT: trn2 v1.8h, v1.8h, v2.8h -; CHECK-NEXT: ldr q0, [x0] -; CHECK-NEXT: ldr q3, [x8, #16]! -; CHECK-NEXT: ldr q4, [x9, #32]! -; CHECK-NEXT: ldr q5, [x10, #48]! -; CHECK-NEXT: trn1 v2.8h, v6.8h, v7.8h -; CHECK-NEXT: trn2 v6.8h, v6.8h, v7.8h -; CHECK-NEXT: trn1 v7.8h, v0.8h, v3.8h -; CHECK-NEXT: trn2 v0.8h, v0.8h, v3.8h ; CHECK-NEXT: trn1 v17.8h, v4.8h, v5.8h -; CHECK-NEXT: trn2 v3.8h, v4.8h, v5.8h -; CHECK-NEXT: trn1 v4.4s, v7.4s, v16.4s -; CHECK-NEXT: trn1 v18.4s, v0.4s, v1.4s -; CHECK-NEXT: trn2 v20.4s, v7.4s, v16.4s -; CHECK-NEXT: trn2 v0.4s, v0.4s, v1.4s -; CHECK-NEXT: trn1 v5.4s, v17.4s, v2.4s -; CHECK-NEXT: trn1 v19.4s, v3.4s, v6.4s -; CHECK-NEXT: trn2 v21.4s, v17.4s, v2.4s -; CHECK-NEXT: trn2 v1.4s, v3.4s, v6.4s -; CHECK-NEXT: st2 { v4.2s, v5.2s }, [x0] -; CHECK-NEXT: zip2 v2.4s, v4.4s, v5.4s +; CHECK-NEXT: ldr q1, [x9, #32]! +; CHECK-NEXT: trn1 v16.8h, v3.8h, v0.8h +; CHECK-NEXT: ldr q2, [x10, #48]! +; CHECK-NEXT: trn2 v4.8h, v4.8h, v5.8h +; CHECK-NEXT: trn1 v19.8h, v6.8h, v7.8h +; CHECK-NEXT: trn2 v0.8h, v3.8h, v0.8h +; CHECK-NEXT: trn2 v3.8h, v6.8h, v7.8h +; CHECK-NEXT: trn1 v18.8h, v1.8h, v2.8h +; CHECK-NEXT: trn2 v1.8h, v1.8h, v2.8h +; CHECK-NEXT: trn1 v5.4s, v16.4s, v17.4s +; CHECK-NEXT: trn2 v16.4s, v16.4s, v17.4s +; CHECK-NEXT: trn1 v20.4s, v0.4s, v4.4s +; CHECK-NEXT: trn1 v6.4s, v18.4s, v19.4s +; CHECK-NEXT: trn2 v17.4s, v18.4s, v19.4s +; CHECK-NEXT: trn2 v18.4s, v0.4s, v4.4s +; CHECK-NEXT: trn1 v21.4s, v1.4s, v3.4s +; CHECK-NEXT: trn2 v19.4s, v1.4s, v3.4s +; CHECK-NEXT: zip2 v0.4s, v5.4s, v6.4s +; CHECK-NEXT: zip2 v2.4s, v16.4s, v17.4s +; CHECK-NEXT: st2 { v5.2s, v6.2s }, [x0] +; CHECK-NEXT: zip2 v1.4s, v20.4s, v21.4s ; CHECK-NEXT: zip2 v3.4s, v18.4s, v19.4s -; CHECK-NEXT: st2 { v18.2s, v19.2s }, [x8] -; CHECK-NEXT: zip2 v4.4s, v20.4s, v21.4s -; CHECK-NEXT: st2 { v0.2s, v1.2s }, [x10] -; CHECK-NEXT: zip2 v0.4s, v0.4s, v1.4s -; CHECK-NEXT: st2 { v20.2s, v21.2s }, [x9] -; CHECK-NEXT: stp q2, q3, [x0, #64] -; CHECK-NEXT: stp q4, q0, [x0, #96] +; CHECK-NEXT: st2 { v20.2s, v21.2s }, [x8] +; CHECK-NEXT: st2 { v16.2s, v17.2s }, [x9] +; CHECK-NEXT: st2 { v18.2s, v19.2s }, [x10] +; CHECK-NEXT: stp q0, q1, [x0, #64] +; CHECK-NEXT: stp q2, q3, [x0, #96] ; CHECK-NEXT: ret %2 = load <8 x i16>, ptr %0, align 16 %3 = getelementptr inbounds <8 x i16>, ptr %0, i64 1 @@ -644,8 +644,8 @@ ; CHECK-NEXT: ext v6.16b, v1.16b, v2.16b, #12 ; CHECK-NEXT: zip2 v3.4s, v0.4s, v3.4s ; CHECK-NEXT: mov v3.s[0], v0.s[0] -; CHECK-NEXT: zip2 v4.4s, v1.4s, v6.4s ; CHECK-NEXT: ext v0.16b, v2.16b, v0.16b, #12 +; CHECK-NEXT: zip2 v4.4s, v1.4s, v6.4s ; CHECK-NEXT: mov v4.s[0], v1.s[0] ; CHECK-NEXT: zip2 v5.4s, v2.4s, v0.4s ; CHECK-NEXT: mov v5.s[0], v2.s[0] diff --git a/llvm/test/CodeGen/AArch64/vselect-constants.ll b/llvm/test/CodeGen/AArch64/vselect-constants.ll --- a/llvm/test/CodeGen/AArch64/vselect-constants.ll +++ b/llvm/test/CodeGen/AArch64/vselect-constants.ll @@ -10,12 +10,12 @@ define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_C1_or_C2_vec: ; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: adrp x9, .LCPI0_1 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] +; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret @@ -27,11 +27,11 @@ ; CHECK-LABEL: cmp_sel_C1_or_C2_vec: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: adrp x9, .LCPI1_1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI1_1] -; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b +; CHECK-NEXT: adrp x9, .LCPI1_1 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI1_1] +; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -41,12 +41,12 @@ define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_Cplus1_or_C_vec: ; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: adrp x9, .LCPI2_1 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] +; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret @@ -58,11 +58,11 @@ ; CHECK-LABEL: cmp_sel_Cplus1_or_C_vec: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: adrp x9, .LCPI3_1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI3_1] -; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b +; CHECK-NEXT: adrp x9, .LCPI3_1 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_1] +; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -72,12 +72,12 @@ define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_Cminus1_or_C_vec: ; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: adrp x9, .LCPI4_1 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_1] +; CHECK-NEXT: shl v0.4s, v0.4s, #31 ; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret @@ -89,11 +89,11 @@ ; CHECK-LABEL: cmp_sel_Cminus1_or_C_vec: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI5_0 -; CHECK-NEXT: adrp x9, .LCPI5_1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI5_1] -; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b +; CHECK-NEXT: adrp x9, .LCPI5_1 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_1] +; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -370,9 +370,10 @@ ; CHECK-LABEL: signbit_mask_xor_nxv16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: ptrue p0.b +; CHECK-NEXT: eor z1.d, z0.d, z1.d ; CHECK-NEXT: cmplt p0.b, p0/z, z0.b, #0 -; CHECK-NEXT: eor z0.d, z0.d, z1.d -; CHECK-NEXT: mov z0.b, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z1.b, p0/m, #0 // =0x0 +; CHECK-NEXT: mov z0.d, z1.d ; CHECK-NEXT: ret %cond = icmp slt %a, zeroinitializer %xor = xor %a, %b diff --git a/llvm/test/CodeGen/AArch64/vselect-ext.ll b/llvm/test/CodeGen/AArch64/vselect-ext.ll --- a/llvm/test/CodeGen/AArch64/vselect-ext.ll +++ b/llvm/test/CodeGen/AArch64/vselect-ext.ll @@ -8,15 +8,14 @@ ; CHECK-NEXT: cmhi.16b v0, v0, v5 ; CHECK-NEXT: sshll.8h v5, v0, #0 ; CHECK-NEXT: sshll2.8h v0, v0, #0 +; CHECK-NEXT: sshll2.4s v16, v0, #0 ; CHECK-NEXT: sshll.4s v6, v5, #0 ; CHECK-NEXT: sshll.4s v7, v0, #0 -; CHECK-NEXT: sshll2.4s v0, v0, #0 ; CHECK-NEXT: sshll2.4s v5, v5, #0 -; CHECK-NEXT: and.16b v4, v4, v0 -; CHECK-NEXT: and.16b v5, v2, v5 -; CHECK-NEXT: and.16b v2, v3, v7 +; CHECK-NEXT: and.16b v4, v4, v16 ; CHECK-NEXT: and.16b v0, v1, v6 -; CHECK-NEXT: mov.16b v1, v5 +; CHECK-NEXT: and.16b v1, v2, v5 +; CHECK-NEXT: and.16b v2, v3, v7 ; CHECK-NEXT: mov.16b v3, v4 ; CHECK-NEXT: ret entry: @@ -28,23 +27,23 @@ define <16 x i32> @second_compare_operand_not_splat(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: second_compare_operand_not_splat: ; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: cmgt.16b v1, v0, v1 ; CHECK-NEXT: ushll.8h v2, v0, #0 -; CHECK-NEXT: ushll2.8h v3, v0, #0 -; CHECK-NEXT: cmgt.16b v0, v0, v1 +; CHECK-NEXT: ushll2.8h v0, v0, #0 +; CHECK-NEXT: sshll.8h v3, v1, #0 +; CHECK-NEXT: sshll2.8h v1, v1, #0 ; CHECK-NEXT: ushll.4s v4, v2, #0 -; CHECK-NEXT: ushll.4s v5, v3, #0 -; CHECK-NEXT: ushll2.4s v1, v2, #0 -; CHECK-NEXT: ushll2.4s v2, v3, #0 -; CHECK-NEXT: sshll.8h v3, v0, #0 -; CHECK-NEXT: sshll2.8h v0, v0, #0 -; CHECK-NEXT: sshll.4s v6, v3, #0 -; CHECK-NEXT: sshll.4s v7, v0, #0 -; CHECK-NEXT: sshll2.4s v0, v0, #0 +; CHECK-NEXT: ushll.4s v5, v0, #0 +; CHECK-NEXT: ushll2.4s v2, v2, #0 +; CHECK-NEXT: ushll2.4s v6, v0, #0 +; CHECK-NEXT: sshll.4s v0, v3, #0 +; CHECK-NEXT: sshll.4s v7, v1, #0 ; CHECK-NEXT: sshll2.4s v16, v3, #0 -; CHECK-NEXT: and.16b v3, v2, v0 -; CHECK-NEXT: and.16b v1, v1, v16 +; CHECK-NEXT: sshll2.4s v1, v1, #0 +; CHECK-NEXT: and.16b v0, v4, v0 +; CHECK-NEXT: and.16b v3, v6, v1 +; CHECK-NEXT: and.16b v1, v2, v16 ; CHECK-NEXT: and.16b v2, v5, v7 -; CHECK-NEXT: and.16b v0, v4, v6 ; CHECK-NEXT: ret entry: %ext = zext <16 x i8> %a to <16 x i32> @@ -58,22 +57,22 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.16b v1, #10 ; CHECK-NEXT: ushll.8h v2, v0, #0 -; CHECK-NEXT: ushll2.8h v3, v0, #0 ; CHECK-NEXT: ushll.4s v4, v2, #0 -; CHECK-NEXT: cmgt.16b v0, v0, v1 -; CHECK-NEXT: ushll.4s v5, v3, #0 -; CHECK-NEXT: ushll2.4s v1, v3, #0 -; CHECK-NEXT: sshll.8h v3, v0, #0 -; CHECK-NEXT: sshll2.8h v0, v0, #0 ; CHECK-NEXT: ushll2.4s v2, v2, #0 -; CHECK-NEXT: sshll.4s v6, v3, #0 -; CHECK-NEXT: sshll.4s v7, v0, #0 -; CHECK-NEXT: sshll2.4s v0, v0, #0 +; CHECK-NEXT: cmgt.16b v1, v0, v1 +; CHECK-NEXT: ushll2.8h v0, v0, #0 +; CHECK-NEXT: sshll.8h v3, v1, #0 +; CHECK-NEXT: sshll2.8h v1, v1, #0 +; CHECK-NEXT: ushll.4s v5, v0, #0 +; CHECK-NEXT: ushll2.4s v6, v0, #0 +; CHECK-NEXT: sshll.4s v0, v3, #0 +; CHECK-NEXT: sshll.4s v7, v1, #0 ; CHECK-NEXT: sshll2.4s v16, v3, #0 -; CHECK-NEXT: and.16b v3, v1, v0 +; CHECK-NEXT: sshll2.4s v1, v1, #0 +; CHECK-NEXT: and.16b v0, v4, v0 +; CHECK-NEXT: and.16b v3, v6, v1 ; CHECK-NEXT: and.16b v1, v2, v16 ; CHECK-NEXT: and.16b v2, v5, v7 -; CHECK-NEXT: and.16b v0, v4, v6 ; CHECK-NEXT: ret entry: %ext = zext <16 x i8> %a to <16 x i32> @@ -86,22 +85,22 @@ ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i64: ; CHECK: ; %bb.0: ; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: mov w8, #10 -; CHECK-NEXT: ushll2.4s v2, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: dup.2d v1, x8 -; CHECK-NEXT: ushll2.2d v3, v2, #0 +; CHECK-NEXT: mov w8, #10 ; =0xa +; CHECK-NEXT: dup.2d v2, x8 +; CHECK-NEXT: ushll.4s v1, v0, #0 +; CHECK-NEXT: ushll2.4s v0, v0, #0 +; CHECK-NEXT: ushll.2d v3, v1, #0 ; CHECK-NEXT: ushll2.2d v4, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: ushll.2d v2, v2, #0 -; CHECK-NEXT: cmhi.2d v5, v0, v1 -; CHECK-NEXT: cmhi.2d v6, v2, v1 -; CHECK-NEXT: cmhi.2d v7, v3, v1 -; CHECK-NEXT: cmhi.2d v1, v4, v1 -; CHECK-NEXT: and.16b v3, v3, v7 -; CHECK-NEXT: and.16b v1, v4, v1 -; CHECK-NEXT: and.16b v2, v2, v6 -; CHECK-NEXT: and.16b v0, v0, v5 +; CHECK-NEXT: ushll2.2d v1, v1, #0 +; CHECK-NEXT: ushll.2d v5, v0, #0 +; CHECK-NEXT: cmhi.2d v0, v3, v2 +; CHECK-NEXT: cmhi.2d v7, v1, v2 +; CHECK-NEXT: cmhi.2d v6, v5, v2 +; CHECK-NEXT: cmhi.2d v2, v4, v2 +; CHECK-NEXT: and.16b v0, v3, v0 +; CHECK-NEXT: and.16b v1, v1, v7 +; CHECK-NEXT: and.16b v3, v4, v2 +; CHECK-NEXT: and.16b v2, v5, v6 ; CHECK-NEXT: ret %ext = zext <8 x i8> %a to <8 x i64> %cmp = icmp ugt <8 x i8> %a, @@ -113,21 +112,21 @@ define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v16i32(<16 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v16i32: ; CHECK: ; %bb.0: +; CHECK-NEXT: ushll.8h v2, v0, #0 +; CHECK-NEXT: ushll2.8h v0, v0, #0 ; CHECK-NEXT: movi.4s v1, #10 -; CHECK-NEXT: ushll2.8h v2, v0, #0 -; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushll2.4s v3, v2, #0 +; CHECK-NEXT: ushll.4s v3, v2, #0 ; CHECK-NEXT: ushll2.4s v4, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll.4s v2, v2, #0 -; CHECK-NEXT: cmhi.4s v5, v0, v1 -; CHECK-NEXT: cmhi.4s v6, v2, v1 -; CHECK-NEXT: cmhi.4s v7, v3, v1 +; CHECK-NEXT: ushll2.4s v2, v2, #0 +; CHECK-NEXT: ushll.4s v5, v0, #0 +; CHECK-NEXT: cmhi.4s v0, v3, v1 +; CHECK-NEXT: cmhi.4s v7, v2, v1 +; CHECK-NEXT: cmhi.4s v6, v5, v1 ; CHECK-NEXT: cmhi.4s v1, v4, v1 -; CHECK-NEXT: and.16b v3, v3, v7 -; CHECK-NEXT: and.16b v1, v4, v1 -; CHECK-NEXT: and.16b v2, v2, v6 -; CHECK-NEXT: and.16b v0, v0, v5 +; CHECK-NEXT: and.16b v0, v3, v0 +; CHECK-NEXT: and.16b v3, v4, v1 +; CHECK-NEXT: and.16b v1, v2, v7 +; CHECK-NEXT: and.16b v2, v5, v6 ; CHECK-NEXT: ret %ext = zext <16 x i8> %a to <16 x i32> %cmp = icmp ugt <16 x i8> %a, @@ -138,14 +137,14 @@ define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32(<8 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll.8h v0, v0, #0 +; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll2.4s v2, v0, #0 ; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: cmhi.4s v3, v2, v1 -; CHECK-NEXT: cmhi.4s v4, v0, v1 -; CHECK-NEXT: and.16b v1, v2, v3 -; CHECK-NEXT: and.16b v0, v0, v4 +; CHECK-NEXT: cmhi.4s v3, v0, v1 +; CHECK-NEXT: cmhi.4s v1, v2, v1 +; CHECK-NEXT: and.16b v1, v2, v1 +; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret %ext = zext <8 x i8> %a to <8 x i32> %cmp = icmp ugt <8 x i8> %a, @@ -159,10 +158,10 @@ ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll2.4s v2, v0, #0 ; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: cmhi.4s v3, v2, v1 -; CHECK-NEXT: cmhi.4s v4, v0, v1 -; CHECK-NEXT: and.16b v1, v2, v3 -; CHECK-NEXT: and.16b v0, v0, v4 +; CHECK-NEXT: cmhi.4s v3, v0, v1 +; CHECK-NEXT: cmhi.4s v1, v2, v1 +; CHECK-NEXT: and.16b v1, v2, v1 +; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret %ext = zext <8 x i16> %a to <8 x i32> %cmp = icmp ugt <8 x i16> %a, @@ -174,14 +173,14 @@ define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: bic.8h v0, #128, lsl #8 +; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll2.4s v2, v0, #0 ; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: cmhi.4s v3, v2, v1 -; CHECK-NEXT: cmhi.4s v4, v0, v1 -; CHECK-NEXT: and.16b v1, v2, v3 -; CHECK-NEXT: and.16b v0, v0, v4 +; CHECK-NEXT: cmhi.4s v3, v0, v1 +; CHECK-NEXT: cmhi.4s v1, v2, v1 +; CHECK-NEXT: and.16b v1, v2, v1 +; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret %ext = zext <8 x i15> %a to <8 x i32> %cmp = icmp ugt <8 x i15> %a, @@ -193,20 +192,20 @@ ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v7i32: ; CHECK: ; %bb.0: ; CHECK-NEXT: movi.8h v1, #10 -; CHECK-NEXT: ushll2.4s v2, v0, #0 +; CHECK-NEXT: ushll.4s v2, v0, #0 ; CHECK-NEXT: cmhi.8h v1, v0, v1 -; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll2.4s v0, v0, #0 ; CHECK-NEXT: sshll.4s v3, v1, #0 ; CHECK-NEXT: sshll2.4s v1, v1, #0 -; CHECK-NEXT: and.16b v0, v0, v3 -; CHECK-NEXT: and.16b v1, v2, v1 -; CHECK-NEXT: mov.s w1, v0[1] -; CHECK-NEXT: mov.s w2, v0[2] -; CHECK-NEXT: mov.s w3, v0[3] -; CHECK-NEXT: mov.s w5, v1[1] -; CHECK-NEXT: mov.s w6, v1[2] -; CHECK-NEXT: fmov w0, s0 -; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: and.16b v2, v2, v3 +; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: mov.s w1, v2[1] +; CHECK-NEXT: mov.s w2, v2[2] +; CHECK-NEXT: mov.s w3, v2[3] +; CHECK-NEXT: mov.s w5, v0[1] +; CHECK-NEXT: mov.s w6, v0[2] +; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %ext = zext <7 x i16> %a to <7 x i32> %cmp = icmp ugt <7 x i16> %a, @@ -220,17 +219,17 @@ ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: Lloh0: ; CHECK-NEXT: adrp x8, lCPI9_0@PAGE -; CHECK-NEXT: mov.h v0[1], w1 ; CHECK-NEXT: Lloh1: ; CHECK-NEXT: ldr d2, [x8, lCPI9_0@PAGEOFF] +; CHECK-NEXT: mov.h v0[1], w1 ; CHECK-NEXT: mov.h v0[2], w2 ; CHECK-NEXT: fmov d1, d0 ; CHECK-NEXT: bic.4h v1, #255, lsl #8 ; CHECK-NEXT: cmhi.4h v1, v1, v2 -; CHECK-NEXT: movi.2d v2, #0x0000ff000000ff ; CHECK-NEXT: and.8b v0, v0, v1 +; CHECK-NEXT: movi.2d v1, #0x0000ff000000ff ; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: and.16b v0, v0, v2 +; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 %ext = zext <3 x i8> %a to <3 x i32> @@ -274,10 +273,10 @@ ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll2.4s v2, v0, #0 ; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: cmeq.4s v3, v2, v1 -; CHECK-NEXT: cmeq.4s v4, v0, v1 -; CHECK-NEXT: and.16b v1, v2, v3 -; CHECK-NEXT: and.16b v0, v0, v4 +; CHECK-NEXT: cmeq.4s v3, v0, v1 +; CHECK-NEXT: cmeq.4s v1, v2, v1 +; CHECK-NEXT: and.16b v1, v2, v1 +; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret %ext = zext <8 x i16> %a to <8 x i32> %cmp = icmp eq <8 x i16> %a, @@ -288,14 +287,14 @@ define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: bic.8h v0, #224, lsl #8 +; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: ushll2.4s v2, v0, #0 ; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: cmeq.4s v3, v2, v1 -; CHECK-NEXT: cmeq.4s v4, v0, v1 -; CHECK-NEXT: and.16b v1, v2, v3 -; CHECK-NEXT: and.16b v0, v0, v4 +; CHECK-NEXT: cmeq.4s v3, v0, v1 +; CHECK-NEXT: cmeq.4s v1, v2, v1 +; CHECK-NEXT: and.16b v1, v2, v1 +; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret %ext = zext <8 x i13> %a to <8 x i32> %cmp = icmp eq <8 x i13> %a, @@ -306,21 +305,21 @@ define <16 x i32> @same_zext_used_in_cmp_ne_and_select_v8i32(<16 x i8> %a) { ; CHECK-LABEL: same_zext_used_in_cmp_ne_and_select_v8i32: ; CHECK: ; %bb.0: +; CHECK-NEXT: ushll.8h v2, v0, #0 +; CHECK-NEXT: ushll2.8h v0, v0, #0 ; CHECK-NEXT: movi.4s v1, #10 -; CHECK-NEXT: ushll2.8h v2, v0, #0 -; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushll2.4s v3, v2, #0 +; CHECK-NEXT: ushll.4s v3, v2, #0 ; CHECK-NEXT: ushll2.4s v4, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll.4s v2, v2, #0 -; CHECK-NEXT: cmeq.4s v5, v0, v1 -; CHECK-NEXT: cmeq.4s v6, v2, v1 -; CHECK-NEXT: cmeq.4s v7, v3, v1 +; CHECK-NEXT: ushll2.4s v2, v2, #0 +; CHECK-NEXT: ushll.4s v5, v0, #0 +; CHECK-NEXT: cmeq.4s v0, v3, v1 +; CHECK-NEXT: cmeq.4s v7, v2, v1 +; CHECK-NEXT: cmeq.4s v6, v5, v1 ; CHECK-NEXT: cmeq.4s v1, v4, v1 -; CHECK-NEXT: bic.16b v3, v3, v7 -; CHECK-NEXT: bic.16b v1, v4, v1 -; CHECK-NEXT: bic.16b v2, v2, v6 -; CHECK-NEXT: bic.16b v0, v0, v5 +; CHECK-NEXT: bic.16b v0, v3, v0 +; CHECK-NEXT: bic.16b v3, v4, v1 +; CHECK-NEXT: bic.16b v1, v2, v7 +; CHECK-NEXT: bic.16b v2, v5, v6 ; CHECK-NEXT: ret %ext = zext <16 x i8> %a to <16 x i32> %cmp = icmp ne <16 x i8> %a, @@ -333,47 +332,47 @@ define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_other_use(<16 x i8> %a, <16 x i64> %v, ptr %ptr) { ; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_other_use: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov.16b v16, v2 -; CHECK-NEXT: movi.16b v2, #10 -; CHECK-NEXT: ushll.8h v18, v0, #0 -; CHECK-NEXT: ushll2.8h v20, v0, #0 -; CHECK-NEXT: mov.16b v17, v1 -; CHECK-NEXT: ldr q1, [sp] -; CHECK-NEXT: cmhi.16b v0, v0, v2 -; CHECK-NEXT: ushll.4s v19, v18, #0 -; CHECK-NEXT: sshll2.8h v21, v0, #0 -; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll2.4s v22, v21, #0 -; CHECK-NEXT: sshll.4s v21, v21, #0 -; CHECK-NEXT: sshll2.2d v23, v22, #0 -; CHECK-NEXT: sshll.2d v24, v22, #0 -; CHECK-NEXT: sshll2.4s v25, v0, #0 -; CHECK-NEXT: sshll2.2d v26, v21, #0 -; CHECK-NEXT: sshll.2d v28, v21, #0 -; CHECK-NEXT: sshll2.2d v27, v25, #0 -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: and.16b v1, v1, v23 -; CHECK-NEXT: and.16b v7, v7, v24 -; CHECK-NEXT: sshll.2d v29, v25, #0 -; CHECK-NEXT: stp q7, q1, [x0, #96] -; CHECK-NEXT: and.16b v1, v6, v26 -; CHECK-NEXT: and.16b v5, v5, v28 -; CHECK-NEXT: ushll.4s v2, v20, #0 -; CHECK-NEXT: stp q5, q1, [x0, #64] -; CHECK-NEXT: ushll2.4s v18, v18, #0 -; CHECK-NEXT: ushll2.4s v20, v20, #0 -; CHECK-NEXT: and.16b v1, v4, v27 -; CHECK-NEXT: sshll2.2d v4, v0, #0 -; CHECK-NEXT: sshll.2d v5, v0, #0 -; CHECK-NEXT: and.16b v3, v3, v29 -; CHECK-NEXT: stp q3, q1, [x0, #32] -; CHECK-NEXT: and.16b v3, v20, v22 -; CHECK-NEXT: and.16b v1, v18, v25 -; CHECK-NEXT: and.16b v2, v2, v21 -; CHECK-NEXT: and.16b v0, v19, v0 -; CHECK-NEXT: and.16b v4, v16, v4 -; CHECK-NEXT: and.16b v5, v17, v5 -; CHECK-NEXT: stp q5, q4, [x0] +; CHECK-NEXT: movi.16b v16, #10 +; CHECK-NEXT: ushll.8h v19, v0, #0 +; CHECK-NEXT: ldr q21, [sp] +; CHECK-NEXT: ushll.4s v24, v19, #0 +; CHECK-NEXT: ushll2.4s v19, v19, #0 +; CHECK-NEXT: cmhi.16b v16, v0, v16 +; CHECK-NEXT: ushll2.8h v0, v0, #0 +; CHECK-NEXT: sshll2.8h v17, v16, #0 +; CHECK-NEXT: sshll.8h v16, v16, #0 +; CHECK-NEXT: ushll.4s v25, v0, #0 +; CHECK-NEXT: ushll2.4s v0, v0, #0 +; CHECK-NEXT: sshll2.4s v18, v17, #0 +; CHECK-NEXT: sshll.4s v17, v17, #0 +; CHECK-NEXT: sshll2.4s v22, v16, #0 +; CHECK-NEXT: sshll.4s v16, v16, #0 +; CHECK-NEXT: sshll2.2d v20, v18, #0 +; CHECK-NEXT: sshll.2d v23, v18, #0 +; CHECK-NEXT: sshll2.2d v26, v17, #0 +; CHECK-NEXT: sshll.2d v27, v17, #0 +; CHECK-NEXT: and.16b v20, v21, v20 +; CHECK-NEXT: sshll2.2d v21, v22, #0 +; CHECK-NEXT: and.16b v7, v7, v23 +; CHECK-NEXT: sshll.2d v23, v22, #0 +; CHECK-NEXT: and.16b v6, v6, v26 +; CHECK-NEXT: sshll2.2d v26, v16, #0 +; CHECK-NEXT: and.16b v5, v5, v27 +; CHECK-NEXT: stp q7, q20, [x0, #96] +; CHECK-NEXT: sshll.2d v20, v16, #0 +; CHECK-NEXT: and.16b v21, v4, v21 +; CHECK-NEXT: and.16b v4, v0, v18 +; CHECK-NEXT: and.16b v7, v3, v23 +; CHECK-NEXT: and.16b v3, v19, v22 +; CHECK-NEXT: stp q5, q6, [x0, #64] +; CHECK-NEXT: and.16b v0, v24, v16 +; CHECK-NEXT: and.16b v6, v2, v26 +; CHECK-NEXT: and.16b v2, v25, v17 +; CHECK-NEXT: and.16b v5, v1, v20 +; CHECK-NEXT: mov.16b v1, v3 +; CHECK-NEXT: mov.16b v3, v4 +; CHECK-NEXT: stp q7, q21, [x0, #32] +; CHECK-NEXT: stp q5, q6, [x0] ; CHECK-NEXT: ret entry: %ext = zext <16 x i8> %a to <16 x i32> @@ -387,21 +386,21 @@ define <16 x i32> @same_sext_used_in_cmp_signed_pred_and_select_v16i32(<16 x i8> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select_v16i32: ; CHECK: ; %bb.0: ; %entry +; CHECK-NEXT: sshll.8h v2, v0, #0 +; CHECK-NEXT: sshll2.8h v0, v0, #0 ; CHECK-NEXT: movi.4s v1, #10 -; CHECK-NEXT: sshll2.8h v2, v0, #0 -; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll2.4s v3, v2, #0 +; CHECK-NEXT: sshll.4s v3, v2, #0 ; CHECK-NEXT: sshll2.4s v4, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: sshll.4s v2, v2, #0 -; CHECK-NEXT: cmgt.4s v5, v0, v1 -; CHECK-NEXT: cmgt.4s v6, v2, v1 -; CHECK-NEXT: cmgt.4s v7, v3, v1 +; CHECK-NEXT: sshll2.4s v2, v2, #0 +; CHECK-NEXT: sshll.4s v5, v0, #0 +; CHECK-NEXT: cmgt.4s v0, v3, v1 +; CHECK-NEXT: cmgt.4s v7, v2, v1 +; CHECK-NEXT: cmgt.4s v6, v5, v1 ; CHECK-NEXT: cmgt.4s v1, v4, v1 -; CHECK-NEXT: and.16b v3, v3, v7 -; CHECK-NEXT: and.16b v1, v4, v1 -; CHECK-NEXT: and.16b v2, v2, v6 -; CHECK-NEXT: and.16b v0, v0, v5 +; CHECK-NEXT: and.16b v0, v3, v0 +; CHECK-NEXT: and.16b v3, v4, v1 +; CHECK-NEXT: and.16b v1, v2, v7 +; CHECK-NEXT: and.16b v2, v5, v6 ; CHECK-NEXT: ret entry: %ext = sext <16 x i8> %a to <16 x i32> @@ -416,10 +415,10 @@ ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: sshll2.4s v2, v0, #0 ; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: cmeq.4s v3, v2, v1 -; CHECK-NEXT: cmeq.4s v4, v0, v1 -; CHECK-NEXT: and.16b v1, v2, v3 -; CHECK-NEXT: and.16b v0, v0, v4 +; CHECK-NEXT: cmeq.4s v3, v0, v1 +; CHECK-NEXT: cmeq.4s v1, v2, v1 +; CHECK-NEXT: and.16b v1, v2, v1 +; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret %ext = sext <8 x i16> %a to <8 x i32> %cmp = icmp eq <8 x i16> %a, @@ -430,17 +429,17 @@ define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13: ; CHECK: ; %bb.0: -; CHECK-NEXT: ushll2.4s v2, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll.4s v2, v0, #0 +; CHECK-NEXT: ushll2.4s v0, v0, #0 ; CHECK-NEXT: movi.4s v1, #10 -; CHECK-NEXT: shl.4s v2, v2, #19 ; CHECK-NEXT: shl.4s v0, v0, #19 -; CHECK-NEXT: sshr.4s v2, v2, #19 +; CHECK-NEXT: shl.4s v2, v2, #19 ; CHECK-NEXT: sshr.4s v0, v0, #19 +; CHECK-NEXT: sshr.4s v2, v2, #19 ; CHECK-NEXT: cmeq.4s v3, v2, v1 -; CHECK-NEXT: cmeq.4s v4, v0, v1 -; CHECK-NEXT: and.16b v1, v2, v3 -; CHECK-NEXT: and.16b v0, v0, v4 +; CHECK-NEXT: cmeq.4s v1, v0, v1 +; CHECK-NEXT: and.16b v1, v0, v1 +; CHECK-NEXT: and.16b v0, v2, v3 ; CHECK-NEXT: ret %ext = sext <8 x i13> %a to <8 x i32> %cmp = icmp eq <8 x i13> %a, @@ -451,21 +450,21 @@ define <16 x i32> @same_sext_used_in_cmp_ne_and_select_v8i32(<16 x i8> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_ne_and_select_v8i32: ; CHECK: ; %bb.0: +; CHECK-NEXT: sshll.8h v2, v0, #0 +; CHECK-NEXT: sshll2.8h v0, v0, #0 ; CHECK-NEXT: movi.4s v1, #10 -; CHECK-NEXT: sshll2.8h v2, v0, #0 -; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll2.4s v3, v2, #0 +; CHECK-NEXT: sshll.4s v3, v2, #0 ; CHECK-NEXT: sshll2.4s v4, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: sshll.4s v2, v2, #0 -; CHECK-NEXT: cmeq.4s v5, v0, v1 -; CHECK-NEXT: cmeq.4s v6, v2, v1 -; CHECK-NEXT: cmeq.4s v7, v3, v1 +; CHECK-NEXT: sshll2.4s v2, v2, #0 +; CHECK-NEXT: sshll.4s v5, v0, #0 +; CHECK-NEXT: cmeq.4s v0, v3, v1 +; CHECK-NEXT: cmeq.4s v7, v2, v1 +; CHECK-NEXT: cmeq.4s v6, v5, v1 ; CHECK-NEXT: cmeq.4s v1, v4, v1 -; CHECK-NEXT: bic.16b v3, v3, v7 -; CHECK-NEXT: bic.16b v1, v4, v1 -; CHECK-NEXT: bic.16b v2, v2, v6 -; CHECK-NEXT: bic.16b v0, v0, v5 +; CHECK-NEXT: bic.16b v0, v3, v0 +; CHECK-NEXT: bic.16b v3, v4, v1 +; CHECK-NEXT: bic.16b v1, v2, v7 +; CHECK-NEXT: bic.16b v2, v5, v6 ; CHECK-NEXT: ret %ext = sext <16 x i8> %a to <16 x i32> %cmp = icmp ne <16 x i8> %a, @@ -479,10 +478,10 @@ ; CHECK-NEXT: movi.4s v1, #10 ; CHECK-NEXT: sshll2.4s v2, v0, #0 ; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: cmgt.4s v3, v2, v1 -; CHECK-NEXT: cmgt.4s v4, v0, v1 -; CHECK-NEXT: and.16b v1, v2, v3 -; CHECK-NEXT: and.16b v0, v0, v4 +; CHECK-NEXT: cmgt.4s v3, v0, v1 +; CHECK-NEXT: cmgt.4s v1, v2, v1 +; CHECK-NEXT: and.16b v1, v2, v1 +; CHECK-NEXT: and.16b v0, v0, v3 ; CHECK-NEXT: ret entry: %ext = sext <8 x i16> %a to <8 x i32> @@ -494,17 +493,17 @@ define <8 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) { ; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15: ; CHECK: ; %bb.0: -; CHECK-NEXT: ushll2.4s v2, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll.4s v2, v0, #0 +; CHECK-NEXT: ushll2.4s v0, v0, #0 ; CHECK-NEXT: movi.4s v1, #10 -; CHECK-NEXT: shl.4s v2, v2, #17 ; CHECK-NEXT: shl.4s v0, v0, #17 -; CHECK-NEXT: sshr.4s v2, v2, #17 +; CHECK-NEXT: shl.4s v2, v2, #17 ; CHECK-NEXT: sshr.4s v0, v0, #17 +; CHECK-NEXT: sshr.4s v2, v2, #17 ; CHECK-NEXT: cmge.4s v3, v2, v1 -; CHECK-NEXT: cmge.4s v4, v0, v1 -; CHECK-NEXT: and.16b v1, v2, v3 -; CHECK-NEXT: and.16b v0, v0, v4 +; CHECK-NEXT: cmge.4s v1, v0, v1 +; CHECK-NEXT: and.16b v1, v0, v1 +; CHECK-NEXT: and.16b v0, v2, v3 ; CHECK-NEXT: ret %ext = sext <8 x i15> %a to <8 x i32> %cmp = icmp sge <8 x i15> %a, @@ -516,23 +515,23 @@ ; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.16b v1, #10 -; CHECK-NEXT: sshll.8h v3, v0, #0 -; CHECK-NEXT: sshll2.8h v2, v0, #0 -; CHECK-NEXT: cmhi.16b v0, v0, v1 -; CHECK-NEXT: ext.16b v1, v3, v3, #8 -; CHECK-NEXT: sshll.8h v5, v0, #0 -; CHECK-NEXT: sshll2.8h v0, v0, #0 +; CHECK-NEXT: sshll.8h v2, v0, #0 ; CHECK-NEXT: ext.16b v4, v2, v2, #8 -; CHECK-NEXT: ext.16b v6, v5, v5, #8 -; CHECK-NEXT: ext.16b v7, v0, v0, #8 -; CHECK-NEXT: and.8b v0, v2, v0 -; CHECK-NEXT: sshll.4s v2, v0, #0 -; CHECK-NEXT: and.8b v0, v3, v5 -; CHECK-NEXT: and.8b v1, v1, v6 -; CHECK-NEXT: and.8b v3, v4, v7 -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: sshll.4s v1, v1, #0 +; CHECK-NEXT: cmhi.16b v1, v0, v1 +; CHECK-NEXT: sshll2.8h v0, v0, #0 +; CHECK-NEXT: sshll.8h v3, v1, #0 +; CHECK-NEXT: sshll2.8h v1, v1, #0 +; CHECK-NEXT: ext.16b v5, v0, v0, #8 +; CHECK-NEXT: ext.16b v6, v3, v3, #8 +; CHECK-NEXT: ext.16b v7, v1, v1, #8 +; CHECK-NEXT: and.8b v2, v2, v3 +; CHECK-NEXT: and.8b v1, v0, v1 +; CHECK-NEXT: sshll.4s v0, v2, #0 +; CHECK-NEXT: and.8b v3, v5, v7 +; CHECK-NEXT: and.8b v4, v4, v6 +; CHECK-NEXT: sshll.4s v2, v1, #0 ; CHECK-NEXT: sshll.4s v3, v3, #0 +; CHECK-NEXT: sshll.4s v1, v4, #0 ; CHECK-NEXT: ret entry: %ext = sext <16 x i8> %a to <16 x i32> @@ -546,22 +545,22 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.2d v1, #0xffffffffffffffff ; CHECK-NEXT: ushll.8h v2, v0, #0 -; CHECK-NEXT: ushll2.8h v3, v0, #0 ; CHECK-NEXT: ushll.4s v4, v2, #0 -; CHECK-NEXT: cmgt.16b v0, v0, v1 -; CHECK-NEXT: ushll.4s v5, v3, #0 -; CHECK-NEXT: ushll2.4s v1, v3, #0 -; CHECK-NEXT: sshll.8h v3, v0, #0 -; CHECK-NEXT: sshll2.8h v0, v0, #0 ; CHECK-NEXT: ushll2.4s v2, v2, #0 -; CHECK-NEXT: sshll.4s v6, v3, #0 -; CHECK-NEXT: sshll.4s v7, v0, #0 -; CHECK-NEXT: sshll2.4s v0, v0, #0 +; CHECK-NEXT: cmgt.16b v1, v0, v1 +; CHECK-NEXT: ushll2.8h v0, v0, #0 +; CHECK-NEXT: sshll.8h v3, v1, #0 +; CHECK-NEXT: sshll2.8h v1, v1, #0 +; CHECK-NEXT: ushll.4s v5, v0, #0 +; CHECK-NEXT: ushll2.4s v6, v0, #0 +; CHECK-NEXT: sshll.4s v0, v3, #0 +; CHECK-NEXT: sshll.4s v7, v1, #0 ; CHECK-NEXT: sshll2.4s v16, v3, #0 -; CHECK-NEXT: and.16b v3, v1, v0 +; CHECK-NEXT: sshll2.4s v1, v1, #0 +; CHECK-NEXT: and.16b v0, v4, v0 +; CHECK-NEXT: and.16b v3, v6, v1 ; CHECK-NEXT: and.16b v1, v2, v16 ; CHECK-NEXT: and.16b v2, v5, v7 -; CHECK-NEXT: and.16b v0, v4, v6 ; CHECK-NEXT: ret entry: %ext = zext <16 x i8> %a to <16 x i32> @@ -574,52 +573,53 @@ ; CHECK-LABEL: extension_in_loop_v16i8_to_v16i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x9, lCPI24_0@PAGE +; CHECK-NEXT: adrp x8, lCPI24_0@PAGE +; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff ; CHECK-NEXT: Lloh3: -; CHECK-NEXT: adrp x10, lCPI24_1@PAGE +; CHECK-NEXT: adrp x9, lCPI24_2@PAGE ; CHECK-NEXT: Lloh4: -; CHECK-NEXT: adrp x11, lCPI24_2@PAGE +; CHECK-NEXT: ldr q1, [x8, lCPI24_0@PAGEOFF] ; CHECK-NEXT: Lloh5: -; CHECK-NEXT: adrp x12, lCPI24_3@PAGE -; CHECK-NEXT: movi.2d v2, #0xffffffffffffffff -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x8, lCPI24_1@PAGE ; CHECK-NEXT: Lloh6: -; CHECK-NEXT: ldr q0, [x9, lCPI24_0@PAGEOFF] +; CHECK-NEXT: adrp x10, lCPI24_3@PAGE ; CHECK-NEXT: Lloh7: -; CHECK-NEXT: ldr q1, [x10, lCPI24_1@PAGEOFF] +; CHECK-NEXT: ldr q2, [x8, lCPI24_1@PAGEOFF] ; CHECK-NEXT: Lloh8: -; CHECK-NEXT: ldr q3, [x11, lCPI24_2@PAGEOFF] +; CHECK-NEXT: ldr q3, [x9, lCPI24_2@PAGEOFF] ; CHECK-NEXT: Lloh9: -; CHECK-NEXT: ldr q4, [x12, lCPI24_3@PAGEOFF] +; CHECK-NEXT: ldr q4, [x10, lCPI24_3@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB24_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q5, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: cmgt.16b v6, v5, v2 -; CHECK-NEXT: tbl.16b v7, { v5 }, v0 +; CHECK-NEXT: cmgt.16b v6, v5, v0 ; CHECK-NEXT: tbl.16b v16, { v5 }, v1 -; CHECK-NEXT: sshll2.8h v18, v6, #0 -; CHECK-NEXT: tbl.16b v17, { v5 }, v3 -; CHECK-NEXT: sshll2.4s v19, v18, #0 -; CHECK-NEXT: sshll.4s v18, v18, #0 +; CHECK-NEXT: tbl.16b v17, { v5 }, v2 +; CHECK-NEXT: tbl.16b v19, { v5 }, v3 ; CHECK-NEXT: tbl.16b v5, { v5 }, v4 +; CHECK-NEXT: sshll2.8h v7, v6, #0 ; CHECK-NEXT: sshll.8h v6, v6, #0 -; CHECK-NEXT: and.16b v7, v7, v19 -; CHECK-NEXT: and.16b v16, v16, v18 -; CHECK-NEXT: stp q16, q7, [x1, #32] -; CHECK-NEXT: sshll2.4s v7, v6, #0 +; CHECK-NEXT: sshll2.4s v18, v7, #0 +; CHECK-NEXT: sshll.4s v7, v7, #0 +; CHECK-NEXT: sshll2.4s v20, v6, #0 ; CHECK-NEXT: sshll.4s v6, v6, #0 +; CHECK-NEXT: and.16b v16, v16, v18 ; CHECK-NEXT: and.16b v7, v17, v7 +; CHECK-NEXT: and.16b v17, v19, v20 ; CHECK-NEXT: and.16b v5, v5, v6 -; CHECK-NEXT: stp q5, q7, [x1], #64 +; CHECK-NEXT: stp q7, q16, [x1, #32] +; CHECK-NEXT: stp q5, q17, [x1], #64 ; CHECK-NEXT: b.ne LBB24_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh5, Lloh9 -; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh8 -; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh7 -; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh6 +; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh9 +; CHECK-NEXT: .loh AdrpLdr Lloh5, Lloh7 +; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh8 +; CHECK-NEXT: .loh AdrpAdrp Lloh2, Lloh5 +; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh4 entry: br label %loop @@ -644,52 +644,53 @@ ; CHECK-LABEL: extension_in_loop_as_shuffle_v16i8_to_v16i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh10: -; CHECK-NEXT: adrp x9, lCPI25_0@PAGE +; CHECK-NEXT: adrp x8, lCPI25_0@PAGE +; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff ; CHECK-NEXT: Lloh11: -; CHECK-NEXT: adrp x10, lCPI25_1@PAGE +; CHECK-NEXT: adrp x9, lCPI25_2@PAGE ; CHECK-NEXT: Lloh12: -; CHECK-NEXT: adrp x11, lCPI25_2@PAGE +; CHECK-NEXT: ldr q1, [x8, lCPI25_0@PAGEOFF] ; CHECK-NEXT: Lloh13: -; CHECK-NEXT: adrp x12, lCPI25_3@PAGE -; CHECK-NEXT: movi.2d v2, #0xffffffffffffffff -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x8, lCPI25_1@PAGE ; CHECK-NEXT: Lloh14: -; CHECK-NEXT: ldr q0, [x9, lCPI25_0@PAGEOFF] +; CHECK-NEXT: adrp x10, lCPI25_3@PAGE ; CHECK-NEXT: Lloh15: -; CHECK-NEXT: ldr q1, [x10, lCPI25_1@PAGEOFF] +; CHECK-NEXT: ldr q2, [x8, lCPI25_1@PAGEOFF] ; CHECK-NEXT: Lloh16: -; CHECK-NEXT: ldr q3, [x11, lCPI25_2@PAGEOFF] +; CHECK-NEXT: ldr q3, [x9, lCPI25_2@PAGEOFF] ; CHECK-NEXT: Lloh17: -; CHECK-NEXT: ldr q4, [x12, lCPI25_3@PAGEOFF] +; CHECK-NEXT: ldr q4, [x10, lCPI25_3@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB25_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q5, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: cmgt.16b v6, v5, v2 -; CHECK-NEXT: tbl.16b v7, { v5 }, v0 +; CHECK-NEXT: cmgt.16b v6, v5, v0 ; CHECK-NEXT: tbl.16b v16, { v5 }, v1 -; CHECK-NEXT: sshll2.8h v18, v6, #0 -; CHECK-NEXT: tbl.16b v17, { v5 }, v3 -; CHECK-NEXT: sshll2.4s v19, v18, #0 -; CHECK-NEXT: sshll.4s v18, v18, #0 +; CHECK-NEXT: tbl.16b v17, { v5 }, v2 +; CHECK-NEXT: tbl.16b v19, { v5 }, v3 ; CHECK-NEXT: tbl.16b v5, { v5 }, v4 +; CHECK-NEXT: sshll2.8h v7, v6, #0 ; CHECK-NEXT: sshll.8h v6, v6, #0 -; CHECK-NEXT: and.16b v7, v7, v19 -; CHECK-NEXT: and.16b v16, v16, v18 -; CHECK-NEXT: stp q16, q7, [x1, #32] -; CHECK-NEXT: sshll2.4s v7, v6, #0 +; CHECK-NEXT: sshll2.4s v18, v7, #0 +; CHECK-NEXT: sshll.4s v7, v7, #0 +; CHECK-NEXT: sshll2.4s v20, v6, #0 ; CHECK-NEXT: sshll.4s v6, v6, #0 +; CHECK-NEXT: and.16b v16, v16, v18 ; CHECK-NEXT: and.16b v7, v17, v7 +; CHECK-NEXT: and.16b v17, v19, v20 ; CHECK-NEXT: and.16b v5, v5, v6 -; CHECK-NEXT: stp q5, q7, [x1], #64 +; CHECK-NEXT: stp q7, q16, [x1, #32] +; CHECK-NEXT: stp q5, q17, [x1], #64 ; CHECK-NEXT: b.ne LBB25_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh13, Lloh17 -; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh16 -; CHECK-NEXT: .loh AdrpLdr Lloh11, Lloh15 -; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh14 +; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh17 +; CHECK-NEXT: .loh AdrpLdr Lloh13, Lloh15 +; CHECK-NEXT: .loh AdrpLdr Lloh11, Lloh16 +; CHECK-NEXT: .loh AdrpAdrp Lloh10, Lloh13 +; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh12 entry: br label %loop @@ -715,52 +716,53 @@ ; CHECK-LABEL: shuffle_in_loop_is_no_extend_v16i8_to_v16i32: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh18: -; CHECK-NEXT: adrp x9, lCPI26_0@PAGE +; CHECK-NEXT: adrp x8, lCPI26_0@PAGE +; CHECK-NEXT: movi.2d v0, #0xffffffffffffffff ; CHECK-NEXT: Lloh19: -; CHECK-NEXT: adrp x10, lCPI26_1@PAGE +; CHECK-NEXT: adrp x9, lCPI26_2@PAGE ; CHECK-NEXT: Lloh20: -; CHECK-NEXT: adrp x11, lCPI26_2@PAGE +; CHECK-NEXT: ldr q1, [x8, lCPI26_0@PAGEOFF] ; CHECK-NEXT: Lloh21: -; CHECK-NEXT: adrp x12, lCPI26_3@PAGE -; CHECK-NEXT: movi.2d v2, #0xffffffffffffffff -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x8, lCPI26_1@PAGE ; CHECK-NEXT: Lloh22: -; CHECK-NEXT: ldr q0, [x9, lCPI26_0@PAGEOFF] +; CHECK-NEXT: adrp x10, lCPI26_3@PAGE ; CHECK-NEXT: Lloh23: -; CHECK-NEXT: ldr q1, [x10, lCPI26_1@PAGEOFF] +; CHECK-NEXT: ldr q2, [x8, lCPI26_1@PAGEOFF] ; CHECK-NEXT: Lloh24: -; CHECK-NEXT: ldr q3, [x11, lCPI26_2@PAGEOFF] +; CHECK-NEXT: ldr q3, [x9, lCPI26_2@PAGEOFF] ; CHECK-NEXT: Lloh25: -; CHECK-NEXT: ldr q4, [x12, lCPI26_3@PAGEOFF] +; CHECK-NEXT: ldr q4, [x10, lCPI26_3@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB26_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q5, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: cmgt.16b v6, v5, v2 -; CHECK-NEXT: tbl.16b v7, { v5 }, v0 +; CHECK-NEXT: cmgt.16b v6, v5, v0 ; CHECK-NEXT: tbl.16b v16, { v5 }, v1 -; CHECK-NEXT: sshll2.8h v18, v6, #0 -; CHECK-NEXT: tbl.16b v17, { v5 }, v3 -; CHECK-NEXT: sshll2.4s v19, v18, #0 -; CHECK-NEXT: sshll.4s v18, v18, #0 +; CHECK-NEXT: tbl.16b v17, { v5 }, v2 +; CHECK-NEXT: tbl.16b v19, { v5 }, v3 ; CHECK-NEXT: tbl.16b v5, { v5 }, v4 +; CHECK-NEXT: sshll2.8h v7, v6, #0 ; CHECK-NEXT: sshll.8h v6, v6, #0 -; CHECK-NEXT: and.16b v7, v7, v19 -; CHECK-NEXT: and.16b v16, v16, v18 -; CHECK-NEXT: stp q16, q7, [x1, #32] -; CHECK-NEXT: sshll2.4s v7, v6, #0 +; CHECK-NEXT: sshll2.4s v18, v7, #0 +; CHECK-NEXT: sshll.4s v7, v7, #0 +; CHECK-NEXT: sshll2.4s v20, v6, #0 ; CHECK-NEXT: sshll.4s v6, v6, #0 +; CHECK-NEXT: and.16b v16, v16, v18 ; CHECK-NEXT: and.16b v7, v17, v7 +; CHECK-NEXT: and.16b v17, v19, v20 ; CHECK-NEXT: and.16b v5, v5, v6 -; CHECK-NEXT: stp q5, q7, [x1], #64 +; CHECK-NEXT: stp q7, q16, [x1, #32] +; CHECK-NEXT: stp q5, q17, [x1], #64 ; CHECK-NEXT: b.ne LBB26_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh21, Lloh25 -; CHECK-NEXT: .loh AdrpLdr Lloh20, Lloh24 -; CHECK-NEXT: .loh AdrpLdr Lloh19, Lloh23 -; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh22 +; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh25 +; CHECK-NEXT: .loh AdrpLdr Lloh21, Lloh23 +; CHECK-NEXT: .loh AdrpLdr Lloh19, Lloh24 +; CHECK-NEXT: .loh AdrpAdrp Lloh18, Lloh21 +; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh20 entry: br label %loop diff --git a/llvm/test/CodeGen/AArch64/wide-scalar-shift-by-byte-multiple-legalization.ll b/llvm/test/CodeGen/AArch64/wide-scalar-shift-by-byte-multiple-legalization.ll --- a/llvm/test/CodeGen/AArch64/wide-scalar-shift-by-byte-multiple-legalization.ll +++ b/llvm/test/CodeGen/AArch64/wide-scalar-shift-by-byte-multiple-legalization.ll @@ -105,14 +105,14 @@ ; ALL-NEXT: ldr x8, [x1] ; ALL-NEXT: ldp x10, x9, [x0] ; ALL-NEXT: lsl x8, x8, #3 -; ALL-NEXT: and x11, x8, #0x38 -; ALL-NEXT: mvn w12, w8 +; ALL-NEXT: lsl x11, x9, #1 +; ALL-NEXT: and x12, x8, #0x38 +; ALL-NEXT: mvn w13, w8 ; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsl x13, x9, #1 -; ALL-NEXT: lsr x10, x10, x11 -; ALL-NEXT: lsl x12, x13, x12 -; ALL-NEXT: lsr x9, x9, x11 -; ALL-NEXT: orr x8, x12, x10 +; ALL-NEXT: lsr x10, x10, x12 +; ALL-NEXT: lsl x11, x11, x13 +; ALL-NEXT: lsr x9, x9, x12 +; ALL-NEXT: orr x8, x11, x10 ; ALL-NEXT: csel x10, xzr, x9, ne ; ALL-NEXT: csel x8, x9, x8, ne ; ALL-NEXT: stp x8, x10, [x2] @@ -130,14 +130,14 @@ ; ALL-NEXT: ldr x8, [x1] ; ALL-NEXT: ldp x9, x10, [x0] ; ALL-NEXT: lsl x8, x8, #3 -; ALL-NEXT: and x11, x8, #0x38 -; ALL-NEXT: mvn w12, w8 -; ALL-NEXT: lsr x13, x9, #1 +; ALL-NEXT: lsr x11, x9, #1 +; ALL-NEXT: and x12, x8, #0x38 +; ALL-NEXT: mvn w13, w8 ; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsl x10, x10, x11 -; ALL-NEXT: lsr x12, x13, x12 -; ALL-NEXT: lsl x9, x9, x11 -; ALL-NEXT: orr x8, x10, x12 +; ALL-NEXT: lsl x10, x10, x12 +; ALL-NEXT: lsr x11, x11, x13 +; ALL-NEXT: lsl x9, x9, x12 +; ALL-NEXT: orr x8, x10, x11 ; ALL-NEXT: csel x10, xzr, x9, ne ; ALL-NEXT: csel x8, x9, x8, ne ; ALL-NEXT: stp x10, x8, [x2] @@ -155,18 +155,18 @@ ; ALL-NEXT: ldr x8, [x1] ; ALL-NEXT: ldp x10, x9, [x0] ; ALL-NEXT: lsl x8, x8, #3 -; ALL-NEXT: and x11, x8, #0x38 -; ALL-NEXT: mvn w12, w8 +; ALL-NEXT: lsl x11, x9, #1 +; ALL-NEXT: and x12, x8, #0x38 +; ALL-NEXT: mvn w13, w8 ; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsl x13, x9, #1 -; ALL-NEXT: asr x8, x9, #63 -; ALL-NEXT: lsr x10, x10, x11 -; ALL-NEXT: lsl x12, x13, x12 -; ALL-NEXT: asr x11, x9, x11 -; ALL-NEXT: orr x9, x12, x10 -; ALL-NEXT: csel x8, x8, x11, ne -; ALL-NEXT: csel x9, x11, x9, ne -; ALL-NEXT: stp x9, x8, [x2] +; ALL-NEXT: lsr x10, x10, x12 +; ALL-NEXT: lsl x11, x11, x13 +; ALL-NEXT: asr x12, x9, x12 +; ALL-NEXT: asr x9, x9, #63 +; ALL-NEXT: orr x8, x11, x10 +; ALL-NEXT: csel x9, x9, x12, ne +; ALL-NEXT: csel x8, x12, x8, ne +; ALL-NEXT: stp x8, x9, [x2] ; ALL-NEXT: ret %src = load i128, ptr %src.ptr, align 1 %byteOff = load i128, ptr %byteOff.ptr, align 1 @@ -180,20 +180,20 @@ ; ALL-LABEL: lshr_32bytes: ; ALL: // %bb.0: ; ALL-NEXT: sub sp, sp, #64 -; ALL-NEXT: ldr x9, [x1] -; ALL-NEXT: mov x8, sp -; ALL-NEXT: ldp x10, x11, [x0, #16] +; ALL-NEXT: ldp x9, x8, [x0, #16] ; ALL-NEXT: movi v0.2d, #0000000000000000 +; ALL-NEXT: ldr x10, [x1] ; ALL-NEXT: ldr q1, [x0] -; ALL-NEXT: and x9, x9, #0x1f +; ALL-NEXT: stp x9, x8, [sp, #16] +; ALL-NEXT: mov x8, sp +; ALL-NEXT: and x9, x10, #0x1f +; ALL-NEXT: str q1, [sp] ; ALL-NEXT: add x8, x8, x9 ; ALL-NEXT: stp q0, q0, [sp, #32] -; ALL-NEXT: stp x10, x11, [sp, #16] -; ALL-NEXT: str q1, [sp] ; ALL-NEXT: ldp x10, x9, [x8, #16] ; ALL-NEXT: ldr q0, [x8] -; ALL-NEXT: stp x10, x9, [x2, #16] ; ALL-NEXT: str q0, [x2] +; ALL-NEXT: stp x10, x9, [x2, #16] ; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret %src = load i256, ptr %src.ptr, align 1 @@ -207,21 +207,21 @@ ; ALL-LABEL: shl_32bytes: ; ALL: // %bb.0: ; ALL-NEXT: sub sp, sp, #64 -; ALL-NEXT: ldr x9, [x1] -; ALL-NEXT: mov x8, sp -; ALL-NEXT: ldp x10, x11, [x0, #16] +; ALL-NEXT: ldp x9, x8, [x0, #16] ; ALL-NEXT: movi v0.2d, #0000000000000000 -; ALL-NEXT: add x8, x8, #32 +; ALL-NEXT: ldr x10, [x1] ; ALL-NEXT: ldr q1, [x0] -; ALL-NEXT: and x9, x9, #0x1f -; ALL-NEXT: sub x8, x8, x9 +; ALL-NEXT: stp x9, x8, [sp, #48] +; ALL-NEXT: mov x8, sp +; ALL-NEXT: and x9, x10, #0x1f +; ALL-NEXT: add x8, x8, #32 ; ALL-NEXT: stp q0, q0, [sp] -; ALL-NEXT: stp x10, x11, [sp, #48] ; ALL-NEXT: str q1, [sp, #32] +; ALL-NEXT: sub x8, x8, x9 ; ALL-NEXT: ldp x9, x10, [x8, #16] ; ALL-NEXT: ldr q0, [x8] -; ALL-NEXT: stp x9, x10, [x2, #16] ; ALL-NEXT: str q0, [x2] +; ALL-NEXT: stp x9, x10, [x2, #16] ; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret %src = load i256, ptr %src.ptr, align 1 @@ -235,21 +235,21 @@ ; ALL-LABEL: ashr_32bytes: ; ALL: // %bb.0: ; ALL-NEXT: sub sp, sp, #64 -; ALL-NEXT: ldp x11, x10, [x0, #16] -; ALL-NEXT: mov x8, sp -; ALL-NEXT: ldr x9, [x1] +; ALL-NEXT: ldp x9, x8, [x0, #16] +; ALL-NEXT: ldr x10, [x1] ; ALL-NEXT: ldr q0, [x0] -; ALL-NEXT: asr x12, x10, #63 -; ALL-NEXT: and x9, x9, #0x1f -; ALL-NEXT: add x8, x8, x9 -; ALL-NEXT: stp x11, x10, [sp, #16] +; ALL-NEXT: and x10, x10, #0x1f +; ALL-NEXT: stp x9, x8, [sp, #16] +; ALL-NEXT: asr x8, x8, #63 +; ALL-NEXT: mov x9, sp ; ALL-NEXT: str q0, [sp] -; ALL-NEXT: stp x12, x12, [sp, #48] -; ALL-NEXT: stp x12, x12, [sp, #32] +; ALL-NEXT: stp x8, x8, [sp, #48] +; ALL-NEXT: stp x8, x8, [sp, #32] +; ALL-NEXT: add x8, x9, x10 ; ALL-NEXT: ldp x10, x9, [x8, #16] ; ALL-NEXT: ldr q0, [x8] -; ALL-NEXT: stp x10, x9, [x2, #16] ; ALL-NEXT: str q0, [x2] +; ALL-NEXT: stp x10, x9, [x2, #16] ; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret %src = load i256, ptr %src.ptr, align 1 diff --git a/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll b/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll --- a/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll +++ b/llvm/test/CodeGen/AArch64/wide-scalar-shift-legalization.ll @@ -1,12 +1,11 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=ALL - define void @lshr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: lshr_4bytes: ; ALL: // %bb.0: -; ALL-NEXT: ldr w8, [x1] -; ALL-NEXT: ldr w9, [x0] -; ALL-NEXT: lsr w8, w9, w8 +; ALL-NEXT: ldr w8, [x0] +; ALL-NEXT: ldr w9, [x1] +; ALL-NEXT: lsr w8, w8, w9 ; ALL-NEXT: str w8, [x2] ; ALL-NEXT: ret %src = load i32, ptr %src.ptr, align 1 @@ -18,9 +17,9 @@ define void @shl_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: shl_4bytes: ; ALL: // %bb.0: -; ALL-NEXT: ldr w8, [x1] -; ALL-NEXT: ldr w9, [x0] -; ALL-NEXT: lsl w8, w9, w8 +; ALL-NEXT: ldr w8, [x0] +; ALL-NEXT: ldr w9, [x1] +; ALL-NEXT: lsl w8, w8, w9 ; ALL-NEXT: str w8, [x2] ; ALL-NEXT: ret %src = load i32, ptr %src.ptr, align 1 @@ -32,9 +31,9 @@ define void @ashr_4bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: ashr_4bytes: ; ALL: // %bb.0: -; ALL-NEXT: ldr w8, [x1] -; ALL-NEXT: ldr w9, [x0] -; ALL-NEXT: asr w8, w9, w8 +; ALL-NEXT: ldr w8, [x0] +; ALL-NEXT: ldr w9, [x1] +; ALL-NEXT: asr w8, w8, w9 ; ALL-NEXT: str w8, [x2] ; ALL-NEXT: ret %src = load i32, ptr %src.ptr, align 1 @@ -43,13 +42,12 @@ store i32 %res, ptr %dst, align 1 ret void } - define void @lshr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: lshr_8bytes: ; ALL: // %bb.0: -; ALL-NEXT: ldr x8, [x1] -; ALL-NEXT: ldr x9, [x0] -; ALL-NEXT: lsr x8, x9, x8 +; ALL-NEXT: ldr x8, [x0] +; ALL-NEXT: ldr x9, [x1] +; ALL-NEXT: lsr x8, x8, x9 ; ALL-NEXT: str x8, [x2] ; ALL-NEXT: ret %src = load i64, ptr %src.ptr, align 1 @@ -61,9 +59,9 @@ define void @shl_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: shl_8bytes: ; ALL: // %bb.0: -; ALL-NEXT: ldr x8, [x1] -; ALL-NEXT: ldr x9, [x0] -; ALL-NEXT: lsl x8, x9, x8 +; ALL-NEXT: ldr x8, [x0] +; ALL-NEXT: ldr x9, [x1] +; ALL-NEXT: lsl x8, x8, x9 ; ALL-NEXT: str x8, [x2] ; ALL-NEXT: ret %src = load i64, ptr %src.ptr, align 1 @@ -75,9 +73,9 @@ define void @ashr_8bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: ashr_8bytes: ; ALL: // %bb.0: -; ALL-NEXT: ldr x8, [x1] -; ALL-NEXT: ldr x9, [x0] -; ALL-NEXT: asr x8, x9, x8 +; ALL-NEXT: ldr x8, [x0] +; ALL-NEXT: ldr x9, [x1] +; ALL-NEXT: asr x8, x8, x9 ; ALL-NEXT: str x8, [x2] ; ALL-NEXT: ret %src = load i64, ptr %src.ptr, align 1 @@ -86,21 +84,20 @@ store i64 %res, ptr %dst, align 1 ret void } - define void @lshr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: lshr_16bytes: ; ALL: // %bb.0: -; ALL-NEXT: ldr x8, [x1] -; ALL-NEXT: ldp x10, x9, [x0] -; ALL-NEXT: mvn w11, w8 -; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsr x10, x10, x8 -; ALL-NEXT: lsl x12, x9, #1 -; ALL-NEXT: lsr x9, x9, x8 -; ALL-NEXT: lsl x11, x12, x11 -; ALL-NEXT: orr x8, x11, x10 -; ALL-NEXT: csel x10, xzr, x9, ne -; ALL-NEXT: csel x8, x9, x8, ne +; ALL-NEXT: ldp x10, x8, [x0] +; ALL-NEXT: ldr x9, [x1] +; ALL-NEXT: mvn w12, w9 +; ALL-NEXT: tst x9, #0x40 +; ALL-NEXT: lsl x11, x8, #1 +; ALL-NEXT: lsr x10, x10, x9 +; ALL-NEXT: lsr x8, x8, x9 +; ALL-NEXT: lsl x11, x11, x12 +; ALL-NEXT: orr x9, x11, x10 +; ALL-NEXT: csel x10, xzr, x8, ne +; ALL-NEXT: csel x8, x8, x9, ne ; ALL-NEXT: stp x8, x10, [x2] ; ALL-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -112,17 +109,17 @@ define void @shl_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: shl_16bytes: ; ALL: // %bb.0: -; ALL-NEXT: ldr x8, [x1] -; ALL-NEXT: ldp x9, x10, [x0] -; ALL-NEXT: mvn w11, w8 -; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsr x12, x9, #1 -; ALL-NEXT: lsl x9, x9, x8 -; ALL-NEXT: lsl x10, x10, x8 -; ALL-NEXT: lsr x11, x12, x11 -; ALL-NEXT: orr x8, x10, x11 -; ALL-NEXT: csel x10, xzr, x9, ne -; ALL-NEXT: csel x8, x9, x8, ne +; ALL-NEXT: ldp x8, x10, [x0] +; ALL-NEXT: ldr x9, [x1] +; ALL-NEXT: mvn w12, w9 +; ALL-NEXT: tst x9, #0x40 +; ALL-NEXT: lsr x11, x8, #1 +; ALL-NEXT: lsl x10, x10, x9 +; ALL-NEXT: lsl x8, x8, x9 +; ALL-NEXT: lsr x11, x11, x12 +; ALL-NEXT: orr x9, x10, x11 +; ALL-NEXT: csel x10, xzr, x8, ne +; ALL-NEXT: csel x8, x8, x9, ne ; ALL-NEXT: stp x10, x8, [x2] ; ALL-NEXT: ret %src = load i128, ptr %src.ptr, align 1 @@ -134,16 +131,16 @@ define void @ashr_16bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: ashr_16bytes: ; ALL: // %bb.0: -; ALL-NEXT: ldr x8, [x1] -; ALL-NEXT: ldp x10, x9, [x0] -; ALL-NEXT: mvn w11, w8 -; ALL-NEXT: tst x8, #0x40 -; ALL-NEXT: lsr x10, x10, x8 -; ALL-NEXT: lsl x12, x9, #1 -; ALL-NEXT: lsl x11, x12, x11 -; ALL-NEXT: asr x12, x9, x8 -; ALL-NEXT: asr x8, x9, #63 -; ALL-NEXT: orr x9, x11, x10 +; ALL-NEXT: ldp x9, x8, [x0] +; ALL-NEXT: ldr x10, [x1] +; ALL-NEXT: mvn w12, w10 +; ALL-NEXT: tst x10, #0x40 +; ALL-NEXT: lsl x11, x8, #1 +; ALL-NEXT: lsr x9, x9, x10 +; ALL-NEXT: lsl x11, x11, x12 +; ALL-NEXT: asr x12, x8, x10 +; ALL-NEXT: asr x8, x8, #63 +; ALL-NEXT: orr x9, x11, x9 ; ALL-NEXT: csel x8, x8, x12, ne ; ALL-NEXT: csel x9, x12, x9, ne ; ALL-NEXT: stp x9, x8, [x2] @@ -154,41 +151,39 @@ store i128 %res, ptr %dst, align 1 ret void } - define void @lshr_32bytes(ptr %src.ptr, ptr %bitOff.ptr, ptr %dst) nounwind { ; ALL-LABEL: lshr_32bytes: ; ALL: // %bb.0: ; ALL-NEXT: sub sp, sp, #64 -; ALL-NEXT: ldr x9, [x1] -; ALL-NEXT: mov x8, sp -; ALL-NEXT: ldp x10, x11, [x0, #16] +; ALL-NEXT: ldp x9, x8, [x0, #16] ; ALL-NEXT: movi v0.2d, #0000000000000000 +; ALL-NEXT: ldr x10, [x1] ; ALL-NEXT: ldr q1, [x0] -; ALL-NEXT: ubfx x12, x9, #3, #5 -; ALL-NEXT: add x8, x8, x12 -; ALL-NEXT: and x9, x9, #0x7 -; ALL-NEXT: mvn w13, w9 -; ALL-NEXT: stp q0, q0, [sp, #32] -; ALL-NEXT: stp x10, x11, [sp, #16] +; ALL-NEXT: stp x9, x8, [sp, #16] +; ALL-NEXT: ubfx x8, x10, #3, #5 +; ALL-NEXT: mov x9, sp ; ALL-NEXT: str q1, [sp] -; ALL-NEXT: ldp x11, x10, [x8, #8] -; ALL-NEXT: ldr x12, [x8] -; ALL-NEXT: ldr x8, [x8, #24] -; ALL-NEXT: lsr x15, x11, x9 -; ALL-NEXT: lsl x11, x11, #1 -; ALL-NEXT: lsl x14, x10, #1 -; ALL-NEXT: lsr x10, x10, x9 -; ALL-NEXT: lsr x12, x12, x9 -; ALL-NEXT: lsr x9, x8, x9 -; ALL-NEXT: lsl x8, x8, #1 -; ALL-NEXT: lsl x11, x11, x13 -; ALL-NEXT: lsl x8, x8, x13 -; ALL-NEXT: orr x11, x11, x12 -; ALL-NEXT: orr x8, x8, x10 -; ALL-NEXT: lsl x10, x14, x13 -; ALL-NEXT: orr x10, x15, x10 -; ALL-NEXT: stp x8, x9, [x2, #16] -; ALL-NEXT: stp x11, x10, [x2] +; ALL-NEXT: and x10, x10, #0x7 +; ALL-NEXT: stp q0, q0, [sp, #32] +; ALL-NEXT: add x8, x9, x8 +; ALL-NEXT: mvn w13, w10 +; ALL-NEXT: ldp x11, x9, [x8, #16] +; ALL-NEXT: ldp x8, x12, [x8] +; ALL-NEXT: lsl x14, x9, #1 +; ALL-NEXT: lsl x15, x11, #1 +; ALL-NEXT: lsr x11, x11, x10 +; ALL-NEXT: lsl x16, x12, #1 +; ALL-NEXT: lsr x9, x9, x10 +; ALL-NEXT: lsr x12, x12, x10 +; ALL-NEXT: lsl x14, x14, x13 +; ALL-NEXT: lsr x8, x8, x10 +; ALL-NEXT: lsl x10, x16, x13 +; ALL-NEXT: lsl x13, x15, x13 +; ALL-NEXT: orr x11, x14, x11 +; ALL-NEXT: stp x11, x9, [x2, #16] +; ALL-NEXT: orr x8, x10, x8 +; ALL-NEXT: orr x9, x12, x13 +; ALL-NEXT: stp x8, x9, [x2] ; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret %src = load i256, ptr %src.ptr, align 1 @@ -201,35 +196,35 @@ ; ALL-LABEL: shl_32bytes: ; ALL: // %bb.0: ; ALL-NEXT: sub sp, sp, #64 -; ALL-NEXT: ldr x9, [x1] -; ALL-NEXT: mov x8, sp -; ALL-NEXT: ldp x10, x11, [x0, #16] +; ALL-NEXT: ldp x9, x8, [x0, #16] ; ALL-NEXT: movi v0.2d, #0000000000000000 -; ALL-NEXT: add x8, x8, #32 +; ALL-NEXT: ldr x10, [x1] ; ALL-NEXT: ldr q1, [x0] -; ALL-NEXT: ubfx x12, x9, #3, #5 -; ALL-NEXT: sub x8, x8, x12 -; ALL-NEXT: and x9, x9, #0x7 -; ALL-NEXT: mvn w12, w9 -; ALL-NEXT: stp q0, q0, [sp] -; ALL-NEXT: stp x10, x11, [sp, #48] -; ALL-NEXT: str q1, [sp, #32] -; ALL-NEXT: ldp x10, x11, [x8] -; ALL-NEXT: ldp x13, x8, [x8, #16] -; ALL-NEXT: lsr x14, x10, #1 -; ALL-NEXT: lsl x10, x10, x9 -; ALL-NEXT: lsl x15, x11, x9 -; ALL-NEXT: lsr x11, x11, #1 -; ALL-NEXT: lsr x14, x14, x12 -; ALL-NEXT: lsr x11, x11, x12 -; ALL-NEXT: lsl x8, x8, x9 -; ALL-NEXT: lsl x9, x13, x9 -; ALL-NEXT: lsr x13, x13, #1 -; ALL-NEXT: orr x14, x15, x14 -; ALL-NEXT: lsr x13, x13, x12 -; ALL-NEXT: orr x9, x9, x11 -; ALL-NEXT: orr x8, x8, x13 -; ALL-NEXT: stp x10, x14, [x2] +; ALL-NEXT: stp x9, x8, [sp, #48] +; ALL-NEXT: mov x8, sp +; ALL-NEXT: ubfx x9, x10, #3, #5 +; ALL-NEXT: add x8, x8, #32 +; ALL-NEXT: stp q0, q1, [sp, #16] +; ALL-NEXT: and x10, x10, #0x7 +; ALL-NEXT: str q0, [sp] +; ALL-NEXT: sub x8, x8, x9 +; ALL-NEXT: mvn w13, w10 +; ALL-NEXT: ldp x9, x11, [x8] +; ALL-NEXT: ldp x12, x8, [x8, #16] +; ALL-NEXT: lsr x14, x9, #1 +; ALL-NEXT: lsr x15, x11, #1 +; ALL-NEXT: lsl x11, x11, x10 +; ALL-NEXT: lsr x16, x12, #1 +; ALL-NEXT: lsl x9, x9, x10 +; ALL-NEXT: lsl x12, x12, x10 +; ALL-NEXT: lsr x14, x14, x13 +; ALL-NEXT: lsl x8, x8, x10 +; ALL-NEXT: lsr x10, x16, x13 +; ALL-NEXT: lsr x13, x15, x13 +; ALL-NEXT: orr x11, x11, x14 +; ALL-NEXT: stp x9, x11, [x2] +; ALL-NEXT: orr x8, x8, x10 +; ALL-NEXT: orr x9, x12, x13 ; ALL-NEXT: stp x9, x8, [x2, #16] ; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret @@ -243,35 +238,35 @@ ; ALL-LABEL: ashr_32bytes: ; ALL: // %bb.0: ; ALL-NEXT: sub sp, sp, #64 -; ALL-NEXT: ldp x11, x10, [x0, #16] -; ALL-NEXT: mov x8, sp -; ALL-NEXT: ldr x9, [x1] +; ALL-NEXT: ldp x9, x8, [x0, #16] +; ALL-NEXT: mov x11, sp +; ALL-NEXT: ldr x10, [x1] ; ALL-NEXT: ldr q0, [x0] -; ALL-NEXT: asr x12, x10, #63 -; ALL-NEXT: stp x11, x10, [sp, #16] -; ALL-NEXT: ubfx x10, x9, #3, #5 +; ALL-NEXT: stp x9, x8, [sp, #16] +; ALL-NEXT: asr x8, x8, #63 +; ALL-NEXT: ubfx x9, x10, #3, #5 ; ALL-NEXT: str q0, [sp] -; ALL-NEXT: add x8, x8, x10 -; ALL-NEXT: and x9, x9, #0x7 -; ALL-NEXT: stp x12, x12, [sp, #48] -; ALL-NEXT: stp x12, x12, [sp, #32] -; ALL-NEXT: mvn w12, w9 -; ALL-NEXT: ldp x10, x11, [x8, #16] -; ALL-NEXT: ldp x8, x13, [x8] -; ALL-NEXT: lsl x14, x10, #1 -; ALL-NEXT: lsr x10, x10, x9 +; ALL-NEXT: and x10, x10, #0x7 +; ALL-NEXT: stp x8, x8, [sp, #48] +; ALL-NEXT: add x9, x11, x9 +; ALL-NEXT: mvn w13, w10 +; ALL-NEXT: stp x8, x8, [sp, #32] +; ALL-NEXT: ldp x11, x8, [x9, #16] +; ALL-NEXT: ldp x9, x12, [x9] +; ALL-NEXT: lsl x14, x8, #1 ; ALL-NEXT: lsl x15, x11, #1 -; ALL-NEXT: asr x11, x11, x9 -; ALL-NEXT: lsl x15, x15, x12 -; ALL-NEXT: lsl x14, x14, x12 -; ALL-NEXT: orr x10, x15, x10 -; ALL-NEXT: lsl x15, x13, #1 -; ALL-NEXT: lsl x12, x15, x12 -; ALL-NEXT: lsr x8, x8, x9 -; ALL-NEXT: lsr x9, x13, x9 -; ALL-NEXT: orr x8, x12, x8 -; ALL-NEXT: orr x9, x9, x14 -; ALL-NEXT: stp x10, x11, [x2, #16] +; ALL-NEXT: lsr x11, x11, x10 +; ALL-NEXT: lsl x16, x12, #1 +; ALL-NEXT: asr x8, x8, x10 +; ALL-NEXT: lsr x12, x12, x10 +; ALL-NEXT: lsl x14, x14, x13 +; ALL-NEXT: lsr x9, x9, x10 +; ALL-NEXT: lsl x10, x16, x13 +; ALL-NEXT: lsl x13, x15, x13 +; ALL-NEXT: orr x11, x14, x11 +; ALL-NEXT: stp x11, x8, [x2, #16] +; ALL-NEXT: orr x8, x10, x9 +; ALL-NEXT: orr x9, x12, x13 ; ALL-NEXT: stp x8, x9, [x2] ; ALL-NEXT: add sp, sp, #64 ; ALL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/win64_vararg.ll b/llvm/test/CodeGen/AArch64/win64_vararg.ll --- a/llvm/test/CodeGen/AArch64/win64_vararg.ll +++ b/llvm/test/CodeGen/AArch64/win64_vararg.ll @@ -7,8 +7,8 @@ ; CHECK-NEXT: str x30, [sp, #-80]! // 8-byte Folded Spill ; CHECK-NEXT: add x8, sp, #24 ; CHECK-NEXT: add x0, sp, #24 -; CHECK-NEXT: stp x3, x4, [sp, #40] ; CHECK-NEXT: stp x1, x2, [sp, #24] +; CHECK-NEXT: stp x3, x4, [sp, #40] ; CHECK-NEXT: stp x5, x6, [sp, #56] ; CHECK-NEXT: str x7, [sp, #72] ; CHECK-NEXT: str x8, [sp, #8] @@ -82,8 +82,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub sp, sp, #80 ; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: stp x3, x4, [sp, #40] ; CHECK-NEXT: stp x1, x2, [sp, #24] +; CHECK-NEXT: stp x3, x4, [sp, #40] ; CHECK-NEXT: stp x5, x6, [sp, #56] ; CHECK-NEXT: str x7, [sp, #72] ; CHECK-NEXT: stp x8, x8, [sp], #80 @@ -178,17 +178,17 @@ ; CHECK-NEXT: add x29, sp, #40 ; CHECK-NEXT: .seh_add_fp 40 ; CHECK-NEXT: .seh_endprologue -; CHECK-NEXT: add x8, x29, #24 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK-NEXT: add x8, x29, #24 ; CHECK-NEXT: mov w9, w0 ; CHECK-NEXT: mov x19, x1 -; CHECK-NEXT: mov x23, sp -; CHECK-NEXT: stp x3, x4, [x29, #32] -; CHECK-NEXT: stp x8, x2, [x29, #16] +; CHECK-NEXT: str x8, [x29, #16] ; CHECK-NEXT: add x8, x9, #15 +; CHECK-NEXT: mov x23, sp ; CHECK-NEXT: lsr x15, x8, #4 -; CHECK-NEXT: stp x5, x6, [x29, #48] -; CHECK-NEXT: str x7, [x29, #64] +; CHECK-NEXT: stp x2, x3, [x29, #24] +; CHECK-NEXT: stp x4, x5, [x29, #40] +; CHECK-NEXT: stp x6, x7, [x29, #56] ; CHECK-NEXT: bl __chkstk ; CHECK-NEXT: sub x20, sp, x15, lsl #4 ; CHECK-NEXT: mov sp, x20 diff --git a/llvm/test/CodeGen/AArch64/win64_vararg_float.ll b/llvm/test/CodeGen/AArch64/win64_vararg_float.ll --- a/llvm/test/CodeGen/AArch64/win64_vararg_float.ll +++ b/llvm/test/CodeGen/AArch64/win64_vararg_float.ll @@ -7,13 +7,13 @@ ; DAGISEL-LABEL: float_va_fn: ; DAGISEL: // %bb.0: // %entry ; DAGISEL-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill -; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: fmov s0, w0 +; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: add x0, sp, #16 -; DAGISEL-NEXT: stp x3, x4, [sp, #24] -; DAGISEL-NEXT: stp x5, x6, [sp, #40] -; DAGISEL-NEXT: stp x8, x2, [sp, #8] -; DAGISEL-NEXT: str x7, [sp, #56] +; DAGISEL-NEXT: stp x2, x3, [sp, #16] +; DAGISEL-NEXT: stp x4, x5, [sp, #32] +; DAGISEL-NEXT: stp x6, x7, [sp, #48] +; DAGISEL-NEXT: str x8, [sp, #8] ; DAGISEL-NEXT: bl f_va_list ; DAGISEL-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload ; DAGISEL-NEXT: ret @@ -57,13 +57,13 @@ ; DAGISEL-LABEL: double_va_fn: ; DAGISEL: // %bb.0: // %entry ; DAGISEL-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill -; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: fmov d0, x0 +; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: add x0, sp, #16 -; DAGISEL-NEXT: stp x3, x4, [sp, #24] -; DAGISEL-NEXT: stp x5, x6, [sp, #40] -; DAGISEL-NEXT: stp x8, x2, [sp, #8] -; DAGISEL-NEXT: str x7, [sp, #56] +; DAGISEL-NEXT: stp x2, x3, [sp, #16] +; DAGISEL-NEXT: stp x4, x5, [sp, #32] +; DAGISEL-NEXT: stp x6, x7, [sp, #48] +; DAGISEL-NEXT: str x8, [sp, #8] ; DAGISEL-NEXT: bl d_va_list ; DAGISEL-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload ; DAGISEL-NEXT: ret @@ -102,28 +102,28 @@ define void @call_f_va() nounwind { ; DAGISEL-LABEL: call_f_va: ; DAGISEL: // %bb.0: // %entry -; DAGISEL-NEXT: mov w0, #1065353216 -; DAGISEL-NEXT: mov w1, #2 -; DAGISEL-NEXT: mov x2, #4613937818241073152 -; DAGISEL-NEXT: mov w3, #4 +; DAGISEL-NEXT: mov w0, #1065353216 // =0x3f800000 +; DAGISEL-NEXT: mov w1, #2 // =0x2 +; DAGISEL-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 +; DAGISEL-NEXT: mov w3, #4 // =0x4 ; DAGISEL-NEXT: b other_f_va_fn ; ; FASTISEL-LABEL: call_f_va: ; FASTISEL: // %bb.0: // %entry -; FASTISEL-NEXT: mov w0, #1065353216 -; FASTISEL-NEXT: mov w1, #2 -; FASTISEL-NEXT: mov x2, #4613937818241073152 -; FASTISEL-NEXT: mov w3, #4 +; FASTISEL-NEXT: mov w0, #1065353216 // =0x3f800000 +; FASTISEL-NEXT: mov w1, #2 // =0x2 +; FASTISEL-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 +; FASTISEL-NEXT: mov w3, #4 // =0x4 ; FASTISEL-NEXT: b other_f_va_fn ; ; GISEL-LABEL: call_f_va: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: fmov s0, #1.00000000 ; GISEL-NEXT: fmov w0, s0 -; GISEL-NEXT: mov w1, #2 +; GISEL-NEXT: mov w1, #2 // =0x2 ; GISEL-NEXT: fmov d0, #3.00000000 ; GISEL-NEXT: fmov x2, d0 -; GISEL-NEXT: mov w3, #4 +; GISEL-NEXT: mov w3, #4 // =0x4 ; GISEL-NEXT: b other_f_va_fn entry: tail call void (float, i32, ...) @other_f_va_fn(float 1.000000e+00, i32 2, double 3.000000e+00, i32 4) @@ -135,28 +135,28 @@ define void @call_d_va() nounwind { ; DAGISEL-LABEL: call_d_va: ; DAGISEL: // %bb.0: // %entry -; DAGISEL-NEXT: mov x0, #4607182418800017408 -; DAGISEL-NEXT: mov w1, #2 -; DAGISEL-NEXT: mov x2, #4613937818241073152 -; DAGISEL-NEXT: mov w3, #4 +; DAGISEL-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 +; DAGISEL-NEXT: mov w1, #2 // =0x2 +; DAGISEL-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 +; DAGISEL-NEXT: mov w3, #4 // =0x4 ; DAGISEL-NEXT: b other_d_va_fn ; ; FASTISEL-LABEL: call_d_va: ; FASTISEL: // %bb.0: // %entry -; FASTISEL-NEXT: mov x0, #4607182418800017408 -; FASTISEL-NEXT: mov w1, #2 -; FASTISEL-NEXT: mov x2, #4613937818241073152 -; FASTISEL-NEXT: mov w3, #4 +; FASTISEL-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 +; FASTISEL-NEXT: mov w1, #2 // =0x2 +; FASTISEL-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 +; FASTISEL-NEXT: mov w3, #4 // =0x4 ; FASTISEL-NEXT: b other_d_va_fn ; ; GISEL-LABEL: call_d_va: ; GISEL: // %bb.0: // %entry ; GISEL-NEXT: fmov d0, #1.00000000 ; GISEL-NEXT: fmov x0, d0 -; GISEL-NEXT: mov w1, #2 +; GISEL-NEXT: mov w1, #2 // =0x2 ; GISEL-NEXT: fmov d0, #3.00000000 ; GISEL-NEXT: fmov x2, d0 -; GISEL-NEXT: mov w3, #4 +; GISEL-NEXT: mov w3, #4 // =0x4 ; GISEL-NEXT: b other_d_va_fn entry: tail call void (double, i32, ...) @other_d_va_fn(double 1.000000e+00, i32 2, double 3.000000e+00, i32 4) @@ -170,16 +170,16 @@ ; DAGISEL: // %bb.0: // %entry ; DAGISEL-NEXT: fmov d0, #1.00000000 ; DAGISEL-NEXT: fmov d1, #3.00000000 -; DAGISEL-NEXT: mov w0, #2 -; DAGISEL-NEXT: mov w1, #4 +; DAGISEL-NEXT: mov w0, #2 // =0x2 +; DAGISEL-NEXT: mov w1, #4 // =0x4 ; DAGISEL-NEXT: b other_d_non_va_fn ; ; O0-LABEL: call_d_non_va: ; O0: // %bb.0: // %entry ; O0-NEXT: fmov d0, #1.00000000 -; O0-NEXT: mov w0, #2 +; O0-NEXT: mov w0, #2 // =0x2 ; O0-NEXT: fmov d1, #3.00000000 -; O0-NEXT: mov w1, #4 +; O0-NEXT: mov w1, #4 // =0x4 ; O0-NEXT: b other_d_non_va_fn entry: tail call void (double, i32, double, i32) @other_d_non_va_fn(double 1.000000e+00, i32 2, double 3.000000e+00, i32 4) diff --git a/llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll b/llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll --- a/llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll +++ b/llvm/test/CodeGen/AArch64/win64_vararg_float_cc.ll @@ -5,9 +5,13 @@ ; Check that non-vararg functions compilation is not broken define win64cc float @foo(float %arg) nounwind { -; GISEL-LABEL: foo: -; GISEL-NEXT: // %bb.0: // %entry -; GISEL-NEXT: ret +; DAGISEL-LABEL: foo: +; DAGISEL: // %bb.0: // %entry +; DAGISEL-NEXT: ret +; +; O0-LABEL: foo: +; O0: // %bb.0: // %entry +; O0-NEXT: ret entry: ret float %arg } @@ -16,13 +20,13 @@ ; DAGISEL-LABEL: float_va_fn: ; DAGISEL: // %bb.0: // %entry ; DAGISEL-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill -; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: fmov s0, w0 +; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: add x0, sp, #16 -; DAGISEL-NEXT: stp x3, x4, [sp, #24] -; DAGISEL-NEXT: stp x5, x6, [sp, #40] -; DAGISEL-NEXT: stp x8, x2, [sp, #8] -; DAGISEL-NEXT: str x7, [sp, #56] +; DAGISEL-NEXT: stp x2, x3, [sp, #16] +; DAGISEL-NEXT: stp x4, x5, [sp, #32] +; DAGISEL-NEXT: stp x6, x7, [sp, #48] +; DAGISEL-NEXT: str x8, [sp, #8] ; DAGISEL-NEXT: bl f_va_list ; DAGISEL-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload ; DAGISEL-NEXT: ret @@ -66,13 +70,13 @@ ; DAGISEL-LABEL: double_va_fn: ; DAGISEL: // %bb.0: // %entry ; DAGISEL-NEXT: str x30, [sp, #-64]! // 8-byte Folded Spill -; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: fmov d0, x0 +; DAGISEL-NEXT: add x8, sp, #16 ; DAGISEL-NEXT: add x0, sp, #16 -; DAGISEL-NEXT: stp x3, x4, [sp, #24] -; DAGISEL-NEXT: stp x5, x6, [sp, #40] -; DAGISEL-NEXT: stp x8, x2, [sp, #8] -; DAGISEL-NEXT: str x7, [sp, #56] +; DAGISEL-NEXT: stp x2, x3, [sp, #16] +; DAGISEL-NEXT: stp x4, x5, [sp, #32] +; DAGISEL-NEXT: stp x6, x7, [sp, #48] +; DAGISEL-NEXT: str x8, [sp, #8] ; DAGISEL-NEXT: bl d_va_list ; DAGISEL-NEXT: ldr x30, [sp], #64 // 8-byte Folded Reload ; DAGISEL-NEXT: ret @@ -112,10 +116,10 @@ ; DAGISEL-LABEL: call_f_va: ; DAGISEL: // %bb.0: // %entry ; DAGISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; DAGISEL-NEXT: mov w0, #1065353216 -; DAGISEL-NEXT: mov w1, #2 -; DAGISEL-NEXT: mov x2, #4613937818241073152 -; DAGISEL-NEXT: mov w3, #4 +; DAGISEL-NEXT: mov w0, #1065353216 // =0x3f800000 +; DAGISEL-NEXT: mov w1, #2 // =0x2 +; DAGISEL-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 +; DAGISEL-NEXT: mov w3, #4 // =0x4 ; DAGISEL-NEXT: bl other_f_va_fn ; DAGISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; DAGISEL-NEXT: ret @@ -123,10 +127,10 @@ ; FASTISEL-LABEL: call_f_va: ; FASTISEL: // %bb.0: // %entry ; FASTISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; FASTISEL-NEXT: mov w0, #1065353216 -; FASTISEL-NEXT: mov w1, #2 -; FASTISEL-NEXT: mov x2, #4613937818241073152 -; FASTISEL-NEXT: mov w3, #4 +; FASTISEL-NEXT: mov w0, #1065353216 // =0x3f800000 +; FASTISEL-NEXT: mov w1, #2 // =0x2 +; FASTISEL-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 +; FASTISEL-NEXT: mov w3, #4 // =0x4 ; FASTISEL-NEXT: bl other_f_va_fn ; FASTISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; FASTISEL-NEXT: ret @@ -136,10 +140,10 @@ ; GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISEL-NEXT: fmov s0, #1.00000000 ; GISEL-NEXT: fmov w0, s0 -; GISEL-NEXT: mov w1, #2 +; GISEL-NEXT: mov w1, #2 // =0x2 ; GISEL-NEXT: fmov d0, #3.00000000 ; GISEL-NEXT: fmov x2, d0 -; GISEL-NEXT: mov w3, #4 +; GISEL-NEXT: mov w3, #4 // =0x4 ; GISEL-NEXT: bl other_f_va_fn ; GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISEL-NEXT: ret @@ -154,10 +158,10 @@ ; DAGISEL-LABEL: call_d_va: ; DAGISEL: // %bb.0: // %entry ; DAGISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; DAGISEL-NEXT: mov x0, #4607182418800017408 -; DAGISEL-NEXT: mov w1, #2 -; DAGISEL-NEXT: mov x2, #4613937818241073152 -; DAGISEL-NEXT: mov w3, #4 +; DAGISEL-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 +; DAGISEL-NEXT: mov w1, #2 // =0x2 +; DAGISEL-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 +; DAGISEL-NEXT: mov w3, #4 // =0x4 ; DAGISEL-NEXT: bl other_d_va_fn ; DAGISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; DAGISEL-NEXT: ret @@ -165,10 +169,10 @@ ; FASTISEL-LABEL: call_d_va: ; FASTISEL: // %bb.0: // %entry ; FASTISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; FASTISEL-NEXT: mov x0, #4607182418800017408 -; FASTISEL-NEXT: mov w1, #2 -; FASTISEL-NEXT: mov x2, #4613937818241073152 -; FASTISEL-NEXT: mov w3, #4 +; FASTISEL-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000 +; FASTISEL-NEXT: mov w1, #2 // =0x2 +; FASTISEL-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000 +; FASTISEL-NEXT: mov w3, #4 // =0x4 ; FASTISEL-NEXT: bl other_d_va_fn ; FASTISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; FASTISEL-NEXT: ret @@ -178,10 +182,10 @@ ; GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; GISEL-NEXT: fmov d0, #1.00000000 ; GISEL-NEXT: fmov x0, d0 -; GISEL-NEXT: mov w1, #2 +; GISEL-NEXT: mov w1, #2 // =0x2 ; GISEL-NEXT: fmov d0, #3.00000000 ; GISEL-NEXT: fmov x2, d0 -; GISEL-NEXT: mov w3, #4 +; GISEL-NEXT: mov w3, #4 // =0x4 ; GISEL-NEXT: bl other_d_va_fn ; GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; GISEL-NEXT: ret @@ -198,8 +202,8 @@ ; DAGISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; DAGISEL-NEXT: fmov d0, #1.00000000 ; DAGISEL-NEXT: fmov d1, #3.00000000 -; DAGISEL-NEXT: mov w0, #2 -; DAGISEL-NEXT: mov w1, #4 +; DAGISEL-NEXT: mov w0, #2 // =0x2 +; DAGISEL-NEXT: mov w1, #4 // =0x4 ; DAGISEL-NEXT: bl other_d_non_va_fn ; DAGISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; DAGISEL-NEXT: ret @@ -208,9 +212,9 @@ ; O0: // %bb.0: // %entry ; O0-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill ; O0-NEXT: fmov d0, #1.00000000 -; O0-NEXT: mov w0, #2 +; O0-NEXT: mov w0, #2 // =0x2 ; O0-NEXT: fmov d1, #3.00000000 -; O0-NEXT: mov w1, #4 +; O0-NEXT: mov w1, #4 // =0x4 ; O0-NEXT: bl other_d_non_va_fn ; O0-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload ; O0-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/wineh-bti.ll b/llvm/test/CodeGen/AArch64/wineh-bti.ll --- a/llvm/test/CodeGen/AArch64/wineh-bti.ll +++ b/llvm/test/CodeGen/AArch64/wineh-bti.ll @@ -43,7 +43,7 @@ ; CHECK: .LBB0_2: ; CHECK-NEXT: hint #36 -; CHECK-NEXT: mov w0, #1 +; CHECK: mov w0, #1 ; CHECK: .LBB0_3: ; CHECK-NEXT: hint #36 diff --git a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll --- a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll +++ b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll @@ -22,8 +22,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul w8, w1, w0 ; CHECK-NEXT: orr w0, w8, w2 -; CHECK-NEXT: mov x1, #0 -; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x1, #0 // =0x0 +; CHECK-NEXT: mov x2, #0 // =0x0 ; CHECK-NEXT: ret entry: @@ -37,9 +37,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul w8, w1, w0 ; CHECK-NEXT: orr w0, w8, w2 -; CHECK-NEXT: mov x1, #0 -; CHECK-NEXT: mov x2, #0 -; CHECK-NEXT: mov x8, #0 +; CHECK-NEXT: mov x1, #0 // =0x0 +; CHECK-NEXT: mov x2, #0 // =0x0 +; CHECK-NEXT: mov x8, #0 // =0x0 ; CHECK-NEXT: ret entry: @@ -53,8 +53,8 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul w8, w1, w0 ; CHECK-NEXT: orr w0, w8, w2 -; CHECK-NEXT: mov x1, #0 -; CHECK-NEXT: mov x2, #0 +; CHECK-NEXT: mov x1, #0 // =0x0 +; CHECK-NEXT: mov x2, #0 // =0x0 ; CHECK-NEXT: ret entry: @@ -68,9 +68,9 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul w8, w1, w0 ; CHECK-NEXT: orr w0, w8, w2 -; CHECK-NEXT: mov x1, #0 -; CHECK-NEXT: mov x2, #0 -; CHECK-NEXT: mov x8, #0 +; CHECK-NEXT: mov x1, #0 // =0x0 +; CHECK-NEXT: mov x2, #0 // =0x0 +; CHECK-NEXT: mov x8, #0 // =0x0 ; CHECK-NEXT: ret entry: @@ -83,16 +83,16 @@ ; CHECK-LABEL: all_gpr_arg: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul w8, w1, w0 -; CHECK-NEXT: mov x1, #0 -; CHECK-NEXT: mov x3, #0 -; CHECK-NEXT: mov x4, #0 +; CHECK-NEXT: mov x1, #0 // =0x0 +; CHECK-NEXT: mov x3, #0 // =0x0 +; CHECK-NEXT: mov x4, #0 // =0x0 +; CHECK-NEXT: mov x5, #0 // =0x0 +; CHECK-NEXT: mov x6, #0 // =0x0 +; CHECK-NEXT: mov x7, #0 // =0x0 +; CHECK-NEXT: mov x18, #0 // =0x0 ; CHECK-NEXT: orr w0, w8, w2 -; CHECK-NEXT: mov x2, #0 -; CHECK-NEXT: mov x5, #0 -; CHECK-NEXT: mov x6, #0 -; CHECK-NEXT: mov x7, #0 -; CHECK-NEXT: mov x8, #0 -; CHECK-NEXT: mov x18, #0 +; CHECK-NEXT: mov x2, #0 // =0x0 +; CHECK-NEXT: mov x8, #0 // =0x0 ; CHECK-NEXT: ret entry: @@ -105,25 +105,25 @@ ; CHECK-LABEL: all_gpr: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: mul w8, w1, w0 -; CHECK-NEXT: mov x1, #0 -; CHECK-NEXT: mov x3, #0 -; CHECK-NEXT: mov x4, #0 +; CHECK-NEXT: mov x1, #0 // =0x0 +; CHECK-NEXT: mov x3, #0 // =0x0 +; CHECK-NEXT: mov x4, #0 // =0x0 +; CHECK-NEXT: mov x5, #0 // =0x0 +; CHECK-NEXT: mov x6, #0 // =0x0 +; CHECK-NEXT: mov x7, #0 // =0x0 +; CHECK-NEXT: mov x9, #0 // =0x0 +; CHECK-NEXT: mov x10, #0 // =0x0 ; CHECK-NEXT: orr w0, w8, w2 -; CHECK-NEXT: mov x2, #0 -; CHECK-NEXT: mov x5, #0 -; CHECK-NEXT: mov x6, #0 -; CHECK-NEXT: mov x7, #0 -; CHECK-NEXT: mov x8, #0 -; CHECK-NEXT: mov x9, #0 -; CHECK-NEXT: mov x10, #0 -; CHECK-NEXT: mov x11, #0 -; CHECK-NEXT: mov x12, #0 -; CHECK-NEXT: mov x13, #0 -; CHECK-NEXT: mov x14, #0 -; CHECK-NEXT: mov x15, #0 -; CHECK-NEXT: mov x16, #0 -; CHECK-NEXT: mov x17, #0 -; CHECK-NEXT: mov x18, #0 +; CHECK-NEXT: mov x2, #0 // =0x0 +; CHECK-NEXT: mov x8, #0 // =0x0 +; CHECK-NEXT: mov x11, #0 // =0x0 +; CHECK-NEXT: mov x12, #0 // =0x0 +; CHECK-NEXT: mov x13, #0 // =0x0 +; CHECK-NEXT: mov x14, #0 // =0x0 +; CHECK-NEXT: mov x15, #0 // =0x0 +; CHECK-NEXT: mov x16, #0 // =0x0 +; CHECK-NEXT: mov x17, #0 // =0x0 +; CHECK-NEXT: mov x18, #0 // =0x0 ; CHECK-NEXT: ret entry: @@ -136,17 +136,17 @@ ; DEFAULT-LABEL: all_arg: ; DEFAULT: // %bb.0: // %entry ; DEFAULT-NEXT: mul w8, w1, w0 -; DEFAULT-NEXT: mov x1, #0 -; DEFAULT-NEXT: mov x3, #0 -; DEFAULT-NEXT: mov x4, #0 -; DEFAULT-NEXT: orr w0, w8, w2 -; DEFAULT-NEXT: mov x2, #0 -; DEFAULT-NEXT: mov x5, #0 -; DEFAULT-NEXT: mov x6, #0 -; DEFAULT-NEXT: mov x7, #0 -; DEFAULT-NEXT: mov x8, #0 -; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: mov x1, #0 // =0x0 +; DEFAULT-NEXT: mov x3, #0 // =0x0 +; DEFAULT-NEXT: mov x4, #0 // =0x0 +; DEFAULT-NEXT: mov x5, #0 // =0x0 +; DEFAULT-NEXT: mov x6, #0 // =0x0 +; DEFAULT-NEXT: mov x7, #0 // =0x0 +; DEFAULT-NEXT: mov x18, #0 // =0x0 ; DEFAULT-NEXT: movi v0.2d, #0000000000000000 +; DEFAULT-NEXT: orr w0, w8, w2 +; DEFAULT-NEXT: mov x2, #0 // =0x0 +; DEFAULT-NEXT: mov x8, #0 // =0x0 ; DEFAULT-NEXT: movi v1.2d, #0000000000000000 ; DEFAULT-NEXT: movi v2.2d, #0000000000000000 ; DEFAULT-NEXT: movi v3.2d, #0000000000000000 @@ -159,17 +159,17 @@ ; SVE-LABEL: all_arg: ; SVE: // %bb.0: // %entry ; SVE-NEXT: mul w8, w1, w0 -; SVE-NEXT: mov x1, #0 -; SVE-NEXT: mov x3, #0 -; SVE-NEXT: mov x4, #0 -; SVE-NEXT: orr w0, w8, w2 -; SVE-NEXT: mov x2, #0 -; SVE-NEXT: mov x5, #0 -; SVE-NEXT: mov x6, #0 -; SVE-NEXT: mov x7, #0 -; SVE-NEXT: mov x8, #0 -; SVE-NEXT: mov x18, #0 +; SVE-NEXT: mov x1, #0 // =0x0 +; SVE-NEXT: mov x3, #0 // =0x0 +; SVE-NEXT: mov x4, #0 // =0x0 +; SVE-NEXT: mov x5, #0 // =0x0 +; SVE-NEXT: mov x6, #0 // =0x0 +; SVE-NEXT: mov x7, #0 // =0x0 +; SVE-NEXT: mov x18, #0 // =0x0 ; SVE-NEXT: mov z0.d, #0 // =0x0 +; SVE-NEXT: orr w0, w8, w2 +; SVE-NEXT: mov x2, #0 // =0x0 +; SVE-NEXT: mov x8, #0 // =0x0 ; SVE-NEXT: mov z1.d, #0 // =0x0 ; SVE-NEXT: mov z2.d, #0 // =0x0 ; SVE-NEXT: mov z3.d, #0 // =0x0 @@ -193,25 +193,25 @@ ; DEFAULT-LABEL: all: ; DEFAULT: // %bb.0: // %entry ; DEFAULT-NEXT: mul w8, w1, w0 -; DEFAULT-NEXT: mov x1, #0 -; DEFAULT-NEXT: mov x3, #0 -; DEFAULT-NEXT: mov x4, #0 +; DEFAULT-NEXT: mov x1, #0 // =0x0 +; DEFAULT-NEXT: mov x3, #0 // =0x0 +; DEFAULT-NEXT: mov x4, #0 // =0x0 +; DEFAULT-NEXT: mov x5, #0 // =0x0 +; DEFAULT-NEXT: mov x6, #0 // =0x0 +; DEFAULT-NEXT: mov x7, #0 // =0x0 +; DEFAULT-NEXT: mov x9, #0 // =0x0 +; DEFAULT-NEXT: mov x10, #0 // =0x0 ; DEFAULT-NEXT: orr w0, w8, w2 -; DEFAULT-NEXT: mov x2, #0 -; DEFAULT-NEXT: mov x5, #0 -; DEFAULT-NEXT: mov x6, #0 -; DEFAULT-NEXT: mov x7, #0 -; DEFAULT-NEXT: mov x8, #0 -; DEFAULT-NEXT: mov x9, #0 -; DEFAULT-NEXT: mov x10, #0 -; DEFAULT-NEXT: mov x11, #0 -; DEFAULT-NEXT: mov x12, #0 -; DEFAULT-NEXT: mov x13, #0 -; DEFAULT-NEXT: mov x14, #0 -; DEFAULT-NEXT: mov x15, #0 -; DEFAULT-NEXT: mov x16, #0 -; DEFAULT-NEXT: mov x17, #0 -; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: mov x2, #0 // =0x0 +; DEFAULT-NEXT: mov x8, #0 // =0x0 +; DEFAULT-NEXT: mov x11, #0 // =0x0 +; DEFAULT-NEXT: mov x12, #0 // =0x0 +; DEFAULT-NEXT: mov x13, #0 // =0x0 +; DEFAULT-NEXT: mov x14, #0 // =0x0 +; DEFAULT-NEXT: mov x15, #0 // =0x0 +; DEFAULT-NEXT: mov x16, #0 // =0x0 +; DEFAULT-NEXT: mov x17, #0 // =0x0 +; DEFAULT-NEXT: mov x18, #0 // =0x0 ; DEFAULT-NEXT: movi v0.2d, #0000000000000000 ; DEFAULT-NEXT: movi v1.2d, #0000000000000000 ; DEFAULT-NEXT: movi v2.2d, #0000000000000000 @@ -241,25 +241,25 @@ ; SVE-LABEL: all: ; SVE: // %bb.0: // %entry ; SVE-NEXT: mul w8, w1, w0 -; SVE-NEXT: mov x1, #0 -; SVE-NEXT: mov x3, #0 -; SVE-NEXT: mov x4, #0 +; SVE-NEXT: mov x1, #0 // =0x0 +; SVE-NEXT: mov x3, #0 // =0x0 +; SVE-NEXT: mov x4, #0 // =0x0 +; SVE-NEXT: mov x5, #0 // =0x0 +; SVE-NEXT: mov x6, #0 // =0x0 +; SVE-NEXT: mov x7, #0 // =0x0 +; SVE-NEXT: mov x9, #0 // =0x0 +; SVE-NEXT: mov x10, #0 // =0x0 ; SVE-NEXT: orr w0, w8, w2 -; SVE-NEXT: mov x2, #0 -; SVE-NEXT: mov x5, #0 -; SVE-NEXT: mov x6, #0 -; SVE-NEXT: mov x7, #0 -; SVE-NEXT: mov x8, #0 -; SVE-NEXT: mov x9, #0 -; SVE-NEXT: mov x10, #0 -; SVE-NEXT: mov x11, #0 -; SVE-NEXT: mov x12, #0 -; SVE-NEXT: mov x13, #0 -; SVE-NEXT: mov x14, #0 -; SVE-NEXT: mov x15, #0 -; SVE-NEXT: mov x16, #0 -; SVE-NEXT: mov x17, #0 -; SVE-NEXT: mov x18, #0 +; SVE-NEXT: mov x2, #0 // =0x0 +; SVE-NEXT: mov x8, #0 // =0x0 +; SVE-NEXT: mov x11, #0 // =0x0 +; SVE-NEXT: mov x12, #0 // =0x0 +; SVE-NEXT: mov x13, #0 // =0x0 +; SVE-NEXT: mov x14, #0 // =0x0 +; SVE-NEXT: mov x15, #0 // =0x0 +; SVE-NEXT: mov x16, #0 // =0x0 +; SVE-NEXT: mov x17, #0 // =0x0 +; SVE-NEXT: mov x18, #0 // =0x0 ; SVE-NEXT: mov z0.d, #0 // =0x0 ; SVE-NEXT: mov z1.d, #0 // =0x0 ; SVE-NEXT: mov z2.d, #0 // =0x0 @@ -394,16 +394,16 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvt d1, s1 ; CHECK-NEXT: fmul d0, d1, d0 -; CHECK-NEXT: mov x0, #0 -; CHECK-NEXT: mov x1, #0 -; CHECK-NEXT: mov x2, #0 -; CHECK-NEXT: mov x3, #0 -; CHECK-NEXT: mov x4, #0 -; CHECK-NEXT: mov x5, #0 -; CHECK-NEXT: mov x6, #0 -; CHECK-NEXT: mov x7, #0 -; CHECK-NEXT: mov x8, #0 -; CHECK-NEXT: mov x18, #0 +; CHECK-NEXT: mov x0, #0 // =0x0 +; CHECK-NEXT: mov x1, #0 // =0x0 +; CHECK-NEXT: mov x2, #0 // =0x0 +; CHECK-NEXT: mov x3, #0 // =0x0 +; CHECK-NEXT: mov x4, #0 // =0x0 +; CHECK-NEXT: mov x5, #0 // =0x0 +; CHECK-NEXT: mov x6, #0 // =0x0 +; CHECK-NEXT: mov x7, #0 // =0x0 +; CHECK-NEXT: mov x8, #0 // =0x0 +; CHECK-NEXT: mov x18, #0 // =0x0 ; CHECK-NEXT: ret entry: @@ -417,25 +417,25 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvt d1, s1 ; CHECK-NEXT: fmul d0, d1, d0 -; CHECK-NEXT: mov x0, #0 -; CHECK-NEXT: mov x1, #0 -; CHECK-NEXT: mov x2, #0 -; CHECK-NEXT: mov x3, #0 -; CHECK-NEXT: mov x4, #0 -; CHECK-NEXT: mov x5, #0 -; CHECK-NEXT: mov x6, #0 -; CHECK-NEXT: mov x7, #0 -; CHECK-NEXT: mov x8, #0 -; CHECK-NEXT: mov x9, #0 -; CHECK-NEXT: mov x10, #0 -; CHECK-NEXT: mov x11, #0 -; CHECK-NEXT: mov x12, #0 -; CHECK-NEXT: mov x13, #0 -; CHECK-NEXT: mov x14, #0 -; CHECK-NEXT: mov x15, #0 -; CHECK-NEXT: mov x16, #0 -; CHECK-NEXT: mov x17, #0 -; CHECK-NEXT: mov x18, #0 +; CHECK-NEXT: mov x0, #0 // =0x0 +; CHECK-NEXT: mov x1, #0 // =0x0 +; CHECK-NEXT: mov x2, #0 // =0x0 +; CHECK-NEXT: mov x3, #0 // =0x0 +; CHECK-NEXT: mov x4, #0 // =0x0 +; CHECK-NEXT: mov x5, #0 // =0x0 +; CHECK-NEXT: mov x6, #0 // =0x0 +; CHECK-NEXT: mov x7, #0 // =0x0 +; CHECK-NEXT: mov x8, #0 // =0x0 +; CHECK-NEXT: mov x9, #0 // =0x0 +; CHECK-NEXT: mov x10, #0 // =0x0 +; CHECK-NEXT: mov x11, #0 // =0x0 +; CHECK-NEXT: mov x12, #0 // =0x0 +; CHECK-NEXT: mov x13, #0 // =0x0 +; CHECK-NEXT: mov x14, #0 // =0x0 +; CHECK-NEXT: mov x15, #0 // =0x0 +; CHECK-NEXT: mov x16, #0 // =0x0 +; CHECK-NEXT: mov x17, #0 // =0x0 +; CHECK-NEXT: mov x18, #0 // =0x0 ; CHECK-NEXT: ret entry: @@ -449,16 +449,16 @@ ; DEFAULT: // %bb.0: // %entry ; DEFAULT-NEXT: fcvt d1, s1 ; DEFAULT-NEXT: fmul d0, d1, d0 -; DEFAULT-NEXT: mov x0, #0 -; DEFAULT-NEXT: mov x1, #0 -; DEFAULT-NEXT: mov x2, #0 -; DEFAULT-NEXT: mov x3, #0 -; DEFAULT-NEXT: mov x4, #0 -; DEFAULT-NEXT: mov x5, #0 -; DEFAULT-NEXT: mov x6, #0 -; DEFAULT-NEXT: mov x7, #0 -; DEFAULT-NEXT: mov x8, #0 -; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: mov x0, #0 // =0x0 +; DEFAULT-NEXT: mov x1, #0 // =0x0 +; DEFAULT-NEXT: mov x2, #0 // =0x0 +; DEFAULT-NEXT: mov x3, #0 // =0x0 +; DEFAULT-NEXT: mov x4, #0 // =0x0 +; DEFAULT-NEXT: mov x5, #0 // =0x0 +; DEFAULT-NEXT: mov x6, #0 // =0x0 +; DEFAULT-NEXT: mov x7, #0 // =0x0 +; DEFAULT-NEXT: mov x8, #0 // =0x0 +; DEFAULT-NEXT: mov x18, #0 // =0x0 ; DEFAULT-NEXT: movi v1.2d, #0000000000000000 ; DEFAULT-NEXT: movi v2.2d, #0000000000000000 ; DEFAULT-NEXT: movi v3.2d, #0000000000000000 @@ -472,16 +472,16 @@ ; SVE: // %bb.0: // %entry ; SVE-NEXT: fcvt d1, s1 ; SVE-NEXT: fmul d0, d1, d0 -; SVE-NEXT: mov x0, #0 -; SVE-NEXT: mov x1, #0 -; SVE-NEXT: mov x2, #0 -; SVE-NEXT: mov x3, #0 -; SVE-NEXT: mov x4, #0 -; SVE-NEXT: mov x5, #0 -; SVE-NEXT: mov x6, #0 -; SVE-NEXT: mov x7, #0 -; SVE-NEXT: mov x8, #0 -; SVE-NEXT: mov x18, #0 +; SVE-NEXT: mov x0, #0 // =0x0 +; SVE-NEXT: mov x1, #0 // =0x0 +; SVE-NEXT: mov x2, #0 // =0x0 +; SVE-NEXT: mov x3, #0 // =0x0 +; SVE-NEXT: mov x4, #0 // =0x0 +; SVE-NEXT: mov x5, #0 // =0x0 +; SVE-NEXT: mov x6, #0 // =0x0 +; SVE-NEXT: mov x7, #0 // =0x0 +; SVE-NEXT: mov x8, #0 // =0x0 +; SVE-NEXT: mov x18, #0 // =0x0 ; SVE-NEXT: mov z1.d, #0 // =0x0 ; SVE-NEXT: mov z2.d, #0 // =0x0 ; SVE-NEXT: mov z3.d, #0 // =0x0 @@ -506,25 +506,25 @@ ; DEFAULT: // %bb.0: // %entry ; DEFAULT-NEXT: fcvt d1, s1 ; DEFAULT-NEXT: fmul d0, d1, d0 -; DEFAULT-NEXT: mov x0, #0 -; DEFAULT-NEXT: mov x1, #0 -; DEFAULT-NEXT: mov x2, #0 -; DEFAULT-NEXT: mov x3, #0 -; DEFAULT-NEXT: mov x4, #0 -; DEFAULT-NEXT: mov x5, #0 -; DEFAULT-NEXT: mov x6, #0 -; DEFAULT-NEXT: mov x7, #0 -; DEFAULT-NEXT: mov x8, #0 -; DEFAULT-NEXT: mov x9, #0 -; DEFAULT-NEXT: mov x10, #0 -; DEFAULT-NEXT: mov x11, #0 -; DEFAULT-NEXT: mov x12, #0 -; DEFAULT-NEXT: mov x13, #0 -; DEFAULT-NEXT: mov x14, #0 -; DEFAULT-NEXT: mov x15, #0 -; DEFAULT-NEXT: mov x16, #0 -; DEFAULT-NEXT: mov x17, #0 -; DEFAULT-NEXT: mov x18, #0 +; DEFAULT-NEXT: mov x0, #0 // =0x0 +; DEFAULT-NEXT: mov x1, #0 // =0x0 +; DEFAULT-NEXT: mov x2, #0 // =0x0 +; DEFAULT-NEXT: mov x3, #0 // =0x0 +; DEFAULT-NEXT: mov x4, #0 // =0x0 +; DEFAULT-NEXT: mov x5, #0 // =0x0 +; DEFAULT-NEXT: mov x6, #0 // =0x0 +; DEFAULT-NEXT: mov x7, #0 // =0x0 +; DEFAULT-NEXT: mov x8, #0 // =0x0 +; DEFAULT-NEXT: mov x9, #0 // =0x0 +; DEFAULT-NEXT: mov x10, #0 // =0x0 +; DEFAULT-NEXT: mov x11, #0 // =0x0 +; DEFAULT-NEXT: mov x12, #0 // =0x0 +; DEFAULT-NEXT: mov x13, #0 // =0x0 +; DEFAULT-NEXT: mov x14, #0 // =0x0 +; DEFAULT-NEXT: mov x15, #0 // =0x0 +; DEFAULT-NEXT: mov x16, #0 // =0x0 +; DEFAULT-NEXT: mov x17, #0 // =0x0 +; DEFAULT-NEXT: mov x18, #0 // =0x0 ; DEFAULT-NEXT: movi v1.2d, #0000000000000000 ; DEFAULT-NEXT: movi v2.2d, #0000000000000000 ; DEFAULT-NEXT: movi v3.2d, #0000000000000000 @@ -554,25 +554,25 @@ ; SVE: // %bb.0: // %entry ; SVE-NEXT: fcvt d1, s1 ; SVE-NEXT: fmul d0, d1, d0 -; SVE-NEXT: mov x0, #0 -; SVE-NEXT: mov x1, #0 -; SVE-NEXT: mov x2, #0 -; SVE-NEXT: mov x3, #0 -; SVE-NEXT: mov x4, #0 -; SVE-NEXT: mov x5, #0 -; SVE-NEXT: mov x6, #0 -; SVE-NEXT: mov x7, #0 -; SVE-NEXT: mov x8, #0 -; SVE-NEXT: mov x9, #0 -; SVE-NEXT: mov x10, #0 -; SVE-NEXT: mov x11, #0 -; SVE-NEXT: mov x12, #0 -; SVE-NEXT: mov x13, #0 -; SVE-NEXT: mov x14, #0 -; SVE-NEXT: mov x15, #0 -; SVE-NEXT: mov x16, #0 -; SVE-NEXT: mov x17, #0 -; SVE-NEXT: mov x18, #0 +; SVE-NEXT: mov x0, #0 // =0x0 +; SVE-NEXT: mov x1, #0 // =0x0 +; SVE-NEXT: mov x2, #0 // =0x0 +; SVE-NEXT: mov x3, #0 // =0x0 +; SVE-NEXT: mov x4, #0 // =0x0 +; SVE-NEXT: mov x5, #0 // =0x0 +; SVE-NEXT: mov x6, #0 // =0x0 +; SVE-NEXT: mov x7, #0 // =0x0 +; SVE-NEXT: mov x8, #0 // =0x0 +; SVE-NEXT: mov x9, #0 // =0x0 +; SVE-NEXT: mov x10, #0 // =0x0 +; SVE-NEXT: mov x11, #0 // =0x0 +; SVE-NEXT: mov x12, #0 // =0x0 +; SVE-NEXT: mov x13, #0 // =0x0 +; SVE-NEXT: mov x14, #0 // =0x0 +; SVE-NEXT: mov x15, #0 // =0x0 +; SVE-NEXT: mov x16, #0 // =0x0 +; SVE-NEXT: mov x17, #0 // =0x0 +; SVE-NEXT: mov x18, #0 // =0x0 ; SVE-NEXT: mov z1.d, #0 // =0x0 ; SVE-NEXT: mov z2.d, #0 // =0x0 ; SVE-NEXT: mov z3.d, #0 // =0x0 diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll --- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll @@ -2,166 +2,28 @@ ; RUN: llc -mtriple=arm64-apple-ios -mattr=+sve -o - %s | FileCheck %s ; RUN: llc -mtriple=aarch64_be-unknown-linux -mattr=+sve -o - %s | FileCheck --check-prefix=CHECK-BE %s -; CHECK-LABEL: lCPI0_0: -; CHECK-NEXT: .byte 0 ; 0x0 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 1 ; 0x1 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 2 ; 0x2 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 3 ; 0x3 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT:lCPI0_1: -; CHECK-NEXT: .byte 4 ; 0x4 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 5 ; 0x5 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 6 ; 0x6 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 7 ; 0x7 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT:lCPI0_2: -; CHECK-NEXT: .byte 8 ; 0x8 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 9 ; 0x9 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 10 ; 0xa -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 11 ; 0xb -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT:lCPI0_3: -; CHECK-NEXT: .byte 12 ; 0xc -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 13 ; 0xd -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 14 ; 0xe -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 15 ; 0xf -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff - -; CHECK-BE: .LCPI0_0: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 0 // 0x0 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 1 // 0x1 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 2 // 0x2 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 3 // 0x3 -; CHECK-BE-NEXT: .LCPI0_1: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 4 // 0x4 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 5 // 0x5 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 6 // 0x6 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 7 // 0x7 -; CHECK-BE-NEXT: .LCPI0_2: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 8 // 0x8 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 9 // 0x9 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 10 // 0xa -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 11 // 0xb -; CHECK-BE-NEXT: .LCPI0_3: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 12 // 0xc -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 13 // 0xd -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 14 // 0xe -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 15 // 0xf - ; It's profitable to convert the zext to a shuffle, which in turn will be ; lowered to 4 tbl instructions. The masks are materialized outside the loop. define void @zext_v16i8_to_v16i32_in_loop(ptr %src, ptr %dst) { ; CHECK-LABEL: zext_v16i8_to_v16i32_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh0: -; CHECK-NEXT: adrp x9, lCPI0_0@PAGE +; CHECK-NEXT: adrp x8, lCPI0_0@PAGE ; CHECK-NEXT: Lloh1: -; CHECK-NEXT: adrp x10, lCPI0_1@PAGE +; CHECK-NEXT: adrp x9, lCPI0_1@PAGE ; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x11, lCPI0_2@PAGE +; CHECK-NEXT: adrp x10, lCPI0_2@PAGE ; CHECK-NEXT: Lloh3: -; CHECK-NEXT: adrp x12, lCPI0_3@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF] ; CHECK-NEXT: Lloh4: -; CHECK-NEXT: ldr q0, [x9, lCPI0_0@PAGEOFF] +; CHECK-NEXT: adrp x8, lCPI0_3@PAGE ; CHECK-NEXT: Lloh5: -; CHECK-NEXT: ldr q1, [x10, lCPI0_1@PAGEOFF] +; CHECK-NEXT: ldr q1, [x9, lCPI0_1@PAGEOFF] ; CHECK-NEXT: Lloh6: -; CHECK-NEXT: ldr q2, [x11, lCPI0_2@PAGEOFF] +; CHECK-NEXT: ldr q2, [x10, lCPI0_2@PAGEOFF] ; CHECK-NEXT: Lloh7: -; CHECK-NEXT: ldr q3, [x12, lCPI0_3@PAGEOFF] +; CHECK-NEXT: ldr q3, [x8, lCPI0_3@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB0_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q4, [x0, x8] @@ -176,10 +38,11 @@ ; CHECK-NEXT: b.ne LBB0_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh7 +; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh7 ; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh6 ; CHECK-NEXT: .loh AdrpLdr Lloh1, Lloh5 -; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh4 +; CHECK-NEXT: .loh AdrpAdrp Lloh0, Lloh4 +; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh3 ; ; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop: ; CHECK-BE: // %bb.0: // %entry @@ -199,27 +62,26 @@ ; CHECK-BE-NEXT: .LBB0_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 -; CHECK-BE-NEXT: add x10, x1, #32 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v4.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, #48 +; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: tbl v5.16b, { v4.16b }, v3.16b -; CHECK-BE-NEXT: tbl v6.16b, { v4.16b }, v0.16b -; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v2.16b -; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v1.16b +; CHECK-BE-NEXT: tbl v6.16b, { v4.16b }, v2.16b +; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v1.16b +; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v0.16b ; CHECK-BE-NEXT: st1 { v5.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #32 +; CHECK-BE-NEXT: st1 { v6.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, #16 -; CHECK-BE-NEXT: st1 { v6.16b }, [x1] +; CHECK-BE-NEXT: st1 { v4.16b }, [x1] ; CHECK-BE-NEXT: add x1, x1, #64 -; CHECK-BE-NEXT: st1 { v7.16b }, [x10] -; CHECK-BE-NEXT: st1 { v4.16b }, [x9] +; CHECK-BE-NEXT: st1 { v7.16b }, [x9] ; CHECK-BE-NEXT: b.ne .LBB0_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -230,12 +92,11 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - define void @zext_v16i8_to_v16i32_in_loop_not_header(ptr %src, ptr %dst, i1 %c) { +; ; CHECK-LABEL: zext_v16i8_to_v16i32_in_loop_not_header: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -281,7 +142,6 @@ ; CHECK-BE-NEXT: // in Loop: Header=BB1_2 Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 ; CHECK-BE-NEXT: add x10, x1, #32 -; CHECK-BE-NEXT: add x11, x1, #16 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, #48 ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 @@ -289,21 +149,20 @@ ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-BE-NEXT: ushll2 v3.4s, v0.8h, #0 -; CHECK-BE-NEXT: st1 { v2.4s }, [x9] ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: st1 { v2.4s }, [x9] +; CHECK-BE-NEXT: add x9, x1, #16 ; CHECK-BE-NEXT: st1 { v1.4s }, [x10] -; CHECK-BE-NEXT: st1 { v3.4s }, [x11] +; CHECK-BE-NEXT: st1 { v3.4s }, [x9] ; CHECK-BE-NEXT: st1 { v0.4s }, [x1] ; CHECK-BE-NEXT: b .LBB1_1 ; CHECK-BE-NEXT: .LBB1_4: // %exit ; CHECK-BE-NEXT: ret entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] br i1 %c, label %then, label %loop.latch - then: %src.gep = getelementptr i8, ptr %src, i64 %iv %load = load <16 x i8>, ptr %src.gep @@ -311,19 +170,17 @@ %dst.gep = getelementptr i32, ptr %dst, i64 %iv store <16 x i32> %ext, ptr %dst.gep br label %loop.latch - loop.latch: %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - ; Not profitable to use shuffle/tbl, as 4 tbls + materializing the masks ; require more instructions than lowering zext directly. define void @zext_v16i8_to_v16i32_no_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v16i8_to_v16i32_no_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: ldr q0, [x0] @@ -345,13 +202,13 @@ ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-BE-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: st1 { v2.4s }, [x8] ; CHECK-BE-NEXT: add x8, x1, #32 ; CHECK-BE-NEXT: st1 { v1.4s }, [x8] ; CHECK-BE-NEXT: add x8, x1, #16 -; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0 -; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-BE-NEXT: st1 { v1.4s }, [x8] +; CHECK-BE-NEXT: st1 { v3.4s }, [x8] ; CHECK-BE-NEXT: st1 { v0.4s }, [x1] ; CHECK-BE-NEXT: ret entry: @@ -360,9 +217,9 @@ store <16 x i32> %ext, ptr %dst ret void } - ; Avoid using tbl when optimizing for size. define void @zext_v16i8_to_v16i32_in_loop_optsize(ptr %src, ptr %dst) optsize { +; ; CHECK-LABEL: zext_v16i8_to_v16i32_in_loop_optsize: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -389,29 +246,28 @@ ; CHECK-BE-NEXT: .LBB3_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 -; CHECK-BE-NEXT: add x10, x1, #32 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, #48 +; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-BE-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: st1 { v2.4s }, [x9] +; CHECK-BE-NEXT: add x9, x1, #32 +; CHECK-BE-NEXT: st1 { v1.4s }, [x9] ; CHECK-BE-NEXT: add x9, x1, #16 -; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0 -; CHECK-BE-NEXT: st1 { v1.4s }, [x10] -; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0 -; CHECK-BE-NEXT: st1 { v2.4s }, [x1] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] ; CHECK-BE-NEXT: add x1, x1, #64 -; CHECK-BE-NEXT: st1 { v0.4s }, [x9] +; CHECK-BE-NEXT: st1 { v3.4s }, [x9] ; CHECK-BE-NEXT: b.ne .LBB3_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -422,13 +278,12 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - ; Avoid using tbl when optimizing for size. define void @zext_v16i8_to_v16i32_in_loop_minsize(ptr %src, ptr %dst) minsize { +; ; CHECK-LABEL: zext_v16i8_to_v16i32_in_loop_minsize: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -455,29 +310,28 @@ ; CHECK-BE-NEXT: .LBB4_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 -; CHECK-BE-NEXT: add x10, x1, #32 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, #48 +; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-BE-NEXT: ushll2 v3.4s, v0.8h, #0 +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: st1 { v2.4s }, [x9] +; CHECK-BE-NEXT: add x9, x1, #32 +; CHECK-BE-NEXT: st1 { v1.4s }, [x9] ; CHECK-BE-NEXT: add x9, x1, #16 -; CHECK-BE-NEXT: ushll v2.4s, v0.4h, #0 -; CHECK-BE-NEXT: st1 { v1.4s }, [x10] -; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0 -; CHECK-BE-NEXT: st1 { v2.4s }, [x1] +; CHECK-BE-NEXT: st1 { v0.4s }, [x1] ; CHECK-BE-NEXT: add x1, x1, #64 -; CHECK-BE-NEXT: st1 { v0.4s }, [x9] +; CHECK-BE-NEXT: st1 { v3.4s }, [x9] ; CHECK-BE-NEXT: b.ne .LBB4_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -488,12 +342,11 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - define void @zext_v16i8_to_v16i16_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v16i8_to_v16i16_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -516,22 +369,19 @@ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, #16 -; CHECK-BE-NEXT: ushll v1.8h, v0.8b, #0 -; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-BE-NEXT: st1 { v1.8h }, [x1] +; CHECK-BE-NEXT: cmp x8, #128 +; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 +; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: st1 { v0.8h }, [x1] ; CHECK-BE-NEXT: add x1, x1, #32 -; CHECK-BE-NEXT: st1 { v0.8h }, [x9] +; CHECK-BE-NEXT: st1 { v1.8h }, [x9] ; CHECK-BE-NEXT: b.ne .LBB5_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -542,93 +392,22 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - -; CHECK-LABEL: lCPI6_0: -; CHECK-NEXT: .byte 0 ; 0x0 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 1 ; 0x1 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 2 ; 0x2 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 3 ; 0x3 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: lCPI6_1: -; CHECK-NEXT: .byte 4 ; 0x4 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 5 ; 0x5 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 6 ; 0x6 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 7 ; 0x7 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff - -; CHECK-BE: .LCPI6_0: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 0 // 0x0 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 1 // 0x1 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 2 // 0x2 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 3 // 0x3 -; CHECK-BE-NEXT: .LCPI6_1: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 4 // 0x4 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 5 // 0x5 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 6 // 0x6 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 7 // 0x7 - define void @zext_v8i8_to_v8i32_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v8i8_to_v8i32_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh8: -; CHECK-NEXT: adrp x9, lCPI6_0@PAGE +; CHECK-NEXT: adrp x8, lCPI6_0@PAGE ; CHECK-NEXT: Lloh9: -; CHECK-NEXT: adrp x10, lCPI6_1@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x9, lCPI6_1@PAGE ; CHECK-NEXT: Lloh10: -; CHECK-NEXT: ldr q0, [x9, lCPI6_0@PAGEOFF] +; CHECK-NEXT: ldr q0, [x8, lCPI6_0@PAGEOFF] ; CHECK-NEXT: Lloh11: -; CHECK-NEXT: ldr q1, [x10, lCPI6_1@PAGEOFF] +; CHECK-NEXT: ldr q1, [x9, lCPI6_1@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB6_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d2, [x0, x8] @@ -656,20 +435,19 @@ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v2.8b }, [x9] ; CHECK-BE-NEXT: add x9, x1, #16 -; CHECK-BE-NEXT: tbl v3.16b, { v2.16b }, v0.16b -; CHECK-BE-NEXT: tbl v2.16b, { v2.16b }, v1.16b -; CHECK-BE-NEXT: st1 { v3.16b }, [x1] +; CHECK-BE-NEXT: cmp x8, #128 +; CHECK-BE-NEXT: tbl v3.16b, { v2.16b }, v1.16b +; CHECK-BE-NEXT: tbl v2.16b, { v2.16b }, v0.16b +; CHECK-BE-NEXT: st1 { v2.16b }, [x1] ; CHECK-BE-NEXT: add x1, x1, #64 -; CHECK-BE-NEXT: st1 { v2.16b }, [x9] +; CHECK-BE-NEXT: st1 { v3.16b }, [x9] ; CHECK-BE-NEXT: b.ne .LBB6_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -680,12 +458,11 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - define void @zext_v16i8_to_v16i64_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v16i8_to_v16i64_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -694,24 +471,24 @@ ; CHECK-NEXT: ldr q0, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: ushll.8h v1, v0, #0 -; CHECK-NEXT: ushll2.8h v0, v0, #0 +; CHECK-NEXT: ushll2.8h v1, v0, #0 +; CHECK-NEXT: ushll.8h v0, v0, #0 ; CHECK-NEXT: ushll2.4s v2, v1, #0 -; CHECK-NEXT: ushll2.4s v3, v0, #0 +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ushll2.4s v4, v0, #0 ; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll2.2d v4, v3, #0 -; CHECK-NEXT: ushll2.2d v5, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: ushll.2d v3, v3, #0 -; CHECK-NEXT: stp q0, q5, [x1, #64] -; CHECK-NEXT: ushll.4s v0, v1, #0 -; CHECK-NEXT: stp q3, q4, [x1, #96] ; CHECK-NEXT: ushll2.2d v3, v2, #0 ; CHECK-NEXT: ushll.2d v2, v2, #0 -; CHECK-NEXT: ushll2.2d v1, v0, #0 +; CHECK-NEXT: ushll2.2d v5, v1, #0 +; CHECK-NEXT: ushll.2d v1, v1, #0 +; CHECK-NEXT: stp q2, q3, [x1, #96] +; CHECK-NEXT: ushll2.2d v3, v4, #0 +; CHECK-NEXT: ushll.2d v2, v4, #0 +; CHECK-NEXT: ushll2.2d v4, v0, #0 ; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: stp q1, q5, [x1, #64] ; CHECK-NEXT: stp q2, q3, [x1, #32] -; CHECK-NEXT: stp q0, q1, [x1], #128 +; CHECK-NEXT: stp q0, q4, [x1], #128 ; CHECK-NEXT: b.ne LBB7_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret @@ -722,47 +499,44 @@ ; CHECK-BE-NEXT: .LBB7_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 -; CHECK-BE-NEXT: add x10, x1, #96 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, #112 +; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-BE-NEXT: ushll2 v4.4s, v0.8h, #0 +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: ushll2 v3.2d, v2.4s, #0 ; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-BE-NEXT: ushll2 v5.2d, v1.4s, #0 +; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-BE-NEXT: st1 { v3.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, #96 +; CHECK-BE-NEXT: ushll2 v3.2d, v4.4s, #0 +; CHECK-BE-NEXT: st1 { v2.2d }, [x9] ; CHECK-BE-NEXT: add x9, x1, #80 -; CHECK-BE-NEXT: ushll2 v3.2d, v1.4s, #0 -; CHECK-BE-NEXT: st1 { v2.2d }, [x10] -; CHECK-BE-NEXT: ushll2 v2.4s, v0.8h, #0 -; CHECK-BE-NEXT: add x10, x1, #48 +; CHECK-BE-NEXT: ushll v2.2d, v4.2s, #0 +; CHECK-BE-NEXT: st1 { v5.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, #48 ; CHECK-BE-NEXT: st1 { v3.2d }, [x9] +; CHECK-BE-NEXT: ushll2 v3.2d, v0.4s, #0 +; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-BE-NEXT: add x9, x1, #64 -; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-BE-NEXT: ushll2 v4.2d, v2.4s, #0 ; CHECK-BE-NEXT: st1 { v1.2d }, [x9] -; CHECK-BE-NEXT: ushll v1.2d, v0.2s, #0 +; CHECK-BE-NEXT: add x9, x1, #32 +; CHECK-BE-NEXT: st1 { v2.2d }, [x9] ; CHECK-BE-NEXT: add x9, x1, #16 -; CHECK-BE-NEXT: st1 { v4.2d }, [x10] -; CHECK-BE-NEXT: add x10, x1, #32 -; CHECK-BE-NEXT: st1 { v1.2d }, [x1] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] ; CHECK-BE-NEXT: add x1, x1, #128 -; CHECK-BE-NEXT: ushll2 v0.2d, v0.4s, #0 -; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-BE-NEXT: st1 { v0.2d }, [x9] -; CHECK-BE-NEXT: st1 { v2.2d }, [x10] +; CHECK-BE-NEXT: st1 { v3.2d }, [x9] ; CHECK-BE-NEXT: b.ne .LBB7_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -773,12 +547,11 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - define void @zext_v8i8_to_v8i64_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v8i8_to_v8i64_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -806,32 +579,29 @@ ; CHECK-BE-NEXT: .LBB8_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 -; CHECK-BE-NEXT: add x10, x1, #32 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v0.8b }, [x9] ; CHECK-BE-NEXT: add x9, x1, #48 +; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0 ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: ushll2 v2.2d, v1.4s, #0 ; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-BE-NEXT: ushll2 v3.2d, v0.4s, #0 +; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-BE-NEXT: st1 { v2.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, #32 +; CHECK-BE-NEXT: st1 { v1.2d }, [x9] ; CHECK-BE-NEXT: add x9, x1, #16 -; CHECK-BE-NEXT: ushll v2.2d, v0.2s, #0 -; CHECK-BE-NEXT: st1 { v1.2d }, [x10] -; CHECK-BE-NEXT: ushll2 v0.2d, v0.4s, #0 -; CHECK-BE-NEXT: st1 { v2.2d }, [x1] +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] ; CHECK-BE-NEXT: add x1, x1, #128 -; CHECK-BE-NEXT: st1 { v0.2d }, [x9] +; CHECK-BE-NEXT: st1 { v3.2d }, [x9] ; CHECK-BE-NEXT: b.ne .LBB8_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -842,12 +612,11 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - define void @zext_v8i8_to_v8i16_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v8i8_to_v8i16_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -869,20 +638,16 @@ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v0.8b }, [x9] +; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BE-NEXT: st1 { v0.8h }, [x1] ; CHECK-BE-NEXT: add x1, x1, #32 ; CHECK-BE-NEXT: b.ne .LBB9_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -893,12 +658,11 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - define void @zext_v8i8_to_v8i20_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v8i8_to_v8i20_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -910,24 +674,24 @@ ; CHECK-NEXT: ushll.8h v0, v0, #0 ; CHECK-NEXT: ushll2.4s v1, v0, #0 ; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: mov.s w11, v1[1] -; CHECK-NEXT: mov.s w13, v0[1] -; CHECK-NEXT: fmov w12, s1 -; CHECK-NEXT: mov.s w14, v1[2] -; CHECK-NEXT: fmov w15, s0 +; CHECK-NEXT: mov.s w9, v1[1] +; CHECK-NEXT: mov.s w11, v0[1] +; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: fmov w14, s0 +; CHECK-NEXT: mov.s w13, v1[2] ; CHECK-NEXT: mov.s w16, v0[2] -; CHECK-NEXT: mov.s w9, v1[3] -; CHECK-NEXT: mov.s w10, v0[3] -; CHECK-NEXT: orr x11, x12, x11, lsl #20 -; CHECK-NEXT: orr x12, x15, x13, lsl #20 -; CHECK-NEXT: orr x11, x11, x14, lsl #40 -; CHECK-NEXT: orr x12, x12, x16, lsl #40 -; CHECK-NEXT: lsr w13, w9, #4 -; CHECK-NEXT: lsr w14, w10, #4 -; CHECK-NEXT: orr x9, x11, x9, lsl #60 -; CHECK-NEXT: orr x10, x12, x10, lsl #60 -; CHECK-NEXT: strh w13, [x1, #18] -; CHECK-NEXT: strh w14, [x1, #8] +; CHECK-NEXT: mov.s w12, v1[3] +; CHECK-NEXT: mov.s w15, v0[3] +; CHECK-NEXT: orr x9, x10, x9, lsl #20 +; CHECK-NEXT: orr x10, x14, x11, lsl #20 +; CHECK-NEXT: orr x9, x9, x13, lsl #40 +; CHECK-NEXT: orr x10, x10, x16, lsl #40 +; CHECK-NEXT: lsr w11, w12, #4 +; CHECK-NEXT: lsr w13, w15, #4 +; CHECK-NEXT: orr x9, x9, x12, lsl #60 +; CHECK-NEXT: orr x10, x10, x15, lsl #60 +; CHECK-NEXT: strh w11, [x1, #18] +; CHECK-NEXT: strh w13, [x1, #8] ; CHECK-NEXT: stur x9, [x1, #10] ; CHECK-NEXT: str x10, [x1], #64 ; CHECK-NEXT: b.ne LBB10_1 @@ -941,43 +705,40 @@ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v0.8b }, [x9] +; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0 ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: mov w9, v1.s[1] -; CHECK-BE-NEXT: mov w11, v1.s[2] +; CHECK-BE-NEXT: mov w10, v0.s[1] ; CHECK-BE-NEXT: fmov w12, s1 -; CHECK-BE-NEXT: mov w13, v0.s[1] -; CHECK-BE-NEXT: mov w15, v0.s[2] -; CHECK-BE-NEXT: mov w10, v1.s[3] -; CHECK-BE-NEXT: mov w14, v0.s[3] +; CHECK-BE-NEXT: fmov w14, s0 +; CHECK-BE-NEXT: mov w11, v1.s[2] +; CHECK-BE-NEXT: mov w13, v0.s[2] +; CHECK-BE-NEXT: mov w15, v1.s[3] ; CHECK-BE-NEXT: lsl x9, x9, #40 +; CHECK-BE-NEXT: lsl x10, x10, #40 ; CHECK-BE-NEXT: orr x9, x9, x12, lsl #60 +; CHECK-BE-NEXT: orr x10, x10, x14, lsl #60 ; CHECK-BE-NEXT: lsr x12, x12, #4 +; CHECK-BE-NEXT: strh w15, [x1, #18] ; CHECK-BE-NEXT: orr x9, x9, x11, lsl #20 -; CHECK-BE-NEXT: fmov w11, s0 -; CHECK-BE-NEXT: lsl x13, x13, #40 +; CHECK-BE-NEXT: orr x10, x10, x13, lsl #20 +; CHECK-BE-NEXT: mov w11, v0.s[3] +; CHECK-BE-NEXT: lsr x13, x14, #4 ; CHECK-BE-NEXT: lsr x9, x9, #16 +; CHECK-BE-NEXT: lsr x10, x10, #16 ; CHECK-BE-NEXT: bfi x9, x12, #48, #4 -; CHECK-BE-NEXT: strh w10, [x1, #18] -; CHECK-BE-NEXT: orr x13, x13, x11, lsl #60 -; CHECK-BE-NEXT: lsr x11, x11, #4 -; CHECK-BE-NEXT: orr x13, x13, x15, lsl #20 -; CHECK-BE-NEXT: strh w14, [x1, #8] -; CHECK-BE-NEXT: lsr x12, x13, #16 +; CHECK-BE-NEXT: bfi x10, x13, #48, #4 +; CHECK-BE-NEXT: strh w11, [x1, #8] ; CHECK-BE-NEXT: stur x9, [x1, #10] -; CHECK-BE-NEXT: bfi x12, x11, #48, #4 -; CHECK-BE-NEXT: str x12, [x1], #64 +; CHECK-BE-NEXT: str x10, [x1], #64 ; CHECK-BE-NEXT: b.ne .LBB10_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -988,12 +749,11 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - define void @zext_v4i8_to_v4i32_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v4i8_to_v4i32_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -1027,11 +787,8 @@ ; CHECK-BE-NEXT: b.ne .LBB11_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -1042,131 +799,26 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - -; CHECK-LABEL: lCPI12_0: -; CHECK-NEXT: .byte 0 ; 0x0 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 1 ; 0x1 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 2 ; 0x2 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 3 ; 0x3 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: lCPI12_1: -; CHECK-NEXT: .byte 4 ; 0x4 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 5 ; 0x5 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 6 ; 0x6 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 7 ; 0x7 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: lCPI12_2: -; CHECK-NEXT: .byte 8 ; 0x8 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 9 ; 0x9 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 10 ; 0xa -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 11 ; 0xb -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff - -; CHECK-BE-LABEL: .LCPI12_0: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 0 // 0x0 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 1 // 0x1 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 2 // 0x2 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 3 // 0x3 -; CHECK-BE-NEXT: .LCPI12_1: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 4 // 0x4 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 5 // 0x5 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 6 // 0x6 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 7 // 0x7 -; CHECK-BE-NEXT: .LCPI12_2: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 8 // 0x8 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 9 // 0x9 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 10 // 0xa -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 11 // 0xb - define void @zext_v12i8_to_v12i32_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v12i8_to_v12i32_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh12: -; CHECK-NEXT: adrp x9, lCPI12_0@PAGE +; CHECK-NEXT: adrp x8, lCPI12_0@PAGE ; CHECK-NEXT: Lloh13: -; CHECK-NEXT: adrp x10, lCPI12_1@PAGE +; CHECK-NEXT: adrp x9, lCPI12_1@PAGE ; CHECK-NEXT: Lloh14: -; CHECK-NEXT: adrp x11, lCPI12_2@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x10, lCPI12_2@PAGE ; CHECK-NEXT: Lloh15: -; CHECK-NEXT: ldr q0, [x9, lCPI12_0@PAGEOFF] +; CHECK-NEXT: ldr q0, [x8, lCPI12_0@PAGEOFF] ; CHECK-NEXT: Lloh16: -; CHECK-NEXT: ldr q1, [x10, lCPI12_1@PAGEOFF] +; CHECK-NEXT: ldr q1, [x9, lCPI12_1@PAGEOFF] ; CHECK-NEXT: Lloh17: -; CHECK-NEXT: ldr q2, [x11, lCPI12_2@PAGEOFF] +; CHECK-NEXT: ldr q2, [x10, lCPI12_2@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB12_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q3, [x0, x8] @@ -1199,26 +851,23 @@ ; CHECK-BE-NEXT: .LBB12_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 -; CHECK-BE-NEXT: add x10, x1, #16 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: cmp x8, #128 +; CHECK-BE-NEXT: add x10, x1, #16 ; CHECK-BE-NEXT: ld1 { v3.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, #32 -; CHECK-BE-NEXT: tbl v4.16b, { v3.16b }, v0.16b -; CHECK-BE-NEXT: tbl v5.16b, { v3.16b }, v2.16b -; CHECK-BE-NEXT: tbl v3.16b, { v3.16b }, v1.16b -; CHECK-BE-NEXT: st1 { v4.16b }, [x1] +; CHECK-BE-NEXT: cmp x8, #128 +; CHECK-BE-NEXT: tbl v4.16b, { v3.16b }, v2.16b +; CHECK-BE-NEXT: tbl v5.16b, { v3.16b }, v1.16b +; CHECK-BE-NEXT: tbl v3.16b, { v3.16b }, v0.16b +; CHECK-BE-NEXT: st1 { v3.16b }, [x1] ; CHECK-BE-NEXT: add x1, x1, #64 -; CHECK-BE-NEXT: st1 { v5.16b }, [x9] -; CHECK-BE-NEXT: st1 { v3.16b }, [x10] +; CHECK-BE-NEXT: st1 { v4.16b }, [x9] +; CHECK-BE-NEXT: st1 { v5.16b }, [x10] ; CHECK-BE-NEXT: b.ne .LBB12_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -1229,12 +878,11 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - define void @zext_v16i4_to_v16i32_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v16i4_to_v16i32_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: movi.4s v0, #15 @@ -1244,11 +892,11 @@ ; CHECK-NEXT: ldr x9, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: and w10, w9, #0xf -; CHECK-NEXT: ubfx w11, w9, #4, #4 -; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: and w11, w9, #0xf +; CHECK-NEXT: ubfx w10, w9, #4, #4 +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: mov.b v1[1], w10 ; CHECK-NEXT: ubfx w10, w9, #8, #4 -; CHECK-NEXT: mov.b v1[1], w11 ; CHECK-NEXT: mov.b v1[2], w10 ; CHECK-NEXT: ubfx w10, w9, #12, #4 ; CHECK-NEXT: mov.b v1[3], w10 @@ -1279,18 +927,18 @@ ; CHECK-NEXT: ext.16b v2, v1, v1, #8 ; CHECK-NEXT: zip2.8b v3, v1, v0 ; CHECK-NEXT: zip1.8b v1, v1, v0 -; CHECK-NEXT: zip1.8b v4, v2, v0 -; CHECK-NEXT: zip2.8b v2, v2, v0 +; CHECK-NEXT: zip2.8b v4, v2, v0 +; CHECK-NEXT: zip1.8b v2, v2, v0 ; CHECK-NEXT: ushll.4s v3, v3, #0 ; CHECK-NEXT: ushll.4s v1, v1, #0 ; CHECK-NEXT: and.16b v3, v3, v0 +; CHECK-NEXT: ushll.4s v4, v4, #0 +; CHECK-NEXT: ushll.4s v2, v2, #0 ; CHECK-NEXT: and.16b v1, v1, v0 -; CHECK-NEXT: stp q1, q3, [x1] -; CHECK-NEXT: ushll.4s v1, v2, #0 -; CHECK-NEXT: ushll.4s v2, v4, #0 -; CHECK-NEXT: and.16b v1, v1, v0 +; CHECK-NEXT: and.16b v4, v4, v0 ; CHECK-NEXT: and.16b v2, v2, v0 -; CHECK-NEXT: stp q2, q1, [x1, #32] +; CHECK-NEXT: stp q1, q3, [x1] +; CHECK-NEXT: stp q2, q4, [x1, #32] ; CHECK-NEXT: add x1, x1, #64 ; CHECK-NEXT: b.ne LBB13_1 ; CHECK-NEXT: ; %bb.2: ; %exit @@ -1310,7 +958,6 @@ ; CHECK-BE-NEXT: fmov s1, w10 ; CHECK-BE-NEXT: ubfx x10, x9, #52, #4 ; CHECK-BE-NEXT: mov v1.b[1], w11 -; CHECK-BE-NEXT: add x11, x1, #32 ; CHECK-BE-NEXT: mov v1.b[2], w10 ; CHECK-BE-NEXT: ubfx x10, x9, #48, #4 ; CHECK-BE-NEXT: mov v1.b[3], w10 @@ -1337,39 +984,37 @@ ; CHECK-BE-NEXT: ubfx w10, w9, #4, #4 ; CHECK-BE-NEXT: and w9, w9, #0xf ; CHECK-BE-NEXT: mov v1.b[14], w10 -; CHECK-BE-NEXT: add x10, x1, #48 +; CHECK-BE-NEXT: add x10, x1, #32 ; CHECK-BE-NEXT: mov v1.b[15], w9 ; CHECK-BE-NEXT: add x9, x1, #16 ; CHECK-BE-NEXT: ext v2.16b, v1.16b, v1.16b, #8 ; CHECK-BE-NEXT: zip2 v3.8b, v1.8b, v0.8b ; CHECK-BE-NEXT: zip1 v1.8b, v1.8b, v0.8b -; CHECK-BE-NEXT: zip1 v4.8b, v2.8b, v0.8b -; CHECK-BE-NEXT: zip2 v2.8b, v2.8b, v0.8b -; CHECK-BE-NEXT: rev16 v1.8b, v1.8b +; CHECK-BE-NEXT: zip2 v4.8b, v2.8b, v0.8b +; CHECK-BE-NEXT: zip1 v2.8b, v2.8b, v0.8b ; CHECK-BE-NEXT: rev16 v3.8b, v3.8b +; CHECK-BE-NEXT: rev16 v1.8b, v1.8b ; CHECK-BE-NEXT: rev16 v4.8b, v4.8b ; CHECK-BE-NEXT: rev16 v2.8b, v2.8b -; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-BE-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-BE-NEXT: and v1.16b, v1.16b, v0.16b -; CHECK-BE-NEXT: st1 { v1.4s }, [x1] -; CHECK-BE-NEXT: add x1, x1, #64 -; CHECK-BE-NEXT: ushll v1.4s, v2.4h, #0 -; CHECK-BE-NEXT: ushll v2.4s, v4.4h, #0 +; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-BE-NEXT: and v3.16b, v3.16b, v0.16b +; CHECK-BE-NEXT: ushll v4.4s, v4.4h, #0 +; CHECK-BE-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-BE-NEXT: and v1.16b, v1.16b, v0.16b ; CHECK-BE-NEXT: st1 { v3.4s }, [x9] +; CHECK-BE-NEXT: add x9, x1, #48 +; CHECK-BE-NEXT: and v4.16b, v4.16b, v0.16b ; CHECK-BE-NEXT: and v2.16b, v2.16b, v0.16b -; CHECK-BE-NEXT: st1 { v1.4s }, [x10] -; CHECK-BE-NEXT: st1 { v2.4s }, [x11] +; CHECK-BE-NEXT: st1 { v1.4s }, [x1] +; CHECK-BE-NEXT: add x1, x1, #64 +; CHECK-BE-NEXT: st1 { v4.4s }, [x9] +; CHECK-BE-NEXT: st1 { v2.4s }, [x10] ; CHECK-BE-NEXT: b.ne .LBB13_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i4, ptr %src, i64 %iv @@ -1380,12 +1025,11 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - define void @zext_v16i16_to_v16i64_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v16i16_to_v16i64_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -1393,24 +1037,24 @@ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8 ; CHECK-NEXT: add x8, x8, #32 +; CHECK-NEXT: ldp q1, q0, [x9] ; CHECK-NEXT: cmp x8, #256 -; CHECK-NEXT: ldp q0, q1, [x9] -; CHECK-NEXT: ushll.4s v2, v0, #0 -; CHECK-NEXT: ushll2.4s v0, v0, #0 -; CHECK-NEXT: ushll.4s v3, v1, #0 -; CHECK-NEXT: ushll2.4s v1, v1, #0 -; CHECK-NEXT: ushll2.2d v5, v0, #0 -; CHECK-NEXT: ushll2.2d v4, v1, #0 -; CHECK-NEXT: ushll.2d v1, v1, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: stp q1, q4, [x1, #96] -; CHECK-NEXT: ushll2.2d v1, v3, #0 -; CHECK-NEXT: stp q0, q5, [x1, #32] +; CHECK-NEXT: ushll2.4s v2, v0, #0 +; CHECK-NEXT: ushll2.4s v3, v1, #0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll.4s v1, v1, #0 +; CHECK-NEXT: ushll2.2d v4, v2, #0 +; CHECK-NEXT: ushll.2d v2, v2, #0 +; CHECK-NEXT: ushll2.2d v5, v3, #0 ; CHECK-NEXT: ushll.2d v3, v3, #0 -; CHECK-NEXT: ushll2.2d v0, v2, #0 -; CHECK-NEXT: stp q3, q1, [x1, #64] -; CHECK-NEXT: ushll.2d v1, v2, #0 -; CHECK-NEXT: stp q1, q0, [x1], #128 +; CHECK-NEXT: stp q2, q4, [x1, #96] +; CHECK-NEXT: ushll2.2d v4, v0, #0 +; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: ushll2.2d v2, v1, #0 +; CHECK-NEXT: ushll.2d v1, v1, #0 +; CHECK-NEXT: stp q3, q5, [x1, #32] +; CHECK-NEXT: stp q0, q4, [x1, #64] +; CHECK-NEXT: stp q1, q2, [x1], #128 ; CHECK-NEXT: b.ne LBB14_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret @@ -1421,47 +1065,44 @@ ; CHECK-BE-NEXT: .LBB14_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 -; CHECK-BE-NEXT: add x10, x1, #48 ; CHECK-BE-NEXT: add x8, x8, #32 -; CHECK-BE-NEXT: cmp x8, #256 ; CHECK-BE-NEXT: ld1 { v0.8h }, [x9] ; CHECK-BE-NEXT: add x9, x9, #16 -; CHECK-BE-NEXT: ld1 { v2.8h }, [x9] -; CHECK-BE-NEXT: add x9, x1, #32 -; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-BE-NEXT: cmp x8, #256 +; CHECK-BE-NEXT: ld1 { v1.8h }, [x9] +; CHECK-BE-NEXT: add x9, x1, #48 +; CHECK-BE-NEXT: ushll2 v2.4s, v0.8h, #0 ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-BE-NEXT: ushll2 v3.2d, v1.4s, #0 -; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-BE-NEXT: st1 { v3.2d }, [x10] -; CHECK-BE-NEXT: add x10, x1, #112 -; CHECK-BE-NEXT: st1 { v1.2d }, [x9] +; CHECK-BE-NEXT: ushll2 v3.4s, v1.8h, #0 +; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-BE-NEXT: ushll2 v4.2d, v2.4s, #0 +; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-BE-NEXT: ushll2 v5.2d, v0.4s, #0 +; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-BE-NEXT: st1 { v4.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, #32 +; CHECK-BE-NEXT: ushll2 v4.2d, v3.4s, #0 +; CHECK-BE-NEXT: st1 { v2.2d }, [x9] ; CHECK-BE-NEXT: add x9, x1, #16 -; CHECK-BE-NEXT: ushll2 v3.2d, v0.4s, #0 -; CHECK-BE-NEXT: ushll2 v1.4s, v2.8h, #0 -; CHECK-BE-NEXT: st1 { v3.2d }, [x9] +; CHECK-BE-NEXT: ushll v2.2d, v3.2s, #0 +; CHECK-BE-NEXT: st1 { v5.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, #112 +; CHECK-BE-NEXT: ushll2 v3.2d, v1.4s, #0 +; CHECK-BE-NEXT: st1 { v4.2d }, [x9] ; CHECK-BE-NEXT: add x9, x1, #96 -; CHECK-BE-NEXT: ushll2 v4.2d, v1.4s, #0 -; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-BE-NEXT: st1 { v4.2d }, [x10] -; CHECK-BE-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-BE-NEXT: add x10, x1, #80 -; CHECK-BE-NEXT: st1 { v0.2d }, [x1] -; CHECK-BE-NEXT: st1 { v1.2d }, [x9] +; CHECK-BE-NEXT: st1 { v2.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, #80 +; CHECK-BE-NEXT: st1 { v3.2d }, [x9] ; CHECK-BE-NEXT: add x9, x1, #64 +; CHECK-BE-NEXT: st1 { v0.2d }, [x1] ; CHECK-BE-NEXT: add x1, x1, #128 -; CHECK-BE-NEXT: ushll v3.2d, v2.2s, #0 -; CHECK-BE-NEXT: ushll2 v2.2d, v2.4s, #0 -; CHECK-BE-NEXT: st1 { v3.2d }, [x9] -; CHECK-BE-NEXT: st1 { v2.2d }, [x10] +; CHECK-BE-NEXT: st1 { v1.2d }, [x9] ; CHECK-BE-NEXT: b.ne .LBB14_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i16, ptr %src, i64 %iv @@ -1472,12 +1113,11 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - define void @zext_v16i32_to_v16i64_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v16i32_to_v16i64_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -1485,21 +1125,21 @@ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8 ; CHECK-NEXT: add x8, x8, #64 -; CHECK-NEXT: cmp x8, #512 ; CHECK-NEXT: ldp q1, q0, [x9, #32] -; CHECK-NEXT: ushll2.2d v5, v1, #0 -; CHECK-NEXT: ushll.2d v1, v1, #0 -; CHECK-NEXT: ldp q3, q2, [x9] -; CHECK-NEXT: ushll2.2d v4, v0, #0 -; CHECK-NEXT: stp q1, q5, [x1, #64] +; CHECK-NEXT: cmp x8, #512 +; CHECK-NEXT: ldp q5, q4, [x9] +; CHECK-NEXT: ushll2.2d v2, v0, #0 ; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: stp q0, q4, [x1, #96] -; CHECK-NEXT: ushll2.2d v1, v3, #0 -; CHECK-NEXT: ushll2.2d v0, v2, #0 -; CHECK-NEXT: ushll.2d v2, v2, #0 -; CHECK-NEXT: stp q2, q0, [x1, #32] -; CHECK-NEXT: ushll.2d v0, v3, #0 -; CHECK-NEXT: stp q0, q1, [x1], #128 +; CHECK-NEXT: ushll2.2d v3, v1, #0 +; CHECK-NEXT: ushll.2d v1, v1, #0 +; CHECK-NEXT: stp q0, q2, [x1, #96] +; CHECK-NEXT: ushll2.2d v2, v4, #0 +; CHECK-NEXT: ushll.2d v0, v4, #0 +; CHECK-NEXT: stp q1, q3, [x1, #64] +; CHECK-NEXT: ushll2.2d v3, v5, #0 +; CHECK-NEXT: ushll.2d v1, v5, #0 +; CHECK-NEXT: stp q0, q2, [x1, #32] +; CHECK-NEXT: stp q1, q3, [x1], #128 ; CHECK-NEXT: b.ne LBB15_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret @@ -1511,46 +1151,43 @@ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 ; CHECK-BE-NEXT: add x8, x8, #64 +; CHECK-BE-NEXT: ld1 { v0.4s }, [x9] ; CHECK-BE-NEXT: add x10, x9, #48 -; CHECK-BE-NEXT: add x11, x9, #32 ; CHECK-BE-NEXT: cmp x8, #512 -; CHECK-BE-NEXT: ld1 { v0.4s }, [x9] -; CHECK-BE-NEXT: add x9, x9, #16 ; CHECK-BE-NEXT: ld1 { v1.4s }, [x10] -; CHECK-BE-NEXT: add x10, x1, #16 -; CHECK-BE-NEXT: ld1 { v2.4s }, [x11] -; CHECK-BE-NEXT: ushll2 v3.2d, v0.4s, #0 +; CHECK-BE-NEXT: add x10, x9, #32 +; CHECK-BE-NEXT: add x9, x9, #16 ; CHECK-BE-NEXT: ld1 { v4.4s }, [x9] -; CHECK-BE-NEXT: add x9, x1, #112 -; CHECK-BE-NEXT: st1 { v3.2d }, [x10] +; CHECK-BE-NEXT: ld1 { v2.4s }, [x10] +; CHECK-BE-NEXT: add x9, x1, #16 +; CHECK-BE-NEXT: ushll2 v3.2d, v0.4s, #0 +; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-BE-NEXT: add x10, x1, #80 -; CHECK-BE-NEXT: ushll2 v3.2d, v1.4s, #0 -; CHECK-BE-NEXT: ushll2 v5.2d, v2.4s, #0 +; CHECK-BE-NEXT: ushll2 v5.2d, v1.4s, #0 +; CHECK-BE-NEXT: ushll2 v6.2d, v2.4s, #0 ; CHECK-BE-NEXT: st1 { v3.2d }, [x9] -; CHECK-BE-NEXT: add x9, x1, #48 -; CHECK-BE-NEXT: st1 { v5.2d }, [x10] -; CHECK-BE-NEXT: add x10, x1, #96 -; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-BE-NEXT: ushll v3.2d, v4.2s, #0 -; CHECK-BE-NEXT: ushll2 v4.2d, v4.4s, #0 +; CHECK-BE-NEXT: ushll2 v3.2d, v4.4s, #0 +; CHECK-BE-NEXT: add x9, x1, #112 ; CHECK-BE-NEXT: st1 { v0.2d }, [x1] -; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-BE-NEXT: st1 { v4.2d }, [x9] +; CHECK-BE-NEXT: ushll v0.2d, v1.2s, #0 +; CHECK-BE-NEXT: ushll v1.2d, v2.2s, #0 +; CHECK-BE-NEXT: st1 { v5.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, #48 +; CHECK-BE-NEXT: ushll v2.2d, v4.2s, #0 +; CHECK-BE-NEXT: st1 { v3.2d }, [x9] ; CHECK-BE-NEXT: add x9, x1, #64 -; CHECK-BE-NEXT: st1 { v1.2d }, [x10] -; CHECK-BE-NEXT: add x10, x1, #32 +; CHECK-BE-NEXT: st1 { v6.2d }, [x10] +; CHECK-BE-NEXT: add x10, x1, #96 +; CHECK-BE-NEXT: st1 { v1.2d }, [x9] +; CHECK-BE-NEXT: add x9, x1, #32 ; CHECK-BE-NEXT: add x1, x1, #128 -; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-BE-NEXT: st1 { v3.2d }, [x10] +; CHECK-BE-NEXT: st1 { v0.2d }, [x10] ; CHECK-BE-NEXT: st1 { v2.2d }, [x9] ; CHECK-BE-NEXT: b.ne .LBB15_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i32, ptr %src, i64 %iv @@ -1561,12 +1198,11 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - define void @zext_v8i8_to_v8i128_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v8i8_to_v8i128_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -1574,35 +1210,35 @@ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d0, [x0, x8] ; CHECK-NEXT: add x9, x1, #112 -; CHECK-NEXT: add x10, x1, #80 -; CHECK-NEXT: str xzr, [x1, #120] -; CHECK-NEXT: str xzr, [x1, #104] ; CHECK-NEXT: add x8, x8, #16 -; CHECK-NEXT: str xzr, [x1, #88] +; CHECK-NEXT: str xzr, [x1, #120] ; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: ushll.8h v0, v0, #0 +; CHECK-NEXT: str xzr, [x1, #104] +; CHECK-NEXT: str xzr, [x1, #88] ; CHECK-NEXT: str xzr, [x1, #72] -; CHECK-NEXT: str xzr, [x1, #56] ; CHECK-NEXT: ushll2.4s v1, v0, #0 -; CHECK-NEXT: str xzr, [x1, #40] ; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: str xzr, [x1, #56] +; CHECK-NEXT: str xzr, [x1, #40] ; CHECK-NEXT: str xzr, [x1, #24] ; CHECK-NEXT: ushll2.2d v2, v1, #0 -; CHECK-NEXT: str xzr, [x1, #8] ; CHECK-NEXT: ushll.2d v1, v1, #0 +; CHECK-NEXT: ushll2.2d v3, v0, #0 +; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: str xzr, [x1, #8] ; CHECK-NEXT: st1.d { v2 }[1], [x9] +; CHECK-NEXT: add x9, x1, #80 +; CHECK-NEXT: st1.d { v1 }[1], [x9] ; CHECK-NEXT: add x9, x1, #48 ; CHECK-NEXT: str d2, [x1, #96] -; CHECK-NEXT: ushll2.2d v2, v0, #0 -; CHECK-NEXT: st1.d { v1 }[1], [x10] -; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: st1.d { v3 }[1], [x9] +; CHECK-NEXT: add x9, x1, #16 ; CHECK-NEXT: str d1, [x1, #64] -; CHECK-NEXT: str d2, [x1, #32] -; CHECK-NEXT: add x10, x1, #16 +; CHECK-NEXT: str d3, [x1, #32] ; CHECK-NEXT: str d0, [x1] ; CHECK-NEXT: add x1, x1, #256 -; CHECK-NEXT: st1.d { v2 }[1], [x9] -; CHECK-NEXT: st1.d { v0 }[1], [x10] +; CHECK-NEXT: st1.d { v0 }[1], [x9] ; CHECK-NEXT: b.ne LBB16_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret @@ -1613,45 +1249,42 @@ ; CHECK-BE-NEXT: .LBB16_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 -; CHECK-BE-NEXT: add x10, x1, #88 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v0.8b }, [x9] ; CHECK-BE-NEXT: add x9, x1, #120 ; CHECK-BE-NEXT: str xzr, [x1, #112] ; CHECK-BE-NEXT: str xzr, [x1, #96] +; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: str xzr, [x1, #80] ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BE-NEXT: str xzr, [x1, #64] ; CHECK-BE-NEXT: str xzr, [x1, #48] -; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0 ; CHECK-BE-NEXT: str xzr, [x1, #32] +; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0 ; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: str xzr, [x1, #16] -; CHECK-BE-NEXT: ushll2 v2.2d, v1.4s, #0 ; CHECK-BE-NEXT: str xzr, [x1] +; CHECK-BE-NEXT: ushll2 v2.2d, v1.4s, #0 ; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 +; CHECK-BE-NEXT: ushll2 v3.2d, v0.4s, #0 +; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-BE-NEXT: st1 { v2.d }[1], [x9] +; CHECK-BE-NEXT: add x9, x1, #88 +; CHECK-BE-NEXT: st1 { v1.d }[1], [x9] ; CHECK-BE-NEXT: add x9, x1, #56 ; CHECK-BE-NEXT: str d2, [x1, #104] -; CHECK-BE-NEXT: ushll2 v2.2d, v0.4s, #0 -; CHECK-BE-NEXT: st1 { v1.d }[1], [x10] -; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-BE-NEXT: st1 { v3.d }[1], [x9] +; CHECK-BE-NEXT: add x9, x1, #24 ; CHECK-BE-NEXT: str d1, [x1, #72] -; CHECK-BE-NEXT: str d2, [x1, #40] -; CHECK-BE-NEXT: add x10, x1, #24 +; CHECK-BE-NEXT: str d3, [x1, #40] ; CHECK-BE-NEXT: str d0, [x1, #8] ; CHECK-BE-NEXT: add x1, x1, #256 -; CHECK-BE-NEXT: st1 { v2.d }[1], [x9] -; CHECK-BE-NEXT: st1 { v0.d }[1], [x10] +; CHECK-BE-NEXT: st1 { v0.d }[1], [x9] ; CHECK-BE-NEXT: b.ne .LBB16_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -1662,13 +1295,12 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - ; multiple back-to-back 'zext' of similar type of vectors combined with arithmetic operations define void @zext_v8i8_to_v8i64_with_add_in_sequence_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v8i8_to_v8i64_with_add_in_sequence_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh18: @@ -1678,36 +1310,36 @@ ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: Lloh20: ; CHECK-NEXT: ldr q0, [x9, lCPI17_0@PAGEOFF] -; CHECK-NEXT: add x9, x0, #8 ; CHECK-NEXT: Lloh21: ; CHECK-NEXT: ldr q1, [x10, lCPI17_1@PAGEOFF] +; CHECK-NEXT: add x9, x0, #8 ; CHECK-NEXT: LBB17_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: ldp d2, d4, [x9, #-8] ; CHECK-NEXT: add x10, x1, x8 +; CHECK-NEXT: ldp q6, q5, [x10, #32] ; CHECK-NEXT: add x8, x8, #128 -; CHECK-NEXT: ldp d2, d3, [x9, #-8] -; CHECK-NEXT: add x9, x9, #16 +; CHECK-NEXT: ldp q17, q16, [x10] ; CHECK-NEXT: cmp x8, #1024 -; CHECK-NEXT: ldp q5, q4, [x10, #32] -; CHECK-NEXT: tbl.16b v6, { v2 }, v1 +; CHECK-NEXT: tbl.16b v3, { v2 }, v1 ; CHECK-NEXT: tbl.16b v2, { v2 }, v0 -; CHECK-NEXT: tbl.16b v17, { v3 }, v0 -; CHECK-NEXT: tbl.16b v3, { v3 }, v1 -; CHECK-NEXT: ldp q16, q7, [x10] -; CHECK-NEXT: uaddw2.2d v4, v4, v6 -; CHECK-NEXT: uaddw.2d v5, v5, v6 -; CHECK-NEXT: stp q5, q4, [x10, #32] -; CHECK-NEXT: ldp q19, q18, [x10, #96] -; CHECK-NEXT: uaddw2.2d v7, v7, v2 -; CHECK-NEXT: uaddw.2d v2, v16, v2 -; CHECK-NEXT: stp q2, q7, [x10] -; CHECK-NEXT: ldp q6, q20, [x10, #64] -; CHECK-NEXT: uaddw2.2d v4, v18, v3 -; CHECK-NEXT: uaddw.2d v3, v19, v3 -; CHECK-NEXT: stp q3, q4, [x10, #96] -; CHECK-NEXT: uaddw2.2d v2, v20, v17 -; CHECK-NEXT: uaddw.2d v4, v6, v17 -; CHECK-NEXT: stp q4, q2, [x10, #64] +; CHECK-NEXT: tbl.16b v7, { v4 }, v1 +; CHECK-NEXT: tbl.16b v4, { v4 }, v0 +; CHECK-NEXT: add x9, x9, #16 +; CHECK-NEXT: uaddw2.2d v5, v5, v3 +; CHECK-NEXT: uaddw.2d v3, v6, v3 +; CHECK-NEXT: uaddw2.2d v6, v16, v2 +; CHECK-NEXT: ldp q18, q16, [x10, #96] +; CHECK-NEXT: uaddw.2d v2, v17, v2 +; CHECK-NEXT: stp q3, q5, [x10, #32] +; CHECK-NEXT: ldp q17, q5, [x10, #64] +; CHECK-NEXT: uaddw2.2d v16, v16, v7 +; CHECK-NEXT: uaddw.2d v7, v18, v7 +; CHECK-NEXT: stp q2, q6, [x10] +; CHECK-NEXT: uaddw2.2d v3, v5, v4 +; CHECK-NEXT: uaddw.2d v4, v17, v4 +; CHECK-NEXT: stp q7, q16, [x10, #96] +; CHECK-NEXT: stp q4, q3, [x10, #64] ; CHECK-NEXT: b.ne LBB17_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret @@ -1726,68 +1358,65 @@ ; CHECK-BE-NEXT: add x9, x0, #8 ; CHECK-BE-NEXT: .LBB17_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: sub x12, x9, #8 -; CHECK-BE-NEXT: add x10, x1, x8 +; CHECK-BE-NEXT: sub x10, x9, #8 ; CHECK-BE-NEXT: ld1 { v2.8b }, [x9] +; CHECK-BE-NEXT: add x9, x9, #16 +; CHECK-BE-NEXT: ld1 { v3.8b }, [x10] +; CHECK-BE-NEXT: add x10, x1, x8 +; CHECK-BE-NEXT: add x8, x8, #128 +; CHECK-BE-NEXT: add x15, x10, #96 ; CHECK-BE-NEXT: add x11, x10, #32 -; CHECK-BE-NEXT: add x13, x10, #48 -; CHECK-BE-NEXT: add x14, x10, #16 -; CHECK-BE-NEXT: ld1 { v4.8b }, [x12] -; CHECK-BE-NEXT: add x15, x10, #64 -; CHECK-BE-NEXT: ld1 { v3.2d }, [x11] -; CHECK-BE-NEXT: add x12, x10, #96 -; CHECK-BE-NEXT: tbl v6.16b, { v2.16b }, v1.16b -; CHECK-BE-NEXT: add x16, x10, #112 +; CHECK-BE-NEXT: add x14, x10, #64 +; CHECK-BE-NEXT: tbl v4.16b, { v2.16b }, v1.16b ; CHECK-BE-NEXT: tbl v2.16b, { v2.16b }, v0.16b -; CHECK-BE-NEXT: ld1 { v7.2d }, [x13] -; CHECK-BE-NEXT: tbl v16.16b, { v4.16b }, v0.16b +; CHECK-BE-NEXT: ld1 { v16.2d }, [x15] +; CHECK-BE-NEXT: tbl v5.16b, { v3.16b }, v1.16b +; CHECK-BE-NEXT: tbl v3.16b, { v3.16b }, v0.16b +; CHECK-BE-NEXT: ld1 { v6.2d }, [x10] +; CHECK-BE-NEXT: ld1 { v19.2d }, [x14] +; CHECK-BE-NEXT: ld1 { v21.2d }, [x11] +; CHECK-BE-NEXT: add x12, x10, #48 +; CHECK-BE-NEXT: add x13, x10, #16 +; CHECK-BE-NEXT: add x16, x10, #112 ; CHECK-BE-NEXT: add x17, x10, #80 -; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v1.16b -; CHECK-BE-NEXT: ld1 { v18.2d }, [x14] -; CHECK-BE-NEXT: rev32 v17.8b, v6.8b -; CHECK-BE-NEXT: add x8, x8, #128 -; CHECK-BE-NEXT: ext v6.16b, v6.16b, v6.16b, #8 -; CHECK-BE-NEXT: ld1 { v5.2d }, [x10] -; CHECK-BE-NEXT: ext v23.16b, v16.16b, v16.16b, #8 -; CHECK-BE-NEXT: add x9, x9, #16 -; CHECK-BE-NEXT: ext v21.16b, v4.16b, v4.16b, #8 -; CHECK-BE-NEXT: ld1 { v20.2d }, [x12] -; CHECK-BE-NEXT: rev32 v4.8b, v4.8b +; CHECK-BE-NEXT: rev32 v7.8b, v4.8b +; CHECK-BE-NEXT: ext v4.16b, v4.16b, v4.16b, #8 +; CHECK-BE-NEXT: rev32 v17.8b, v2.8b +; CHECK-BE-NEXT: ext v18.16b, v5.16b, v5.16b, #8 +; CHECK-BE-NEXT: ext v20.16b, v3.16b, v3.16b, #8 +; CHECK-BE-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-BE-NEXT: rev32 v5.8b, v5.8b +; CHECK-BE-NEXT: rev32 v3.8b, v3.8b ; CHECK-BE-NEXT: cmp x8, #1024 -; CHECK-BE-NEXT: ext v19.16b, v2.16b, v2.16b, #8 -; CHECK-BE-NEXT: ld1 { v22.2d }, [x15] +; CHECK-BE-NEXT: rev32 v4.8b, v4.8b +; CHECK-BE-NEXT: uaddw v7.2d, v16.2d, v7.2s +; CHECK-BE-NEXT: ld1 { v16.2d }, [x16] +; CHECK-BE-NEXT: rev32 v18.8b, v18.8b +; CHECK-BE-NEXT: rev32 v20.8b, v20.8b ; CHECK-BE-NEXT: rev32 v2.8b, v2.8b -; CHECK-BE-NEXT: rev32 v21.8b, v21.8b -; CHECK-BE-NEXT: ld1 { v24.2d }, [x16] -; CHECK-BE-NEXT: uaddw v3.2d, v3.2d, v4.2s -; CHECK-BE-NEXT: rev32 v4.8b, v23.8b -; CHECK-BE-NEXT: ld1 { v23.2d }, [x17] -; CHECK-BE-NEXT: rev32 v16.8b, v16.8b -; CHECK-BE-NEXT: rev32 v6.8b, v6.8b -; CHECK-BE-NEXT: rev32 v19.8b, v19.8b -; CHECK-BE-NEXT: st1 { v3.2d }, [x11] -; CHECK-BE-NEXT: uaddw v3.2d, v7.2d, v21.2s -; CHECK-BE-NEXT: uaddw v4.2d, v18.2d, v4.2s -; CHECK-BE-NEXT: uaddw v5.2d, v5.2d, v16.2s -; CHECK-BE-NEXT: uaddw v7.2d, v20.2d, v17.2s -; CHECK-BE-NEXT: st1 { v3.2d }, [x13] -; CHECK-BE-NEXT: uaddw v2.2d, v22.2d, v2.2s -; CHECK-BE-NEXT: st1 { v4.2d }, [x14] -; CHECK-BE-NEXT: uaddw v3.2d, v24.2d, v6.2s -; CHECK-BE-NEXT: st1 { v5.2d }, [x10] -; CHECK-BE-NEXT: uaddw v4.2d, v23.2d, v19.2s +; CHECK-BE-NEXT: uaddw v17.2d, v19.2d, v17.2s +; CHECK-BE-NEXT: ld1 { v19.2d }, [x12] +; CHECK-BE-NEXT: uaddw v5.2d, v21.2d, v5.2s +; CHECK-BE-NEXT: ld1 { v21.2d }, [x13] +; CHECK-BE-NEXT: uaddw v3.2d, v6.2d, v3.2s +; CHECK-BE-NEXT: ld1 { v6.2d }, [x17] +; CHECK-BE-NEXT: uaddw v4.2d, v16.2d, v4.2s +; CHECK-BE-NEXT: st1 { v7.2d }, [x15] +; CHECK-BE-NEXT: uaddw v7.2d, v19.2d, v18.2s +; CHECK-BE-NEXT: uaddw v16.2d, v21.2d, v20.2s +; CHECK-BE-NEXT: uaddw v2.2d, v6.2d, v2.2s +; CHECK-BE-NEXT: st1 { v17.2d }, [x14] +; CHECK-BE-NEXT: st1 { v5.2d }, [x11] +; CHECK-BE-NEXT: st1 { v3.2d }, [x10] +; CHECK-BE-NEXT: st1 { v4.2d }, [x16] ; CHECK-BE-NEXT: st1 { v7.2d }, [x12] -; CHECK-BE-NEXT: st1 { v2.2d }, [x15] -; CHECK-BE-NEXT: st1 { v3.2d }, [x16] -; CHECK-BE-NEXT: st1 { v4.2d }, [x17] +; CHECK-BE-NEXT: st1 { v16.2d }, [x13] +; CHECK-BE-NEXT: st1 { v2.2d }, [x17] ; CHECK-BE-NEXT: b.ne .LBB17_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -1807,13 +1436,12 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - ; multiple back-to-back 'zext' of similar type of vectors define void @zext_v16i8_to_v16i64_in_sequence_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v16i8_to_v16i64_in_sequence_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -1822,44 +1450,44 @@ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x10, x0, x8 ; CHECK-NEXT: add x8, x8, #16 -; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: ldp q0, q1, [x10] -; CHECK-NEXT: ushll.8h v2, v0, #0 -; CHECK-NEXT: ushll2.8h v0, v0, #0 -; CHECK-NEXT: ushll.4s v4, v2, #0 +; CHECK-NEXT: cmp x8, #128 +; CHECK-NEXT: ushll2.8h v2, v0, #0 +; CHECK-NEXT: ushll.8h v0, v0, #0 +; CHECK-NEXT: ushll2.8h v6, v1, #0 +; CHECK-NEXT: ushll.8h v1, v1, #0 +; CHECK-NEXT: ushll2.4s v3, v2, #0 +; CHECK-NEXT: ushll.4s v2, v2, #0 ; CHECK-NEXT: ushll2.4s v5, v0, #0 -; CHECK-NEXT: ushll2.4s v2, v2, #0 ; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll2.2d v6, v5, #0 +; CHECK-NEXT: ushll2.2d v4, v3, #0 +; CHECK-NEXT: ushll.2d v3, v3, #0 +; CHECK-NEXT: ushll2.2d v7, v2, #0 +; CHECK-NEXT: ushll.2d v2, v2, #0 +; CHECK-NEXT: stp q3, q4, [x9, #-32] +; CHECK-NEXT: ushll2.2d v4, v5, #0 +; CHECK-NEXT: ushll2.4s v3, v6, #0 ; CHECK-NEXT: ushll.2d v5, v5, #0 -; CHECK-NEXT: ushll2.8h v3, v1, #0 +; CHECK-NEXT: stp q2, q7, [x9, #-64] ; CHECK-NEXT: ushll2.2d v7, v0, #0 -; CHECK-NEXT: stp q5, q6, [x9, #-32] ; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: ushll2.2d v5, v2, #0 -; CHECK-NEXT: ushll.2d v2, v2, #0 -; CHECK-NEXT: stp q0, q7, [x9, #-64] -; CHECK-NEXT: ushll2.2d v0, v4, #0 -; CHECK-NEXT: stp q2, q5, [x9, #-96] -; CHECK-NEXT: ushll2.4s v5, v3, #0 -; CHECK-NEXT: ushll.2d v2, v4, #0 -; CHECK-NEXT: ushll2.2d v4, v5, #0 -; CHECK-NEXT: stp q2, q0, [x9, #-128] -; CHECK-NEXT: ushll.2d v0, v5, #0 -; CHECK-NEXT: ushll.4s v2, v3, #0 -; CHECK-NEXT: stp q0, q4, [x9, #96] -; CHECK-NEXT: ushll.8h v0, v1, #0 -; CHECK-NEXT: ushll2.2d v1, v2, #0 -; CHECK-NEXT: ushll.2d v2, v2, #0 -; CHECK-NEXT: ushll2.4s v3, v0, #0 -; CHECK-NEXT: stp q2, q1, [x9, #64] -; CHECK-NEXT: ushll2.2d v1, v3, #0 -; CHECK-NEXT: ushll.2d v2, v3, #0 -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: stp q2, q1, [x9, #32] -; CHECK-NEXT: ushll2.2d v1, v0, #0 +; CHECK-NEXT: ushll.4s v2, v6, #0 +; CHECK-NEXT: stp q5, q4, [x9, #-96] +; CHECK-NEXT: ushll2.2d v4, v3, #0 +; CHECK-NEXT: ushll2.4s v5, v1, #0 +; CHECK-NEXT: ushll.2d v3, v3, #0 +; CHECK-NEXT: stp q0, q7, [x9, #-128] +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ushll2.2d v6, v2, #0 +; CHECK-NEXT: ushll.2d v1, v2, #0 +; CHECK-NEXT: ushll2.2d v2, v5, #0 +; CHECK-NEXT: stp q3, q4, [x9, #96] +; CHECK-NEXT: ushll.2d v3, v5, #0 +; CHECK-NEXT: ushll2.2d v4, v0, #0 ; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: stp q0, q1, [x9], #128 +; CHECK-NEXT: stp q1, q6, [x9, #64] +; CHECK-NEXT: stp q3, q2, [x9, #32] +; CHECK-NEXT: stp q0, q4, [x9], #128 ; CHECK-NEXT: b.ne LBB18_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret @@ -1871,79 +1499,76 @@ ; CHECK-BE-NEXT: .LBB18_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x10, x0, x8 +; CHECK-BE-NEXT: sub x11, x9, #32 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: add x11, x10, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x10] +; CHECK-BE-NEXT: add x10, x10, #16 +; CHECK-BE-NEXT: cmp x8, #128 +; CHECK-BE-NEXT: ld1 { v5.16b }, [x10] ; CHECK-BE-NEXT: sub x10, x9, #16 -; CHECK-BE-NEXT: ld1 { v3.16b }, [x11] -; CHECK-BE-NEXT: sub x11, x9, #32 ; CHECK-BE-NEXT: ushll2 v1.8h, v0.16b, #0 ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-BE-NEXT: ushll2 v2.4s, v1.8h, #0 +; CHECK-BE-NEXT: ushll2 v3.4s, v0.8h, #0 ; CHECK-BE-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: ushll2 v4.2d, v2.4s, #0 ; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-BE-NEXT: ushll2 v6.2d, v1.4s, #0 +; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-BE-NEXT: st1 { v4.2d }, [x10] -; CHECK-BE-NEXT: sub x10, x9, #48 -; CHECK-BE-NEXT: ushll2 v4.2d, v1.4s, #0 +; CHECK-BE-NEXT: ushll2 v4.2d, v3.4s, #0 +; CHECK-BE-NEXT: ushll v3.2d, v3.2s, #0 ; CHECK-BE-NEXT: st1 { v2.2d }, [x11] -; CHECK-BE-NEXT: ushll2 v2.4s, v0.8h, #0 +; CHECK-BE-NEXT: ushll2 v2.8h, v5.16b, #0 ; CHECK-BE-NEXT: sub x11, x9, #80 -; CHECK-BE-NEXT: st1 { v4.2d }, [x10] -; CHECK-BE-NEXT: sub x10, x9, #64 -; CHECK-BE-NEXT: ushll2 v5.2d, v2.4s, #0 -; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-BE-NEXT: st1 { v5.2d }, [x11] +; CHECK-BE-NEXT: sub x10, x9, #48 +; CHECK-BE-NEXT: st1 { v4.2d }, [x11] +; CHECK-BE-NEXT: ushll v4.8h, v5.8b, #0 +; CHECK-BE-NEXT: sub x11, x9, #64 +; CHECK-BE-NEXT: ushll2 v5.4s, v2.8h, #0 +; CHECK-BE-NEXT: st1 { v1.2d }, [x11] ; CHECK-BE-NEXT: sub x11, x9, #96 -; CHECK-BE-NEXT: st1 { v1.2d }, [x10] -; CHECK-BE-NEXT: ushll v4.8h, v3.8b, #0 -; CHECK-BE-NEXT: sub x10, x9, #112 -; CHECK-BE-NEXT: ushll2 v3.8h, v3.16b, #0 -; CHECK-BE-NEXT: ushll v5.2d, v0.2s, #0 -; CHECK-BE-NEXT: ushll2 v0.2d, v0.4s, #0 -; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 -; CHECK-BE-NEXT: ushll v1.4s, v4.4h, #0 +; CHECK-BE-NEXT: ushll2 v1.2d, v0.4s, #0 +; CHECK-BE-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 +; CHECK-BE-NEXT: st1 { v6.2d }, [x10] +; CHECK-BE-NEXT: sub x10, x9, #128 +; CHECK-BE-NEXT: st1 { v3.2d }, [x11] +; CHECK-BE-NEXT: ushll2 v3.4s, v4.8h, #0 +; CHECK-BE-NEXT: ushll2 v6.2d, v5.4s, #0 +; CHECK-BE-NEXT: sub x11, x9, #112 +; CHECK-BE-NEXT: ushll v5.2d, v5.2s, #0 ; CHECK-BE-NEXT: st1 { v0.2d }, [x10] -; CHECK-BE-NEXT: ushll2 v6.4s, v3.8h, #0 -; CHECK-BE-NEXT: st1 { v2.2d }, [x11] -; CHECK-BE-NEXT: sub x11, x9, #128 +; CHECK-BE-NEXT: st1 { v1.2d }, [x11] +; CHECK-BE-NEXT: ushll2 v1.2d, v2.4s, #0 ; CHECK-BE-NEXT: add x10, x9, #112 -; CHECK-BE-NEXT: ushll v2.2d, v1.2s, #0 -; CHECK-BE-NEXT: ushll2 v0.2d, v1.4s, #0 -; CHECK-BE-NEXT: ushll2 v1.2d, v6.4s, #0 -; CHECK-BE-NEXT: st1 { v5.2d }, [x11] -; CHECK-BE-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-BE-NEXT: add x11, x9, #96 -; CHECK-BE-NEXT: st1 { v1.2d }, [x10] +; CHECK-BE-NEXT: ushll v4.4s, v4.4h, #0 +; CHECK-BE-NEXT: ushll2 v0.2d, v3.4s, #0 +; CHECK-BE-NEXT: st1 { v6.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #96 +; CHECK-BE-NEXT: ushll v2.2d, v2.2s, #0 +; CHECK-BE-NEXT: ushll v3.2d, v3.2s, #0 +; CHECK-BE-NEXT: st1 { v5.2d }, [x10] ; CHECK-BE-NEXT: add x10, x9, #80 -; CHECK-BE-NEXT: ushll v5.2d, v6.2s, #0 -; CHECK-BE-NEXT: st1 { v2.2d }, [x9] -; CHECK-BE-NEXT: ushll2 v1.4s, v4.8h, #0 -; CHECK-BE-NEXT: ushll2 v4.2d, v3.4s, #0 -; CHECK-BE-NEXT: st1 { v5.2d }, [x11] -; CHECK-BE-NEXT: add x11, x9, #48 -; CHECK-BE-NEXT: st1 { v4.2d }, [x10] +; CHECK-BE-NEXT: st1 { v1.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #48 +; CHECK-BE-NEXT: ushll2 v1.2d, v4.4s, #0 +; CHECK-BE-NEXT: st1 { v0.2d }, [x10] +; CHECK-BE-NEXT: ushll v0.2d, v4.2s, #0 ; CHECK-BE-NEXT: add x10, x9, #64 -; CHECK-BE-NEXT: ushll2 v5.2d, v1.4s, #0 -; CHECK-BE-NEXT: ushll v3.2d, v3.2s, #0 -; CHECK-BE-NEXT: st1 { v5.2d }, [x11] -; CHECK-BE-NEXT: add x11, x9, #16 -; CHECK-BE-NEXT: st1 { v3.2d }, [x10] +; CHECK-BE-NEXT: st1 { v2.2d }, [x10] ; CHECK-BE-NEXT: add x10, x9, #32 +; CHECK-BE-NEXT: st1 { v3.2d }, [x10] +; CHECK-BE-NEXT: add x10, x9, #16 +; CHECK-BE-NEXT: st1 { v0.2d }, [x9] ; CHECK-BE-NEXT: add x9, x9, #128 -; CHECK-BE-NEXT: ushll v1.2d, v1.2s, #0 -; CHECK-BE-NEXT: st1 { v0.2d }, [x11] ; CHECK-BE-NEXT: st1 { v1.2d }, [x10] ; CHECK-BE-NEXT: b.ne .LBB18_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -1959,66 +1584,64 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - define void @zext_v16i8_to_v16i32_in_loop_scalable_vectors(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v16i8_to_v16i32_in_loop_scalable_vectors: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB19_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add x9, x0, x8 ; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, x8] +; CHECK-NEXT: add x9, x0, x8 ; CHECK-NEXT: ld1b { z1.s }, p0/z, [x9, #2, mul vl] ; CHECK-NEXT: ld1b { z2.s }, p0/z, [x9, #3, mul vl] ; CHECK-NEXT: ld1b { z3.s }, p0/z, [x9, #1, mul vl] -; CHECK-NEXT: add z0.s, z0.s, z0.s ; CHECK-NEXT: add x9, x1, x8, lsl #2 +; CHECK-NEXT: add z0.s, z0.s, z0.s +; CHECK-NEXT: add z1.s, z1.s, z1.s +; CHECK-NEXT: add z2.s, z2.s, z2.s +; CHECK-NEXT: add z3.s, z3.s, z3.s ; CHECK-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] ; CHECK-NEXT: add x8, x8, #16 ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: add z1.s, z1.s, z1.s -; CHECK-NEXT: add z0.s, z3.s, z3.s -; CHECK-NEXT: add z2.s, z2.s, z2.s ; CHECK-NEXT: st1w { z1.s }, p0, [x9, #2, mul vl] ; CHECK-NEXT: st1w { z2.s }, p0, [x9, #3, mul vl] -; CHECK-NEXT: st1w { z0.s }, p0, [x9, #1, mul vl] +; CHECK-NEXT: st1w { z3.s }, p0, [x9, #1, mul vl] ; CHECK-NEXT: b.ne LBB19_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: zext_v16i8_to_v16i32_in_loop_scalable_vectors: ; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: mov x8, xzr ; CHECK-BE-NEXT: ptrue p0.s +; CHECK-BE-NEXT: mov x8, xzr ; CHECK-BE-NEXT: .LBB19_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-BE-NEXT: add x9, x0, x8 ; CHECK-BE-NEXT: ld1b { z0.s }, p0/z, [x0, x8] +; CHECK-BE-NEXT: add x9, x0, x8 ; CHECK-BE-NEXT: ld1b { z1.s }, p0/z, [x9, #2, mul vl] ; CHECK-BE-NEXT: ld1b { z2.s }, p0/z, [x9, #3, mul vl] ; CHECK-BE-NEXT: ld1b { z3.s }, p0/z, [x9, #1, mul vl] -; CHECK-BE-NEXT: add z0.s, z0.s, z0.s ; CHECK-BE-NEXT: add x9, x1, x8, lsl #2 +; CHECK-BE-NEXT: add z0.s, z0.s, z0.s +; CHECK-BE-NEXT: add z1.s, z1.s, z1.s +; CHECK-BE-NEXT: add z2.s, z2.s, z2.s +; CHECK-BE-NEXT: add z3.s, z3.s, z3.s ; CHECK-BE-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] ; CHECK-BE-NEXT: add x8, x8, #16 ; CHECK-BE-NEXT: cmp x8, #128 -; CHECK-BE-NEXT: add z1.s, z1.s, z1.s -; CHECK-BE-NEXT: add z0.s, z3.s, z3.s -; CHECK-BE-NEXT: add z2.s, z2.s, z2.s ; CHECK-BE-NEXT: st1w { z1.s }, p0, [x9, #2, mul vl] ; CHECK-BE-NEXT: st1w { z2.s }, p0, [x9, #3, mul vl] -; CHECK-BE-NEXT: st1w { z0.s }, p0, [x9, #1, mul vl] +; CHECK-BE-NEXT: st1w { z3.s }, p0, [x9, #1, mul vl] ; CHECK-BE-NEXT: b.ne .LBB19_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -2030,192 +1653,54 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - -; CHECK-LABEL: lCPI20_0: -; CHECK-NEXT: .byte 0 ; 0x0 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 1 ; 0x1 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 2 ; 0x2 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 3 ; 0x3 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 4 ; 0x4 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 5 ; 0x5 -; CHECK-NEXT:lCPI20_1: -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 6 ; 0x6 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 7 ; 0x7 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 8 ; 0x8 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 9 ; 0x9 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 10 ; 0xa -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT:lCPI20_2: -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 11 ; 0xb -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 12 ; 0xc -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 13 ; 0xd -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 14 ; 0xe -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 15 ; 0xf -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT:lCPI20_3: -; CHECK-NEXT: .byte 0 ; 0x0 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 1 ; 0x1 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 2 ; 0x2 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 3 ; 0x3 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff - -; CHECK-BE-LABEL: .LCPI20_0: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 0 // 0x0 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 1 // 0x1 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 2 // 0x2 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 3 // 0x3 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .LCPI20_1: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 0 // 0x0 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 1 // 0x1 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 2 // 0x2 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 3 // 0x3 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 4 // 0x4 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .LCPI20_2: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 5 // 0x5 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 6 // 0x6 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 7 // 0x7 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 8 // 0x8 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 9 // 0x9 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .LCPI20_3: -; CHECK-BE-NEXT: .byte 10 // 0xa -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 11 // 0xb -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 12 // 0xc -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 13 // 0xd -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 14 // 0xe -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 15 // 0xf - define void @zext_v20i8_to_v20i24_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v20i8_to_v20i24_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh22: -; CHECK-NEXT: adrp x9, lCPI20_0@PAGE +; CHECK-NEXT: adrp x8, lCPI20_0@PAGE ; CHECK-NEXT: Lloh23: -; CHECK-NEXT: adrp x10, lCPI20_1@PAGE +; CHECK-NEXT: adrp x9, lCPI20_1@PAGE ; CHECK-NEXT: Lloh24: -; CHECK-NEXT: adrp x11, lCPI20_2@PAGE +; CHECK-NEXT: adrp x10, lCPI20_2@PAGE ; CHECK-NEXT: Lloh25: -; CHECK-NEXT: adrp x12, lCPI20_3@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: ldr q0, [x8, lCPI20_0@PAGEOFF] ; CHECK-NEXT: Lloh26: -; CHECK-NEXT: ldr q0, [x9, lCPI20_0@PAGEOFF] +; CHECK-NEXT: adrp x8, lCPI20_3@PAGE ; CHECK-NEXT: Lloh27: -; CHECK-NEXT: ldr q1, [x10, lCPI20_1@PAGEOFF] +; CHECK-NEXT: ldr q1, [x9, lCPI20_1@PAGEOFF] ; CHECK-NEXT: Lloh28: -; CHECK-NEXT: ldr q2, [x11, lCPI20_2@PAGEOFF] +; CHECK-NEXT: ldr q2, [x10, lCPI20_2@PAGEOFF] ; CHECK-NEXT: Lloh29: -; CHECK-NEXT: ldr q3, [x12, lCPI20_3@PAGEOFF] +; CHECK-NEXT: ldr q3, [x8, lCPI20_3@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB20_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8 ; CHECK-NEXT: add x8, x8, #16 -; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: ldp q4, q5, [x9] +; CHECK-NEXT: ldp q5, q4, [x9] ; CHECK-NEXT: add x9, x1, #56 -; CHECK-NEXT: tbl.16b v6, { v4 }, v2 -; CHECK-NEXT: tbl.16b v7, { v4 }, v1 -; CHECK-NEXT: tbl.16b v4, { v4 }, v0 -; CHECK-NEXT: tbl.16b v5, { v5 }, v3 +; CHECK-NEXT: cmp x8, #128 +; CHECK-NEXT: tbl.16b v4, { v4 }, v3 +; CHECK-NEXT: tbl.16b v6, { v5 }, v2 +; CHECK-NEXT: tbl.16b v7, { v5 }, v1 +; CHECK-NEXT: tbl.16b v5, { v5 }, v0 ; CHECK-NEXT: stp q7, q6, [x1, #16] -; CHECK-NEXT: str q4, [x1] -; CHECK-NEXT: str d5, [x1, #48] +; CHECK-NEXT: str q5, [x1] +; CHECK-NEXT: str d4, [x1, #48] ; CHECK-NEXT: add x1, x1, #64 -; CHECK-NEXT: st1.s { v5 }[2], [x9] +; CHECK-NEXT: st1.s { v4 }[2], [x9] ; CHECK-NEXT: b.ne LBB20_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh25, Lloh29 +; CHECK-NEXT: .loh AdrpLdr Lloh26, Lloh29 ; CHECK-NEXT: .loh AdrpLdr Lloh24, Lloh28 ; CHECK-NEXT: .loh AdrpLdr Lloh23, Lloh27 -; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh26 +; CHECK-NEXT: .loh AdrpAdrp Lloh22, Lloh26 +; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh25 ; ; CHECK-BE-LABEL: zext_v20i8_to_v20i24_in_loop: ; CHECK-BE: // %bb.0: // %entry @@ -2237,31 +1722,29 @@ ; CHECK-BE-NEXT: add x9, x0, x8 ; CHECK-BE-NEXT: add x8, x8, #16 ; CHECK-BE-NEXT: add x10, x9, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v5.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, #32 ; CHECK-BE-NEXT: ld1 { v4.16b }, [x10] -; CHECK-BE-NEXT: add x10, x1, #56 +; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: tbl v6.16b, { v5.16b }, v3.16b -; CHECK-BE-NEXT: tbl v7.16b, { v5.16b }, v1.16b +; CHECK-BE-NEXT: tbl v7.16b, { v5.16b }, v2.16b +; CHECK-BE-NEXT: tbl v5.16b, { v5.16b }, v1.16b ; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v0.16b -; CHECK-BE-NEXT: tbl v5.16b, { v5.16b }, v2.16b ; CHECK-BE-NEXT: st1 { v6.16b }, [x9] ; CHECK-BE-NEXT: add x9, x1, #16 -; CHECK-BE-NEXT: st1 { v7.16b }, [x1] -; CHECK-BE-NEXT: rev64 v16.16b, v4.16b -; CHECK-BE-NEXT: rev32 v4.16b, v4.16b -; CHECK-BE-NEXT: st1 { v5.16b }, [x9] -; CHECK-BE-NEXT: str d16, [x1, #48] +; CHECK-BE-NEXT: rev32 v16.16b, v4.16b +; CHECK-BE-NEXT: rev64 v4.16b, v4.16b +; CHECK-BE-NEXT: st1 { v7.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #56 +; CHECK-BE-NEXT: st1 { v5.16b }, [x1] +; CHECK-BE-NEXT: str d4, [x1, #48] ; CHECK-BE-NEXT: add x1, x1, #64 -; CHECK-BE-NEXT: st1 { v4.s }[2], [x10] +; CHECK-BE-NEXT: st1 { v16.s }[2], [x9] ; CHECK-BE-NEXT: b.ne .LBB20_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -2272,296 +1755,72 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - -; CHECK-LABEL: lCPI21_0: -; CHECK-NEXT: .byte 0 ; 0x0 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 1 ; 0x1 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 2 ; 0x2 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: lCPI21_1: -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 3 ; 0x3 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 4 ; 0x4 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 5 ; 0x5 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: lCPI21_2: -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 6 ; 0x6 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 7 ; 0x7 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: lCPI21_3: -; CHECK-NEXT: .byte 8 ; 0x8 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 9 ; 0x9 -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 10 ; 0xa -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: lCPI21_4: -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 11 ; 0xb -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 12 ; 0xc -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 13 ; 0xd -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: lCPI21_5: -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 14 ; 0xe -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 15 ; 0xf -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff -; CHECK-NEXT: .byte 255 ; 0xff - -; CHECK-BE-LABEL: .LCPI21_0: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 5 // 0x5 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 6 // 0x6 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .LCPI21_1: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 0 // 0x0 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 1 // 0x1 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .LCPI21_2: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 2 // 0x2 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 3 // 0x3 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 4 // 0x4 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .LCPI21_3: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 5 // 0x5 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 6 // 0x6 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 7 // 0x7 -; CHECK-BE-NEXT: .LCPI21_4: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 8 // 0x8 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 9 // 0x9 -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .LCPI21_5: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 10 // 0xa -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 11 // 0xb -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 12 // 0xc -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .LCPI21_6: -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 13 // 0xd -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 14 // 0xe -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 255 // 0xff -; CHECK-BE-NEXT: .byte 15 // 0xf - define void @zext_v23i8_to_v23i48_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v23i8_to_v23i48_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh30: -; CHECK-NEXT: adrp x9, lCPI21_0@PAGE +; CHECK-NEXT: adrp x8, lCPI21_0@PAGE ; CHECK-NEXT: Lloh31: -; CHECK-NEXT: adrp x10, lCPI21_1@PAGE +; CHECK-NEXT: adrp x9, lCPI21_1@PAGE ; CHECK-NEXT: Lloh32: -; CHECK-NEXT: adrp x11, lCPI21_2@PAGE -; CHECK-NEXT: mov x8, xzr +; CHECK-NEXT: adrp x10, lCPI21_2@PAGE ; CHECK-NEXT: Lloh33: -; CHECK-NEXT: ldr q0, [x9, lCPI21_0@PAGEOFF] +; CHECK-NEXT: ldr q0, [x8, lCPI21_0@PAGEOFF] ; CHECK-NEXT: Lloh34: -; CHECK-NEXT: adrp x9, lCPI21_3@PAGE +; CHECK-NEXT: ldr q1, [x9, lCPI21_1@PAGEOFF] ; CHECK-NEXT: Lloh35: -; CHECK-NEXT: ldr q1, [x10, lCPI21_1@PAGEOFF] +; CHECK-NEXT: ldr q2, [x10, lCPI21_2@PAGEOFF] ; CHECK-NEXT: Lloh36: -; CHECK-NEXT: adrp x10, lCPI21_4@PAGE +; CHECK-NEXT: adrp x8, lCPI21_3@PAGE ; CHECK-NEXT: Lloh37: -; CHECK-NEXT: ldr q2, [x11, lCPI21_2@PAGEOFF] +; CHECK-NEXT: adrp x9, lCPI21_4@PAGE ; CHECK-NEXT: Lloh38: -; CHECK-NEXT: adrp x11, lCPI21_5@PAGE +; CHECK-NEXT: adrp x10, lCPI21_5@PAGE ; CHECK-NEXT: Lloh39: -; CHECK-NEXT: ldr q3, [x9, lCPI21_3@PAGEOFF] +; CHECK-NEXT: ldr q3, [x8, lCPI21_3@PAGEOFF] ; CHECK-NEXT: Lloh40: -; CHECK-NEXT: ldr q4, [x10, lCPI21_4@PAGEOFF] +; CHECK-NEXT: ldr q4, [x9, lCPI21_4@PAGEOFF] ; CHECK-NEXT: Lloh41: -; CHECK-NEXT: ldr q5, [x11, lCPI21_5@PAGEOFF] +; CHECK-NEXT: ldr q5, [x10, lCPI21_5@PAGEOFF] +; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: LBB21_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: add x9, x0, x8 +; CHECK-NEXT: movi.2d v19, #0000000000000000 ; CHECK-NEXT: add x8, x8, #16 -; CHECK-NEXT: movi.2d v6, #0000000000000000 +; CHECK-NEXT: ldp q7, q6, [x9] ; CHECK-NEXT: cmp x8, #128 -; CHECK-NEXT: ldp q16, q7, [x9] ; CHECK-NEXT: strh wzr, [x1, #136] -; CHECK-NEXT: tbl.16b v18, { v16 }, v5 -; CHECK-NEXT: tbl.16b v19, { v16 }, v4 -; CHECK-NEXT: mov.b v6[4], v7[6] +; CHECK-NEXT: tbl.16b v16, { v6 }, v1 +; CHECK-NEXT: tbl.16b v17, { v6 }, v0 +; CHECK-NEXT: mov.b v19[4], v6[6] +; CHECK-NEXT: tbl.16b v18, { v7 }, v5 +; CHECK-NEXT: tbl.16b v20, { v7 }, v4 +; CHECK-NEXT: tbl.16b v21, { v7 }, v3 +; CHECK-NEXT: stp q17, q16, [x1, #96] +; CHECK-NEXT: tbl.16b v16, { v7 }, v2 ; CHECK-NEXT: tbl.16b v17, { v7 }, v1 ; CHECK-NEXT: tbl.16b v7, { v7 }, v0 -; CHECK-NEXT: tbl.16b v20, { v16 }, v3 -; CHECK-NEXT: stp q19, q18, [x1, #64] -; CHECK-NEXT: fmov x9, d6 -; CHECK-NEXT: stp q7, q17, [x1, #96] -; CHECK-NEXT: tbl.16b v17, { v16 }, v2 -; CHECK-NEXT: tbl.16b v7, { v16 }, v1 -; CHECK-NEXT: tbl.16b v16, { v16 }, v0 -; CHECK-NEXT: stp q17, q20, [x1, #32] -; CHECK-NEXT: stp q16, q7, [x1] +; CHECK-NEXT: fmov x9, d19 +; CHECK-NEXT: stp q20, q18, [x1, #64] +; CHECK-NEXT: stp q16, q21, [x1, #32] +; CHECK-NEXT: stp q7, q17, [x1] ; CHECK-NEXT: str x9, [x1, #128]! ; CHECK-NEXT: b.ne LBB21_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdr Lloh38, Lloh41 -; CHECK-NEXT: .loh AdrpLdr Lloh36, Lloh40 -; CHECK-NEXT: .loh AdrpLdr Lloh34, Lloh39 +; CHECK-NEXT: .loh AdrpLdr Lloh37, Lloh40 +; CHECK-NEXT: .loh AdrpLdr Lloh36, Lloh39 ; CHECK-NEXT: .loh AdrpAdrp Lloh32, Lloh38 -; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh37 -; CHECK-NEXT: .loh AdrpAdrp Lloh31, Lloh36 -; CHECK-NEXT: .loh AdrpLdr Lloh31, Lloh35 -; CHECK-NEXT: .loh AdrpAdrp Lloh30, Lloh34 +; CHECK-NEXT: .loh AdrpLdr Lloh32, Lloh35 +; CHECK-NEXT: .loh AdrpAdrp Lloh31, Lloh37 +; CHECK-NEXT: .loh AdrpLdr Lloh31, Lloh34 +; CHECK-NEXT: .loh AdrpAdrp Lloh30, Lloh36 ; CHECK-NEXT: .loh AdrpLdr Lloh30, Lloh33 ; ; CHECK-BE-LABEL: zext_v23i8_to_v23i48_in_loop: @@ -2591,50 +1850,46 @@ ; CHECK-BE-NEXT: .LBB21_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 -; CHECK-BE-NEXT: add x11, x1, #64 -; CHECK-BE-NEXT: add x10, x1, #80 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v7.16b }, [x9] ; CHECK-BE-NEXT: add x9, x9, #16 -; CHECK-BE-NEXT: ld1 { v18.16b }, [x9] -; CHECK-BE-NEXT: add x9, x1, #48 -; CHECK-BE-NEXT: tbl v17.16b, { v7.16b }, v5.16b +; CHECK-BE-NEXT: cmp x8, #128 +; CHECK-BE-NEXT: ld1 { v17.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #80 ; CHECK-BE-NEXT: tbl v16.16b, { v7.16b }, v6.16b -; CHECK-BE-NEXT: tbl v19.16b, { v7.16b }, v3.16b -; CHECK-BE-NEXT: tbl v20.16b, { v18.16b }, v0.16b -; CHECK-BE-NEXT: st1 { v17.16b }, [x11] -; CHECK-BE-NEXT: add x11, x1, #16 -; CHECK-BE-NEXT: tbl v17.16b, { v7.16b }, v4.16b -; CHECK-BE-NEXT: st1 { v16.16b }, [x10] -; CHECK-BE-NEXT: add x10, x1, #32 -; CHECK-BE-NEXT: tbl v16.16b, { v7.16b }, v1.16b -; CHECK-BE-NEXT: tbl v7.16b, { v7.16b }, v2.16b -; CHECK-BE-NEXT: tbl v21.16b, { v18.16b }, v1.16b -; CHECK-BE-NEXT: st1 { v17.16b }, [x9] -; CHECK-BE-NEXT: tbl v17.16b, { v18.16b }, v2.16b +; CHECK-BE-NEXT: tbl v18.16b, { v7.16b }, v5.16b +; CHECK-BE-NEXT: tbl v19.16b, { v7.16b }, v4.16b +; CHECK-BE-NEXT: tbl v20.16b, { v7.16b }, v3.16b +; CHECK-BE-NEXT: tbl v21.16b, { v17.16b }, v0.16b +; CHECK-BE-NEXT: st1 { v16.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #64 +; CHECK-BE-NEXT: tbl v16.16b, { v7.16b }, v2.16b +; CHECK-BE-NEXT: st1 { v18.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #48 +; CHECK-BE-NEXT: tbl v18.16b, { v17.16b }, v2.16b +; CHECK-BE-NEXT: st1 { v19.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #32 +; CHECK-BE-NEXT: tbl v17.16b, { v17.16b }, v1.16b +; CHECK-BE-NEXT: st1 { v20.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #16 +; CHECK-BE-NEXT: rev64 v19.16b, v21.16b +; CHECK-BE-NEXT: st1 { v16.16b }, [x9] +; CHECK-BE-NEXT: rev16 v16.16b, v21.16b ; CHECK-BE-NEXT: add x9, x1, #112 -; CHECK-BE-NEXT: rev64 v18.16b, v20.16b -; CHECK-BE-NEXT: st1 { v19.16b }, [x10] -; CHECK-BE-NEXT: rev16 v19.16b, v20.16b -; CHECK-BE-NEXT: add x10, x1, #96 -; CHECK-BE-NEXT: st1 { v7.16b }, [x11] -; CHECK-BE-NEXT: add x11, x1, #136 +; CHECK-BE-NEXT: st1 { v18.16b }, [x9] +; CHECK-BE-NEXT: add x9, x1, #96 +; CHECK-BE-NEXT: tbl v7.16b, { v7.16b }, v1.16b ; CHECK-BE-NEXT: st1 { v17.16b }, [x9] -; CHECK-BE-NEXT: fmov x9, d18 -; CHECK-BE-NEXT: st1 { v21.16b }, [x10] -; CHECK-BE-NEXT: st1 { v19.h }[4], [x11] -; CHECK-BE-NEXT: st1 { v16.16b }, [x1] +; CHECK-BE-NEXT: add x9, x1, #136 +; CHECK-BE-NEXT: st1 { v16.h }[4], [x9] +; CHECK-BE-NEXT: fmov x9, d19 +; CHECK-BE-NEXT: st1 { v7.16b }, [x1] ; CHECK-BE-NEXT: str x9, [x1, #128]! ; CHECK-BE-NEXT: b.ne .LBB21_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -2645,12 +1900,11 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - define void @zext_v8i8_to_v8i33_in_loop(ptr %src, ptr %dst) { +; ; CHECK-LABEL: zext_v8i8_to_v8i33_in_loop: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, xzr @@ -2658,32 +1912,32 @@ ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d0, [x0, x8] ; CHECK-NEXT: add x8, x8, #16 -; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: strb wzr, [x1, #32] +; CHECK-NEXT: cmp x8, #128 ; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushll.4s v1, v0, #0 -; CHECK-NEXT: ushll2.4s v0, v0, #0 -; CHECK-NEXT: ushll.2d v2, v1, #0 +; CHECK-NEXT: ushll2.4s v1, v0, #0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll2.2d v2, v1, #0 +; CHECK-NEXT: ushll.2d v1, v1, #0 ; CHECK-NEXT: ushll2.2d v3, v0, #0 -; CHECK-NEXT: ushll2.2d v1, v1, #0 -; CHECK-NEXT: mov.d x9, v3[1] -; CHECK-NEXT: fmov x10, d3 -; CHECK-NEXT: mov.d x12, v1[1] ; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: lsl x9, x9, #39 -; CHECK-NEXT: orr x9, x9, x10, lsl #6 -; CHECK-NEXT: fmov x10, d1 -; CHECK-NEXT: mov.d x11, v0[1] -; CHECK-NEXT: lsl x12, x12, #35 -; CHECK-NEXT: mov.d x14, v2[1] -; CHECK-NEXT: fmov x13, d0 -; CHECK-NEXT: orr x10, x12, x10, lsl #2 +; CHECK-NEXT: mov.d x9, v2[1] +; CHECK-NEXT: mov.d x10, v1[1] ; CHECK-NEXT: fmov x12, d2 -; CHECK-NEXT: lsl x11, x11, #37 -; CHECK-NEXT: orr x11, x11, x13, lsl #4 -; CHECK-NEXT: orr x12, x12, x14, lsl #33 -; CHECK-NEXT: stp x11, x9, [x1, #16] -; CHECK-NEXT: stp x12, x10, [x1], #128 +; CHECK-NEXT: mov.d x11, v3[1] +; CHECK-NEXT: mov.d x13, v0[1] +; CHECK-NEXT: lsl x9, x9, #39 +; CHECK-NEXT: lsl x10, x10, #37 +; CHECK-NEXT: lsl x11, x11, #35 +; CHECK-NEXT: orr x9, x9, x12, lsl #6 +; CHECK-NEXT: fmov x12, d1 +; CHECK-NEXT: orr x10, x10, x12, lsl #4 +; CHECK-NEXT: fmov x12, d3 +; CHECK-NEXT: stp x10, x9, [x1, #16] +; CHECK-NEXT: orr x11, x11, x12, lsl #2 +; CHECK-NEXT: fmov x12, d0 +; CHECK-NEXT: orr x9, x12, x13, lsl #33 +; CHECK-NEXT: stp x9, x11, [x1], #128 ; CHECK-NEXT: b.ne LBB22_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: ret @@ -2695,45 +1949,42 @@ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: add x9, x0, x8 ; CHECK-BE-NEXT: add x8, x8, #16 -; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ld1 { v0.8b }, [x9] +; CHECK-BE-NEXT: cmp x8, #128 ; CHECK-BE-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: ushll v1.4s, v0.4h, #0 -; CHECK-BE-NEXT: ushll2 v0.4s, v0.8h, #0 +; CHECK-BE-NEXT: ushll2 v1.4s, v0.8h, #0 +; CHECK-BE-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-BE-NEXT: ushll v2.2d, v1.2s, #0 +; CHECK-BE-NEXT: ushll2 v1.2d, v1.4s, #0 ; CHECK-BE-NEXT: ushll2 v3.2d, v0.4s, #0 ; CHECK-BE-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-BE-NEXT: mov x9, v3.d[1] -; CHECK-BE-NEXT: ushll2 v1.2d, v1.4s, #0 -; CHECK-BE-NEXT: fmov x10, d3 -; CHECK-BE-NEXT: mov x11, v0.d[1] +; CHECK-BE-NEXT: mov x9, v2.d[1] +; CHECK-BE-NEXT: mov x10, v1.d[1] +; CHECK-BE-NEXT: fmov x13, d1 +; CHECK-BE-NEXT: mov x11, v3.d[1] +; CHECK-BE-NEXT: mov x12, v0.d[1] +; CHECK-BE-NEXT: fmov x14, d2 +; CHECK-BE-NEXT: fmov x15, d3 +; CHECK-BE-NEXT: lsl x9, x9, #2 +; CHECK-BE-NEXT: orr x13, x10, x13, lsl #33 +; CHECK-BE-NEXT: strb w10, [x1, #32] +; CHECK-BE-NEXT: lsl x11, x11, #4 +; CHECK-BE-NEXT: lsl x12, x12, #6 +; CHECK-BE-NEXT: orr x14, x9, x14, lsl #35 +; CHECK-BE-NEXT: extr x9, x9, x13, #8 ; CHECK-BE-NEXT: fmov x13, d0 -; CHECK-BE-NEXT: mov x12, v1.d[1] -; CHECK-BE-NEXT: strb w9, [x1, #32] -; CHECK-BE-NEXT: orr x10, x9, x10, lsl #33 -; CHECK-BE-NEXT: fmov x15, d1 -; CHECK-BE-NEXT: mov x14, v2.d[1] -; CHECK-BE-NEXT: lsl x11, x11, #2 -; CHECK-BE-NEXT: lsl x12, x12, #4 -; CHECK-BE-NEXT: orr x13, x11, x13, lsl #35 -; CHECK-BE-NEXT: extr x10, x11, x10, #8 -; CHECK-BE-NEXT: fmov x11, d2 -; CHECK-BE-NEXT: orr x15, x12, x15, lsl #37 -; CHECK-BE-NEXT: lsl x14, x14, #6 -; CHECK-BE-NEXT: extr x9, x12, x13, #8 -; CHECK-BE-NEXT: orr x11, x14, x11, lsl #39 -; CHECK-BE-NEXT: extr x12, x14, x15, #8 -; CHECK-BE-NEXT: lsr x11, x11, #8 -; CHECK-BE-NEXT: stp x9, x10, [x1, #16] -; CHECK-BE-NEXT: stp x11, x12, [x1], #128 +; CHECK-BE-NEXT: orr x15, x11, x15, lsl #37 +; CHECK-BE-NEXT: extr x10, x11, x14, #8 +; CHECK-BE-NEXT: orr x11, x12, x13, lsl #39 +; CHECK-BE-NEXT: extr x12, x12, x15, #8 +; CHECK-BE-NEXT: stp x10, x9, [x1, #16] +; CHECK-BE-NEXT: lsr x9, x11, #8 +; CHECK-BE-NEXT: stp x9, x12, [x1], #128 ; CHECK-BE-NEXT: b.ne .LBB22_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: ret - - entry: br label %loop - loop: %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] %src.gep = getelementptr i8, ptr %src, i64 %iv @@ -2744,44 +1995,43 @@ %iv.next = add nuw i64 %iv, 16 %ec = icmp eq i64 %iv.next, 128 br i1 %ec, label %exit, label %loop - exit: ret void } - ; FIXME: Widening instructions should be used instead of tbl. define i32 @test_pr62620_widening_instr(ptr %p1, ptr %p2, i64 %lx, i32 %h) { +; ; CHECK-LABEL: test_pr62620_widening_instr: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov x8, x0 -; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: lsl x9, x2, #4 +; CHECK-NEXT: mov w8, wzr ; CHECK-NEXT: LBB23_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ldr q0, [x8, x9] -; CHECK-NEXT: subs w3, w3, #1 +; CHECK-NEXT: ldr q0, [x0, x9] ; CHECK-NEXT: ldr q1, [x1, x9] +; CHECK-NEXT: subs w3, w3, #1 ; CHECK-NEXT: uabdl.8h v2, v0, v1 ; CHECK-NEXT: uabal2.8h v2, v0, v1 ; CHECK-NEXT: uaddlv.8h s0, v2 ; CHECK-NEXT: fmov w10, s0 -; CHECK-NEXT: add w0, w10, w0 +; CHECK-NEXT: add w8, w10, w8 ; CHECK-NEXT: b.ne LBB23_1 ; CHECK-NEXT: ; %bb.2: ; %exit +; CHECK-NEXT: mov w0, w8 ; CHECK-NEXT: ret ; ; CHECK-BE-LABEL: test_pr62620_widening_instr: ; CHECK-BE: // %bb.0: // %entry -; CHECK-BE-NEXT: mov x8, x0 ; CHECK-BE-NEXT: lsl x9, x2, #4 +; CHECK-BE-NEXT: mov x8, x0 ; CHECK-BE-NEXT: mov w0, wzr ; CHECK-BE-NEXT: add x8, x8, x9 ; CHECK-BE-NEXT: add x9, x1, x9 ; CHECK-BE-NEXT: .LBB23_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x8] -; CHECK-BE-NEXT: subs w3, w3, #1 ; CHECK-BE-NEXT: ld1 { v1.16b }, [x9] +; CHECK-BE-NEXT: subs w3, w3, #1 ; CHECK-BE-NEXT: uabdl v2.8h, v0.8b, v1.8b ; CHECK-BE-NEXT: uabal2 v2.8h, v0.16b, v1.16b ; CHECK-BE-NEXT: uaddlv s0, v2.8h @@ -2792,7 +2042,6 @@ ; CHECK-BE-NEXT: ret entry: br label %loop - loop: %s0 = phi i32 [ 0, %entry ], [ %op.rdx, %loop ] %j.0261 = phi i32 [ 0, %entry ], [ %inc, %loop ] @@ -2809,36 +2058,33 @@ %inc = add nuw nsw i32 %j.0261, 1 %exitcond.not = icmp eq i32 %inc, %h br i1 %exitcond.not, label %exit, label %loop - exit: %s1 = phi i32 [ %op.rdx, %loop ] ret i32 %s1 } - declare <16 x i32> @llvm.abs.v16i32(<16 x i32>, i1 immarg) - declare i32 @llvm.vector.reduce.add.v16i32(<16 x i32>) - define i32 @test_widening_instr_mull(ptr %p1, ptr %p2, i32 %h) { +; ; CHECK-LABEL: test_widening_instr_mull: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: LBB24_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q0, [x1], #16 -; CHECK-NEXT: ldr q1, [x0] -; CHECK-NEXT: subs w2, w2, #1 +; CHECK-NEXT: ldr q3, [x0] ; CHECK-NEXT: ldr q2, [x8, #16]! -; CHECK-NEXT: ushll2.8h v3, v0, #0 +; CHECK-NEXT: subs w2, w2, #1 +; CHECK-NEXT: ushll2.8h v1, v0, #0 ; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: umull2.4s v4, v2, v3 -; CHECK-NEXT: umull.4s v2, v2, v3 -; CHECK-NEXT: umull.4s v3, v1, v0 -; CHECK-NEXT: umull2.4s v0, v1, v0 -; CHECK-NEXT: stp q2, q4, [x0, #32] -; CHECK-NEXT: str q3, [x0] +; CHECK-NEXT: umull2.4s v4, v2, v1 +; CHECK-NEXT: umull.4s v1, v2, v1 +; CHECK-NEXT: umull2.4s v2, v3, v0 +; CHECK-NEXT: umull.4s v0, v3, v0 +; CHECK-NEXT: stp q1, q4, [x0, #32] +; CHECK-NEXT: str q0, [x0] ; CHECK-NEXT: mov x0, x8 -; CHECK-NEXT: str q0, [x8] +; CHECK-NEXT: str q2, [x8] ; CHECK-NEXT: b.ne LBB24_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: mov w0, wzr @@ -2849,31 +2095,30 @@ ; CHECK-BE-NEXT: .LBB24_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: ld1 { v0.16b }, [x1] +; CHECK-BE-NEXT: ld1 { v1.8h }, [x0] ; CHECK-BE-NEXT: add x8, x0, #16 +; CHECK-BE-NEXT: ld1 { v3.8h }, [x8] ; CHECK-BE-NEXT: add x9, x0, #48 ; CHECK-BE-NEXT: add x10, x0, #32 -; CHECK-BE-NEXT: ld1 { v1.8h }, [x0] ; CHECK-BE-NEXT: subs w2, w2, #1 ; CHECK-BE-NEXT: add x1, x1, #16 -; CHECK-BE-NEXT: ld1 { v4.8h }, [x8] ; CHECK-BE-NEXT: ushll v2.8h, v0.8b, #0 ; CHECK-BE-NEXT: ushll2 v0.8h, v0.16b, #0 -; CHECK-BE-NEXT: umull v3.4s, v1.4h, v2.4h +; CHECK-BE-NEXT: umull v4.4s, v1.4h, v2.4h +; CHECK-BE-NEXT: umull2 v5.4s, v3.8h, v0.8h +; CHECK-BE-NEXT: umull v0.4s, v3.4h, v0.4h ; CHECK-BE-NEXT: umull2 v1.4s, v1.8h, v2.8h -; CHECK-BE-NEXT: umull2 v2.4s, v4.8h, v0.8h -; CHECK-BE-NEXT: umull v0.4s, v4.4h, v0.4h -; CHECK-BE-NEXT: st1 { v3.4s }, [x0] +; CHECK-BE-NEXT: st1 { v4.4s }, [x0] ; CHECK-BE-NEXT: mov x0, x8 -; CHECK-BE-NEXT: st1 { v1.4s }, [x8] -; CHECK-BE-NEXT: st1 { v2.4s }, [x9] +; CHECK-BE-NEXT: st1 { v5.4s }, [x9] ; CHECK-BE-NEXT: st1 { v0.4s }, [x10] +; CHECK-BE-NEXT: st1 { v1.4s }, [x8] ; CHECK-BE-NEXT: b.ne .LBB24_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: mov w0, wzr ; CHECK-BE-NEXT: ret entry: br label %loop - loop: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] %gep.1 = getelementptr inbounds <16 x i8>, ptr %p1, i32 %iv @@ -2887,12 +2132,11 @@ %iv.next= add nuw nsw i32 %iv, 1 %exitcond.not = icmp eq i32 %iv.next, %h br i1 %exitcond.not, label %exit, label %loop - exit: ret i32 0 } - define i32 @test_widening_instr_mull_64(ptr %p1, ptr %p2, i32 %h) { +; ; CHECK-LABEL: test_widening_instr_mull_64: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh42: @@ -2900,51 +2144,52 @@ ; CHECK-NEXT: Lloh43: ; CHECK-NEXT: adrp x9, lCPI25_1@PAGE ; CHECK-NEXT: Lloh44: -; CHECK-NEXT: adrp x10, lCPI25_2@PAGE +; CHECK-NEXT: adrp x10, lCPI25_3@PAGE ; CHECK-NEXT: Lloh45: -; CHECK-NEXT: adrp x11, lCPI25_3@PAGE -; CHECK-NEXT: Lloh46: ; CHECK-NEXT: ldr q0, [x8, lCPI25_0@PAGEOFF] -; CHECK-NEXT: mov x8, x1 +; CHECK-NEXT: Lloh46: +; CHECK-NEXT: adrp x8, lCPI25_2@PAGE ; CHECK-NEXT: Lloh47: ; CHECK-NEXT: ldr q1, [x9, lCPI25_1@PAGEOFF] ; CHECK-NEXT: Lloh48: -; CHECK-NEXT: ldr q2, [x10, lCPI25_2@PAGEOFF] +; CHECK-NEXT: ldr q2, [x8, lCPI25_2@PAGEOFF] ; CHECK-NEXT: Lloh49: -; CHECK-NEXT: ldr q3, [x11, lCPI25_3@PAGEOFF] +; CHECK-NEXT: ldr q3, [x10, lCPI25_3@PAGEOFF] +; CHECK-NEXT: mov x8, x1 ; CHECK-NEXT: LBB25_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q4, [x0] +; CHECK-NEXT: ldp q16, q7, [x1, #32] +; CHECK-NEXT: ldr q18, [x8, #16]! ; CHECK-NEXT: subs w2, w2, #1 -; CHECK-NEXT: ldp q7, q17, [x1, #32] -; CHECK-NEXT: tbl.16b v16, { v4 }, v3 -; CHECK-NEXT: tbl.16b v18, { v4 }, v0 -; CHECK-NEXT: tbl.16b v19, { v4 }, v1 -; CHECK-NEXT: tbl.16b v4, { v4 }, v2 -; CHECK-NEXT: ldr q5, [x1] -; CHECK-NEXT: ldr q6, [x8, #16]! -; CHECK-NEXT: umull2.2d v20, v16, v17 +; CHECK-NEXT: tbl.16b v5, { v4 }, v3 +; CHECK-NEXT: tbl.16b v6, { v4 }, v0 +; CHECK-NEXT: tbl.16b v17, { v4 }, v2 +; CHECK-NEXT: tbl.16b v4, { v4 }, v1 +; CHECK-NEXT: umull2.2d v19, v5, v7 +; CHECK-NEXT: umull.2d v5, v5, v7 +; CHECK-NEXT: ldr q7, [x1] +; CHECK-NEXT: umull2.2d v20, v6, v16 +; CHECK-NEXT: umull2.2d v21, v17, v18 +; CHECK-NEXT: umull.2d v17, v17, v18 +; CHECK-NEXT: umull2.2d v18, v4, v7 +; CHECK-NEXT: umull.2d v4, v4, v7 ; CHECK-NEXT: mov x1, x8 -; CHECK-NEXT: umull2.2d v21, v18, v7 -; CHECK-NEXT: umull.2d v16, v16, v17 -; CHECK-NEXT: umull2.2d v17, v4, v6 -; CHECK-NEXT: umull.2d v4, v4, v6 -; CHECK-NEXT: umull2.2d v6, v19, v5 -; CHECK-NEXT: str q21, [x0, #80] -; CHECK-NEXT: umull.2d v5, v19, v5 -; CHECK-NEXT: stp q16, q20, [x0, #96] -; CHECK-NEXT: umull.2d v7, v18, v7 -; CHECK-NEXT: stp q4, q17, [x0, #32] -; CHECK-NEXT: stp q5, q6, [x0] -; CHECK-NEXT: str q7, [x0, #64]! +; CHECK-NEXT: stp q5, q19, [x0, #96] +; CHECK-NEXT: umull.2d v5, v6, v16 +; CHECK-NEXT: str q20, [x0, #80] +; CHECK-NEXT: stp q4, q18, [x0] +; CHECK-NEXT: stp q17, q21, [x0, #32] +; CHECK-NEXT: str q5, [x0, #64]! ; CHECK-NEXT: b.ne LBB25_1 ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh45, Lloh49 -; CHECK-NEXT: .loh AdrpLdr Lloh44, Lloh48 +; CHECK-NEXT: .loh AdrpLdr Lloh46, Lloh48 +; CHECK-NEXT: .loh AdrpLdr Lloh44, Lloh49 ; CHECK-NEXT: .loh AdrpLdr Lloh43, Lloh47 -; CHECK-NEXT: .loh AdrpLdr Lloh42, Lloh46 +; CHECK-NEXT: .loh AdrpAdrp Lloh42, Lloh46 +; CHECK-NEXT: .loh AdrpLdr Lloh42, Lloh45 ; ; CHECK-BE-LABEL: test_widening_instr_mull_64: ; CHECK-BE: // %bb.0: // %entry @@ -2963,64 +2208,64 @@ ; CHECK-BE-NEXT: .LBB25_1: // %loop ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: ld1 { v4.16b }, [x0] -; CHECK-BE-NEXT: add x8, x1, #48 -; CHECK-BE-NEXT: add x9, x1, #32 -; CHECK-BE-NEXT: subs w2, w2, #1 -; CHECK-BE-NEXT: ld1 { v5.4s }, [x1] +; CHECK-BE-NEXT: add x9, x1, #48 +; CHECK-BE-NEXT: add x8, x1, #32 +; CHECK-BE-NEXT: ld1 { v18.4s }, [x9] +; CHECK-BE-NEXT: ld1 { v16.4s }, [x1] ; CHECK-BE-NEXT: add x1, x1, #16 -; CHECK-BE-NEXT: ld1 { v6.4s }, [x8] +; CHECK-BE-NEXT: ld1 { v20.4s }, [x8] +; CHECK-BE-NEXT: ld1 { v22.4s }, [x1] ; CHECK-BE-NEXT: add x8, x0, #96 -; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v3.16b -; CHECK-BE-NEXT: tbl v17.16b, { v4.16b }, v1.16b -; CHECK-BE-NEXT: ld1 { v20.4s }, [x1] -; CHECK-BE-NEXT: tbl v16.16b, { v4.16b }, v0.16b -; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v2.16b -; CHECK-BE-NEXT: ld1 { v18.4s }, [x9] -; CHECK-BE-NEXT: rev32 v19.8b, v7.8b +; CHECK-BE-NEXT: tbl v5.16b, { v4.16b }, v3.16b +; CHECK-BE-NEXT: tbl v6.16b, { v4.16b }, v2.16b +; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v1.16b +; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v0.16b +; CHECK-BE-NEXT: ext v24.16b, v18.16b, v18.16b, #8 ; CHECK-BE-NEXT: add x9, x0, #32 -; CHECK-BE-NEXT: ext v23.16b, v6.16b, v6.16b, #8 -; CHECK-BE-NEXT: rev32 v22.8b, v17.8b +; CHECK-BE-NEXT: ext v25.16b, v20.16b, v20.16b, #8 +; CHECK-BE-NEXT: add x10, x0, #16 +; CHECK-BE-NEXT: subs w2, w2, #1 +; CHECK-BE-NEXT: ext v17.16b, v5.16b, v5.16b, #8 +; CHECK-BE-NEXT: ext v19.16b, v6.16b, v6.16b, #8 +; CHECK-BE-NEXT: rev32 v5.8b, v5.8b +; CHECK-BE-NEXT: rev32 v21.8b, v7.8b +; CHECK-BE-NEXT: rev32 v23.8b, v4.8b ; CHECK-BE-NEXT: ext v7.16b, v7.16b, v7.16b, #8 -; CHECK-BE-NEXT: ext v24.16b, v4.16b, v4.16b, #8 -; CHECK-BE-NEXT: umull v6.2d, v19.2s, v6.2s -; CHECK-BE-NEXT: umull v19.2d, v22.2s, v20.2s -; CHECK-BE-NEXT: ext v22.16b, v18.16b, v18.16b, #8 -; CHECK-BE-NEXT: ext v21.16b, v5.16b, v5.16b, #8 -; CHECK-BE-NEXT: st1 { v6.2d }, [x8] -; CHECK-BE-NEXT: rev32 v6.8b, v7.8b -; CHECK-BE-NEXT: ext v7.16b, v17.16b, v17.16b, #8 -; CHECK-BE-NEXT: rev32 v17.8b, v16.8b +; CHECK-BE-NEXT: ext v4.16b, v4.16b, v4.16b, #8 +; CHECK-BE-NEXT: rev32 v6.8b, v6.8b +; CHECK-BE-NEXT: rev32 v17.8b, v17.8b +; CHECK-BE-NEXT: rev32 v19.8b, v19.8b +; CHECK-BE-NEXT: umull v5.2d, v5.2s, v18.2s +; CHECK-BE-NEXT: umull v18.2d, v21.2s, v22.2s +; CHECK-BE-NEXT: ext v21.16b, v22.16b, v22.16b, #8 +; CHECK-BE-NEXT: rev32 v7.8b, v7.8b +; CHECK-BE-NEXT: umull v22.2d, v23.2s, v16.2s ; CHECK-BE-NEXT: ext v16.16b, v16.16b, v16.16b, #8 -; CHECK-BE-NEXT: add x8, x0, #112 -; CHECK-BE-NEXT: st1 { v19.2d }, [x9] -; CHECK-BE-NEXT: rev32 v19.8b, v24.8b -; CHECK-BE-NEXT: umull v6.2d, v6.2s, v23.2s ; CHECK-BE-NEXT: rev32 v4.8b, v4.8b -; CHECK-BE-NEXT: umull v5.2d, v17.2s, v5.2s +; CHECK-BE-NEXT: umull v17.2d, v17.2s, v24.2s +; CHECK-BE-NEXT: umull v19.2d, v19.2s, v25.2s +; CHECK-BE-NEXT: st1 { v5.2d }, [x8] +; CHECK-BE-NEXT: umull v5.2d, v6.2s, v20.2s +; CHECK-BE-NEXT: umull v6.2d, v7.2s, v21.2s +; CHECK-BE-NEXT: add x8, x0, #112 +; CHECK-BE-NEXT: umull v4.2d, v4.2s, v16.2s +; CHECK-BE-NEXT: st1 { v18.2d }, [x9] ; CHECK-BE-NEXT: add x9, x0, #80 -; CHECK-BE-NEXT: rev32 v7.8b, v7.8b -; CHECK-BE-NEXT: rev32 v16.8b, v16.8b -; CHECK-BE-NEXT: st1 { v6.2d }, [x8] -; CHECK-BE-NEXT: add x8, x0, #48 -; CHECK-BE-NEXT: ext v6.16b, v20.16b, v20.16b, #8 -; CHECK-BE-NEXT: st1 { v5.2d }, [x0] -; CHECK-BE-NEXT: umull v17.2d, v19.2s, v22.2s -; CHECK-BE-NEXT: umull v4.2d, v4.2s, v18.2s -; CHECK-BE-NEXT: umull v5.2d, v7.2s, v6.2s -; CHECK-BE-NEXT: umull v6.2d, v16.2s, v21.2s -; CHECK-BE-NEXT: st1 { v17.2d }, [x9] -; CHECK-BE-NEXT: add x9, x0, #16 -; CHECK-BE-NEXT: add x0, x0, #64 +; CHECK-BE-NEXT: st1 { v22.2d }, [x0] +; CHECK-BE-NEXT: st1 { v17.2d }, [x8] +; CHECK-BE-NEXT: add x8, x0, #64 +; CHECK-BE-NEXT: st1 { v19.2d }, [x9] +; CHECK-BE-NEXT: add x9, x0, #48 +; CHECK-BE-NEXT: mov x0, x8 ; CHECK-BE-NEXT: st1 { v5.2d }, [x8] -; CHECK-BE-NEXT: st1 { v4.2d }, [x0] ; CHECK-BE-NEXT: st1 { v6.2d }, [x9] +; CHECK-BE-NEXT: st1 { v4.2d }, [x10] ; CHECK-BE-NEXT: b.ne .LBB25_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: mov w0, wzr ; CHECK-BE-NEXT: ret entry: br label %loop - loop: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] %gep.1 = getelementptr inbounds <16 x i32>, ptr %p1, i32 %iv @@ -3034,12 +2279,11 @@ %iv.next= add nuw nsw i32 %iv, 1 %exitcond.not = icmp eq i32 %iv.next, %h br i1 %exitcond.not, label %exit, label %loop - exit: ret i32 0 } - define i32 @test_widening_instr_mull_2(ptr %p1, ptr %p2, i32 %h) { +; ; CHECK-LABEL: test_widening_instr_mull_2: ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: Lloh50: @@ -3047,33 +2291,33 @@ ; CHECK-NEXT: Lloh51: ; CHECK-NEXT: adrp x9, lCPI26_1@PAGE ; CHECK-NEXT: Lloh52: -; CHECK-NEXT: adrp x10, lCPI26_2@PAGE +; CHECK-NEXT: adrp x10, lCPI26_3@PAGE ; CHECK-NEXT: Lloh53: -; CHECK-NEXT: adrp x11, lCPI26_3@PAGE -; CHECK-NEXT: Lloh54: ; CHECK-NEXT: ldr q0, [x8, lCPI26_0@PAGEOFF] -; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: Lloh54: +; CHECK-NEXT: adrp x8, lCPI26_2@PAGE ; CHECK-NEXT: Lloh55: ; CHECK-NEXT: ldr q1, [x9, lCPI26_1@PAGEOFF] ; CHECK-NEXT: Lloh56: -; CHECK-NEXT: ldr q2, [x10, lCPI26_2@PAGEOFF] +; CHECK-NEXT: ldr q2, [x8, lCPI26_2@PAGEOFF] ; CHECK-NEXT: Lloh57: -; CHECK-NEXT: ldr q3, [x11, lCPI26_3@PAGEOFF] +; CHECK-NEXT: ldr q3, [x10, lCPI26_3@PAGEOFF] +; CHECK-NEXT: mov x8, x0 ; CHECK-NEXT: LBB26_1: ; %loop ; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q4, [x1], #16 -; CHECK-NEXT: ldp q5, q6, [x0, #32] +; CHECK-NEXT: ldr q18, [x0] +; CHECK-NEXT: ldp q16, q17, [x0, #32] ; CHECK-NEXT: subs w2, w2, #1 -; CHECK-NEXT: tbl.16b v16, { v4 }, v0 -; CHECK-NEXT: tbl.16b v18, { v4 }, v1 -; CHECK-NEXT: tbl.16b v19, { v4 }, v2 +; CHECK-NEXT: tbl.16b v5, { v4 }, v0 +; CHECK-NEXT: tbl.16b v6, { v4 }, v1 +; CHECK-NEXT: tbl.16b v7, { v4 }, v2 ; CHECK-NEXT: tbl.16b v4, { v4 }, v3 -; CHECK-NEXT: ldr q7, [x0] -; CHECK-NEXT: ldr q17, [x8, #16]! -; CHECK-NEXT: mul.4s v5, v5, v16 -; CHECK-NEXT: mul.4s v6, v6, v18 -; CHECK-NEXT: mul.4s v7, v7, v19 -; CHECK-NEXT: mul.4s v4, v17, v4 +; CHECK-NEXT: mul.4s v5, v16, v5 +; CHECK-NEXT: ldr q16, [x8, #16]! +; CHECK-NEXT: mul.4s v6, v17, v6 +; CHECK-NEXT: mul.4s v7, v18, v7 +; CHECK-NEXT: mul.4s v4, v16, v4 ; CHECK-NEXT: stp q5, q6, [x0, #32] ; CHECK-NEXT: str q7, [x0] ; CHECK-NEXT: mov x0, x8 @@ -3082,10 +2326,11 @@ ; CHECK-NEXT: ; %bb.2: ; %exit ; CHECK-NEXT: mov w0, wzr ; CHECK-NEXT: ret -; CHECK-NEXT: .loh AdrpLdr Lloh53, Lloh57 -; CHECK-NEXT: .loh AdrpLdr Lloh52, Lloh56 +; CHECK-NEXT: .loh AdrpLdr Lloh54, Lloh56 +; CHECK-NEXT: .loh AdrpLdr Lloh52, Lloh57 ; CHECK-NEXT: .loh AdrpLdr Lloh51, Lloh55 -; CHECK-NEXT: .loh AdrpLdr Lloh50, Lloh54 +; CHECK-NEXT: .loh AdrpAdrp Lloh50, Lloh54 +; CHECK-NEXT: .loh AdrpLdr Lloh50, Lloh53 ; ; CHECK-BE-LABEL: test_widening_instr_mull_2: ; CHECK-BE: // %bb.0: // %entry @@ -3105,38 +2350,37 @@ ; CHECK-BE-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-BE-NEXT: ld1 { v4.16b }, [x1] ; CHECK-BE-NEXT: add x8, x0, #32 +; CHECK-BE-NEXT: ld1 { v16.4s }, [x0] ; CHECK-BE-NEXT: add x9, x0, #48 ; CHECK-BE-NEXT: add x10, x0, #16 -; CHECK-BE-NEXT: ld1 { v6.4s }, [x0] +; CHECK-BE-NEXT: ld1 { v17.4s }, [x8] +; CHECK-BE-NEXT: ld1 { v18.4s }, [x9] +; CHECK-BE-NEXT: ld1 { v19.4s }, [x10] ; CHECK-BE-NEXT: subs w2, w2, #1 -; CHECK-BE-NEXT: add x1, x1, #16 -; CHECK-BE-NEXT: ld1 { v16.4s }, [x8] ; CHECK-BE-NEXT: tbl v5.16b, { v4.16b }, v1.16b -; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v0.16b -; CHECK-BE-NEXT: ld1 { v18.4s }, [x10] -; CHECK-BE-NEXT: tbl v17.16b, { v4.16b }, v3.16b -; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v2.16b +; CHECK-BE-NEXT: tbl v6.16b, { v4.16b }, v3.16b +; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v2.16b +; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v0.16b +; CHECK-BE-NEXT: add x1, x1, #16 ; CHECK-BE-NEXT: rev32 v5.16b, v5.16b +; CHECK-BE-NEXT: rev32 v6.16b, v6.16b ; CHECK-BE-NEXT: rev32 v7.16b, v7.16b -; CHECK-BE-NEXT: rev32 v17.16b, v17.16b ; CHECK-BE-NEXT: rev32 v4.16b, v4.16b -; CHECK-BE-NEXT: mul v5.4s, v6.4s, v5.4s -; CHECK-BE-NEXT: ld1 { v6.4s }, [x9] +; CHECK-BE-NEXT: mul v5.4s, v16.4s, v5.4s +; CHECK-BE-NEXT: mul v6.4s, v17.4s, v6.4s ; CHECK-BE-NEXT: mul v7.4s, v18.4s, v7.4s +; CHECK-BE-NEXT: mul v4.4s, v19.4s, v4.4s ; CHECK-BE-NEXT: st1 { v5.4s }, [x0] ; CHECK-BE-NEXT: mov x0, x10 -; CHECK-BE-NEXT: mul v5.4s, v16.4s, v17.4s -; CHECK-BE-NEXT: st1 { v7.4s }, [x10] -; CHECK-BE-NEXT: mul v4.4s, v6.4s, v4.4s -; CHECK-BE-NEXT: st1 { v5.4s }, [x8] -; CHECK-BE-NEXT: st1 { v4.4s }, [x9] +; CHECK-BE-NEXT: st1 { v6.4s }, [x8] +; CHECK-BE-NEXT: st1 { v7.4s }, [x9] +; CHECK-BE-NEXT: st1 { v4.4s }, [x10] ; CHECK-BE-NEXT: b.ne .LBB26_1 ; CHECK-BE-NEXT: // %bb.2: // %exit ; CHECK-BE-NEXT: mov w0, wzr ; CHECK-BE-NEXT: ret entry: br label %loop - loop: %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop ] %gep.1 = getelementptr inbounds <16 x i8>, ptr %p1, i32 %iv @@ -3149,7 +2393,6 @@ %iv.next= add nuw nsw i32 %iv, 1 %exitcond.not = icmp eq i32 %iv.next, %h br i1 %exitcond.not, label %exit, label %loop - exit: ret i32 0 } diff --git a/llvm/test/MC/AArch64/elf-globaladdress.ll b/llvm/test/MC/AArch64/elf-globaladdress.ll --- a/llvm/test/MC/AArch64/elf-globaladdress.ll +++ b/llvm/test/MC/AArch64/elf-globaladdress.ll @@ -41,22 +41,22 @@ ; OBJ: Relocations [ ; OBJ: Section {{.*}} .rela.text { -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var8 -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var16 -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST8_ABS_LO12_NC var8 -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var32 -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST16_ABS_LO12_NC var16 -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var64 -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST32_ABS_LO12_NC var32 -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST64_ABS_LO12_NC var64 - -; This is on the store, so not really important, but it stops the next -; match working. -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST64_ABS_LO12_NC var64 - -; Pure address-calculation against var64 -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var64 -; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADD_ABS_LO12_NC var64 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var8 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST8_ABS_LO12_NC var8 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST8_ABS_LO12_NC var8 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var16 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST16_ABS_LO12_NC var16 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST16_ABS_LO12_NC var16 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var32 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST32_ABS_LO12_NC var32 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST32_ABS_LO12_NC var32 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var64 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST64_ABS_LO12_NC var64 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST64_ABS_LO12_NC var64 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 globaddr 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADR_PREL_PG_HI21 var64 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_ADD_ABS_LO12_NC var64 0x0 +; OBJ: 0x{{[0-9,A-F]+}} R_AARCH64_LDST64_ABS_LO12_NC globaddr 0x0 ; OBJ: } ; OBJ: ] diff --git a/llvm/test/MachineVerifier/test_g_concat_vectors.mir b/llvm/test/MachineVerifier/test_g_concat_vectors.mir --- a/llvm/test/MachineVerifier/test_g_concat_vectors.mir +++ b/llvm/test/MachineVerifier/test_g_concat_vectors.mir @@ -1,4 +1,4 @@ -#RUN: not --crash llc -o - -global-isel -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +#RUN: not --crash llc -o - -global-isel -mtriple=aarch64 -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s # REQUIRES: aarch64-registered-target --- name: g_concat_vectors diff --git a/llvm/test/Transforms/CanonicalizeFreezeInLoops/aarch64.ll b/llvm/test/Transforms/CanonicalizeFreezeInLoops/aarch64.ll --- a/llvm/test/Transforms/CanonicalizeFreezeInLoops/aarch64.ll +++ b/llvm/test/Transforms/CanonicalizeFreezeInLoops/aarch64.ll @@ -10,9 +10,9 @@ ; CHECK-NEXT: add w8, w2, #1 ; CHECK-NEXT: .LBB0_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: subs w1, w1, #1 ; CHECK-NEXT: strb wzr, [x0, w8, sxtw] ; CHECK-NEXT: add w8, w8, #1 -; CHECK-NEXT: subs w1, w1, #1 ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %exit ; CHECK-NEXT: ret @@ -37,8 +37,8 @@ ; CHECK-NEXT: add w8, w2, #1 ; CHECK-NEXT: .LBB1_1: // %loop ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: strb wzr, [x0, w8, sxtw] ; CHECK-NEXT: subs w1, w1, #1 +; CHECK-NEXT: strb wzr, [x0, w8, sxtw] ; CHECK-NEXT: add w8, w8, #1 ; CHECK-NEXT: b.ne .LBB1_1 ; CHECK-NEXT: // %bb.2: // %exit diff --git a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll --- a/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll +++ b/llvm/test/Transforms/CodeGenPrepare/AArch64/large-offset-gep.ll @@ -14,9 +14,9 @@ ; CHECK-NEXT: b.ge .LBB0_2 ; CHECK-NEXT: .LBB0_1: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add w10, w8, #1 -; CHECK-NEXT: stp w10, w8, [x9] -; CHECK-NEXT: mov w8, w10 +; CHECK-NEXT: str w8, [x9, #4] +; CHECK-NEXT: add w8, w8, #1 +; CHECK-NEXT: str w8, [x9] ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: b.lt .LBB0_1 ; CHECK-NEXT: .LBB0_2: // %while_end @@ -54,9 +54,9 @@ ; CHECK-NEXT: b.ge .LBB1_3 ; CHECK-NEXT: .LBB1_2: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add w10, w8, #1 -; CHECK-NEXT: stp w10, w8, [x9] -; CHECK-NEXT: mov w8, w10 +; CHECK-NEXT: str w8, [x9, #4] +; CHECK-NEXT: add w8, w8, #1 +; CHECK-NEXT: str w8, [x9] ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: b.lt .LBB1_2 ; CHECK-NEXT: .LBB1_3: // %while_end @@ -96,9 +96,9 @@ ; CHECK-NEXT: b.ge .LBB2_3 ; CHECK-NEXT: .LBB2_2: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add w10, w8, #1 -; CHECK-NEXT: stp w10, w8, [x9] -; CHECK-NEXT: mov w8, w10 +; CHECK-NEXT: str w8, [x9, #4] +; CHECK-NEXT: add w8, w8, #1 +; CHECK-NEXT: str w8, [x9] ; CHECK-NEXT: cmp w8, w3 ; CHECK-NEXT: b.lt .LBB2_2 ; CHECK-NEXT: .LBB2_3: // %while_end @@ -165,9 +165,9 @@ ; CHECK-NEXT: b.ge .LBB3_4 ; CHECK-NEXT: // %bb.3: // %while_body ; CHECK-NEXT: // in Loop: Header=BB3_1 Depth=1 -; CHECK-NEXT: add w9, w20, #1 -; CHECK-NEXT: stp w9, w20, [x8] -; CHECK-NEXT: mov w20, w9 +; CHECK-NEXT: str w20, [x8, #4] +; CHECK-NEXT: add w20, w20, #1 +; CHECK-NEXT: str w20, [x8] ; CHECK-NEXT: b .LBB3_1 ; CHECK-NEXT: .LBB3_4: // %while_end ; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload @@ -228,9 +228,9 @@ ; CHECK-NEXT: b.ge .LBB4_2 ; CHECK-NEXT: .LBB4_1: // %while_body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: add w10, w8, #1 -; CHECK-NEXT: stp w10, w8, [x9] -; CHECK-NEXT: mov w8, w10 +; CHECK-NEXT: str w8, [x9, #4] +; CHECK-NEXT: add w8, w8, #1 +; CHECK-NEXT: str w8, [x9] ; CHECK-NEXT: cmp w8, w1 ; CHECK-NEXT: b.lt .LBB4_1 ; CHECK-NEXT: .LBB4_2: // %while_end diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/lsr-ldp.ll @@ -12,22 +12,22 @@ ; CHECK-NEXT: add x12, x0, x3 ; CHECK-NEXT: .LBB0_1: // %do.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: add x13, x1, x8 +; CHECK-NEXT: add x14, x0, x8 ; CHECK-NEXT: ldr q0, [x11, x8] -; CHECK-NEXT: add x13, x0, x8 +; CHECK-NEXT: ldp q2, q3, [x14] ; CHECK-NEXT: ldr q1, [x12, x8] -; CHECK-NEXT: add x14, x1, x8 -; CHECK-NEXT: ldr q4, [x10, x8] +; CHECK-NEXT: ldp q6, q7, [x13] ; CHECK-NEXT: subs w5, w5, #1 -; CHECK-NEXT: ldp q2, q3, [x13] -; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldp q6, q1, [x14] -; CHECK-NEXT: fadd v2.4s, v2.4s, v3.4s -; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q4, [x10, x8] ; CHECK-NEXT: ldr q5, [x9, x8] +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s ; CHECK-NEXT: add x8, x8, #32 -; CHECK-NEXT: fadd v1.4s, v6.4s, v1.4s -; CHECK-NEXT: fadd v3.4s, v4.4s, v5.4s -; CHECK-NEXT: fadd v1.4s, v3.4s, v1.4s +; CHECK-NEXT: fadd v2.4s, v4.4s, v5.4s +; CHECK-NEXT: fadd v3.4s, v6.4s, v7.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s ; CHECK-NEXT: str q0, [x4], #16 ; CHECK-NEXT: b.ne .LBB0_1 diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/pr53625.ll @@ -135,8 +135,8 @@ ; CHECK-NEXT: cbnz w9, .LBB2_5 ; CHECK-NEXT: // %bb.3: // %for.cond ; CHECK-NEXT: // in Loop: Header=BB2_2 Depth=1 -; CHECK-NEXT: add x1, x1, #4 ; CHECK-NEXT: subs x8, x8, #1 +; CHECK-NEXT: add x1, x1, #4 ; CHECK-NEXT: b.ne .LBB2_2 ; CHECK-NEXT: .LBB2_4: ; CHECK-NEXT: mov w0, wzr diff --git a/llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll b/llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll --- a/llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll +++ b/llvm/test/Transforms/LoopStrengthReduce/AArch64/small-constant.ll @@ -67,9 +67,9 @@ ; CHECK-NEXT: add x8, x0, #28 ; CHECK-NEXT: .LBB1_2: // %for.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: scvtf s2, x1 +; CHECK-NEXT: scvtf s1, x1 +; CHECK-NEXT: fadd s2, s1, s0 ; CHECK-NEXT: ldr s1, [x8, x1, lsl #2] -; CHECK-NEXT: fadd s2, s2, s0 ; CHECK-NEXT: fcmp s1, s2 ; CHECK-NEXT: b.gt .LBB1_5 ; CHECK-NEXT: // %bb.3: // %for.cond diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.generated.expected @@ -109,19 +109,19 @@ ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: adrp x9, x -; CHECK-NEXT: mov w10, #2 // =0x2 -; CHECK-NEXT: mov w11, #3 // =0x3 -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: str w8, [x9, :lo12:x] -; CHECK-NEXT: mov w9, #4 // =0x4 +; CHECK-NEXT: adrp x11, x +; CHECK-NEXT: mov w9, #2 // =0x2 +; CHECK-NEXT: mov w10, #3 // =0x3 +; CHECK-NEXT: str w8, [x11, :lo12:x] +; CHECK-NEXT: mov w11, #4 // =0x4 ; CHECK-NEXT: stp w8, wzr, [x29, #-8] -; CHECK-NEXT: stur w10, [x29, #-12] -; CHECK-NEXT: stp w9, w11, [sp, #12] +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: stur w9, [x29, #-12] +; CHECK-NEXT: stp w11, w10, [sp, #12] ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: stp w10, w8, [x29, #-12] -; CHECK-NEXT: stp w9, w11, [sp, #12] +; CHECK-NEXT: stp w9, w8, [x29, #-12] +; CHECK-NEXT: stp w11, w10, [sp, #12] ; CHECK-NEXT: .cfi_def_cfa wsp, 48 ; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48 @@ -134,7 +134,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w9, #2 // =0x2 ; CHECK-NEXT: mov w10, #3 // =0x3 -; CHECK-NEXT: mov w11, #4 // =0x4 ; CHECK-NEXT: stp w9, w8, [x29, #-12] -; CHECK-NEXT: stp w11, w10, [sp, #12] +; CHECK-NEXT: mov w8, #4 // =0x4 +; CHECK-NEXT: stp w8, w10, [sp, #12] ; CHECK-NEXT: ret diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/aarch64_generated_funcs.ll.nogenerated.expected @@ -86,19 +86,19 @@ ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: mov w8, #1 // =0x1 -; CHECK-NEXT: adrp x9, x -; CHECK-NEXT: mov w10, #2 // =0x2 -; CHECK-NEXT: mov w11, #3 // =0x3 -; CHECK-NEXT: mov w0, wzr -; CHECK-NEXT: str w8, [x9, :lo12:x] -; CHECK-NEXT: mov w9, #4 // =0x4 +; CHECK-NEXT: adrp x11, x +; CHECK-NEXT: mov w9, #2 // =0x2 +; CHECK-NEXT: mov w10, #3 // =0x3 +; CHECK-NEXT: str w8, [x11, :lo12:x] +; CHECK-NEXT: mov w11, #4 // =0x4 ; CHECK-NEXT: stp w8, wzr, [x29, #-8] -; CHECK-NEXT: stur w10, [x29, #-12] -; CHECK-NEXT: stp w9, w11, [sp, #12] +; CHECK-NEXT: mov w0, wzr +; CHECK-NEXT: stur w9, [x29, #-12] +; CHECK-NEXT: stp w11, w10, [sp, #12] ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP -; CHECK-NEXT: stp w10, w8, [x29, #-12] -; CHECK-NEXT: stp w9, w11, [sp, #12] +; CHECK-NEXT: stp w9, w8, [x29, #-12] +; CHECK-NEXT: stp w11, w10, [sp, #12] ; CHECK-NEXT: .cfi_def_cfa wsp, 48 ; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add sp, sp, #48