diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64InstPrinter.cpp @@ -1466,17 +1466,31 @@ Reg = MRI.getMatchingSuperReg(Reg, AArch64::dsub, &FPR128RC); } - for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) { - if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg)) - printRegName(O, Reg); - else - printRegName(O, Reg, AArch64::vreg); + if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg) && NumRegs > 1 && + // Do not print the range when the last register is lower than the first. + // Because it is a wrap-around register. + Reg < getNextVectorRegister(Reg, NumRegs - 1)) { + printRegName(O, Reg); O << LayoutSuffix; - - if (i + 1 != NumRegs) - O << ", "; + if (NumRegs > 1) { + // Set of two sve registers should be separated by ',' + StringRef split_char = NumRegs == 2 ? ", " : " - "; + O << split_char; + printRegName(O, (getNextVectorRegister(Reg, NumRegs - 1))); + O << LayoutSuffix; + } + } else { + for (unsigned i = 0; i < NumRegs; ++i, Reg = getNextVectorRegister(Reg)) { + // wrap-around sve register + if (MRI.getRegClass(AArch64::ZPRRegClassID).contains(Reg)) + printRegName(O, Reg); + else + printRegName(O, Reg, AArch64::vreg); + O << LayoutSuffix; + if (i + 1 != NumRegs) + O << ", "; + } } - O << " }"; } diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll --- a/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll +++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-mixed.ll @@ -13,8 +13,8 @@ ; CHECK-NEXT: addvl sp, sp, #-4 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: fmov s0, #1.00000000 -; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] -; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] +; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0] +; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1] ; CHECK-NEXT: ld1d { z5.d }, p0/z, [x2] ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: ptrue p0.d @@ -60,8 +60,8 @@ ; CHECK-NEXT: sub sp, sp, #16 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: add x9, sp, #16 -; CHECK-NEXT: ld4d { z1.d, z2.d, z3.d, z4.d }, p0/z, [x0] -; CHECK-NEXT: ld4d { z16.d, z17.d, z18.d, z19.d }, p0/z, [x1] +; CHECK-NEXT: ld4d { z1.d - z4.d }, p0/z, [x0] +; CHECK-NEXT: ld4d { z16.d - z19.d }, p0/z, [x1] ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: fmov s0, #1.00000000 @@ -118,8 +118,8 @@ ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: ptrue p0.b ; CHECK-NEXT: fmov s0, #1.00000000 -; CHECK-NEXT: ld4d { z2.d, z3.d, z4.d, z5.d }, p0/z, [x0] -; CHECK-NEXT: ld3d { z16.d, z17.d, z18.d }, p0/z, [x1] +; CHECK-NEXT: ld4d { z2.d - z5.d }, p0/z, [x0] +; CHECK-NEXT: ld3d { z16.d - z18.d }, p0/z, [x1] ; CHECK-NEXT: ld1d { z6.d }, p0/z, [x2] ; CHECK-NEXT: fmov s1, #2.00000000 ; CHECK-NEXT: mov x0, sp @@ -234,7 +234,7 @@ ret double %x0 } -; Use AAVPCS, SVE register in z0-z7 used +; Use AAVPCS, SVE register in z0 - z7 used define void @aavpcs1(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, i32 * %ptr) nounwind { ; CHECK-LABEL: aavpcs1: @@ -267,7 +267,7 @@ ret void } -; Use AAVPCS, SVE register in z0-z7 used +; Use AAVPCS, SVE register in z0 - z7 used define void @aavpcs2(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16,float * %ptr) nounwind { ; CHECK-LABEL: aavpcs2: @@ -306,7 +306,7 @@ ret void } -; Use AAVPCS, no SVE register in z0-z7 used (floats occupy z0-z7) but predicate arg is used +; Use AAVPCS, no SVE register in z0 - z7 used (floats occupy z0 - z7) but predicate arg is used define void @aavpcs3(float %s0, float %s1, float %s2, float %s3, float %s4, float %s5, float %s6, float %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, %s17, %p0, float * %ptr) nounwind { ; CHECK-LABEL: aavpcs3: @@ -347,7 +347,7 @@ ret void } -; use AAVPCS, SVE register in z0-z7 used (i32s dont occupy z0-z7) +; use AAVPCS, SVE register in z0 - z7 used (i32s dont occupy z0 - z7) define void @aavpcs4(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, i32 %s6, i32 %s7, %s8, %s9, %s10, %s11, %s12, %s13, %s14, %s15, %s16, %s17, i32 * %ptr) nounwind { ; CHECK-LABEL: aavpcs4: diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+imm-addr-mode.ll @@ -173,7 +173,7 @@ ; CHECK-LABEL: ld3.nxv48i8: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #3 -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 3 %base_ptr = bitcast * %base to i8 * @@ -185,7 +185,7 @@ ; CHECK-LABEL: ld3.nxv48i8_lower_bound: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #-24 -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to i8 * @@ -197,7 +197,7 @@ ; CHECK-LABEL: ld3.nxv48i8_upper_bound: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #21 -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 21 %base_ptr = bitcast * %base to i8 * @@ -209,7 +209,7 @@ ; CHECK-LABEL: ld3.nxv48i8_not_multiple_of_3_01: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #4 -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 4 %base_ptr = bitcast * %base to i8 * @@ -221,7 +221,7 @@ ; CHECK-LABEL: ld3.nxv48i8_not_multiple_of_3_02: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #5 -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 5 %base_ptr = bitcast * %base to i8 * @@ -233,7 +233,7 @@ ; CHECK-LABEL: ld3.nxv48i8_outside_lower_bound: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #-27 -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -27 %base_ptr = bitcast * %base to i8 * @@ -245,7 +245,7 @@ ; CHECK-LABEL: ld3.nxv48i8_outside_upper_bound: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #24 -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 24 %base_ptr = bitcast * %base to i8 * @@ -258,7 +258,7 @@ ; CHECK-LABEL: ld3.nxv24i16: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #21 -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x8] +; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 21 %base_ptr = bitcast * %base to i16 * @@ -270,7 +270,7 @@ ; CHECK-LABEL: ld3.nxv24f16: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #21 -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x8] +; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 21 %base_ptr = bitcast * %base to half * @@ -282,7 +282,7 @@ ; CHECK-LABEL: ld3.nxv24bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #-24 -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x8] +; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to bfloat * @@ -295,7 +295,7 @@ ; CHECK-LABEL: ld3.nxv12i32: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #21 -; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x8] +; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 21 %base_ptr = bitcast * %base to i32 * @@ -307,7 +307,7 @@ ; CHECK-LABEL: ld3.nxv12f32: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #-24 -; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x8] +; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to float * @@ -320,7 +320,7 @@ ; CHECK-LABEL: ld3.nxv6i64: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #21 -; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x8] +; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 21 %base_ptr = bitcast * %base to i64 * @@ -332,7 +332,7 @@ ; CHECK-LABEL: ld3.nxv6f64: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #-24 -; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x8] +; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24 %base_ptr = bitcast * %base to double * @@ -345,7 +345,7 @@ ; CHECK-LABEL: ld4.nxv64i8: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #4 -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 4 %base_ptr = bitcast * %base to i8 * @@ -357,7 +357,7 @@ ; CHECK-LABEL: ld4.nxv64i8_lower_bound: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #-32 -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to i8 * @@ -369,7 +369,7 @@ ; CHECK-LABEL: ld4.nxv64i8_upper_bound: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #28 -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 28 %base_ptr = bitcast * %base to i8 * @@ -381,7 +381,7 @@ ; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_01: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #5 -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 5 %base_ptr = bitcast * %base to i8 * @@ -393,7 +393,7 @@ ; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_02: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #6 -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 6 %base_ptr = bitcast * %base to i8 * @@ -405,7 +405,7 @@ ; CHECK-LABEL: ld4.nxv64i8_not_multiple_of_4_03: ; CHECK: // %bb.0: ; CHECK-NEXT: rdvl x8, #7 -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 7 %base_ptr = bitcast * %base to i8 * @@ -420,7 +420,7 @@ ; CHECK-NEXT: mov x9, #-576 ; CHECK-NEXT: lsr x8, x8, #4 ; CHECK-NEXT: mul x8, x8, x9 -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9) ; xM = -9 * 2^6 @@ -439,7 +439,7 @@ ; CHECK-NEXT: mov w9, #512 ; CHECK-NEXT: lsr x8, x8, #4 ; CHECK-NEXT: mul x8, x8, x9 -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x8] +; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x8] ; CHECK-NEXT: ret ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #16) #2) ; xM = 2^9 @@ -456,7 +456,7 @@ ; CHECK-LABEL: ld4.nxv32i16: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #8 -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8] +; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 8 %base_ptr = bitcast * %base to i16 * @@ -468,7 +468,7 @@ ; CHECK-LABEL: ld4.nxv32f16: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #28 -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8] +; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 28 %base_ptr = bitcast * %base to half * @@ -480,7 +480,7 @@ ; CHECK-LABEL: ld4.nxv32bf16: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #-32 -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8] +; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to bfloat * @@ -493,7 +493,7 @@ ; CHECK-LABEL: ld4.nxv16i32: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #28 -; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x8] +; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 28 %base_ptr = bitcast * %base to i32 * @@ -505,7 +505,7 @@ ; CHECK-LABEL: ld4.nxv16f32: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #-32 -; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x8] +; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to float * @@ -518,7 +518,7 @@ ; CHECK-LABEL: ld4.nxv8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #28 -; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x8] +; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 28 %base_ptr = bitcast * %base to i64 * @@ -530,7 +530,7 @@ ; CHECK-LABEL: ld4.nxv8f64: ; CHECK: // %bb.0: ; CHECK-NEXT: addvl x8, x0, #-32 -; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x8] +; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32 %base_ptr = bitcast * %base to double * diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ldN-sret-reg+reg-addr-mode.ll @@ -90,7 +90,7 @@ define { , , } @ld3.nxv48i8( %Pg, i8 *%addr, i64 %a) { ; CHECK-LABEL: ld3.nxv48i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x1] +; CHECK-NEXT: ld3b { z0.b - z2.b }, p0/z, [x0, x1] ; CHECK-NEXT: ret %addr2 = getelementptr i8, i8 * %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv16i8( %Pg, i8 *%addr2) @@ -101,7 +101,7 @@ define { , , } @ld3.nxv24i16( %Pg, i16 *%addr, i64 %a) { ; CHECK-LABEL: ld3.nxv24i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr i16, i16 * %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv8i16( %Pg, i16 *%addr2) @@ -111,7 +111,7 @@ define { , , } @ld3.nxv24f16( %Pg, half *%addr, i64 %a) { ; CHECK-LABEL: ld3.nxv24f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr half, half * %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv8f16( %Pg, half *%addr2) @@ -121,7 +121,7 @@ define { , , } @ld3.nxv24bf16( %Pg, bfloat *%addr, i64 %a) #0 { ; CHECK-LABEL: ld3.nxv24bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ld3h { z0.h - z2.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr bfloat, bfloat * %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv8bf16( %Pg, bfloat *%addr2) @@ -132,7 +132,7 @@ define { , , } @ld3.nxv12i32( %Pg, i32 *%addr, i64 %a) { ; CHECK-LABEL: ld3.nxv12i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %addr2 = getelementptr i32, i32 * %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv4i32( %Pg, i32 *%addr2) @@ -142,7 +142,7 @@ define { , , } @ld3.nxv12f32( %Pg, float *%addr, i64 %a) { ; CHECK-LABEL: ld3.nxv12f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: ld3w { z0.s - z2.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %addr2 = getelementptr float, float * %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv4f32( %Pg, float *%addr2) @@ -153,7 +153,7 @@ define { , , } @ld3.nxv6i64( %Pg, i64 *%addr, i64 %a) { ; CHECK-LABEL: ld3.nxv6i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3] +; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %addr2 = getelementptr i64, i64 * %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv2i64( %Pg, i64 *%addr2) @@ -163,7 +163,7 @@ define { , , } @ld3.nxv6f64( %Pg, double *%addr, i64 %a) { ; CHECK-LABEL: ld3.nxv6f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x1, lsl #3] +; CHECK-NEXT: ld3d { z0.d - z2.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %addr2 = getelementptr double, double * %addr, i64 %a %res = call { , , } @llvm.aarch64.sve.ld3.sret.nxv2f64( %Pg, double *%addr2) @@ -174,7 +174,7 @@ define { , , , } @ld4.nxv64i8( %Pg, i8 *%addr, i64 %a) { ; CHECK-LABEL: ld4.nxv64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x1] +; CHECK-NEXT: ld4b { z0.b - z3.b }, p0/z, [x0, x1] ; CHECK-NEXT: ret %addr2 = getelementptr i8, i8 * %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv16i8( %Pg, i8 *%addr2) @@ -185,7 +185,7 @@ define { , , , } @ld4.nxv32i16( %Pg, i16 *%addr, i64 %a) { ; CHECK-LABEL: ld4.nxv32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr i16, i16 * %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8i16( %Pg, i16 *%addr2) @@ -195,7 +195,7 @@ define { , , , } @ld4.nxv32f16( %Pg, half *%addr, i64 %a) { ; CHECK-LABEL: ld4.nxv32f16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr half, half * %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8f16( %Pg, half *%addr2) @@ -205,7 +205,7 @@ define { , , , } @ld4.nxv32bf16( %Pg, bfloat *%addr, i64 %a) #0 { ; CHECK-LABEL: ld4.nxv32bf16: ; CHECK: // %bb.0: -; CHECK-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x1, lsl #1] +; CHECK-NEXT: ld4h { z0.h - z3.h }, p0/z, [x0, x1, lsl #1] ; CHECK-NEXT: ret %addr2 = getelementptr bfloat, bfloat * %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv8bf16( %Pg, bfloat *%addr2) @@ -216,7 +216,7 @@ define { , , , } @ld4.nxv16i32( %Pg, i32 *%addr, i64 %a) { ; CHECK-LABEL: ld4.nxv16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %addr2 = getelementptr i32, i32 * %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4i32( %Pg, i32 *%addr2) @@ -226,7 +226,7 @@ define { , , , } @ld4.nxv16f32( %Pg, float *%addr, i64 %a) { ; CHECK-LABEL: ld4.nxv16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x1, lsl #2] +; CHECK-NEXT: ld4w { z0.s - z3.s }, p0/z, [x0, x1, lsl #2] ; CHECK-NEXT: ret %addr2 = getelementptr float, float * %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv4f32( %Pg, float *%addr2) @@ -237,7 +237,7 @@ define { , , , } @ld4.nxv8i64( %Pg, i64 *%addr, i64 %a) { ; CHECK-LABEL: ld4.nxv8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3] +; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %addr2 = getelementptr i64, i64 * %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2i64( %Pg, i64 *%addr2) @@ -247,7 +247,7 @@ define { , , , } @ld4.nxv8f64( %Pg, double *%addr, i64 %a) { ; CHECK-LABEL: ld4.nxv8f64: ; CHECK: // %bb.0: -; CHECK-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x1, lsl #3] +; CHECK-NEXT: ld4d { z0.d - z3.d }, p0/z, [x0, x1, lsl #3] ; CHECK-NEXT: ret %addr2 = getelementptr double, double * %addr, i64 %a %res = call { , , , } @llvm.aarch64.sve.ld4.sret.nxv2f64( %Pg, double *%addr2) diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll @@ -217,7 +217,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, #3, mul vl] +; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #3, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 3, i64 0 call void @llvm.aarch64.sve.st3.nxv16i8( %v0, @@ -235,7 +235,7 @@ ; CHECK-NEXT: rdvl x8, #4 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x8] +; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 4, i64 0 call void @llvm.aarch64.sve.st3.nxv16i8( %v0, @@ -253,7 +253,7 @@ ; CHECK-NEXT: rdvl x8, #5 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x8] +; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 5, i64 0 call void @llvm.aarch64.sve.st3.nxv16i8( %v0, @@ -271,7 +271,7 @@ ; CHECK-NEXT: rdvl x8, #-27 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x8] +; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -27, i64 0 call void @llvm.aarch64.sve.st3.nxv16i8( %v0, @@ -289,7 +289,7 @@ ; CHECK-NEXT: rdvl x8, #24 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x8] +; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 24, i64 0 call void @llvm.aarch64.sve.st3.nxv16i8( %v0, @@ -306,7 +306,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, #-24, mul vl] +; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #-24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -24, i64 0 call void @llvm.aarch64.sve.st3.nxv16i8( %v0, @@ -323,7 +323,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, #21, mul vl] +; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, #21, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 21, i64 0 call void @llvm.aarch64.sve.st3.nxv16i8( %v0, @@ -344,7 +344,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0, #6, mul vl] +; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #6, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 6, i64 0 call void @llvm.aarch64.sve.st3.nxv8i16( %v0, @@ -361,7 +361,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0, #9, mul vl] +; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, #9, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 9, i64 0 call void @llvm.aarch64.sve.st3.nxv8f16( %v0, @@ -382,7 +382,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x0, #12, mul vl] +; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #12, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 12, i64 0 call void @llvm.aarch64.sve.st3.nxv4i32( %v0, @@ -399,7 +399,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x0, #15, mul vl] +; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, #15, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 15, i64 0 call void @llvm.aarch64.sve.st3.nxv4f32( %v0, @@ -420,7 +420,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0, #18, mul vl] +; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #18, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 18, i64 0 call void @llvm.aarch64.sve.st3.nxv2i64( %v0, @@ -437,7 +437,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0, #-3, mul vl] +; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, #-3, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -3, i64 0 call void @llvm.aarch64.sve.st3.nxv2f64( %v0, @@ -459,7 +459,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #4, mul vl] +; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #4, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 4, i64 0 call void @llvm.aarch64.sve.st4.nxv16i8( %v0, @@ -479,7 +479,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8] +; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 5, i64 0 call void @llvm.aarch64.sve.st4.nxv16i8( %v0, @@ -499,7 +499,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8] +; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 6, i64 0 call void @llvm.aarch64.sve.st4.nxv16i8( %v0, @@ -519,7 +519,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8] +; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 7, i64 0 call void @llvm.aarch64.sve.st4.nxv16i8( %v0, @@ -542,7 +542,7 @@ ; CHECK-NEXT: mul x8, x8, x9 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8] +; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret ; FIXME: optimize OFFSET computation so that xOFFSET = (mul (RDVL #4) #9) ; xM = -9 * 2^6 @@ -569,7 +569,7 @@ ; CHECK-NEXT: mul x8, x8, x9 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x8] +; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x8] ; CHECK-NEXT: ret ; FIXME: optimize OFFSET computation so that xOFFSET = (shl (RDVL #16) #1) ; xM = 2^9 @@ -592,7 +592,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #-32, mul vl] +; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #-32, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 -32, i64 0 call void @llvm.aarch64.sve.st4.nxv16i8( %v0, @@ -611,7 +611,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, #28, mul vl] +; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 28, i64 0 call void @llvm.aarch64.sve.st4.nxv16i8( %v0, @@ -634,7 +634,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #8, mul vl] +; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #8, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 8, i64 0 call void @llvm.aarch64.sve.st4.nxv8i16( %v0, @@ -653,7 +653,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, #12, mul vl] +; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, #12, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 12, i64 0 call void @llvm.aarch64.sve.st4.nxv8f16( %v0, @@ -676,7 +676,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #16, mul vl] +; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #16, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 16, i64 0 call void @llvm.aarch64.sve.st4.nxv4i32( %v0, @@ -695,7 +695,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, #20, mul vl] +; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, #20, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 20, i64 0 call void @llvm.aarch64.sve.st4.nxv4f32( %v0, @@ -718,7 +718,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #24, mul vl] +; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #24, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 24, i64 0 call void @llvm.aarch64.sve.st4.nxv2i64( %v0, @@ -737,7 +737,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, #28, mul vl] +; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, #28, mul vl] ; CHECK-NEXT: ret %base = getelementptr , * %addr, i64 28, i64 0 call void @llvm.aarch64.sve.st4.nxv2f64( %v0, diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll @@ -133,7 +133,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0, x1] +; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0, x1] ; CHECK-NEXT: ret %1 = getelementptr i8, i8* %addr, i64 %offset call void @llvm.aarch64.sve.st3.nxv16i8( %v0, @@ -154,7 +154,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1] +; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr i16, i16* %addr, i64 %offset call void @llvm.aarch64.sve.st3.nxv8i16( %v0, @@ -171,7 +171,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0, x1, lsl #1] +; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr half, half* %addr, i64 %offset call void @llvm.aarch64.sve.st3.nxv8f16( %v0, @@ -192,7 +192,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2] +; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr i32, i32* %addr, i64 %offset call void @llvm.aarch64.sve.st3.nxv4i32( %v0, @@ -209,7 +209,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x0, x1, lsl #2] +; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr float, float* %addr, i64 %offset call void @llvm.aarch64.sve.st3.nxv4f32( %v0, @@ -230,7 +230,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3] +; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr i64, i64* %addr, i64 %offset call void @llvm.aarch64.sve.st3.nxv2i64( %v0, @@ -247,7 +247,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0, x1, lsl #3] +; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr double, double* %addr, i64 %offset call void @llvm.aarch64.sve.st3.nxv2f64( %v0, @@ -269,7 +269,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x1] +; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0, x1] ; CHECK-NEXT: ret %1 = getelementptr i8, i8* %addr, i64 %offset call void @llvm.aarch64.sve.st4.nxv16i8( %v0, @@ -292,7 +292,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1] +; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr i16, i16* %addr, i64 %offset call void @llvm.aarch64.sve.st4.nxv8i16( %v0, @@ -311,7 +311,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x1, lsl #1] +; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0, x1, lsl #1] ; CHECK-NEXT: ret %1 = getelementptr half, half* %addr, i64 %offset call void @llvm.aarch64.sve.st4.nxv8f16( %v0, @@ -334,7 +334,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2] +; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr i32, i32* %addr, i64 %offset call void @llvm.aarch64.sve.st4.nxv4i32( %v0, @@ -353,7 +353,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x1, lsl #2] +; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0, x1, lsl #2] ; CHECK-NEXT: ret %1 = getelementptr float, float* %addr, i64 %offset call void @llvm.aarch64.sve.st4.nxv4f32( %v0, @@ -376,7 +376,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3] +; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr i64, i64* %addr, i64 %offset call void @llvm.aarch64.sve.st4.nxv2i64( %v0, @@ -395,7 +395,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x1, lsl #3] +; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0, x1, lsl #3] ; CHECK-NEXT: ret %1 = getelementptr double, double* %addr, i64 %offset call void @llvm.aarch64.sve.st4.nxv2f64( %v0, diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll @@ -154,7 +154,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x0] +; CHECK-NEXT: st3b { z0.b - z2.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv16i8( %v0, %v1, @@ -174,7 +174,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0] +; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv8i16( %v0, %v1, @@ -190,7 +190,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0] +; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv8f16( %v0, %v1, @@ -206,7 +206,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x0] +; CHECK-NEXT: st3h { z0.h - z2.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv8bf16( %v0, %v1, @@ -226,7 +226,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x0] +; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv4i32( %v0, %v1, @@ -242,7 +242,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x0] +; CHECK-NEXT: st3w { z0.s - z2.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv4f32( %v0, %v1, @@ -262,7 +262,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0] +; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv2i64( %v0, %v1, @@ -278,7 +278,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0] +; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv2f64( %v0, %v1, @@ -294,7 +294,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2 def $z0_z1_z2 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2 def $z0_z1_z2 -; CHECK-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x0] +; CHECK-NEXT: st3d { z0.d - z2.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st3.nxv2p0i8( %v0, %v1, @@ -315,7 +315,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0] +; CHECK-NEXT: st4b { z0.b - z3.b }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv16i8( %v0, %v1, @@ -337,7 +337,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0] +; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv8i16( %v0, %v1, @@ -355,7 +355,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0] +; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv8f16( %v0, %v1, @@ -373,7 +373,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0] +; CHECK-NEXT: st4h { z0.h - z3.h }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv8bf16( %v0, %v1, @@ -395,7 +395,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0] +; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv4i32( %v0, %v1, @@ -413,7 +413,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0] +; CHECK-NEXT: st4w { z0.s - z3.s }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv4f32( %v0, %v1, @@ -435,7 +435,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0] +; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv2i64( %v0, %v1, @@ -453,7 +453,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0] +; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv2f64( %v0, %v1, @@ -471,7 +471,7 @@ ; CHECK-NEXT: // kill: def $z2 killed $z2 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z1 killed $z1 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 ; CHECK-NEXT: // kill: def $z0 killed $z0 killed $z0_z1_z2_z3 def $z0_z1_z2_z3 -; CHECK-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0] +; CHECK-NEXT: st4d { z0.d - z3.d }, p0, [x0] ; CHECK-NEXT: ret call void @llvm.aarch64.sve.st4.nxv2p0i8( %v0, %v1, diff --git a/llvm/test/CodeGen/AArch64/sve-ldN.mir b/llvm/test/CodeGen/AArch64/sve-ldN.mir --- a/llvm/test/CodeGen/AArch64/sve-ldN.mir +++ b/llvm/test/CodeGen/AArch64/sve-ldN.mir @@ -59,22 +59,22 @@ ; CHECK-OFFSET-NEXT: ld2w { z0.s, z1.s }, p0/z, [sp, #14, mul vl] ; CHECK-OFFSET-NEXT: ld2d { z0.d, z1.d }, p0/z, [sp, #-16, mul vl] ; CHECK-OFFSET-NEXT: ld2d { z0.d, z1.d }, p0/z, [sp, #14, mul vl] - ; CHECK-OFFSET-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [sp, #-24, mul vl] - ; CHECK-OFFSET-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [sp, #21, mul vl] - ; CHECK-OFFSET-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [sp, #-24, mul vl] - ; CHECK-OFFSET-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [sp, #21, mul vl] - ; CHECK-OFFSET-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [sp, #-24, mul vl] - ; CHECK-OFFSET-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [sp, #21, mul vl] - ; CHECK-OFFSET-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [sp, #-24, mul vl] - ; CHECK-OFFSET-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [sp, #21, mul vl] - ; CHECK-OFFSET-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [sp, #-32, mul vl] - ; CHECK-OFFSET-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [sp, #28, mul vl] - ; CHECK-OFFSET-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [sp, #-32, mul vl] - ; CHECK-OFFSET-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [sp, #28, mul vl] - ; CHECK-OFFSET-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [sp, #-32, mul vl] - ; CHECK-OFFSET-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [sp, #28, mul vl] - ; CHECK-OFFSET-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [sp, #-32, mul vl] - ; CHECK-OFFSET-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [sp, #28, mul vl] + ; CHECK-OFFSET-NEXT: ld3b { z0.b - z2.b }, p0/z, [sp, #-24, mul vl] + ; CHECK-OFFSET-NEXT: ld3b { z0.b - z2.b }, p0/z, [sp, #21, mul vl] + ; CHECK-OFFSET-NEXT: ld3h { z0.h - z2.h }, p0/z, [sp, #-24, mul vl] + ; CHECK-OFFSET-NEXT: ld3h { z0.h - z2.h }, p0/z, [sp, #21, mul vl] + ; CHECK-OFFSET-NEXT: ld3w { z0.s - z2.s }, p0/z, [sp, #-24, mul vl] + ; CHECK-OFFSET-NEXT: ld3w { z0.s - z2.s }, p0/z, [sp, #21, mul vl] + ; CHECK-OFFSET-NEXT: ld3d { z0.d - z2.d }, p0/z, [sp, #-24, mul vl] + ; CHECK-OFFSET-NEXT: ld3d { z0.d - z2.d }, p0/z, [sp, #21, mul vl] + ; CHECK-OFFSET-NEXT: ld4b { z0.b - z3.b }, p0/z, [sp, #-32, mul vl] + ; CHECK-OFFSET-NEXT: ld4b { z0.b - z3.b }, p0/z, [sp, #28, mul vl] + ; CHECK-OFFSET-NEXT: ld4h { z0.h - z3.h }, p0/z, [sp, #-32, mul vl] + ; CHECK-OFFSET-NEXT: ld4h { z0.h - z3.h }, p0/z, [sp, #28, mul vl] + ; CHECK-OFFSET-NEXT: ld4w { z0.s - z3.s }, p0/z, [sp, #-32, mul vl] + ; CHECK-OFFSET-NEXT: ld4w { z0.s - z3.s }, p0/z, [sp, #28, mul vl] + ; CHECK-OFFSET-NEXT: ld4d { z0.d - z3.d }, p0/z, [sp, #-32, mul vl] + ; CHECK-OFFSET-NEXT: ld4d { z0.d - z3.d }, p0/z, [sp, #28, mul vl] ; CHECK-OFFSET-NEXT: addvl sp, sp, #31 ; CHECK-OFFSET-NEXT: addvl sp, sp, #1 ; CHECK-OFFSET-NEXT: ldr x29, [sp], #16 @@ -195,37 +195,37 @@ ; CHECK-OFFSET-NEXT: addvl x8, sp, #2 ; CHECK-OFFSET-NEXT: ld2d { z0.d, z1.d }, p0/z, [x8, #14, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3 - ; CHECK-OFFSET-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x8, #-24, mul vl] + ; CHECK-OFFSET-NEXT: ld3b { z0.b - z2.b }, p0/z, [x8, #-24, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #3 - ; CHECK-OFFSET-NEXT: ld3b { z0.b, z1.b, z2.b }, p0/z, [x8, #21, mul vl] + ; CHECK-OFFSET-NEXT: ld3b { z0.b - z2.b }, p0/z, [x8, #21, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3 - ; CHECK-OFFSET-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x8, #-24, mul vl] + ; CHECK-OFFSET-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8, #-24, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #3 - ; CHECK-OFFSET-NEXT: ld3h { z0.h, z1.h, z2.h }, p0/z, [x8, #21, mul vl] + ; CHECK-OFFSET-NEXT: ld3h { z0.h - z2.h }, p0/z, [x8, #21, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3 - ; CHECK-OFFSET-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x8, #-24, mul vl] + ; CHECK-OFFSET-NEXT: ld3w { z0.s - z2.s }, p0/z, [x8, #-24, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #3 - ; CHECK-OFFSET-NEXT: ld3w { z0.s, z1.s, z2.s }, p0/z, [x8, #21, mul vl] + ; CHECK-OFFSET-NEXT: ld3w { z0.s - z2.s }, p0/z, [x8, #21, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3 - ; CHECK-OFFSET-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x8, #-24, mul vl] + ; CHECK-OFFSET-NEXT: ld3d { z0.d - z2.d }, p0/z, [x8, #-24, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #3 - ; CHECK-OFFSET-NEXT: ld3d { z0.d, z1.d, z2.d }, p0/z, [x8, #21, mul vl] + ; CHECK-OFFSET-NEXT: ld3d { z0.d - z2.d }, p0/z, [x8, #21, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4 - ; CHECK-OFFSET-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x8, #-32, mul vl] + ; CHECK-OFFSET-NEXT: ld4b { z0.b - z3.b }, p0/z, [x8, #-32, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #4 - ; CHECK-OFFSET-NEXT: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x8, #28, mul vl] + ; CHECK-OFFSET-NEXT: ld4b { z0.b - z3.b }, p0/z, [x8, #28, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4 - ; CHECK-OFFSET-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8, #-32, mul vl] + ; CHECK-OFFSET-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8, #-32, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #4 - ; CHECK-OFFSET-NEXT: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x8, #28, mul vl] + ; CHECK-OFFSET-NEXT: ld4h { z0.h - z3.h }, p0/z, [x8, #28, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4 - ; CHECK-OFFSET-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x8, #-32, mul vl] + ; CHECK-OFFSET-NEXT: ld4w { z0.s - z3.s }, p0/z, [x8, #-32, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #4 - ; CHECK-OFFSET-NEXT: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x8, #28, mul vl] + ; CHECK-OFFSET-NEXT: ld4w { z0.s - z3.s }, p0/z, [x8, #28, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4 - ; CHECK-OFFSET-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x8, #-32, mul vl] + ; CHECK-OFFSET-NEXT: ld4d { z0.d - z3.d }, p0/z, [x8, #-32, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #4 - ; CHECK-OFFSET-NEXT: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x8, #28, mul vl] + ; CHECK-OFFSET-NEXT: ld4d { z0.d - z3.d }, p0/z, [x8, #28, mul vl] ; CHECK-OFFSET-NEXT: addvl sp, sp, #31 ; CHECK-OFFSET-NEXT: addvl sp, sp, #1 ; CHECK-OFFSET-NEXT: ldr x29, [sp], #16 diff --git a/llvm/test/CodeGen/AArch64/sve-stN.mir b/llvm/test/CodeGen/AArch64/sve-stN.mir --- a/llvm/test/CodeGen/AArch64/sve-stN.mir +++ b/llvm/test/CodeGen/AArch64/sve-stN.mir @@ -59,22 +59,22 @@ ; CHECK-OFFSET-NEXT: st2w { z0.s, z1.s }, p0, [sp, #14, mul vl] ; CHECK-OFFSET-NEXT: st2d { z0.d, z1.d }, p0, [sp, #-16, mul vl] ; CHECK-OFFSET-NEXT: st2d { z0.d, z1.d }, p0, [sp, #14, mul vl] - ; CHECK-OFFSET-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [sp, #-24, mul vl] - ; CHECK-OFFSET-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [sp, #21, mul vl] - ; CHECK-OFFSET-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [sp, #-24, mul vl] - ; CHECK-OFFSET-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [sp, #21, mul vl] - ; CHECK-OFFSET-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [sp, #-24, mul vl] - ; CHECK-OFFSET-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [sp, #21, mul vl] - ; CHECK-OFFSET-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [sp, #-24, mul vl] - ; CHECK-OFFSET-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [sp, #21, mul vl] - ; CHECK-OFFSET-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [sp, #-32, mul vl] - ; CHECK-OFFSET-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [sp, #28, mul vl] - ; CHECK-OFFSET-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [sp, #-32, mul vl] - ; CHECK-OFFSET-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [sp, #28, mul vl] - ; CHECK-OFFSET-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [sp, #-32, mul vl] - ; CHECK-OFFSET-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [sp, #28, mul vl] - ; CHECK-OFFSET-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [sp, #-32, mul vl] - ; CHECK-OFFSET-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [sp, #28, mul vl] + ; CHECK-OFFSET-NEXT: st3b { z0.b - z2.b }, p0, [sp, #-24, mul vl] + ; CHECK-OFFSET-NEXT: st3b { z0.b - z2.b }, p0, [sp, #21, mul vl] + ; CHECK-OFFSET-NEXT: st3h { z0.h - z2.h }, p0, [sp, #-24, mul vl] + ; CHECK-OFFSET-NEXT: st3h { z0.h - z2.h }, p0, [sp, #21, mul vl] + ; CHECK-OFFSET-NEXT: st3w { z0.s - z2.s }, p0, [sp, #-24, mul vl] + ; CHECK-OFFSET-NEXT: st3w { z0.s - z2.s }, p0, [sp, #21, mul vl] + ; CHECK-OFFSET-NEXT: st3d { z0.d - z2.d }, p0, [sp, #-24, mul vl] + ; CHECK-OFFSET-NEXT: st3d { z0.d - z2.d }, p0, [sp, #21, mul vl] + ; CHECK-OFFSET-NEXT: st4b { z0.b - z3.b }, p0, [sp, #-32, mul vl] + ; CHECK-OFFSET-NEXT: st4b { z0.b - z3.b }, p0, [sp, #28, mul vl] + ; CHECK-OFFSET-NEXT: st4h { z0.h - z3.h }, p0, [sp, #-32, mul vl] + ; CHECK-OFFSET-NEXT: st4h { z0.h - z3.h }, p0, [sp, #28, mul vl] + ; CHECK-OFFSET-NEXT: st4w { z0.s - z3.s }, p0, [sp, #-32, mul vl] + ; CHECK-OFFSET-NEXT: st4w { z0.s - z3.s }, p0, [sp, #28, mul vl] + ; CHECK-OFFSET-NEXT: st4d { z0.d - z3.d }, p0, [sp, #-32, mul vl] + ; CHECK-OFFSET-NEXT: st4d { z0.d - z3.d }, p0, [sp, #28, mul vl] ; CHECK-OFFSET-NEXT: addvl sp, sp, #31 ; CHECK-OFFSET-NEXT: addvl sp, sp, #1 ; CHECK-OFFSET-NEXT: ldr x29, [sp], #16 @@ -195,37 +195,37 @@ ; CHECK-OFFSET-NEXT: addvl x8, sp, #2 ; CHECK-OFFSET-NEXT: st2d { z0.d, z1.d }, p0, [x8, #14, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3 - ; CHECK-OFFSET-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x8, #-24, mul vl] + ; CHECK-OFFSET-NEXT: st3b { z0.b - z2.b }, p0, [x8, #-24, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #3 - ; CHECK-OFFSET-NEXT: st3b { z0.b, z1.b, z2.b }, p0, [x8, #21, mul vl] + ; CHECK-OFFSET-NEXT: st3b { z0.b - z2.b }, p0, [x8, #21, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3 - ; CHECK-OFFSET-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x8, #-24, mul vl] + ; CHECK-OFFSET-NEXT: st3h { z0.h - z2.h }, p0, [x8, #-24, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #3 - ; CHECK-OFFSET-NEXT: st3h { z0.h, z1.h, z2.h }, p0, [x8, #21, mul vl] + ; CHECK-OFFSET-NEXT: st3h { z0.h - z2.h }, p0, [x8, #21, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3 - ; CHECK-OFFSET-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x8, #-24, mul vl] + ; CHECK-OFFSET-NEXT: st3w { z0.s - z2.s }, p0, [x8, #-24, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #3 - ; CHECK-OFFSET-NEXT: st3w { z0.s, z1.s, z2.s }, p0, [x8, #21, mul vl] + ; CHECK-OFFSET-NEXT: st3w { z0.s - z2.s }, p0, [x8, #21, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-3 - ; CHECK-OFFSET-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x8, #-24, mul vl] + ; CHECK-OFFSET-NEXT: st3d { z0.d - z2.d }, p0, [x8, #-24, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #3 - ; CHECK-OFFSET-NEXT: st3d { z0.d, z1.d, z2.d }, p0, [x8, #21, mul vl] + ; CHECK-OFFSET-NEXT: st3d { z0.d - z2.d }, p0, [x8, #21, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4 - ; CHECK-OFFSET-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x8, #-32, mul vl] + ; CHECK-OFFSET-NEXT: st4b { z0.b - z3.b }, p0, [x8, #-32, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #4 - ; CHECK-OFFSET-NEXT: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x8, #28, mul vl] + ; CHECK-OFFSET-NEXT: st4b { z0.b - z3.b }, p0, [x8, #28, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4 - ; CHECK-OFFSET-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x8, #-32, mul vl] + ; CHECK-OFFSET-NEXT: st4h { z0.h - z3.h }, p0, [x8, #-32, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #4 - ; CHECK-OFFSET-NEXT: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x8, #28, mul vl] + ; CHECK-OFFSET-NEXT: st4h { z0.h - z3.h }, p0, [x8, #28, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4 - ; CHECK-OFFSET-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x8, #-32, mul vl] + ; CHECK-OFFSET-NEXT: st4w { z0.s - z3.s }, p0, [x8, #-32, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #4 - ; CHECK-OFFSET-NEXT: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x8, #28, mul vl] + ; CHECK-OFFSET-NEXT: st4w { z0.s - z3.s }, p0, [x8, #28, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #-4 - ; CHECK-OFFSET-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x8, #-32, mul vl] + ; CHECK-OFFSET-NEXT: st4d { z0.d - z3.d }, p0, [x8, #-32, mul vl] ; CHECK-OFFSET-NEXT: addvl x8, sp, #4 - ; CHECK-OFFSET-NEXT: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x8, #28, mul vl] + ; CHECK-OFFSET-NEXT: st4d { z0.d - z3.d }, p0, [x8, #28, mul vl] ; CHECK-OFFSET-NEXT: addvl sp, sp, #31 ; CHECK-OFFSET-NEXT: addvl sp, sp, #1 ; CHECK-OFFSET-NEXT: ldr x29, [sp], #16 diff --git a/llvm/test/MC/AArch64/SVE/ld3b.s b/llvm/test/MC/AArch64/SVE/ld3b.s --- a/llvm/test/MC/AArch64/SVE/ld3b.s +++ b/llvm/test/MC/AArch64/SVE/ld3b.s @@ -10,31 +10,31 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0] -// CHECK-INST: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0] +// CHECK-INST: ld3b { z0.b - z2.b }, p0/z, [x0, x0] // CHECK-ENCODING: [0x00,0xc0,0x40,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a440c000 ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16] -// CHECK-INST: ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16] +// CHECK-INST: ld3b { z5.b - z7.b }, p3/z, [x17, x16] // CHECK-ENCODING: [0x25,0xce,0x50,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a450ce25 ld3b { z0.b, z1.b, z2.b }, p0/z, [x0] -// CHECK-INST: ld3b { z0.b, z1.b, z2.b }, p0/z, [x0] +// CHECK-INST: ld3b { z0.b - z2.b }, p0/z, [x0] // CHECK-ENCODING: [0x00,0xe0,0x40,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a440e000 ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl] -// CHECK-INST: ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl] +// CHECK-INST: ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl] // CHECK-ENCODING: [0xb7,0xed,0x48,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a448edb7 ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl] -// CHECK-INST: ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl] +// CHECK-INST: ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl] // CHECK-ENCODING: [0x55,0xf5,0x45,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a445f555 diff --git a/llvm/test/MC/AArch64/SVE/ld3d.s b/llvm/test/MC/AArch64/SVE/ld3d.s --- a/llvm/test/MC/AArch64/SVE/ld3d.s +++ b/llvm/test/MC/AArch64/SVE/ld3d.s @@ -10,31 +10,31 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3] -// CHECK-INST: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3] +// CHECK-INST: ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3] // CHECK-ENCODING: [0x00,0xc0,0xc0,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a5c0c000 ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3] -// CHECK-INST: ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3] +// CHECK-INST: ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3] // CHECK-ENCODING: [0x25,0xce,0xd0,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a5d0ce25 ld3d { z0.d, z1.d, z2.d }, p0/z, [x0] -// CHECK-INST: ld3d { z0.d, z1.d, z2.d }, p0/z, [x0] +// CHECK-INST: ld3d { z0.d - z2.d }, p0/z, [x0] // CHECK-ENCODING: [0x00,0xe0,0xc0,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a5c0e000 ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl] -// CHECK-INST: ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl] +// CHECK-INST: ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl] // CHECK-ENCODING: [0xb7,0xed,0xc8,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a5c8edb7 ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl] -// CHECK-INST: ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl] +// CHECK-INST: ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl] // CHECK-ENCODING: [0x55,0xf5,0xc5,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a5c5f555 diff --git a/llvm/test/MC/AArch64/SVE/ld3h.s b/llvm/test/MC/AArch64/SVE/ld3h.s --- a/llvm/test/MC/AArch64/SVE/ld3h.s +++ b/llvm/test/MC/AArch64/SVE/ld3h.s @@ -10,31 +10,37 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1] -// CHECK-INST: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1] +// CHECK-INST: ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1] // CHECK-ENCODING: [0x00,0xc0,0xc0,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a4c0c000 ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1] -// CHECK-INST: ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1] +// CHECK-INST: ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1] // CHECK-ENCODING: [0x25,0xce,0xd0,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a4d0ce25 ld3h { z0.h, z1.h, z2.h }, p0/z, [x0] -// CHECK-INST: ld3h { z0.h, z1.h, z2.h }, p0/z, [x0] +// CHECK-INST: ld3h { z0.h - z2.h }, p0/z, [x0] // CHECK-ENCODING: [0x00,0xe0,0xc0,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a4c0e000 ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl] -// CHECK-INST: ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl] +// CHECK-INST: ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl] // CHECK-ENCODING: [0xb7,0xed,0xc8,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a4c8edb7 ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl] -// CHECK-INST: ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl] +// CHECK-INST: ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl] // CHECK-ENCODING: [0x55,0xf5,0xc5,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a4c5f555 + +ld3h { z30.h, z31.h, z0.h }, p5/z, [x10, #15, mul vl] +// CHECK-INST: ld3h { z30.h, z31.h, z0.h }, p5/z, [x10, #15, mul vl] +// CHECK-ENCODING: [0x5e,0xf5,0xc5,0xa4] +// CHECK-ERROR: instruction requires: sve or sme +// CHECK-UNKNOWN: a4c5f55e diff --git a/llvm/test/MC/AArch64/SVE/ld3w.s b/llvm/test/MC/AArch64/SVE/ld3w.s --- a/llvm/test/MC/AArch64/SVE/ld3w.s +++ b/llvm/test/MC/AArch64/SVE/ld3w.s @@ -10,31 +10,31 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2] -// CHECK-INST: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2] +// CHECK-INST: ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2] // CHECK-ENCODING: [0x00,0xc0,0x40,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a540c000 ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2] -// CHECK-INST: ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2] +// CHECK-INST: ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2] // CHECK-ENCODING: [0x25,0xce,0x50,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a550ce25 ld3w { z0.s, z1.s, z2.s }, p0/z, [x0] -// CHECK-INST: ld3w { z0.s, z1.s, z2.s }, p0/z, [x0] +// CHECK-INST: ld3w { z0.s - z2.s }, p0/z, [x0] // CHECK-ENCODING: [0x00,0xe0,0x40,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a540e000 ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl] -// CHECK-INST: ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl] +// CHECK-INST: ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl] // CHECK-ENCODING: [0xb7,0xed,0x48,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a548edb7 ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl] -// CHECK-INST: ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl] +// CHECK-INST: ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl] // CHECK-ENCODING: [0x55,0xf5,0x45,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a545f555 diff --git a/llvm/test/MC/AArch64/SVE/ld4b.s b/llvm/test/MC/AArch64/SVE/ld4b.s --- a/llvm/test/MC/AArch64/SVE/ld4b.s +++ b/llvm/test/MC/AArch64/SVE/ld4b.s @@ -10,31 +10,31 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0] -// CHECK-INST: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0] +// CHECK-INST: ld4b { z0.b - z3.b }, p0/z, [x0, x0] // CHECK-ENCODING: [0x00,0xc0,0x60,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a460c000 ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16] -// CHECK-INST: ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16] +// CHECK-INST: ld4b { z5.b - z8.b }, p3/z, [x17, x16] // CHECK-ENCODING: [0x25,0xce,0x70,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a470ce25 ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0] -// CHECK-INST: ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0] +// CHECK-INST: ld4b { z0.b - z3.b }, p0/z, [x0] // CHECK-ENCODING: [0x00,0xe0,0x60,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a460e000 ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl] -// CHECK-INST: ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl] +// CHECK-INST: ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl] // CHECK-ENCODING: [0xb7,0xed,0x68,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a468edb7 ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl] -// CHECK-INST: ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl] +// CHECK-INST: ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl] // CHECK-ENCODING: [0x55,0xf5,0x65,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a465f555 diff --git a/llvm/test/MC/AArch64/SVE/ld4d.s b/llvm/test/MC/AArch64/SVE/ld4d.s --- a/llvm/test/MC/AArch64/SVE/ld4d.s +++ b/llvm/test/MC/AArch64/SVE/ld4d.s @@ -10,31 +10,31 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3] -// CHECK-INST: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3] +// CHECK-INST: ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3] // CHECK-ENCODING: [0x00,0xc0,0xe0,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a5e0c000 ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3] -// CHECK-INST: ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3] +// CHECK-INST: ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3] // CHECK-ENCODING: [0x25,0xce,0xf0,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a5f0ce25 ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0] -// CHECK-INST: ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0] +// CHECK-INST: ld4d { z0.d - z3.d }, p0/z, [x0] // CHECK-ENCODING: [0x00,0xe0,0xe0,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a5e0e000 ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl] -// CHECK-INST: ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl] +// CHECK-INST: ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl] // CHECK-ENCODING: [0xb7,0xed,0xe8,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a5e8edb7 ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl] -// CHECK-INST: ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl] +// CHECK-INST: ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl] // CHECK-ENCODING: [0x55,0xf5,0xe5,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a5e5f555 diff --git a/llvm/test/MC/AArch64/SVE/ld4h.s b/llvm/test/MC/AArch64/SVE/ld4h.s --- a/llvm/test/MC/AArch64/SVE/ld4h.s +++ b/llvm/test/MC/AArch64/SVE/ld4h.s @@ -10,31 +10,37 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1] -// CHECK-INST: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1] +// CHECK-INST: ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1] // CHECK-ENCODING: [0x00,0xc0,0xe0,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a4e0c000 ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1] -// CHECK-INST: ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1] +// CHECK-INST: ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1] // CHECK-ENCODING: [0x25,0xce,0xf0,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a4f0ce25 ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0] -// CHECK-INST: ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0] +// CHECK-INST: ld4h { z0.h - z3.h }, p0/z, [x0] // CHECK-ENCODING: [0x00,0xe0,0xe0,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a4e0e000 ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl] -// CHECK-INST: ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl] +// CHECK-INST: ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl] // CHECK-ENCODING: [0xb7,0xed,0xe8,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a4e8edb7 ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl] -// CHECK-INST: ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl] +// CHECK-INST: ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl] // CHECK-ENCODING: [0x55,0xf5,0xe5,0xa4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a4e5f555 + +ld4h { z31.h, z0.h, z1.h, z2.h }, p5/z, [x10, #20, mul vl] +// CHECK-INST: ld4h { z31.h, z0.h, z1.h, z2.h }, p5/z, [x10, #20, mul vl] +// CHECK-ENCODING: [0x5f,0xf5,0xe5,0xa4] +// CHECK-ERROR: instruction requires: sve or sme +// CHECK-UNKNOWN: a4e5f55f diff --git a/llvm/test/MC/AArch64/SVE/ld4w.s b/llvm/test/MC/AArch64/SVE/ld4w.s --- a/llvm/test/MC/AArch64/SVE/ld4w.s +++ b/llvm/test/MC/AArch64/SVE/ld4w.s @@ -10,31 +10,31 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2] -// CHECK-INST: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2] +// CHECK-INST: ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2] // CHECK-ENCODING: [0x00,0xc0,0x60,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a560c000 ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2] -// CHECK-INST: ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2] +// CHECK-INST: ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2] // CHECK-ENCODING: [0x25,0xce,0x70,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a570ce25 ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0] -// CHECK-INST: ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0] +// CHECK-INST: ld4w { z0.s - z3.s }, p0/z, [x0] // CHECK-ENCODING: [0x00,0xe0,0x60,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a560e000 ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl] -// CHECK-INST: ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl] +// CHECK-INST: ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl] // CHECK-ENCODING: [0xb7,0xed,0x68,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a568edb7 ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl] -// CHECK-INST: ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl] +// CHECK-INST: ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl] // CHECK-ENCODING: [0x55,0xf5,0x65,0xa5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: a565f555 diff --git a/llvm/test/MC/AArch64/SVE/st3b.s b/llvm/test/MC/AArch64/SVE/st3b.s --- a/llvm/test/MC/AArch64/SVE/st3b.s +++ b/llvm/test/MC/AArch64/SVE/st3b.s @@ -10,31 +10,31 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN st3b { z0.b, z1.b, z2.b }, p0, [x0, x0] -// CHECK-INST: st3b { z0.b, z1.b, z2.b }, p0, [x0, x0] +// CHECK-INST: st3b { z0.b - z2.b }, p0, [x0, x0] // CHECK-ENCODING: [0x00,0x60,0x40,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4406000 st3b { z5.b, z6.b, z7.b }, p3, [x17, x16] -// CHECK-INST: st3b { z5.b, z6.b, z7.b }, p3, [x17, x16] +// CHECK-INST: st3b { z5.b - z7.b }, p3, [x17, x16] // CHECK-ENCODING: [0x25,0x6e,0x50,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4506e25 st3b { z0.b, z1.b, z2.b }, p0, [x0] -// CHECK-INST: st3b { z0.b, z1.b, z2.b }, p0, [x0] +// CHECK-INST: st3b { z0.b - z2.b }, p0, [x0] // CHECK-ENCODING: [0x00,0xe0,0x50,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e450e000 st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl] -// CHECK-INST: st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl] +// CHECK-INST: st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl] // CHECK-ENCODING: [0xb7,0xed,0x58,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e458edb7 st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl] -// CHECK-INST: st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl] +// CHECK-INST: st3b { z21.b - z23.b }, p5, [x10, #15, mul vl] // CHECK-ENCODING: [0x55,0xf5,0x55,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e455f555 diff --git a/llvm/test/MC/AArch64/SVE/st3d.s b/llvm/test/MC/AArch64/SVE/st3d.s --- a/llvm/test/MC/AArch64/SVE/st3d.s +++ b/llvm/test/MC/AArch64/SVE/st3d.s @@ -10,31 +10,31 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3] -// CHECK-INST: st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3] +// CHECK-INST: st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3] // CHECK-ENCODING: [0x00,0x60,0xc0,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5c06000 st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3] -// CHECK-INST: st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3] +// CHECK-INST: st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3] // CHECK-ENCODING: [0x25,0x6e,0xd0,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5d06e25 st3d { z0.d, z1.d, z2.d }, p0, [x0] -// CHECK-INST: st3d { z0.d, z1.d, z2.d }, p0, [x0] +// CHECK-INST: st3d { z0.d - z2.d }, p0, [x0] // CHECK-ENCODING: [0x00,0xe0,0xd0,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5d0e000 st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl] -// CHECK-INST: st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl] +// CHECK-INST: st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl] // CHECK-ENCODING: [0xb7,0xed,0xd8,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5d8edb7 st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl] -// CHECK-INST: st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl] +// CHECK-INST: st3d { z21.d - z23.d }, p5, [x10, #15, mul vl] // CHECK-ENCODING: [0x55,0xf5,0xd5,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5d5f555 diff --git a/llvm/test/MC/AArch64/SVE/st3h.s b/llvm/test/MC/AArch64/SVE/st3h.s --- a/llvm/test/MC/AArch64/SVE/st3h.s +++ b/llvm/test/MC/AArch64/SVE/st3h.s @@ -10,31 +10,37 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1] -// CHECK-INST: st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1] +// CHECK-INST: st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1] // CHECK-ENCODING: [0x00,0x60,0xc0,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4c06000 st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1] -// CHECK-INST: st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1] +// CHECK-INST: st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1] // CHECK-ENCODING: [0x25,0x6e,0xd0,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4d06e25 st3h { z0.h, z1.h, z2.h }, p0, [x0] -// CHECK-INST: st3h { z0.h, z1.h, z2.h }, p0, [x0] +// CHECK-INST: st3h { z0.h - z2.h }, p0, [x0] // CHECK-ENCODING: [0x00,0xe0,0xd0,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4d0e000 st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl] -// CHECK-INST: st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl] +// CHECK-INST: st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl] // CHECK-ENCODING: [0xb7,0xed,0xd8,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4d8edb7 st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl] -// CHECK-INST: st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl] +// CHECK-INST: st3h { z21.h - z23.h }, p5, [x10, #15, mul vl] // CHECK-ENCODING: [0x55,0xf5,0xd5,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4d5f555 + +st3h { z31.h, z0.h, z1.h }, p5, [x10, #15, mul vl] +// CHECK-INST: st3h { z31.h, z0.h, z1.h }, p5, [x10, #15, mul vl] +// CHECK-ENCODING: [0x5f,0xf5,0xd5,0xe4] +// CHECK-ERROR: instruction requires: sve or sme +// CHECK-UNKNOWN: e4d5f55f diff --git a/llvm/test/MC/AArch64/SVE/st3w.s b/llvm/test/MC/AArch64/SVE/st3w.s --- a/llvm/test/MC/AArch64/SVE/st3w.s +++ b/llvm/test/MC/AArch64/SVE/st3w.s @@ -10,31 +10,31 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2] -// CHECK-INST: st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2] +// CHECK-INST: st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2] // CHECK-ENCODING: [0x00,0x60,0x40,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5406000 st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2] -// CHECK-INST: st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2] +// CHECK-INST: st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2] // CHECK-ENCODING: [0x25,0x6e,0x50,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5506e25 st3w { z0.s, z1.s, z2.s }, p0, [x0] -// CHECK-INST: st3w { z0.s, z1.s, z2.s }, p0, [x0] +// CHECK-INST: st3w { z0.s - z2.s }, p0, [x0] // CHECK-ENCODING: [0x00,0xe0,0x50,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e550e000 st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl] -// CHECK-INST: st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl] +// CHECK-INST: st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl] // CHECK-ENCODING: [0xb7,0xed,0x58,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e558edb7 st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl] -// CHECK-INST: st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl] +// CHECK-INST: st3w { z21.s - z23.s }, p5, [x10, #15, mul vl] // CHECK-ENCODING: [0x55,0xf5,0x55,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e555f555 diff --git a/llvm/test/MC/AArch64/SVE/st4b.s b/llvm/test/MC/AArch64/SVE/st4b.s --- a/llvm/test/MC/AArch64/SVE/st4b.s +++ b/llvm/test/MC/AArch64/SVE/st4b.s @@ -10,31 +10,31 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0] -// CHECK-INST: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0] +// CHECK-INST: st4b { z0.b - z3.b }, p0, [x0, x0] // CHECK-ENCODING: [0x00,0x60,0x60,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4606000 st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16] -// CHECK-INST: st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16] +// CHECK-INST: st4b { z5.b - z8.b }, p3, [x17, x16] // CHECK-ENCODING: [0x25,0x6e,0x70,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4706e25 st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0] -// CHECK-INST: st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0] +// CHECK-INST: st4b { z0.b - z3.b }, p0, [x0] // CHECK-ENCODING: [0x00,0xe0,0x70,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e470e000 st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl] -// CHECK-INST: st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl] +// CHECK-INST: st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl] // CHECK-ENCODING: [0xb7,0xed,0x78,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e478edb7 st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl] -// CHECK-INST: st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl] +// CHECK-INST: st4b { z21.b - z24.b }, p5, [x10, #20, mul vl] // CHECK-ENCODING: [0x55,0xf5,0x75,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e475f555 diff --git a/llvm/test/MC/AArch64/SVE/st4d.s b/llvm/test/MC/AArch64/SVE/st4d.s --- a/llvm/test/MC/AArch64/SVE/st4d.s +++ b/llvm/test/MC/AArch64/SVE/st4d.s @@ -10,31 +10,31 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3] -// CHECK-INST: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3] +// CHECK-INST: st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3] // CHECK-ENCODING: [0x00,0x60,0xe0,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5e06000 st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3] -// CHECK-INST: st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3] +// CHECK-INST: st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3] // CHECK-ENCODING: [0x25,0x6e,0xf0,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5f06e25 st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0] -// CHECK-INST: st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0] +// CHECK-INST: st4d { z0.d - z3.d }, p0, [x0] // CHECK-ENCODING: [0x00,0xe0,0xf0,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5f0e000 st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl] -// CHECK-INST: st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl] +// CHECK-INST: st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl] // CHECK-ENCODING: [0xb7,0xed,0xf8,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5f8edb7 st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl] -// CHECK-INST: st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl] +// CHECK-INST: st4d { z21.d - z24.d }, p5, [x10, #20, mul vl] // CHECK-ENCODING: [0x55,0xf5,0xf5,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5f5f555 diff --git a/llvm/test/MC/AArch64/SVE/st4h.s b/llvm/test/MC/AArch64/SVE/st4h.s --- a/llvm/test/MC/AArch64/SVE/st4h.s +++ b/llvm/test/MC/AArch64/SVE/st4h.s @@ -10,31 +10,37 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1] -// CHECK-INST: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1] +// CHECK-INST: st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1] // CHECK-ENCODING: [0x00,0x60,0xe0,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4e06000 st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1] -// CHECK-INST: st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1] +// CHECK-INST: st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1] // CHECK-ENCODING: [0x25,0x6e,0xf0,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4f06e25 st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0] -// CHECK-INST: st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0] +// CHECK-INST: st4h { z0.h - z3.h }, p0, [x0] // CHECK-ENCODING: [0x00,0xe0,0xf0,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4f0e000 st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl] -// CHECK-INST: st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl] +// CHECK-INST: st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl] // CHECK-ENCODING: [0xb7,0xed,0xf8,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4f8edb7 st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl] -// CHECK-INST: st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl] +// CHECK-INST: st4h { z21.h - z24.h }, p5, [x10, #20, mul vl] // CHECK-ENCODING: [0x55,0xf5,0xf5,0xe4] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e4f5f555 + +st4h { z29.h, z30.h, z31.h, z0.h }, p5, [x10, #20, mul vl] +// CHECK-INST: st4h { z29.h, z30.h, z31.h, z0.h }, p5, [x10, #20, mul vl] +// CHECK-ENCODING: [0x5d,0xf5,0xf5,0xe4] +// CHECK-ERROR: instruction requires: sve or sme +// CHECK-UNKNOWN: e4f5f55d diff --git a/llvm/test/MC/AArch64/SVE/st4w.s b/llvm/test/MC/AArch64/SVE/st4w.s --- a/llvm/test/MC/AArch64/SVE/st4w.s +++ b/llvm/test/MC/AArch64/SVE/st4w.s @@ -10,31 +10,31 @@ // RUN: | llvm-objdump -d --mattr=-sve - | FileCheck %s --check-prefix=CHECK-UNKNOWN st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2] -// CHECK-INST: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2] +// CHECK-INST: st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2] // CHECK-ENCODING: [0x00,0x60,0x60,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5606000 st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2] -// CHECK-INST: st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2] +// CHECK-INST: st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2] // CHECK-ENCODING: [0x25,0x6e,0x70,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e5706e25 st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0] -// CHECK-INST: st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0] +// CHECK-INST: st4w { z0.s - z3.s }, p0, [x0] // CHECK-ENCODING: [0x00,0xe0,0x70,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e570e000 st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl] -// CHECK-INST: st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl] +// CHECK-INST: st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl] // CHECK-ENCODING: [0xb7,0xed,0x78,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e578edb7 st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl] -// CHECK-INST: st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl] +// CHECK-INST: st4w { z21.s - z24.s }, p5, [x10, #20, mul vl] // CHECK-ENCODING: [0x55,0xf5,0x75,0xe5] // CHECK-ERROR: instruction requires: sve or sme // CHECK-UNKNOWN: e575f555 diff --git a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s --- a/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/A64FX/A64FX-sve-instructions.s @@ -3568,46 +3568,46 @@ # CHECK-NEXT: 2 11 1.00 * U ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl] # CHECK-NEXT: 2 11 1.00 * U ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl] # CHECK-NEXT: 3 12 1.50 * U ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2] -# CHECK-NEXT: 4 15 6.50 * U ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0] -# CHECK-NEXT: 4 15 6.50 * U ld3b { z0.b, z1.b, z2.b }, p0/z, [x0] -# CHECK-NEXT: 4 15 6.50 * U ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: 4 15 6.50 * U ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: 4 15 6.50 * U ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16] -# CHECK-NEXT: 4 12 2.00 * U ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3] -# CHECK-NEXT: 3 11 1.50 * U ld3d { z0.d, z1.d, z2.d }, p0/z, [x0] -# CHECK-NEXT: 3 11 1.50 * U ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: 3 11 1.50 * U ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: 4 12 2.00 * U ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3] -# CHECK-NEXT: 4 15 6.50 * U ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1] -# CHECK-NEXT: 4 15 6.50 * U ld3h { z0.h, z1.h, z2.h }, p0/z, [x0] -# CHECK-NEXT: 4 15 6.50 * U ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: 4 15 6.50 * U ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: 4 15 6.50 * U ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1] -# CHECK-NEXT: 4 12 2.00 * U ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2] -# CHECK-NEXT: 3 11 1.50 * U ld3w { z0.s, z1.s, z2.s }, p0/z, [x0] -# CHECK-NEXT: 3 11 1.50 * U ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: 3 11 1.50 * U ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: 4 12 2.00 * U ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2] -# CHECK-NEXT: 5 15 8.50 * U ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0] -# CHECK-NEXT: 5 15 8.50 * U ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0] -# CHECK-NEXT: 5 15 8.50 * U ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: 5 15 8.50 * U ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: 5 15 8.50 * U ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16] -# CHECK-NEXT: 5 12 2.50 * U ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3] -# CHECK-NEXT: 4 11 2.00 * U ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0] -# CHECK-NEXT: 4 11 2.00 * U ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: 4 11 2.00 * U ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: 5 12 2.50 * U ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3] -# CHECK-NEXT: 5 15 8.50 * U ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1] -# CHECK-NEXT: 5 15 8.50 * U ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0] -# CHECK-NEXT: 5 15 8.50 * U ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: 5 15 8.50 * U ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: 5 15 8.50 * U ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1] -# CHECK-NEXT: 5 12 2.50 * U ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2] -# CHECK-NEXT: 4 11 2.00 * U ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0] -# CHECK-NEXT: 4 11 2.00 * U ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: 4 11 2.00 * U ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: 5 12 2.50 * U ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: 4 15 6.50 * U ld3b { z0.b - z2.b }, p0/z, [x0, x0] +# CHECK-NEXT: 4 15 6.50 * U ld3b { z0.b - z2.b }, p0/z, [x0] +# CHECK-NEXT: 4 15 6.50 * U ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: 4 15 6.50 * U ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: 4 15 6.50 * U ld3b { z5.b - z7.b }, p3/z, [x17, x16] +# CHECK-NEXT: 4 12 2.00 * U ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: 3 11 1.50 * U ld3d { z0.d - z2.d }, p0/z, [x0] +# CHECK-NEXT: 3 11 1.50 * U ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: 3 11 1.50 * U ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: 4 12 2.00 * U ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: 4 15 6.50 * U ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 4 15 6.50 * U ld3h { z0.h - z2.h }, p0/z, [x0] +# CHECK-NEXT: 4 15 6.50 * U ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: 4 15 6.50 * U ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: 4 15 6.50 * U ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: 4 12 2.00 * U ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: 3 11 1.50 * U ld3w { z0.s - z2.s }, p0/z, [x0] +# CHECK-NEXT: 3 11 1.50 * U ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: 3 11 1.50 * U ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: 4 12 2.00 * U ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: 5 15 8.50 * U ld4b { z0.b - z3.b }, p0/z, [x0, x0] +# CHECK-NEXT: 5 15 8.50 * U ld4b { z0.b - z3.b }, p0/z, [x0] +# CHECK-NEXT: 5 15 8.50 * U ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: 5 15 8.50 * U ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: 5 15 8.50 * U ld4b { z5.b - z8.b }, p3/z, [x17, x16] +# CHECK-NEXT: 5 12 2.50 * U ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: 4 11 2.00 * U ld4d { z0.d - z3.d }, p0/z, [x0] +# CHECK-NEXT: 4 11 2.00 * U ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: 4 11 2.00 * U ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: 5 12 2.50 * U ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: 5 15 8.50 * U ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 5 15 8.50 * U ld4h { z0.h - z3.h }, p0/z, [x0] +# CHECK-NEXT: 5 15 8.50 * U ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: 5 15 8.50 * U ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: 5 15 8.50 * U ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: 5 12 2.50 * U ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: 4 11 2.00 * U ld4w { z0.s - z3.s }, p0/z, [x0] +# CHECK-NEXT: 4 11 2.00 * U ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: 4 11 2.00 * U ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: 5 12 2.50 * U ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2] # CHECK-NEXT: 1 11 0.50 * U ldff1b { z0.d }, p0/z, [x0, x0] # CHECK-NEXT: 1 16 2.00 * U ldff1b { z0.d }, p0/z, [z0.d] # CHECK-NEXT: 1 11 0.50 * U ldff1b { z0.h }, p0/z, [x0, x0] @@ -4580,46 +4580,46 @@ # CHECK-NEXT: 3 12 2.00 * U st2w { z21.s, z22.s }, p5, [x10, #10, mul vl] # CHECK-NEXT: 3 12 2.00 * U st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl] # CHECK-NEXT: 2 11 2.00 * U st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2] -# CHECK-NEXT: 4 15 12.00 * U st3b { z0.b, z1.b, z2.b }, p0, [x0, x0] -# CHECK-NEXT: 4 15 12.00 * U st3b { z0.b, z1.b, z2.b }, p0, [x0] -# CHECK-NEXT: 4 15 12.00 * U st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl] -# CHECK-NEXT: 4 15 12.00 * U st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: 4 15 12.00 * U st3b { z5.b, z6.b, z7.b }, p3, [x17, x16] -# CHECK-NEXT: 3 11 3.00 * U st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3] -# CHECK-NEXT: 4 12 3.00 * U st3d { z0.d, z1.d, z2.d }, p0, [x0] -# CHECK-NEXT: 4 12 3.00 * U st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl] -# CHECK-NEXT: 4 12 3.00 * U st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: 3 11 3.00 * U st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3] -# CHECK-NEXT: 4 15 12.00 * U st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1] -# CHECK-NEXT: 4 15 12.00 * U st3h { z0.h, z1.h, z2.h }, p0, [x0] -# CHECK-NEXT: 4 15 12.00 * U st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl] -# CHECK-NEXT: 4 15 12.00 * U st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: 4 15 12.00 * U st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1] -# CHECK-NEXT: 3 11 3.00 * U st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2] -# CHECK-NEXT: 4 12 3.00 * U st3w { z0.s, z1.s, z2.s }, p0, [x0] -# CHECK-NEXT: 4 12 3.00 * U st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl] -# CHECK-NEXT: 4 12 3.00 * U st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: 3 11 3.00 * U st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2] -# CHECK-NEXT: 5 15 16.00 * U st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0] -# CHECK-NEXT: 5 15 16.00 * U st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0] -# CHECK-NEXT: 5 15 16.00 * U st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl] -# CHECK-NEXT: 5 15 16.00 * U st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: 5 15 16.00 * U st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16] -# CHECK-NEXT: 4 11 4.00 * U st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3] -# CHECK-NEXT: 5 12 4.00 * U st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0] -# CHECK-NEXT: 5 12 4.00 * U st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl] -# CHECK-NEXT: 5 12 4.00 * U st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: 4 11 4.00 * U st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3] -# CHECK-NEXT: 5 15 16.00 * U st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1] -# CHECK-NEXT: 5 15 16.00 * U st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0] -# CHECK-NEXT: 5 15 16.00 * U st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl] -# CHECK-NEXT: 5 15 16.00 * U st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: 5 15 16.00 * U st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1] -# CHECK-NEXT: 4 11 4.00 * U st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2] -# CHECK-NEXT: 5 12 4.00 * U st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0] -# CHECK-NEXT: 5 12 4.00 * U st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl] -# CHECK-NEXT: 5 12 4.00 * U st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: 4 11 4.00 * U st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: 4 15 12.00 * U st3b { z0.b - z2.b }, p0, [x0, x0] +# CHECK-NEXT: 4 15 12.00 * U st3b { z0.b - z2.b }, p0, [x0] +# CHECK-NEXT: 4 15 12.00 * U st3b { z21.b - z23.b }, p5, [x10, #15, mul vl] +# CHECK-NEXT: 4 15 12.00 * U st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: 4 15 12.00 * U st3b { z5.b - z7.b }, p3, [x17, x16] +# CHECK-NEXT: 3 11 3.00 * U st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: 4 12 3.00 * U st3d { z0.d - z2.d }, p0, [x0] +# CHECK-NEXT: 4 12 3.00 * U st3d { z21.d - z23.d }, p5, [x10, #15, mul vl] +# CHECK-NEXT: 4 12 3.00 * U st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: 3 11 3.00 * U st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: 4 15 12.00 * U st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: 4 15 12.00 * U st3h { z0.h - z2.h }, p0, [x0] +# CHECK-NEXT: 4 15 12.00 * U st3h { z21.h - z23.h }, p5, [x10, #15, mul vl] +# CHECK-NEXT: 4 15 12.00 * U st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: 4 15 12.00 * U st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: 3 11 3.00 * U st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: 4 12 3.00 * U st3w { z0.s - z2.s }, p0, [x0] +# CHECK-NEXT: 4 12 3.00 * U st3w { z21.s - z23.s }, p5, [x10, #15, mul vl] +# CHECK-NEXT: 4 12 3.00 * U st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: 3 11 3.00 * U st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: 5 15 16.00 * U st4b { z0.b - z3.b }, p0, [x0, x0] +# CHECK-NEXT: 5 15 16.00 * U st4b { z0.b - z3.b }, p0, [x0] +# CHECK-NEXT: 5 15 16.00 * U st4b { z21.b - z24.b }, p5, [x10, #20, mul vl] +# CHECK-NEXT: 5 15 16.00 * U st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: 5 15 16.00 * U st4b { z5.b - z8.b }, p3, [x17, x16] +# CHECK-NEXT: 4 11 4.00 * U st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: 5 12 4.00 * U st4d { z0.d - z3.d }, p0, [x0] +# CHECK-NEXT: 5 12 4.00 * U st4d { z21.d - z24.d }, p5, [x10, #20, mul vl] +# CHECK-NEXT: 5 12 4.00 * U st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: 4 11 4.00 * U st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: 5 15 16.00 * U st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: 5 15 16.00 * U st4h { z0.h - z3.h }, p0, [x0] +# CHECK-NEXT: 5 15 16.00 * U st4h { z21.h - z24.h }, p5, [x10, #20, mul vl] +# CHECK-NEXT: 5 15 16.00 * U st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: 5 15 16.00 * U st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: 4 11 4.00 * U st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: 5 12 4.00 * U st4w { z0.s - z3.s }, p0, [x0] +# CHECK-NEXT: 5 12 4.00 * U st4w { z21.s - z24.s }, p5, [x10, #20, mul vl] +# CHECK-NEXT: 5 12 4.00 * U st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: 4 11 4.00 * U st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2] # CHECK-NEXT: 1 11 1.00 * stnt1b { z0.b }, p0, [x0, x0] # CHECK-NEXT: 1 11 1.00 * stnt1b { z0.b }, p0, [x0] # CHECK-NEXT: 1 11 1.00 * stnt1b { z21.b }, p5, [x10, #7, mul vl] @@ -6080,46 +6080,46 @@ # CHECK-NEXT: - 1.00 1.00 - - - - - ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl] # CHECK-NEXT: - 1.00 1.00 - - - - - ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl] # CHECK-NEXT: - 1.50 1.50 - - - - - ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2] -# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0] -# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z0.b, z1.b, z2.b }, p0/z, [x0] -# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16] -# CHECK-NEXT: - 2.00 2.00 - - - - - ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3] -# CHECK-NEXT: - 1.50 1.50 - - - - - ld3d { z0.d, z1.d, z2.d }, p0/z, [x0] -# CHECK-NEXT: - 1.50 1.50 - - - - - ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: - 1.50 1.50 - - - - - ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: - 2.00 2.00 - - - - - ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3] -# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1] -# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z0.h, z1.h, z2.h }, p0/z, [x0] -# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1] -# CHECK-NEXT: - 2.00 2.00 - - - - - ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2] -# CHECK-NEXT: - 1.50 1.50 - - - - - ld3w { z0.s, z1.s, z2.s }, p0/z, [x0] -# CHECK-NEXT: - 1.50 1.50 - - - - - ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: - 1.50 1.50 - - - - - ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: - 2.00 2.00 - - - - - ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2] -# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0] -# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0] -# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16] -# CHECK-NEXT: - 2.50 2.50 - - - - - ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3] -# CHECK-NEXT: - 2.00 2.00 - - - - - ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0] -# CHECK-NEXT: - 2.00 2.00 - - - - - ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: - 2.00 2.00 - - - - - ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: - 2.50 2.50 - - - - - ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3] -# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1] -# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0] -# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1] -# CHECK-NEXT: - 2.50 2.50 - - - - - ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2] -# CHECK-NEXT: - 2.00 2.00 - - - - - ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0] -# CHECK-NEXT: - 2.00 2.00 - - - - - ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: - 2.00 2.00 - - - - - ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: - 2.50 2.50 - - - - - ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z0.b - z2.b }, p0/z, [x0, x0] +# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z0.b - z2.b }, p0/z, [x0] +# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: - 6.50 6.50 - - - - - ld3b { z5.b - z7.b }, p3/z, [x17, x16] +# CHECK-NEXT: - 2.00 2.00 - - - - - ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: - 1.50 1.50 - - - - - ld3d { z0.d - z2.d }, p0/z, [x0] +# CHECK-NEXT: - 1.50 1.50 - - - - - ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: - 1.50 1.50 - - - - - ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: - 2.00 2.00 - - - - - ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z0.h - z2.h }, p0/z, [x0] +# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: - 6.50 6.50 - - - - - ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: - 2.00 2.00 - - - - - ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: - 1.50 1.50 - - - - - ld3w { z0.s - z2.s }, p0/z, [x0] +# CHECK-NEXT: - 1.50 1.50 - - - - - ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: - 1.50 1.50 - - - - - ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: - 2.00 2.00 - - - - - ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z0.b - z3.b }, p0/z, [x0, x0] +# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z0.b - z3.b }, p0/z, [x0] +# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: - 8.50 8.50 - - - - - ld4b { z5.b - z8.b }, p3/z, [x17, x16] +# CHECK-NEXT: - 2.50 2.50 - - - - - ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: - 2.00 2.00 - - - - - ld4d { z0.d - z3.d }, p0/z, [x0] +# CHECK-NEXT: - 2.00 2.00 - - - - - ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: - 2.00 2.00 - - - - - ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: - 2.50 2.50 - - - - - ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z0.h - z3.h }, p0/z, [x0] +# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: - 8.50 8.50 - - - - - ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: - 2.50 2.50 - - - - - ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: - 2.00 2.00 - - - - - ld4w { z0.s - z3.s }, p0/z, [x0] +# CHECK-NEXT: - 2.00 2.00 - - - - - ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: - 2.00 2.00 - - - - - ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: - 2.50 2.50 - - - - - ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2] # CHECK-NEXT: - 0.50 0.50 - - - - - ldff1b { z0.d }, p0/z, [x0, x0] # CHECK-NEXT: - 2.00 2.00 - - 1.00 - - ldff1b { z0.d }, p0/z, [z0.d] # CHECK-NEXT: - 0.50 0.50 - - - - - ldff1b { z0.h }, p0/z, [x0, x0] @@ -7092,46 +7092,46 @@ # CHECK-NEXT: - 1.50 1.50 - - 2.00 - - st2w { z21.s, z22.s }, p5, [x10, #10, mul vl] # CHECK-NEXT: - 1.50 1.50 - - 2.00 - - st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl] # CHECK-NEXT: - 1.00 1.00 - - 2.00 - - st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2] -# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z0.b, z1.b, z2.b }, p0, [x0, x0] -# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z0.b, z1.b, z2.b }, p0, [x0] -# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl] -# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z5.b, z6.b, z7.b }, p3, [x17, x16] -# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3] -# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3d { z0.d, z1.d, z2.d }, p0, [x0] -# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl] -# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3] -# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1] -# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z0.h, z1.h, z2.h }, p0, [x0] -# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl] -# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1] -# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2] -# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3w { z0.s, z1.s, z2.s }, p0, [x0] -# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl] -# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2] -# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0] -# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0] -# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl] -# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16] -# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3] -# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0] -# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl] -# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3] -# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1] -# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0] -# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl] -# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1] -# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2] -# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0] -# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl] -# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z0.b - z2.b }, p0, [x0, x0] +# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z0.b - z2.b }, p0, [x0] +# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z21.b - z23.b }, p5, [x10, #15, mul vl] +# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3b { z5.b - z7.b }, p3, [x17, x16] +# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3d { z0.d - z2.d }, p0, [x0] +# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3d { z21.d - z23.d }, p5, [x10, #15, mul vl] +# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z0.h - z2.h }, p0, [x0] +# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z21.h - z23.h }, p5, [x10, #15, mul vl] +# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: - 6.50 6.50 - - 12.00 - - st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3w { z0.s - z2.s }, p0, [x0] +# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3w { z21.s - z23.s }, p5, [x10, #15, mul vl] +# CHECK-NEXT: - 2.00 2.00 - - 3.00 - - st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: - 1.50 1.50 - - 3.00 - - st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z0.b - z3.b }, p0, [x0, x0] +# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z0.b - z3.b }, p0, [x0] +# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z21.b - z24.b }, p5, [x10, #20, mul vl] +# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4b { z5.b - z8.b }, p3, [x17, x16] +# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4d { z0.d - z3.d }, p0, [x0] +# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4d { z21.d - z24.d }, p5, [x10, #20, mul vl] +# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z0.h - z3.h }, p0, [x0] +# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z21.h - z24.h }, p5, [x10, #20, mul vl] +# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: - 8.50 8.50 - - 16.00 - - st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4w { z0.s - z3.s }, p0, [x0] +# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4w { z21.s - z24.s }, p5, [x10, #20, mul vl] +# CHECK-NEXT: - 2.50 2.50 - - 4.00 - - st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: - 2.00 2.00 - - 4.00 - - st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2] # CHECK-NEXT: - 0.50 0.50 - - 1.00 - - stnt1b { z0.b }, p0, [x0, x0] # CHECK-NEXT: - 0.50 0.50 - - 1.00 - - stnt1b { z0.b }, p0, [x0] # CHECK-NEXT: - 0.50 0.50 - - 1.00 - - stnt1b { z21.b }, p5, [x10, #7, mul vl] diff --git a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s --- a/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Neoverse/N2-sve-instructions.s @@ -4624,46 +4624,46 @@ # CHECK-NEXT: 2 8 0.50 * U ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl] # CHECK-NEXT: 2 8 0.50 * U ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl] # CHECK-NEXT: 2 9 0.50 * U ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2] -# CHECK-NEXT: 3 10 0.67 * U ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0] -# CHECK-NEXT: 2 9 0.50 * U ld3b { z0.b, z1.b, z2.b }, p0/z, [x0] -# CHECK-NEXT: 2 9 0.50 * U ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: 2 9 0.50 * U ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: 3 10 0.67 * U ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16] -# CHECK-NEXT: 3 10 0.67 * U ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3] -# CHECK-NEXT: 2 9 0.50 * U ld3d { z0.d, z1.d, z2.d }, p0/z, [x0] -# CHECK-NEXT: 2 9 0.50 * U ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: 2 9 0.50 * U ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: 3 10 0.67 * U ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3] -# CHECK-NEXT: 3 10 0.67 * U ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1] -# CHECK-NEXT: 2 9 0.50 * U ld3h { z0.h, z1.h, z2.h }, p0/z, [x0] -# CHECK-NEXT: 2 9 0.50 * U ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: 2 9 0.50 * U ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: 3 10 0.67 * U ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1] -# CHECK-NEXT: 3 10 0.67 * U ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2] -# CHECK-NEXT: 2 9 0.50 * U ld3w { z0.s, z1.s, z2.s }, p0/z, [x0] -# CHECK-NEXT: 2 9 0.50 * U ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: 2 9 0.50 * U ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: 3 10 0.67 * U ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2] -# CHECK-NEXT: 6 10 1.00 * U ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0] -# CHECK-NEXT: 4 9 1.00 * U ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0] -# CHECK-NEXT: 4 9 1.00 * U ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: 4 9 1.00 * U ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: 6 10 1.00 * U ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16] -# CHECK-NEXT: 6 10 1.00 * U ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3] -# CHECK-NEXT: 4 9 1.00 * U ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0] -# CHECK-NEXT: 4 9 1.00 * U ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: 4 9 1.00 * U ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: 6 10 1.00 * U ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3] -# CHECK-NEXT: 6 10 1.00 * U ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1] -# CHECK-NEXT: 4 9 1.00 * U ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0] -# CHECK-NEXT: 4 9 1.00 * U ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: 4 9 1.00 * U ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: 6 10 1.00 * U ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1] -# CHECK-NEXT: 6 10 1.00 * U ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2] -# CHECK-NEXT: 4 9 1.00 * U ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0] -# CHECK-NEXT: 4 9 1.00 * U ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: 4 9 1.00 * U ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: 6 10 1.00 * U ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: 3 10 0.67 * U ld3b { z0.b - z2.b }, p0/z, [x0, x0] +# CHECK-NEXT: 2 9 0.50 * U ld3b { z0.b - z2.b }, p0/z, [x0] +# CHECK-NEXT: 2 9 0.50 * U ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: 2 9 0.50 * U ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: 3 10 0.67 * U ld3b { z5.b - z7.b }, p3/z, [x17, x16] +# CHECK-NEXT: 3 10 0.67 * U ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: 2 9 0.50 * U ld3d { z0.d - z2.d }, p0/z, [x0] +# CHECK-NEXT: 2 9 0.50 * U ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: 2 9 0.50 * U ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: 3 10 0.67 * U ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: 3 10 0.67 * U ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 2 9 0.50 * U ld3h { z0.h - z2.h }, p0/z, [x0] +# CHECK-NEXT: 2 9 0.50 * U ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: 2 9 0.50 * U ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: 3 10 0.67 * U ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: 3 10 0.67 * U ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: 2 9 0.50 * U ld3w { z0.s - z2.s }, p0/z, [x0] +# CHECK-NEXT: 2 9 0.50 * U ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: 2 9 0.50 * U ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: 3 10 0.67 * U ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: 6 10 1.00 * U ld4b { z0.b - z3.b }, p0/z, [x0, x0] +# CHECK-NEXT: 4 9 1.00 * U ld4b { z0.b - z3.b }, p0/z, [x0] +# CHECK-NEXT: 4 9 1.00 * U ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: 4 9 1.00 * U ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: 6 10 1.00 * U ld4b { z5.b - z8.b }, p3/z, [x17, x16] +# CHECK-NEXT: 6 10 1.00 * U ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: 4 9 1.00 * U ld4d { z0.d - z3.d }, p0/z, [x0] +# CHECK-NEXT: 4 9 1.00 * U ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: 4 9 1.00 * U ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: 6 10 1.00 * U ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: 6 10 1.00 * U ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: 4 9 1.00 * U ld4h { z0.h - z3.h }, p0/z, [x0] +# CHECK-NEXT: 4 9 1.00 * U ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: 4 9 1.00 * U ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: 6 10 1.00 * U ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: 6 10 1.00 * U ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: 4 9 1.00 * U ld4w { z0.s - z3.s }, p0/z, [x0] +# CHECK-NEXT: 4 9 1.00 * U ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: 4 9 1.00 * U ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: 6 10 1.00 * U ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2] # CHECK-NEXT: 2 6 0.50 * U ldff1b { z0.d }, p0/z, [x0, x0] # CHECK-NEXT: 4 9 1.00 * U ldff1b { z0.d }, p0/z, [z0.d] # CHECK-NEXT: 2 6 0.50 * U ldff1b { z0.h }, p0/z, [x0, x0] @@ -6124,46 +6124,46 @@ # CHECK-NEXT: 2 4 0.50 * U st2w { z21.s, z22.s }, p5, [x10, #10, mul vl] # CHECK-NEXT: 2 4 0.50 * U st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl] # CHECK-NEXT: 2 4 0.50 * U st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2] -# CHECK-NEXT: 15 7 2.50 * U st3b { z0.b, z1.b, z2.b }, p0, [x0, x0] -# CHECK-NEXT: 10 7 2.50 * U st3b { z0.b, z1.b, z2.b }, p0, [x0] -# CHECK-NEXT: 10 7 2.50 * U st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl] -# CHECK-NEXT: 10 7 2.50 * U st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: 15 7 2.50 * U st3b { z5.b, z6.b, z7.b }, p3, [x17, x16] -# CHECK-NEXT: 15 7 2.50 * U st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3] -# CHECK-NEXT: 10 7 2.50 * U st3d { z0.d, z1.d, z2.d }, p0, [x0] -# CHECK-NEXT: 10 7 2.50 * U st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl] -# CHECK-NEXT: 10 7 2.50 * U st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: 15 7 2.50 * U st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3] -# CHECK-NEXT: 15 7 2.50 * U st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1] -# CHECK-NEXT: 10 7 2.50 * U st3h { z0.h, z1.h, z2.h }, p0, [x0] -# CHECK-NEXT: 10 7 2.50 * U st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl] -# CHECK-NEXT: 10 7 2.50 * U st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: 15 7 2.50 * U st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1] -# CHECK-NEXT: 15 7 2.50 * U st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2] -# CHECK-NEXT: 10 7 2.50 * U st3w { z0.s, z1.s, z2.s }, p0, [x0] -# CHECK-NEXT: 10 7 2.50 * U st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl] -# CHECK-NEXT: 10 7 2.50 * U st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: 15 7 2.50 * U st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2] -# CHECK-NEXT: 27 11 4.50 * U st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0] -# CHECK-NEXT: 18 11 4.50 * U st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0] -# CHECK-NEXT: 18 11 4.50 * U st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl] -# CHECK-NEXT: 18 11 4.50 * U st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: 27 11 4.50 * U st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16] -# CHECK-NEXT: 27 11 4.50 * U st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3] -# CHECK-NEXT: 18 11 4.50 * U st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0] -# CHECK-NEXT: 18 11 4.50 * U st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl] -# CHECK-NEXT: 18 11 4.50 * U st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: 27 11 4.50 * U st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3] -# CHECK-NEXT: 27 11 4.50 * U st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1] -# CHECK-NEXT: 18 11 4.50 * U st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0] -# CHECK-NEXT: 18 11 4.50 * U st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl] -# CHECK-NEXT: 18 11 4.50 * U st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: 27 11 4.50 * U st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1] -# CHECK-NEXT: 27 11 4.50 * U st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2] -# CHECK-NEXT: 18 11 4.50 * U st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0] -# CHECK-NEXT: 18 11 4.50 * U st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl] -# CHECK-NEXT: 18 11 4.50 * U st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: 27 11 4.50 * U st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: 15 7 2.50 * U st3b { z0.b - z2.b }, p0, [x0, x0] +# CHECK-NEXT: 10 7 2.50 * U st3b { z0.b - z2.b }, p0, [x0] +# CHECK-NEXT: 10 7 2.50 * U st3b { z21.b - z23.b }, p5, [x10, #15, mul vl] +# CHECK-NEXT: 10 7 2.50 * U st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: 15 7 2.50 * U st3b { z5.b - z7.b }, p3, [x17, x16] +# CHECK-NEXT: 15 7 2.50 * U st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: 10 7 2.50 * U st3d { z0.d - z2.d }, p0, [x0] +# CHECK-NEXT: 10 7 2.50 * U st3d { z21.d - z23.d }, p5, [x10, #15, mul vl] +# CHECK-NEXT: 10 7 2.50 * U st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: 15 7 2.50 * U st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: 15 7 2.50 * U st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: 10 7 2.50 * U st3h { z0.h - z2.h }, p0, [x0] +# CHECK-NEXT: 10 7 2.50 * U st3h { z21.h - z23.h }, p5, [x10, #15, mul vl] +# CHECK-NEXT: 10 7 2.50 * U st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: 15 7 2.50 * U st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: 15 7 2.50 * U st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: 10 7 2.50 * U st3w { z0.s - z2.s }, p0, [x0] +# CHECK-NEXT: 10 7 2.50 * U st3w { z21.s - z23.s }, p5, [x10, #15, mul vl] +# CHECK-NEXT: 10 7 2.50 * U st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: 15 7 2.50 * U st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: 27 11 4.50 * U st4b { z0.b - z3.b }, p0, [x0, x0] +# CHECK-NEXT: 18 11 4.50 * U st4b { z0.b - z3.b }, p0, [x0] +# CHECK-NEXT: 18 11 4.50 * U st4b { z21.b - z24.b }, p5, [x10, #20, mul vl] +# CHECK-NEXT: 18 11 4.50 * U st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: 27 11 4.50 * U st4b { z5.b - z8.b }, p3, [x17, x16] +# CHECK-NEXT: 27 11 4.50 * U st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: 18 11 4.50 * U st4d { z0.d - z3.d }, p0, [x0] +# CHECK-NEXT: 18 11 4.50 * U st4d { z21.d - z24.d }, p5, [x10, #20, mul vl] +# CHECK-NEXT: 18 11 4.50 * U st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: 27 11 4.50 * U st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: 27 11 4.50 * U st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: 18 11 4.50 * U st4h { z0.h - z3.h }, p0, [x0] +# CHECK-NEXT: 18 11 4.50 * U st4h { z21.h - z24.h }, p5, [x10, #20, mul vl] +# CHECK-NEXT: 18 11 4.50 * U st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: 27 11 4.50 * U st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: 27 11 4.50 * U st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: 18 11 4.50 * U st4w { z0.s - z3.s }, p0, [x0] +# CHECK-NEXT: 18 11 4.50 * U st4w { z21.s - z24.s }, p5, [x10, #20, mul vl] +# CHECK-NEXT: 18 11 4.50 * U st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: 27 11 4.50 * U st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2] # CHECK-NEXT: 2 2 0.50 * stnt1b { z0.b }, p0, [x0, x0] # CHECK-NEXT: 2 2 0.50 * stnt1b { z0.b }, p0, [x0] # CHECK-NEXT: 2 2 0.50 * stnt1b { z0.d }, p0, [z1.d] @@ -8055,46 +8055,46 @@ # CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld2w { z21.s, z22.s }, p5/z, [x10, #10, mul vl] # CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld2w { z23.s, z24.s }, p3/z, [x13, #-16, mul vl] # CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld2w { z5.s, z6.s }, p3/z, [x17, x16, lsl #2] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3b { z0.b, z1.b, z2.b }, p0/z, [x0, x0] -# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3b { z0.b, z1.b, z2.b }, p0/z, [x0] -# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3b { z21.b, z22.b, z23.b }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3b { z23.b, z24.b, z25.b }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3b { z5.b, z6.b, z7.b }, p3/z, [x17, x16] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3d { z0.d, z1.d, z2.d }, p0/z, [x0, x0, lsl #3] -# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3d { z0.d, z1.d, z2.d }, p0/z, [x0] -# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3d { z21.d, z22.d, z23.d }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3d { z23.d, z24.d, z25.d }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3d { z5.d, z6.d, z7.d }, p3/z, [x17, x16, lsl #3] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3h { z0.h, z1.h, z2.h }, p0/z, [x0, x0, lsl #1] -# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3h { z0.h, z1.h, z2.h }, p0/z, [x0] -# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3h { z21.h, z22.h, z23.h }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3h { z23.h, z24.h, z25.h }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3h { z5.h, z6.h, z7.h }, p3/z, [x17, x16, lsl #1] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3w { z0.s, z1.s, z2.s }, p0/z, [x0, x0, lsl #2] -# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3w { z0.s, z1.s, z2.s }, p0/z, [x0] -# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3w { z21.s, z22.s, z23.s }, p5/z, [x10, #15, mul vl] -# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3w { z23.s, z24.s, z25.s }, p3/z, [x13, #-24, mul vl] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3w { z5.s, z6.s, z7.s }, p3/z, [x17, x16, lsl #2] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0, x0] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4b { z0.b, z1.b, z2.b, z3.b }, p0/z, [x0] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4b { z21.b, z22.b, z23.b, z24.b }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4b { z23.b, z24.b, z25.b, z26.b }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4b { z5.b, z6.b, z7.b, z8.b }, p3/z, [x17, x16] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0, x0, lsl #3] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4d { z0.d, z1.d, z2.d, z3.d }, p0/z, [x0] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4d { z21.d, z22.d, z23.d, z24.d }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4d { z23.d, z24.d, z25.d, z26.d }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4d { z5.d, z6.d, z7.d, z8.d }, p3/z, [x17, x16, lsl #3] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0, x0, lsl #1] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4h { z0.h, z1.h, z2.h, z3.h }, p0/z, [x0] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4h { z21.h, z22.h, z23.h, z24.h }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4h { z23.h, z24.h, z25.h, z26.h }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4h { z5.h, z6.h, z7.h, z8.h }, p3/z, [x17, x16, lsl #1] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0, x0, lsl #2] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4w { z0.s, z1.s, z2.s, z3.s }, p0/z, [x0] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4w { z21.s, z22.s, z23.s, z24.s }, p5/z, [x10, #20, mul vl] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4w { z23.s, z24.s, z25.s, z26.s }, p3/z, [x13, #-32, mul vl] -# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4w { z5.s, z6.s, z7.s, z8.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3b { z0.b - z2.b }, p0/z, [x0, x0] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3b { z0.b - z2.b }, p0/z, [x0] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3b { z21.b - z23.b }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3b { z23.b - z25.b }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3b { z5.b - z7.b }, p3/z, [x17, x16] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3d { z0.d - z2.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3d { z0.d - z2.d }, p0/z, [x0] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3d { z21.d - z23.d }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3d { z23.d - z25.d }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3d { z5.d - z7.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3h { z0.h - z2.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3h { z0.h - z2.h }, p0/z, [x0] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3h { z21.h - z23.h }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3h { z23.h - z25.h }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3h { z5.h - z7.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3w { z0.s - z2.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3w { z0.s - z2.s }, p0/z, [x0] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3w { z21.s - z23.s }, p5/z, [x10, #15, mul vl] +# CHECK-NEXT: - - - - 0.33 0.33 0.33 - - - - 0.50 0.50 ld3w { z23.s - z25.s }, p3/z, [x13, #-24, mul vl] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 0.50 0.50 ld3w { z5.s - z7.s }, p3/z, [x17, x16, lsl #2] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4b { z0.b - z3.b }, p0/z, [x0, x0] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4b { z0.b - z3.b }, p0/z, [x0] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4b { z21.b - z24.b }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4b { z23.b - z26.b }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4b { z5.b - z8.b }, p3/z, [x17, x16] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4d { z0.d - z3.d }, p0/z, [x0, x0, lsl #3] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4d { z0.d - z3.d }, p0/z, [x0] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4d { z21.d - z24.d }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4d { z23.d - z26.d }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4d { z5.d - z8.d }, p3/z, [x17, x16, lsl #3] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4h { z0.h - z3.h }, p0/z, [x0, x0, lsl #1] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4h { z0.h - z3.h }, p0/z, [x0] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4h { z21.h - z24.h }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4h { z23.h - z26.h }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4h { z5.h - z8.h }, p3/z, [x17, x16, lsl #1] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4w { z0.s - z3.s }, p0/z, [x0, x0, lsl #2] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4w { z0.s - z3.s }, p0/z, [x0] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4w { z21.s - z24.s }, p5/z, [x10, #20, mul vl] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ld4w { z23.s - z26.s }, p3/z, [x13, #-32, mul vl] +# CHECK-NEXT: - - - - 0.67 0.67 0.67 - - 1.00 1.00 1.00 1.00 ld4w { z5.s - z8.s }, p3/z, [x17, x16, lsl #2] # CHECK-NEXT: - - - - 0.33 0.33 0.33 - - 0.50 0.50 - - ldff1b { z0.d }, p0/z, [x0, x0] # CHECK-NEXT: - - - - 0.67 0.67 0.67 - - - - 1.00 1.00 ldff1b { z0.d }, p0/z, [z0.d] # CHECK-NEXT: - - - - 0.33 0.33 0.33 - - 0.50 0.50 - - ldff1b { z0.h }, p0/z, [x0, x0] @@ -9555,46 +9555,46 @@ # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 st2w { z21.s, z22.s }, p5, [x10, #10, mul vl] # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 st2w { z23.s, z24.s }, p3, [x13, #-16, mul vl] # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 st2w { z5.s, z6.s }, p3, [x17, x16, lsl #2] -# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3b { z0.b, z1.b, z2.b }, p0, [x0, x0] -# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3b { z0.b, z1.b, z2.b }, p0, [x0] -# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3b { z21.b, z22.b, z23.b }, p5, [x10, #15, mul vl] -# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3b { z23.b, z24.b, z25.b }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3b { z5.b, z6.b, z7.b }, p3, [x17, x16] -# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3d { z0.d, z1.d, z2.d }, p0, [x0, x0, lsl #3] -# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3d { z0.d, z1.d, z2.d }, p0, [x0] -# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3d { z21.d, z22.d, z23.d }, p5, [x10, #15, mul vl] -# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3d { z23.d, z24.d, z25.d }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3d { z5.d, z6.d, z7.d }, p3, [x17, x16, lsl #3] -# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3h { z0.h, z1.h, z2.h }, p0, [x0, x0, lsl #1] -# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3h { z0.h, z1.h, z2.h }, p0, [x0] -# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3h { z21.h, z22.h, z23.h }, p5, [x10, #15, mul vl] -# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3h { z23.h, z24.h, z25.h }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3h { z5.h, z6.h, z7.h }, p3, [x17, x16, lsl #1] -# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3w { z0.s, z1.s, z2.s }, p0, [x0, x0, lsl #2] -# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3w { z0.s, z1.s, z2.s }, p0, [x0] -# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3w { z21.s, z22.s, z23.s }, p5, [x10, #15, mul vl] -# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3w { z23.s, z24.s, z25.s }, p3, [x13, #-24, mul vl] -# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3w { z5.s, z6.s, z7.s }, p3, [x17, x16, lsl #2] -# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0, x0] -# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4b { z0.b, z1.b, z2.b, z3.b }, p0, [x0] -# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4b { z21.b, z22.b, z23.b, z24.b }, p5, [x10, #20, mul vl] -# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4b { z23.b, z24.b, z25.b, z26.b }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4b { z5.b, z6.b, z7.b, z8.b }, p3, [x17, x16] -# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0, x0, lsl #3] -# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4d { z0.d, z1.d, z2.d, z3.d }, p0, [x0] -# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4d { z21.d, z22.d, z23.d, z24.d }, p5, [x10, #20, mul vl] -# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4d { z23.d, z24.d, z25.d, z26.d }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4d { z5.d, z6.d, z7.d, z8.d }, p3, [x17, x16, lsl #3] -# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0, x0, lsl #1] -# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4h { z0.h, z1.h, z2.h, z3.h }, p0, [x0] -# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4h { z21.h, z22.h, z23.h, z24.h }, p5, [x10, #20, mul vl] -# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4h { z23.h, z24.h, z25.h, z26.h }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4h { z5.h, z6.h, z7.h, z8.h }, p3, [x17, x16, lsl #1] -# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0, x0, lsl #2] -# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4w { z0.s, z1.s, z2.s, z3.s }, p0, [x0] -# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4w { z21.s, z22.s, z23.s, z24.s }, p5, [x10, #20, mul vl] -# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4w { z23.s, z24.s, z25.s, z26.s }, p3, [x13, #-32, mul vl] -# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4w { z5.s, z6.s, z7.s, z8.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3b { z0.b - z2.b }, p0, [x0, x0] +# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3b { z0.b - z2.b }, p0, [x0] +# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3b { z21.b - z23.b }, p5, [x10, #15, mul vl] +# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3b { z23.b - z25.b }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3b { z5.b - z7.b }, p3, [x17, x16] +# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3d { z0.d - z2.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3d { z0.d - z2.d }, p0, [x0] +# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3d { z21.d - z23.d }, p5, [x10, #15, mul vl] +# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3d { z23.d - z25.d }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3d { z5.d - z7.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3h { z0.h - z2.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3h { z0.h - z2.h }, p0, [x0] +# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3h { z21.h - z23.h }, p5, [x10, #15, mul vl] +# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3h { z23.h - z25.h }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3h { z5.h - z7.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3w { z0.s - z2.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3w { z0.s - z2.s }, p0, [x0] +# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3w { z21.s - z23.s }, p5, [x10, #15, mul vl] +# CHECK-NEXT: - - - - - 2.50 2.50 - - - - 2.50 2.50 st3w { z23.s - z25.s }, p3, [x13, #-24, mul vl] +# CHECK-NEXT: - - - - - 2.50 2.50 - - 2.50 2.50 2.50 2.50 st3w { z5.s - z7.s }, p3, [x17, x16, lsl #2] +# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4b { z0.b - z3.b }, p0, [x0, x0] +# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4b { z0.b - z3.b }, p0, [x0] +# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4b { z21.b - z24.b }, p5, [x10, #20, mul vl] +# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4b { z23.b - z26.b }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4b { z5.b - z8.b }, p3, [x17, x16] +# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4d { z0.d - z3.d }, p0, [x0, x0, lsl #3] +# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4d { z0.d - z3.d }, p0, [x0] +# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4d { z21.d - z24.d }, p5, [x10, #20, mul vl] +# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4d { z23.d - z26.d }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4d { z5.d - z8.d }, p3, [x17, x16, lsl #3] +# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4h { z0.h - z3.h }, p0, [x0, x0, lsl #1] +# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4h { z0.h - z3.h }, p0, [x0] +# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4h { z21.h - z24.h }, p5, [x10, #20, mul vl] +# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4h { z23.h - z26.h }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4h { z5.h - z8.h }, p3, [x17, x16, lsl #1] +# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4w { z0.s - z3.s }, p0, [x0, x0, lsl #2] +# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4w { z0.s - z3.s }, p0, [x0] +# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4w { z21.s - z24.s }, p5, [x10, #20, mul vl] +# CHECK-NEXT: - - - - - 4.50 4.50 - - - - 4.50 4.50 st4w { z23.s - z26.s }, p3, [x13, #-32, mul vl] +# CHECK-NEXT: - - - - - 4.50 4.50 - - 4.50 4.50 4.50 4.50 st4w { z5.s - z8.s }, p3, [x17, x16, lsl #2] # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 stnt1b { z0.b }, p0, [x0, x0] # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 stnt1b { z0.b }, p0, [x0] # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 stnt1b { z0.d }, p0, [z1.d]