diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td --- a/llvm/lib/Target/AArch64/AArch64SchedA55.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td @@ -149,8 +149,36 @@ def : WriteRes { let Latency = 4; } def : WriteRes { let Latency = 3; } def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; let BeginGroup = 1; } + +// NEON +class CortexA55WriteVd : SchedWriteRes<[res]> { + let Latency = n; +} +class CortexA55WriteVq : SchedWriteRes<[res, res]> { + let Latency = n; + let BeginGroup = 1; +} +class CortexA55WriteVqL : SchedWriteRes<[res, res, res, res]> { + let Latency = n; + let BeginGroup = 1; +} +def CortexA55WriteDotScVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; +def CortexA55WriteDotVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; +def CortexA55WriteDotVd_4 : CortexA55WriteVd<4, CortexA55UnitFPALU>; +def CortexA55WriteMlaLVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; +def CortexA55WriteMlaIxVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; +def CortexA55WriteMlaVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; +def CortexA55WriteMlaVd_4 : CortexA55WriteVd<4, CortexA55UnitFPALU>; +def CortexA55WriteAluVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; +def CortexA55WriteAluVd_3 : CortexA55WriteVd<3, CortexA55UnitFPALU>; +def CortexA55WriteAluVq_3 : CortexA55WriteVq<3, CortexA55UnitFPALU>; +def CortexA55WriteAluVd_2 : CortexA55WriteVd<2, CortexA55UnitFPALU>; +def CortexA55WriteAluVq_2 : CortexA55WriteVq<2, CortexA55UnitFPALU>; +def CortexA55WriteAluVd_1 : CortexA55WriteVd<1, CortexA55UnitFPALU>; +def CortexA55WriteAluVq_1 : CortexA55WriteVq<1, CortexA55UnitFPALU>; +def CortexA55WriteAluVqL_4 : CortexA55WriteVqL<4, CortexA55UnitFPALU>; +def : SchedAlias>; +def : SchedAlias>; // FP ALU specific new schedwrite definitions def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;} @@ -229,6 +257,13 @@ WriteID32,WriteID64, WriteIM32,WriteIM64]>; +// NEON ALU/MAC forwarding paths +def CortexA55ReadMla : SchedReadAdvance<3, [CortexA55WriteMlaVd_4, CortexA55WriteMlaVq_4]>; +def CortexA55ReadMlaIx : SchedReadAdvance<3, [CortexA55WriteMlaIxVq_4]>; +def CortexA55ReadMlaL : SchedReadAdvance<3, [CortexA55WriteMlaLVq_4]>; +def CortexA55ReadDot : SchedReadAdvance<3, [CortexA55WriteDotVd_4, CortexA55WriteDotVq_4]>; +def CortexA55ReadDotSc : SchedReadAdvance<3, [CortexA55WriteDotScVq_4]>; + //===----------------------------------------------------------------------===// // Subtarget-specific InstRWs. @@ -358,4 +393,99 @@ def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; +// 4.15. Advanced SIMD integer instructions +// ASIMD absolute diff +def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]ABDv(2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]ABDv(16i8|4i32|8i16)")>; +// ASIMD absolute diff accum +def : InstRW<[CortexA55WriteAluVqL_4], (instregex "[SU]ABAL?v")>; +// ASIMD absolute diff long +def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]ABDLv")>; +// ASIMD arith #1 +def : InstRW<[CortexA55WriteAluVd_2], (instregex "(ADD|SUB|NEG)v(1i64|2i32|4i16|8i8)", + "[SU]R?HADDv(2i32|4i16|8i8)", "[SU]HSUBv(2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_2], (instregex "(ADD|SUB|NEG)v(2i64|4i32|8i16|16i8)", + "[SU]R?HADDv(8i16|4i32|16i8)", "[SU]HSUBv(8i16|4i32|16i8)")>; +// ASIMD arith #2 +def : InstRW<[CortexA55WriteAluVd_3], (instregex "ABSv(1i64|2i32|4i16|8i8)$", + "[SU]ADDLPv(2i32_v1i64|4i16_v2i32|8i8_v4i16)$", + "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(1i16|1i32|1i64|1i8|2i32|4i16|8i8)$", + "ADDPv(2i32|4i16|8i8)$")>; +def : InstRW<[CortexA55WriteAluVq_3], (instregex "ABSv(2i64|4i32|8i16|16i8)$", + "[SU]ADDLPv(16i8_v8i16|4i32_v2i64|8i16_v4i32)$", + "([SU]QADD|[SU]QSUB|SQNEG|SUQADD|USQADD)v(16i8|2i64|4i32|8i16)$", + "ADDPv(16i8|2i64|4i32|8i16)$")>; +// ASIMD arith #3 +def : InstRW<[CortexA55WriteAluVq_3], (instregex "SADDLv", "UADDLv", "SADDWv", + "UADDWv", "SSUBLv", "USUBLv", "SSUBWv", "USUBWv", "ADDHNv", "SUBHNv")>; +// ASIMD arith #5 +def : InstRW<[CortexA55WriteAluVqL_4], (instregex "RADDHNv", "RSUBHNv")>; +// ASIMD arith, reduce +def : InstRW<[CortexA55WriteAluVq_3], (instregex "ADDVv", "SADDLVv", "UADDLVv")>; +// ASIMD compare #1 +def : InstRW<[CortexA55WriteAluVd_2], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_2], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v(2i64|4i32|8i16|16i8)")>; +// ASIMD compare #2 +def : InstRW<[CortexA55WriteAluVd_3], (instregex "CMTSTv(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_3], (instregex "CMTSTv(2i64|4i32|8i16|16i8)")>; +// ASIMD logical $1 +def : InstRW<[CortexA55WriteAluVd_1], (instregex "(AND|EOR|NOT|ORN)v8i8", + "(ORR|BIC)v(2i32|4i16|8i8)$", "MVNIv(2i|2s|4i16)")>; +def : InstRW<[CortexA55WriteAluVq_1], (instregex "(AND|EOR|NOT|ORN)v16i8", + "(ORR|BIC)v(16i8|4i32|8i16)$", "MVNIv(4i32|4s|8i16)")>; +// ASIMD max/min, basic +def : InstRW<[CortexA55WriteAluVd_2], (instregex "[SU](MIN|MAX)P?v(2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_2], (instregex "[SU](MIN|MAX)P?v(16i8|4i132|8i16)")>; +// SIMD max/min, reduce +def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU](MAX|MIN)Vv")>; +// ASIMD multiply, by element +def : InstRW<[CortexA55WriteAluVq_4], (instregex "MULv(2i32|4i16|4i32|8i16)_indexed$", + "SQR?DMULHv(1i16|1i32|2i32|4i16|4i32|8i16)_indexed$")>; +// ASIMD multiply +def : InstRW<[CortexA55WriteAluVd_3], (instrs PMULv8i8)>; +def : InstRW<[CortexA55WriteAluVq_3], (instrs PMULv16i8)>; +// ASIMD multiply accumulate +def : InstRW<[CortexA55WriteMlaVd_4, CortexA55ReadMla], (instregex "ML[AS]v(2i32|4i16|8i8)$")>; +def : InstRW<[CortexA55WriteMlaVq_4, CortexA55ReadMla], (instregex "ML[AS]v(16i8|4i32|8i16)$")>; +def : InstRW<[CortexA55WriteMlaIxVq_4, CortexA55ReadMlaIx], (instregex "ML[AS]v(2i32|4i16|4i32|8i16)_indexed$")>; +// ASIMD multiply accumulate half +def : InstRW<[CortexA55WriteAluVq_4], (instregex "SQRDML[AS]H[vi]")>; +// ASIMD multiply accumulate long +def : InstRW<[CortexA55WriteMlaLVq_4, CortexA55ReadMlaL], (instregex "[SU]ML[AS]Lv")>; +// ASIMD multiply accumulate long #2 +def : InstRW<[CortexA55WriteAluVq_4], (instregex "SQDML[AS]L[iv]")>; +// ASIMD dot product +def : InstRW<[CortexA55WriteDotVd_4, CortexA55ReadDot], (instregex "[SU]DOTv8i8")>; +def : InstRW<[CortexA55WriteDotVq_4, CortexA55ReadDot], (instregex "[SU]DOTv16i8")>; +// ASIMD dot product, by scalar +def : InstRW<[CortexA55WriteDotScVq_4, CortexA55ReadDotSc], (instregex "[SU]DOTlanev")>; +// ASIMD multiply long +def : InstRW<[CortexA55WriteAluVq_4], (instregex "[SU]MULLv", "SQDMULL[iv]")>; +// ASIMD polynomial (8x8) multiply long +def : InstRW<[CortexA55WriteAluVq_3], (instrs PMULLv8i8, PMULLv16i8)>; +// ASIMD pairwise add and accumulate +def : InstRW<[CortexA55WriteAluVqL_4], (instregex "[SU]ADALPv")>; +// ASIMD shift accumulate +def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]SRA(d|v2i32|v4i16|v8i8)")>; +def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]SRAv(16i8|2i64|4i32|8i16)")>; +// ASIMD shift accumulate #2 +def : InstRW<[CortexA55WriteAluVqL_4], (instregex "[SU]RSRA[vd]")>; +// ASIMD shift by immed +def : InstRW<[CortexA55WriteAluVd_2], (instregex "SHLd$", "SHLv", + "SLId$", "SRId$", "[SU]SHR[vd]", "SHRNv")>; +// ASIMD shift by immed +// SXTL and UXTL are aliases for SHLL +def : InstRW<[CortexA55WriteAluVq_2], (instregex "[US]?SHLLv")>; +// ASIMD shift by immed #2 +def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]RSHR(d|v2i32|v4i16|v8i8)", + "RSHRNv(2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]RSHRv(16i8|2i64|4i32|8i16)", + "RSHRNv(16i8|4i32|8i16)")>; +// ASIMD shift by register +def : InstRW<[CortexA55WriteAluVd_2], (instregex "[SU]SHLv(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_2], (instregex "[SU]SHLv(2i64|4i32|8i16|16i8)")>; +// ASIMD shift by register #2 +def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]RSHLv(1i64|2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]RSHLv(2i64|4i32|8i16|16i8)")>; + } diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll --- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -121,11 +121,11 @@ ; CODE-LABEL: v3i64_select_sle ; CODE: bb.0 ; CODE: mov -; CODE: ldr ; CODE: mov ; CODE: mov ; CODE: cmge ; CODE: cmge +; CODE: ldr ; CODE: bif ; CODE: bif ; CODE: ext diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll @@ -35,11 +35,11 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI1_0 +; SDAG-NEXT: adrp x9, .LCPI1_1 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] -; SDAG-NEXT: adrp x8, .LCPI1_1 -; SDAG-NEXT: ushl v1.8h, v0.8h, v1.8h -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_1] ; SDAG-NEXT: adrp x8, .LCPI1_2 +; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI1_1] +; SDAG-NEXT: ushl v1.8h, v0.8h, v1.8h ; SDAG-NEXT: umull2 v3.4s, v1.8h, v2.8h ; SDAG-NEXT: umull v1.4s, v1.4h, v2.4h ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_2] @@ -48,41 +48,41 @@ ; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h ; SDAG-NEXT: umull2 v3.4s, v0.8h, v2.8h ; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_3] ; SDAG-NEXT: uzp2 v0.8h, v0.8h, v3.8h ; SDAG-NEXT: add v0.8h, v0.8h, v1.8h -; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_3] -; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h +; SDAG-NEXT: ushl v0.8h, v0.8h, v2.8h ; SDAG-NEXT: ret ; ; GISEL-LABEL: combine_vec_udiv_nonuniform: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI1_4 -; GISEL-NEXT: adrp x10, .LCPI1_0 -; GISEL-NEXT: adrp x9, .LCPI1_1 +; GISEL-NEXT: adrp x9, .LCPI1_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4] ; GISEL-NEXT: adrp x8, .LCPI1_3 -; GISEL-NEXT: ldr q5, [x10, :lo12:.LCPI1_0] -; GISEL-NEXT: ldr q6, [x9, :lo12:.LCPI1_1] +; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI1_0] ; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3] ; GISEL-NEXT: adrp x8, .LCPI1_2 ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h -; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_2] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2] ; GISEL-NEXT: adrp x8, .LCPI1_5 -; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h -; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h -; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h +; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h +; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h +; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h +; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5] +; GISEL-NEXT: adrp x8, .LCPI1_1 ; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; GISEL-NEXT: neg v4.8h, v6.8h +; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI1_1] +; GISEL-NEXT: shl v3.8h, v3.8h, #15 ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h -; GISEL-NEXT: shl v2.8h, v3.8h, #15 -; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h -; GISEL-NEXT: sshr v2.8h, v2.8h, #15 +; GISEL-NEXT: neg v2.8h, v4.8h +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -93,15 +93,15 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform2: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI2_0 +; SDAG-NEXT: adrp x9, .LCPI2_1 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] -; SDAG-NEXT: adrp x8, .LCPI2_1 -; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h -; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_1] ; SDAG-NEXT: adrp x8, .LCPI2_2 -; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h -; SDAG-NEXT: umull v0.4s, v0.4h, v1.4h +; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] +; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h +; SDAG-NEXT: umull2 v1.4s, v0.8h, v2.8h +; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h +; SDAG-NEXT: uzp2 v0.8h, v0.8h, v1.8h ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_2] -; SDAG-NEXT: uzp2 v0.8h, v0.8h, v2.8h ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h ; SDAG-NEXT: ret ; @@ -112,21 +112,21 @@ ; GISEL-NEXT: adrp x10, .LCPI2_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3] ; GISEL-NEXT: adrp x8, .LCPI2_2 -; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI2_4] ; GISEL-NEXT: ldr q4, [x10, :lo12:.LCPI2_0] ; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2] ; GISEL-NEXT: adrp x8, .LCPI2_1 -; GISEL-NEXT: cmeq v3.8h, v3.8h, v4.8h ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h -; GISEL-NEXT: shl v3.8h, v3.8h, #15 -; GISEL-NEXT: umull2 v5.4s, v1.8h, v2.8h +; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h +; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_1] ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_1] -; GISEL-NEXT: neg v2.8h, v2.8h -; GISEL-NEXT: uzp2 v1.8h, v1.8h, v5.8h -; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h -; GISEL-NEXT: sshr v2.8h, v3.8h, #15 +; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI2_4] +; GISEL-NEXT: cmeq v2.8h, v2.8h, v4.8h +; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h +; GISEL-NEXT: neg v3.8h, v5.8h +; GISEL-NEXT: shl v2.8h, v2.8h, #15 +; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h +; GISEL-NEXT: sshr v2.8h, v2.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -151,21 +151,21 @@ ; GISEL-LABEL: combine_vec_udiv_nonuniform3: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI3_2 -; GISEL-NEXT: adrp x10, .LCPI3_0 -; GISEL-NEXT: adrp x9, .LCPI3_1 +; GISEL-NEXT: adrp x9, .LCPI3_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2] ; GISEL-NEXT: adrp x8, .LCPI3_3 -; GISEL-NEXT: ldr q3, [x10, :lo12:.LCPI3_0] -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_1] +; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI3_0] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3] +; GISEL-NEXT: adrp x8, .LCPI3_1 ; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h -; GISEL-NEXT: sub v5.8h, v0.8h, v1.8h -; GISEL-NEXT: neg v3.8h, v4.8h +; GISEL-NEXT: sub v4.8h, v0.8h, v1.8h +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_1] ; GISEL-NEXT: shl v2.8h, v2.8h, #15 -; GISEL-NEXT: usra v1.8h, v5.8h, #1 +; GISEL-NEXT: usra v1.8h, v4.8h, #1 +; GISEL-NEXT: neg v3.8h, v3.8h ; GISEL-NEXT: sshr v2.8h, v2.8h, #15 ; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b @@ -178,41 +178,41 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform4: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI4_0 -; SDAG-NEXT: adrp x9, .LCPI4_3 +; SDAG-NEXT: adrp x9, .LCPI4_2 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; SDAG-NEXT: adrp x8, .LCPI4_1 -; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI4_3] +; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI4_2] ; SDAG-NEXT: umull2 v2.8h, v0.16b, v1.16b ; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b -; SDAG-NEXT: and v0.16b, v0.16b, v3.16b ; SDAG-NEXT: uzp2 v1.16b, v1.16b, v2.16b ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_1] -; SDAG-NEXT: adrp x8, .LCPI4_2 +; SDAG-NEXT: adrp x8, .LCPI4_3 ; SDAG-NEXT: ushl v1.16b, v1.16b, v2.16b -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_2] -; SDAG-NEXT: and v1.16b, v1.16b, v2.16b +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_3] +; SDAG-NEXT: and v1.16b, v1.16b, v3.16b +; SDAG-NEXT: and v0.16b, v0.16b, v2.16b ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b ; SDAG-NEXT: ret ; ; GISEL-LABEL: combine_vec_udiv_nonuniform4: ; GISEL: // %bb.0: +; GISEL-NEXT: adrp x8, .LCPI4_2 +; GISEL-NEXT: adrp x9, .LCPI4_0 +; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2] ; GISEL-NEXT: adrp x8, .LCPI4_3 -; GISEL-NEXT: adrp x9, .LCPI4_2 -; GISEL-NEXT: adrp x10, .LCPI4_1 -; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_3] -; GISEL-NEXT: adrp x8, .LCPI4_0 -; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI4_2] -; GISEL-NEXT: ldr q3, [x10, :lo12:.LCPI4_1] -; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI4_0] -; GISEL-NEXT: umull2 v5.8h, v0.16b, v2.16b -; GISEL-NEXT: umull v2.8h, v0.8b, v2.8b -; GISEL-NEXT: cmeq v1.16b, v1.16b, v4.16b -; GISEL-NEXT: neg v3.16b, v3.16b -; GISEL-NEXT: uzp2 v2.16b, v2.16b, v5.16b -; GISEL-NEXT: shl v1.16b, v1.16b, #7 -; GISEL-NEXT: ushl v2.16b, v2.16b, v3.16b -; GISEL-NEXT: sshr v1.16b, v1.16b, #7 -; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b +; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0] +; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_3] +; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b +; GISEL-NEXT: adrp x8, .LCPI4_1 +; GISEL-NEXT: cmeq v3.16b, v3.16b, v4.16b +; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_1] +; GISEL-NEXT: shl v3.16b, v3.16b, #7 +; GISEL-NEXT: neg v2.16b, v2.16b +; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b +; GISEL-NEXT: sshr v2.16b, v3.16b, #7 +; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %div = udiv <16 x i8> %x, ret <16 x i8> %div @@ -222,54 +222,54 @@ ; SDAG-LABEL: pr38477: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI5_0 -; SDAG-NEXT: adrp x9, .LCPI5_4 +; SDAG-NEXT: adrp x9, .LCPI5_3 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] ; SDAG-NEXT: adrp x8, .LCPI5_1 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h -; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_1] ; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h -; SDAG-NEXT: adrp x8, .LCPI5_2 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; SDAG-NEXT: sub v2.8h, v0.8h, v1.8h -; SDAG-NEXT: umull2 v4.4s, v2.8h, v3.8h -; SDAG-NEXT: umull v2.4s, v2.4h, v3.4h -; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI5_4] -; SDAG-NEXT: and v0.16b, v0.16b, v3.16b +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_1] +; SDAG-NEXT: adrp x8, .LCPI5_2 +; SDAG-NEXT: sub v3.8h, v0.8h, v1.8h +; SDAG-NEXT: umull2 v4.4s, v3.8h, v2.8h +; SDAG-NEXT: umull v2.4s, v3.4h, v2.4h +; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_2] +; SDAG-NEXT: adrp x8, .LCPI5_4 ; SDAG-NEXT: uzp2 v2.8h, v2.8h, v4.8h +; SDAG-NEXT: ldr q4, [x9, :lo12:.LCPI5_3] ; SDAG-NEXT: add v1.8h, v2.8h, v1.8h -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_2] -; SDAG-NEXT: adrp x8, .LCPI5_3 -; SDAG-NEXT: ushl v1.8h, v1.8h, v2.8h -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_3] -; SDAG-NEXT: and v1.16b, v1.16b, v2.16b +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_4] +; SDAG-NEXT: ushl v1.8h, v1.8h, v3.8h +; SDAG-NEXT: and v0.16b, v0.16b, v2.16b +; SDAG-NEXT: and v1.16b, v1.16b, v4.16b ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b ; SDAG-NEXT: ret ; ; GISEL-LABEL: pr38477: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI5_3 -; GISEL-NEXT: adrp x10, .LCPI5_0 -; GISEL-NEXT: adrp x9, .LCPI5_1 +; GISEL-NEXT: adrp x9, .LCPI5_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3] ; GISEL-NEXT: adrp x8, .LCPI5_2 -; GISEL-NEXT: ldr q5, [x10, :lo12:.LCPI5_0] -; GISEL-NEXT: ldr q6, [x9, :lo12:.LCPI5_1] +; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI5_0] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_2] ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h -; GISEL-NEXT: adrp x8, .LCPI5_4 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h -; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h -; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_2] +; GISEL-NEXT: adrp x8, .LCPI5_4 +; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h +; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h +; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4] +; GISEL-NEXT: adrp x8, .LCPI5_1 ; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; GISEL-NEXT: neg v4.8h, v6.8h +; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_1] +; GISEL-NEXT: shl v3.8h, v3.8h, #15 ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h -; GISEL-NEXT: shl v2.8h, v3.8h, #15 -; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h -; GISEL-NEXT: sshr v2.8h, v2.8h, #15 +; GISEL-NEXT: neg v2.8h, v4.8h +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %a0, diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll --- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll @@ -98,10 +98,10 @@ define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) { ; CHECK-LABEL: dupsext_v2i8_v2i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: dup v1.2s, w8 +; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: sshr v0.2s, v0.2s, #24 +; CHECK-NEXT: dup v1.2s, w8 ; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll --- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -428,10 +428,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: dup v0.8b, w0 +; CHECK-NEXT: dup v2.8b, w1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI24_0] ; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b -; CHECK-NEXT: dup v1.8b, w1 -; CHECK-NEXT: cmhi v0.8b, v1.8b, v0.8b +; CHECK-NEXT: cmhi v0.8b, v2.8b, v0.8b ; CHECK-NEXT: ret %active.lane.mask = call <8 x i1> @llvm.get.active.lane.mask.v8i1.i8(i8 %index, i8 %TC) ret <8 x i1> %active.lane.mask @@ -440,16 +440,16 @@ define <4 x i1> @lane_mask_v4i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_v4i1_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: dup v0.4h, w0 ; CHECK-NEXT: adrp x8, .LCPI25_0 -; CHECK-NEXT: dup v2.4h, w1 +; CHECK-NEXT: dup v0.4h, w0 +; CHECK-NEXT: movi d2, #0xff00ff00ff00ff +; CHECK-NEXT: dup v3.4h, w1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI25_0] ; CHECK-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-NEXT: bic v2.4h, #255, lsl #8 +; CHECK-NEXT: bic v3.4h, #255, lsl #8 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: movi d1, #0xff00ff00ff00ff -; CHECK-NEXT: umin v0.4h, v0.4h, v1.4h -; CHECK-NEXT: cmhi v0.4h, v2.4h, v0.4h +; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h +; CHECK-NEXT: cmhi v0.4h, v3.4h, v0.4h ; CHECK-NEXT: ret %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC) ret <4 x i1> %active.lane.mask @@ -458,16 +458,16 @@ define <2 x i1> @lane_mask_v2i1_i8(i8 %index, i8 %TC) { ; CHECK-LABEL: lane_mask_v2i1_i8: ; CHECK: // %bb.0: +; CHECK-NEXT: adrp x8, .LCPI26_0 ; CHECK-NEXT: movi d0, #0x0000ff000000ff ; CHECK-NEXT: dup v1.2s, w0 -; CHECK-NEXT: adrp x8, .LCPI26_0 ; CHECK-NEXT: dup v3.2s, w1 -; CHECK-NEXT: and v1.8b, v1.8b, v0.8b ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI26_0] +; CHECK-NEXT: and v1.8b, v1.8b, v0.8b ; CHECK-NEXT: add v1.2s, v1.2s, v2.2s -; CHECK-NEXT: and v2.8b, v3.8b, v0.8b -; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s -; CHECK-NEXT: cmhi v0.2s, v2.2s, v0.2s +; CHECK-NEXT: umin v1.2s, v1.2s, v0.2s +; CHECK-NEXT: and v0.8b, v3.8b, v0.8b +; CHECK-NEXT: cmhi v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC) ret <2 x i1> %active.lane.mask diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll --- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll +++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll @@ -213,9 +213,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -290,9 +290,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll b/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll --- a/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll +++ b/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll @@ -68,16 +68,16 @@ define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK-LABEL: add_sub_su64: ; CHECK: // %bb.0: +; CHECK-NEXT: fmov d2, xzr ; CHECK-NEXT: add d0, d1, d0 -; CHECK-NEXT: fmov d1, xzr -; CHECK-NEXT: sub d0, d1, d0 +; CHECK-NEXT: sub d0, d2, d0 ; CHECK-NEXT: ret ; ; GENERIC-LABEL: add_sub_su64: ; GENERIC: // %bb.0: +; GENERIC-NEXT: fmov d2, xzr ; GENERIC-NEXT: add d0, d1, d0 -; GENERIC-NEXT: fmov d1, xzr -; GENERIC-NEXT: sub d0, d1, d0 +; GENERIC-NEXT: sub d0, d2, d0 ; GENERIC-NEXT: ret %vecext = extractelement <2 x i64> %a, i32 0 %vecext1 = extractelement <2 x i64> %b, i32 0 diff --git a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll @@ -6,8 +6,8 @@ define float @test1(float %x, float %y) nounwind { ; CHECK-LABEL: test1: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mvni.4s v2, #128, lsl #24 ; CHECK-NEXT: ; kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: mvni.4s v2, #128, lsl #24 ; CHECK-NEXT: ; kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: bif.16b v0, v1, v2 ; CHECK-NEXT: ; kill: def $s0 killed $s0 killed $q0 @@ -55,10 +55,10 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; CHECK-NEXT: bl _bar -; CHECK-NEXT: mvni.4s v1, #128, lsl #24 ; CHECK-NEXT: fcvt s0, d0 -; CHECK-NEXT: fmov s2, #0.50000000 -; CHECK-NEXT: bsl.16b v1, v2, v0 +; CHECK-NEXT: fmov s1, #0.50000000 +; CHECK-NEXT: mvni.4s v2, #128, lsl #24 +; CHECK-NEXT: bif.16b v1, v0, v2 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll --- a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll @@ -374,8 +374,8 @@ ; CHECK-LABEL: testLeftBad2x64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #10 -; CHECK-NEXT: movk x8, #1, lsl #48 ; CHECK-NEXT: shl.2d v1, v1, #48 +; CHECK-NEXT: movk x8, #1, lsl #48 ; CHECK-NEXT: dup.2d v2, x8 ; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: orr.16b v0, v0, v1 @@ -405,8 +405,8 @@ ; CHECK-LABEL: testRightBad2x64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #10 -; CHECK-NEXT: movk x8, #1, lsl #48 ; CHECK-NEXT: ushr.2d v1, v1, #48 +; CHECK-NEXT: movk x8, #1, lsl #48 ; CHECK-NEXT: dup.2d v2, x8 ; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: orr.16b v0, v0, v1 diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -177,12 +177,12 @@ ; CHECK-LABEL: sext_v4i8_to_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll2.2d v1, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: shl.2d v1, v1, #56 +; CHECK-NEXT: ushll.2d v1, v0, #0 +; CHECK-NEXT: ushll2.2d v0, v0, #0 +; CHECK-NEXT: shl.2d v2, v1, #56 ; CHECK-NEXT: shl.2d v0, v0, #56 -; CHECK-NEXT: sshr.2d v1, v1, #56 -; CHECK-NEXT: sshr.2d v0, v0, #56 +; CHECK-NEXT: sshr.2d v1, v0, #56 +; CHECK-NEXT: sshr.2d v0, v2, #56 ; CHECK-NEXT: ret %r = sext <4 x i8> %v0 to <4 x i64> ret <4 x i64> %r @@ -192,12 +192,12 @@ ; CHECK-LABEL: zext_v8i8_to_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushll.4s v2, v0, #0 -; CHECK-NEXT: ushll2.4s v4, v0, #0 -; CHECK-NEXT: ushll2.2d v1, v2, #0 -; CHECK-NEXT: ushll.2d v0, v2, #0 -; CHECK-NEXT: ushll2.2d v3, v4, #0 -; CHECK-NEXT: ushll.2d v2, v4, #0 +; CHECK-NEXT: ushll2.4s v2, v0, #0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll2.2d v3, v2, #0 +; CHECK-NEXT: ushll2.2d v1, v0, #0 +; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: ushll.2d v2, v2, #0 ; CHECK-NEXT: ret %r = zext <8 x i8> %v0 to <8 x i64> ret <8 x i64> %r @@ -207,12 +207,12 @@ ; CHECK-LABEL: sext_v8i8_to_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll.4s v2, v0, #0 -; CHECK-NEXT: sshll2.4s v4, v0, #0 -; CHECK-NEXT: sshll2.2d v1, v2, #0 -; CHECK-NEXT: sshll.2d v0, v2, #0 -; CHECK-NEXT: sshll2.2d v3, v4, #0 -; CHECK-NEXT: sshll.2d v2, v4, #0 +; CHECK-NEXT: sshll2.4s v2, v0, #0 +; CHECK-NEXT: sshll.4s v0, v0, #0 +; CHECK-NEXT: sshll2.2d v3, v2, #0 +; CHECK-NEXT: sshll2.2d v1, v0, #0 +; CHECK-NEXT: sshll.2d v0, v0, #0 +; CHECK-NEXT: sshll.2d v2, v2, #0 ; CHECK-NEXT: ret %r = sext <8 x i8> %v0 to <8 x i64> ret <8 x i64> %r @@ -496,129 +496,129 @@ ; CHECK-NEXT: ldr w9, [sp, #64] ; CHECK-NEXT: ldr w10, [sp, #192] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: ldr w9, [sp, #200] -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: ldr w10, [sp, #328] +; CHECK-NEXT: ldr w8, [sp, #328] +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: ldr w10, [sp, #80] +; CHECK-NEXT: mov.b v0[1], w8 +; CHECK-NEXT: ldr w8, [sp, #200] +; CHECK-NEXT: mov.b v1[1], w9 +; CHECK-NEXT: ldr w9, [sp, #336] ; CHECK-NEXT: mov.b v3[1], w1 -; CHECK-NEXT: ldr w11, [sp, #344] +; CHECK-NEXT: ldr w11, [sp, #88] ; CHECK-NEXT: mov.b v2[1], w8 -; CHECK-NEXT: ldr w8, [sp, #336] -; CHECK-NEXT: mov.b v1[1], w9 -; CHECK-NEXT: ldr w9, [sp, #80] -; CHECK-NEXT: mov.b v0[1], w10 -; CHECK-NEXT: ldr w10, [sp, #208] +; CHECK-NEXT: ldr w8, [sp, #344] +; CHECK-NEXT: mov.b v0[2], w9 +; CHECK-NEXT: ldr w9, [sp, #208] +; CHECK-NEXT: mov.b v1[2], w10 +; CHECK-NEXT: ldr w10, [sp, #352] ; CHECK-NEXT: mov.b v3[2], w2 -; CHECK-NEXT: ldr w12, [sp, #360] +; CHECK-NEXT: ldr w12, [sp, #96] ; CHECK-NEXT: mov.b v2[2], w9 -; CHECK-NEXT: ldr w9, [sp, #352] -; CHECK-NEXT: mov.b v1[2], w10 -; CHECK-NEXT: ldr w10, [sp, #88] -; CHECK-NEXT: mov.b v0[2], w8 +; CHECK-NEXT: ldr w9, [sp, #360] +; CHECK-NEXT: mov.b v0[3], w8 ; CHECK-NEXT: ldr w8, [sp, #216] +; CHECK-NEXT: mov.b v1[3], w11 +; CHECK-NEXT: ldr w13, [sp, #104] ; CHECK-NEXT: mov.b v3[3], w3 -; CHECK-NEXT: ldr w13, [sp, #376] -; CHECK-NEXT: mov.b v2[3], w10 -; CHECK-NEXT: ldr w10, [sp, #368] -; CHECK-NEXT: mov.b v1[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v0[3], w11 -; CHECK-NEXT: ldr w11, [sp, #224] +; CHECK-NEXT: ldr w11, [sp, #368] +; CHECK-NEXT: mov.b v2[3], w8 +; CHECK-NEXT: ldr w14, [sp, #112] +; CHECK-NEXT: mov.b v0[4], w10 +; CHECK-NEXT: ldr w10, [sp, #224] +; CHECK-NEXT: mov.b v1[4], w12 +; CHECK-NEXT: ldr w8, [sp, #376] ; CHECK-NEXT: mov.b v3[4], w4 -; CHECK-NEXT: ldr w14, [sp, #392] -; CHECK-NEXT: mov.b v2[4], w8 -; CHECK-NEXT: ldr w8, [sp, #384] -; CHECK-NEXT: mov.b v1[4], w11 -; CHECK-NEXT: ldr w11, [sp, #104] -; CHECK-NEXT: mov.b v0[4], w9 +; CHECK-NEXT: ldr w15, [sp, #120] +; CHECK-NEXT: mov.b v2[4], w10 +; CHECK-NEXT: ldr w12, [sp, #384] +; CHECK-NEXT: mov.b v0[5], w9 ; CHECK-NEXT: ldr w9, [sp, #232] +; CHECK-NEXT: mov.b v1[5], w13 +; CHECK-NEXT: ldr w16, [sp, #128] ; CHECK-NEXT: mov.b v3[5], w5 -; CHECK-NEXT: ldr w15, [sp, #408] -; CHECK-NEXT: mov.b v2[5], w11 -; CHECK-NEXT: ldr w11, [sp, #400] -; CHECK-NEXT: mov.b v1[5], w9 -; CHECK-NEXT: ldr w9, [sp, #112] -; CHECK-NEXT: mov.b v0[5], w12 -; CHECK-NEXT: ldr w12, [sp, #240] +; CHECK-NEXT: ldr w10, [sp, #392] +; CHECK-NEXT: mov.b v2[5], w9 +; CHECK-NEXT: ldr w13, [sp, #400] +; CHECK-NEXT: mov.b v0[6], w11 +; CHECK-NEXT: ldr w11, [sp, #240] +; CHECK-NEXT: mov.b v1[6], w14 +; CHECK-NEXT: ldr w9, [sp, #408] ; CHECK-NEXT: mov.b v3[6], w6 -; CHECK-NEXT: ldr w16, [sp, #424] -; CHECK-NEXT: mov.b v2[6], w9 -; CHECK-NEXT: ldr w9, [sp, #416] -; CHECK-NEXT: mov.b v1[6], w12 -; CHECK-NEXT: ldr w12, [sp, #120] -; CHECK-NEXT: mov.b v0[6], w10 -; CHECK-NEXT: ldr w10, [sp, #248] +; CHECK-NEXT: ldr w14, [sp, #416] +; CHECK-NEXT: mov.b v2[6], w11 +; CHECK-NEXT: ldr w11, [sp, #424] +; CHECK-NEXT: mov.b v0[7], w8 +; CHECK-NEXT: ldr w8, [sp, #248] +; CHECK-NEXT: mov.b v1[7], w15 +; CHECK-NEXT: ldr w15, [sp, #432] ; CHECK-NEXT: mov.b v3[7], w7 -; CHECK-NEXT: mov.b v2[7], w12 -; CHECK-NEXT: ldr w12, [sp] -; CHECK-NEXT: mov.b v1[7], w10 -; CHECK-NEXT: ldr w10, [sp, #128] -; CHECK-NEXT: mov.b v0[7], w13 -; CHECK-NEXT: ldr w13, [sp, #256] -; CHECK-NEXT: mov.b v3[8], w12 -; CHECK-NEXT: ldr w12, [sp, #432] -; CHECK-NEXT: mov.b v2[8], w10 -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: mov.b v1[8], w13 -; CHECK-NEXT: ldr w13, [sp, #136] -; CHECK-NEXT: mov.b v0[8], w8 -; CHECK-NEXT: ldr w8, [sp, #264] -; CHECK-NEXT: mov.b v3[9], w10 -; CHECK-NEXT: ldr w10, [sp, #440] -; CHECK-NEXT: mov.b v2[9], w13 -; CHECK-NEXT: ldr w13, [sp, #16] +; CHECK-NEXT: mov.b v2[7], w8 +; CHECK-NEXT: ldr w8, [sp] +; CHECK-NEXT: mov.b v0[8], w12 +; CHECK-NEXT: ldr w12, [sp, #256] +; CHECK-NEXT: mov.b v1[8], w16 +; CHECK-NEXT: ldr w16, [sp, #440] +; CHECK-NEXT: mov.b v3[8], w8 +; CHECK-NEXT: ldr w8, [sp, #136] +; CHECK-NEXT: mov.b v2[8], w12 +; CHECK-NEXT: ldr w12, [sp, #8] +; CHECK-NEXT: mov.b v0[9], w10 +; CHECK-NEXT: ldr w10, [sp, #264] ; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v0[9], w14 -; CHECK-NEXT: ldr w14, [sp, #272] -; CHECK-NEXT: mov.b v3[10], w13 +; CHECK-NEXT: ldr w8, [sp, #272] +; CHECK-NEXT: mov.b v3[9], w12 +; CHECK-NEXT: ldr w12, [sp, #144] +; CHECK-NEXT: mov.b v2[9], w10 +; CHECK-NEXT: ldr w10, [sp, #16] +; CHECK-NEXT: mov.b v0[10], w13 ; CHECK-NEXT: ldr w13, [sp, #280] +; CHECK-NEXT: mov.b v1[10], w12 +; CHECK-NEXT: ldr w12, [sp, #152] +; CHECK-NEXT: mov.b v3[10], w10 +; CHECK-NEXT: ldr w10, [sp, #160] ; CHECK-NEXT: mov.b v2[10], w8 ; CHECK-NEXT: ldr w8, [sp, #24] -; CHECK-NEXT: mov.b v1[10], w14 -; CHECK-NEXT: ldr w14, [sp, #152] -; CHECK-NEXT: mov.b v0[10], w11 -; CHECK-NEXT: ldr w11, [sp, #288] +; CHECK-NEXT: mov.b v0[11], w9 +; CHECK-NEXT: ldr w9, [sp, #288] +; CHECK-NEXT: mov.b v1[11], w12 +; CHECK-NEXT: ldr w12, [sp, #296] ; CHECK-NEXT: mov.b v3[11], w8 ; CHECK-NEXT: ldr w8, [sp, #32] -; CHECK-NEXT: mov.b v2[11], w14 -; CHECK-NEXT: ldr w14, [sp, #296] -; CHECK-NEXT: mov.b v1[11], w13 -; CHECK-NEXT: ldr w13, [sp, #160] -; CHECK-NEXT: mov.b v0[11], w15 +; CHECK-NEXT: mov.b v2[11], w13 +; CHECK-NEXT: mov.b v0[12], w14 +; CHECK-NEXT: mov.b v1[12], w10 +; CHECK-NEXT: ldr w10, [sp, #168] ; CHECK-NEXT: mov.b v3[12], w8 ; CHECK-NEXT: ldr w8, [sp, #40] -; CHECK-NEXT: mov.b v2[12], w13 -; CHECK-NEXT: ldr w13, [sp, #312] -; CHECK-NEXT: mov.b v1[12], w11 -; CHECK-NEXT: ldr w11, [sp, #168] -; CHECK-NEXT: mov.b v0[12], w9 +; CHECK-NEXT: mov.b v2[12], w9 ; CHECK-NEXT: ldr w9, [sp, #304] +; CHECK-NEXT: mov.b v0[13], w11 +; CHECK-NEXT: ldr w11, [sp, #312] +; CHECK-NEXT: mov.b v1[13], w10 +; CHECK-NEXT: ldr w10, [sp, #176] ; CHECK-NEXT: mov.b v3[13], w8 ; CHECK-NEXT: ldr w8, [sp, #48] -; CHECK-NEXT: mov.b v2[13], w11 -; CHECK-NEXT: ldr w11, [sp, #176] -; CHECK-NEXT: mov.b v1[13], w14 -; CHECK-NEXT: mov.b v0[13], w16 +; CHECK-NEXT: mov.b v2[13], w12 +; CHECK-NEXT: mov.b v0[14], w15 +; CHECK-NEXT: mov.b v1[14], w10 +; CHECK-NEXT: ldr w10, [sp, #184] ; CHECK-NEXT: mov.b v3[14], w8 ; CHECK-NEXT: ldr w8, [sp, #56] -; CHECK-NEXT: mov.b v2[14], w11 -; CHECK-NEXT: mov.b v1[14], w9 -; CHECK-NEXT: ldr w9, [sp, #184] -; CHECK-NEXT: mov.b v0[14], w12 +; CHECK-NEXT: mov.b v2[14], w9 +; CHECK-NEXT: mov.b v0[15], w16 +; CHECK-NEXT: mov.b v1[15], w10 ; CHECK-NEXT: mov.b v3[15], w8 -; CHECK-NEXT: mov.b v2[15], w9 -; CHECK-NEXT: mov.b v1[15], w13 -; CHECK-NEXT: mov.b v0[15], w10 +; CHECK-NEXT: mov.b v2[15], w11 +; CHECK-NEXT: shl.16b v4, v0, #7 +; CHECK-NEXT: shl.16b v1, v1, #7 ; CHECK-NEXT: shl.16b v3, v3, #7 ; CHECK-NEXT: shl.16b v2, v2, #7 -; CHECK-NEXT: shl.16b v4, v1, #7 -; CHECK-NEXT: shl.16b v5, v0, #7 ; CHECK-NEXT: cmlt.16b v0, v3, #0 -; CHECK-NEXT: cmlt.16b v1, v2, #0 -; CHECK-NEXT: cmlt.16b v2, v4, #0 -; CHECK-NEXT: cmlt.16b v3, v5, #0 +; CHECK-NEXT: cmlt.16b v1, v1, #0 +; CHECK-NEXT: cmlt.16b v2, v2, #0 +; CHECK-NEXT: cmlt.16b v3, v4, #0 ; CHECK-NEXT: ret %res = sext <64 x i1> %arg to <64 x i8> ret <64 x i8> %res diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll --- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll @@ -787,10 +787,10 @@ define <4 x i64> @hadd32_sext_lsr(<4 x i32> %src1, <4 x i32> %src2) nounwind { ; CHECK-LABEL: hadd32_sext_lsr: ; CHECK: // %bb.0: -; CHECK-NEXT: saddl2.2d v2, v0, v1 -; CHECK-NEXT: saddl.2d v0, v0, v1 -; CHECK-NEXT: ushr.2d v1, v2, #1 -; CHECK-NEXT: ushr.2d v0, v0, #1 +; CHECK-NEXT: saddl.2d v2, v0, v1 +; CHECK-NEXT: saddl2.2d v0, v0, v1 +; CHECK-NEXT: ushr.2d v1, v0, #1 +; CHECK-NEXT: ushr.2d v0, v2, #1 ; CHECK-NEXT: ret %zextsrc1 = sext <4 x i32> %src1 to <4 x i64> %zextsrc2 = sext <4 x i32> %src2 to <4 x i64> diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -178,10 +178,10 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 -; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s ; CHECK-NEXT: orr v2.4s, #1 -; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: cmgt v1.4s, v0.4s, v1.4s ; CHECK-NEXT: str q2, [sp] // 16-byte Folded Spill +; CHECK-NEXT: xtn v0.4h, v1.4s ; CHECK-NEXT: bl use_4xi1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload @@ -198,10 +198,10 @@ ; CHECK-LABEL: not_sign_4xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s -; CHECK-NEXT: and v1.16b, v0.16b, v1.16b +; CHECK-NEXT: movi v2.4s, #1 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v1.16b, v0.16b, v2.16b ; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, @@ -229,10 +229,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: movi v2.4s, #1 +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b +; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, %res = select <4 x i1> %c, <4 x i32> , <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll --- a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll +++ b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll @@ -8,15 +8,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w1, wzr +; CHECK-NEXT: movi v2.4h, #1 ; CHECK-NEXT: dup v0.4h, v0.h[0] +; CHECK-NEXT: mov w1, wzr ; CHECK-NEXT: mov w2, wzr ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: adrp x8, .LCPI0_1 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: movi v1.4h, #1 -; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_1] +; CHECK-NEXT: cmgt v0.4h, v2.4h, v0.4h ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b ; CHECK-NEXT: shl v0.4h, v0.4h, #15 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 diff --git a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll --- a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll +++ b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll @@ -228,13 +228,13 @@ ; ALL-NEXT: sdiv x9, x9, x8 ; ALL-NEXT: mul x8, x9, x8 ; ALL-NEXT: sdiv x11, x11, x10 +; ALL-NEXT: fmov d2, x9 ; ALL-NEXT: fmov d1, x8 ; ALL-NEXT: mul x10, x11, x10 +; ALL-NEXT: mov v2.d[1], x11 ; ALL-NEXT: mov v1.d[1], x10 +; ALL-NEXT: str q2, [x0] ; ALL-NEXT: sub v0.2d, v0.2d, v1.2d -; ALL-NEXT: fmov d1, x9 -; ALL-NEXT: mov v1.d[1], x11 -; ALL-NEXT: str q1, [x0] ; ALL-NEXT: ret %div = sdiv <2 x i64> %x, %y store <2 x i64> %div, <2 x i64>* %divdst, align 16 diff --git a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll --- a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll @@ -228,13 +228,13 @@ ; ALL-NEXT: udiv x9, x9, x8 ; ALL-NEXT: mul x8, x9, x8 ; ALL-NEXT: udiv x11, x11, x10 +; ALL-NEXT: fmov d2, x9 ; ALL-NEXT: fmov d1, x8 ; ALL-NEXT: mul x10, x11, x10 +; ALL-NEXT: mov v2.d[1], x11 ; ALL-NEXT: mov v1.d[1], x10 +; ALL-NEXT: str q2, [x0] ; ALL-NEXT: sub v0.2d, v0.2d, v1.2d -; ALL-NEXT: fmov d1, x9 -; ALL-NEXT: mov v1.d[1], x11 -; ALL-NEXT: str q1, [x0] ; ALL-NEXT: ret %div = udiv <2 x i64> %x, %y store <2 x i64> %div, <2 x i64>* %divdst, align 16 diff --git a/llvm/test/CodeGen/AArch64/expand-vector-rot.ll b/llvm/test/CodeGen/AArch64/expand-vector-rot.ll --- a/llvm/test/CodeGen/AArch64/expand-vector-rot.ll +++ b/llvm/test/CodeGen/AArch64/expand-vector-rot.ll @@ -7,14 +7,14 @@ ; CHECK-LABEL: rotlv2_16: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v2.2s, #15 -; CHECK-NEXT: neg v3.2s, v1.2s -; CHECK-NEXT: movi d4, #0x00ffff0000ffff -; CHECK-NEXT: and v3.8b, v3.8b, v2.8b +; CHECK-NEXT: movi d3, #0x00ffff0000ffff +; CHECK-NEXT: neg v4.2s, v1.2s +; CHECK-NEXT: and v4.8b, v4.8b, v2.8b +; CHECK-NEXT: and v3.8b, v0.8b, v3.8b +; CHECK-NEXT: neg v4.2s, v4.2s ; CHECK-NEXT: and v1.8b, v1.8b, v2.8b -; CHECK-NEXT: and v4.8b, v0.8b, v4.8b -; CHECK-NEXT: neg v3.2s, v3.2s ; CHECK-NEXT: ushl v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushl v2.2s, v4.2s, v3.2s +; CHECK-NEXT: ushl v2.2s, v3.2s, v4.2s ; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b ; CHECK-NEXT: ret %1 = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %vec2_16, <2 x i16> %vec2_16, <2 x i16> %shift) diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -1101,9 +1101,9 @@ } ; CHECK-CVT-LABEL: test_copysign: -; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: bif.16b v0, v1, v2 ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: ret @@ -1119,15 +1119,15 @@ } ; CHECK-CVT-LABEL: test_copysign_f32: -; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: bif.16b v0, v1, v2 ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_copysign_f32: -; CHECK-FP16-NEXT: mvni.8h v2, #128, lsl #8 ; CHECK-FP16-NEXT: fcvt h1, s1 +; CHECK-FP16-NEXT: mvni.8h v2, #128, lsl #8 ; CHECK-FP16-NEXT: bif.16b v0, v1, v2 ; CHECK-FP16-NEXT: ret @@ -1138,16 +1138,16 @@ } ; CHECK-CVT-LABEL: test_copysign_f64: -; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: fcvt s1, d1 ; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: bif.16b v0, v1, v2 ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_copysign_f64: -; CHECK-FP16-NEXT: mvni.8h v2, #128, lsl #8 ; CHECK-FP16-NEXT: fcvt h1, d1 +; CHECK-FP16-NEXT: mvni.8h v2, #128, lsl #8 ; CHECK-FP16-NEXT: bif.16b v0, v1, v2 ; CHECK-FP16-NEXT: ret @@ -1161,9 +1161,9 @@ ; away the (fpext (fp_round )) here. ; CHECK-CVT-LABEL: test_copysign_extended: -; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: mvni.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: bif.16b v0, v1, v2 ; CHECK-CVT-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fcopysign.ll b/llvm/test/CodeGen/AArch64/fcopysign.ll --- a/llvm/test/CodeGen/AArch64/fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/fcopysign.ll @@ -95,8 +95,8 @@ define float @copysign32(float %a, float %b) { ; CHECK-LABEL: copysign32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mvni v2.4s, #128, lsl #24 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: mvni v2.4s, #128, lsl #24 ; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 @@ -142,9 +142,9 @@ define half @copysign16(half %a, half %b) { ; CHECK-LABEL: copysign16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mvni v2.4s, #128, lsl #24 ; CHECK-NEXT: fcvt s1, h1 ; CHECK-NEXT: fcvt s0, h0 +; CHECK-NEXT: mvni v2.4s, #128, lsl #24 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: fcvt h0, s0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -295,11 +295,11 @@ ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -364,12 +364,12 @@ ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -454,11 +454,11 @@ ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: stp q2, q3, [sp, #64] // 32-byte Folded Spill ; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: stp q2, q3, [sp, #64] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill @@ -697,9 +697,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s -; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff -; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s +; CHECK-NEXT: smax v0.2s, v0.2s, v2.2s ; CHECK-NEXT: ret %x = call <2 x i1> @llvm.fptosi.sat.v2f32.v2i1(<2 x float> %f) ret <2 x i1> %x @@ -1628,9 +1628,9 @@ ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: movi v2.2d, #0xffffffffffffffff ; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h -; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff -; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h +; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v2.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i1> @llvm.fptosi.sat.v4f16.v4i1(<4 x half> %f) ret <4 x i1> %x @@ -1674,10 +1674,10 @@ ; ; CHECK-FP16-LABEL: test_signed_v4f16_v4i13: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mvni v1.4h, #240, lsl #8 ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h -; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: movi v1.4h, #240, lsl #8 +; CHECK-FP16-NEXT: mvni v2.4h, #240, lsl #8 +; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v2.4h ; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i13> @llvm.fptosi.sat.v4f16.v4i13(<4 x half> %f) @@ -2129,9 +2129,9 @@ ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: movi v2.2d, #0xffffffffffffffff ; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h -; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff -; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h +; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v2.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret %x = call <8 x i1> @llvm.fptosi.sat.v8f16.v8i1(<8 x half> %f) @@ -2278,10 +2278,10 @@ ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i13: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mvni v1.8h, #240, lsl #8 ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h -; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: movi v1.8h, #240, lsl #8 +; CHECK-FP16-NEXT: mvni v2.8h, #240, lsl #8 +; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v2.8h ; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: ret %x = call <8 x i13> @llvm.fptosi.sat.v8f16.v8i13(<8 x half> %f) @@ -2366,21 +2366,21 @@ ; CHECK-NEXT: fcvtl v2.4s, v0.4h ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: movi v1.4s, #3, msl #16 -; CHECK-NEXT: mvni v3.4s, #3, msl #16 ; CHECK-NEXT: fcvtzs v2.4s, v2.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v1.4s, v2.4s, v3.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v3.4s -; CHECK-NEXT: mov w1, v1.s[1] -; CHECK-NEXT: mov w2, v1.s[2] +; CHECK-NEXT: mvni v1.4s, #3, msl #16 +; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mov w1, v2.s[1] +; CHECK-NEXT: mov w2, v2.s[2] ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w3, v1.s[3] +; CHECK-NEXT: mov w3, v2.s[3] ; CHECK-NEXT: mov w6, v0.s[2] ; CHECK-NEXT: mov w7, v0.s[3] ; CHECK-NEXT: fmov w4, s0 -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w0, s2 ; CHECK-NEXT: ret %x = call <8 x i19> @llvm.fptosi.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x @@ -2995,11 +2995,11 @@ ; CHECK-NEXT: movi v2.4s, #127 ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: mvni v3.4s, #127 ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s -; CHECK-NEXT: smax v1.4s, v1.4s, v3.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v3.4s +; CHECK-NEXT: mvni v2.4s, #127 +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s ; CHECK-NEXT: xtn v1.4h, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: uzp1 v0.8b, v0.8b, v1.8b diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -285,11 +285,11 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -338,12 +338,12 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -406,13 +406,13 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: stp q0, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: stp q0, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q3, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -1424,8 +1424,8 @@ ; ; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i13: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mvni v1.4h, #224, lsl #8 ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: mvni v1.4h, #224, lsl #8 ; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i13> @llvm.fptoui.sat.v4f16.v4i13(<4 x half> %f) @@ -1910,8 +1910,8 @@ ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mvni v1.8h, #224, lsl #8 ; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: mvni v1.8h, #224, lsl #8 ; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: ret %x = call <8 x i13> @llvm.fptoui.sat.v8f16.v8i13(<8 x half> %f) diff --git a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll --- a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll @@ -81,8 +81,8 @@ ; CHECK-NEXT: neg v3.4s, v1.4s ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-NEXT: and v2.16b, v3.16b, v2.16b -; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: neg v2.4s, v2.4s +; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret @@ -170,8 +170,8 @@ ; CHECK-NEXT: movi v2.4s, #31 ; CHECK-NEXT: neg v3.4s, v1.4s ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: and v2.16b, v3.16b, v2.16b ; CHECK-NEXT: neg v1.4s, v1.4s +; CHECK-NEXT: and v2.16b, v3.16b, v2.16b ; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b diff --git a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll --- a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll +++ b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll @@ -92,17 +92,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] -; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: sub x8, x8, #8 ; CHECK-NEXT: mov w9, #8 ; CHECK-NEXT: cmp x8, #8 +; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: lsl x8, x8, #1 ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] @@ -136,17 +136,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cntw x8 -; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] -; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sub x8, x8, #4 ; CHECK-NEXT: mov w9, #4 ; CHECK-NEXT: cmp x8, #4 +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: lsl x8, x8, #2 ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] @@ -180,17 +180,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cntd x8 -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] -; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sub x8, x8, #2 ; CHECK-NEXT: mov w9, #2 ; CHECK-NEXT: cmp x8, #2 +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] diff --git a/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll b/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll --- a/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll +++ b/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll @@ -22,9 +22,9 @@ ; CHECK-LABEL: mlai16_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h -; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %v0 = sext <4 x i16> %vec0 to <4 x i32> @@ -158,9 +158,9 @@ ; CHECK-LABEL: mlai32_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s -; CHECK-NEXT: movi v3.2d, #0x000000ffffffff +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-NEXT: uaddw v0.2d, v0.2d, v2.2s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %v0 = sext <2 x i32> %vec0 to <2 x i64> diff --git a/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll b/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll --- a/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll +++ b/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll @@ -1079,8 +1079,8 @@ ; CHECK-LABEL: notted_smin_bc_ab: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1101,8 +1101,8 @@ ; CHECK-LABEL: notted_smin_bc_ba: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1167,8 +1167,8 @@ ; CHECK-LABEL: notted_smin_bc_ab_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1189,8 +1189,8 @@ ; CHECK-LABEL: notted_smin_bc_ba_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1255,8 +1255,8 @@ ; CHECK-LABEL: notted_smin_bc_ab_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1277,8 +1277,8 @@ ; CHECK-LABEL: notted_smin_bc_ba_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1343,8 +1343,8 @@ ; CHECK-LABEL: notted_smin_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1365,8 +1365,8 @@ ; CHECK-LABEL: notted_smin_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1431,8 +1431,8 @@ ; CHECK-LABEL: notted_smax_bc_ab: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1453,8 +1453,8 @@ ; CHECK-LABEL: notted_smax_bc_ba: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1519,8 +1519,8 @@ ; CHECK-LABEL: notted_smax_bc_ab_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1541,8 +1541,8 @@ ; CHECK-LABEL: notted_smax_bc_ba_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1607,8 +1607,8 @@ ; CHECK-LABEL: notted_smax_bc_ab_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1629,8 +1629,8 @@ ; CHECK-LABEL: notted_smax_bc_ba_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1695,8 +1695,8 @@ ; CHECK-LABEL: notted_smax_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1717,8 +1717,8 @@ ; CHECK-LABEL: notted_smax_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1783,8 +1783,8 @@ ; CHECK-LABEL: notted_umin_bc_ab: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1805,8 +1805,8 @@ ; CHECK-LABEL: notted_umin_bc_ba: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1871,8 +1871,8 @@ ; CHECK-LABEL: notted_umin_bc_ab_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1893,8 +1893,8 @@ ; CHECK-LABEL: notted_umin_bc_ba_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1959,8 +1959,8 @@ ; CHECK-LABEL: notted_umin_bc_ab_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1981,8 +1981,8 @@ ; CHECK-LABEL: notted_umin_bc_ba_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -2047,8 +2047,8 @@ ; CHECK-LABEL: notted_umin_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -2069,8 +2069,8 @@ ; CHECK-LABEL: notted_umin_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -2135,8 +2135,8 @@ ; CHECK-LABEL: notted_umax_bc_ab: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2157,8 +2157,8 @@ ; CHECK-LABEL: notted_umax_bc_ba: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2223,8 +2223,8 @@ ; CHECK-LABEL: notted_umax_bc_ab_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2245,8 +2245,8 @@ ; CHECK-LABEL: notted_umax_bc_ba_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2311,8 +2311,8 @@ ; CHECK-LABEL: notted_umax_bc_ab_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2333,8 +2333,8 @@ ; CHECK-LABEL: notted_umax_bc_ba_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2399,8 +2399,8 @@ ; CHECK-LABEL: notted_umax_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2421,8 +2421,8 @@ ; CHECK-LABEL: notted_umax_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s diff --git a/llvm/test/CodeGen/AArch64/minmax.ll b/llvm/test/CodeGen/AArch64/minmax.ll --- a/llvm/test/CodeGen/AArch64/minmax.ll +++ b/llvm/test/CodeGen/AArch64/minmax.ll @@ -122,10 +122,10 @@ define <16 x i8> @t12(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: t12: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.16b, #1 -; CHECK-NEXT: cmhi v3.16b, v1.16b, v0.16b -; CHECK-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-NEXT: and v1.16b, v3.16b, v2.16b +; CHECK-NEXT: cmhi v2.16b, v1.16b, v0.16b +; CHECK-NEXT: movi v3.16b, #1 +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: and v1.16b, v2.16b, v3.16b ; CHECK-NEXT: add v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %t1 = icmp ugt <16 x i8> %b, %a diff --git a/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll b/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll --- a/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll +++ b/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll @@ -13,9 +13,9 @@ ; CHECK-NEXT: mul v0.8h, v2.8h, v0.8h ; CHECK-NEXT: mul v1.8h, v3.8h, v1.8h ; CHECK-NEXT: add v2.8h, v0.8h, v1.8h -; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h ; CHECK-NEXT: str q2, [x9, x8] ; CHECK-NEXT: ldr x9, [x2, #56] +; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h ; CHECK-NEXT: str q0, [x9, x8] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -119,12 +119,12 @@ ; CHECK-NEXT: add v0.2d, v0.2d, v15.2d ; CHECK-NEXT: add v11.2d, v11.2d, v14.2d ; CHECK-NEXT: fmov d14, x3 -; CHECK-NEXT: add v9.2d, v9.2d, v1.2d ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: fmov d0, x13 +; CHECK-NEXT: add v9.2d, v9.2d, v1.2d ; CHECK-NEXT: mov v14.d[1], x15 -; CHECK-NEXT: add v31.2d, v31.2d, v1.2d ; CHECK-NEXT: mov v0.d[1], x12 +; CHECK-NEXT: add v31.2d, v31.2d, v1.2d ; CHECK-NEXT: add v26.2d, v26.2d, v1.2d ; CHECK-NEXT: add v23.2d, v23.2d, v1.2d ; CHECK-NEXT: add v21.2d, v21.2d, v1.2d diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -97,9 +97,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: sqadd v0.8b, v1.8b, v0.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -158,9 +158,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: sqadd v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -224,9 +224,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: sqadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr b0, [x1] +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: sqadd v0.8b, v1.8b, v0.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -239,9 +239,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr h0, [x1] +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: sqadd v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -254,10 +254,10 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { ; CHECK-LABEL: v16i4: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v0.16b, v0.16b, #4 -; CHECK-NEXT: sshr v1.16b, v1.16b, #4 +; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: sshr v0.16b, v0.16b, #4 +; CHECK-NEXT: sshr v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v0.16b, v0.16b, #4 ; CHECK-NEXT: sqadd v0.16b, v0.16b, v1.16b diff --git a/llvm/test/CodeGen/AArch64/sat-add.ll b/llvm/test/CodeGen/AArch64/sat-add.ll --- a/llvm/test/CodeGen/AArch64/sat-add.ll +++ b/llvm/test/CodeGen/AArch64/sat-add.ll @@ -346,9 +346,9 @@ ; CHECK-LABEL: unsigned_sat_constant_v16i8_using_min: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.16b, #213 +; CHECK-NEXT: movi v2.16b, #42 ; CHECK-NEXT: umin v0.16b, v0.16b, v1.16b -; CHECK-NEXT: movi v1.16b, #42 -; CHECK-NEXT: add v0.16b, v0.16b, v1.16b +; CHECK-NEXT: add v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %c = icmp ult <16 x i8> %x, %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> @@ -383,9 +383,9 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) { ; CHECK-LABEL: unsigned_sat_constant_v8i16_using_min: ; CHECK: // %bb.0: -; CHECK-NEXT: mvni v1.8h, #42 -; CHECK-NEXT: umin v0.8h, v0.8h, v1.8h ; CHECK-NEXT: movi v1.8h, #42 +; CHECK-NEXT: mvni v2.8h, #42 +; CHECK-NEXT: umin v0.8h, v0.8h, v2.8h ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %c = icmp ult <8 x i16> %x, diff --git a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll --- a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll @@ -165,8 +165,8 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) { ; CHECK-LABEL: sel_shift_bool_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v0.16b, v0.16b, #7 ; CHECK-NEXT: movi v1.16b, #128 +; CHECK-NEXT: shl v0.16b, v0.16b, #7 ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -205,8 +205,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: mov w8, #65536 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: shl v0.2d, v0.2d, #63 +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/signbit-shift.ll b/llvm/test/CodeGen/AArch64/signbit-shift.ll --- a/llvm/test/CodeGen/AArch64/signbit-shift.ll +++ b/llvm/test/CodeGen/AArch64/signbit-shift.ll @@ -30,9 +30,9 @@ ; CHECK-LABEL: add_zext_ifpos_vec_splat: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: movi v2.4s, #41 ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #41 -; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %x, %e = zext <4 x i1> %c to <4 x i32> @@ -79,9 +79,9 @@ ; CHECK-LABEL: add_sext_ifpos_vec_splat: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: movi v2.4s, #42 ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #42 -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %x, %e = sext <4 x i1> %c to <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll --- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll +++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll @@ -160,8 +160,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = add <4 x i32> %t0, %b @@ -172,8 +172,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = add <4 x i32> %b, %t0 @@ -188,8 +188,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = add <4 x i32> %t0, %b @@ -200,8 +200,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = add <4 x i32> %b, %t0 @@ -216,8 +216,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = add <4 x i32> %t0, %b @@ -228,8 +228,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = add <4 x i32> %b, %t0 @@ -244,8 +244,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = sub <4 x i32> %t0, %b @@ -256,8 +256,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI19_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = sub <4 x i32> %b, %t0 @@ -272,8 +272,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI20_0 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = sub <4 x i32> %t0, %b @@ -284,8 +284,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI21_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = sub <4 x i32> %b, %t0 @@ -300,8 +300,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI22_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0] -; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_0] +; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = sub <4 x i32> %t0, %b @@ -312,8 +312,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = sub <4 x i32> %b, %t0 diff --git a/llvm/test/CodeGen/AArch64/sinksplat.ll b/llvm/test/CodeGen/AArch64/sinksplat.ll --- a/llvm/test/CodeGen/AArch64/sinksplat.ll +++ b/llvm/test/CodeGen/AArch64/sinksplat.ll @@ -305,10 +305,10 @@ ; CHECK-NEXT: dup v1.4s, v1.s[3] ; CHECK-NEXT: .LBB9_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: mov v2.16b, v0.16b ; CHECK-NEXT: ldr q3, [x0] -; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: subs w8, w8, #1 +; CHECK-NEXT: mov v2.16b, v0.16b +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: fmla v0.4s, v2.4s, v3.4s ; CHECK-NEXT: b.eq .LBB9_1 ; CHECK-NEXT: // %bb.2: // %l2 diff --git a/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll b/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll --- a/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll +++ b/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll @@ -5,20 +5,20 @@ ; CHECK-LABEL: test_sitofp_fixed: ; CHECK: ; %bb.0: ; CHECK-NEXT: sshll2.2d v4, v2, #0 -; CHECK-NEXT: sshll.2d v16, v1, #0 ; CHECK-NEXT: sshll2.2d v5, v0, #0 ; CHECK-NEXT: sshll2.2d v6, v1, #0 ; CHECK-NEXT: sshll2.2d v7, v3, #0 ; CHECK-NEXT: sshll.2d v0, v0, #0 +; CHECK-NEXT: sshll.2d v16, v1, #0 ; CHECK-NEXT: sshll.2d v17, v2, #0 ; CHECK-NEXT: sshll.2d v18, v3, #0 ; CHECK-NEXT: scvtf.2d v1, v5, #6 +; CHECK-NEXT: scvtf.2d v0, v0, #6 ; CHECK-NEXT: scvtf.2d v3, v6, #6 ; CHECK-NEXT: scvtf.2d v2, v16, #6 ; CHECK-NEXT: scvtf.2d v5, v4, #6 -; CHECK-NEXT: scvtf.2d v0, v0, #6 -; CHECK-NEXT: scvtf.2d v7, v7, #6 ; CHECK-NEXT: scvtf.2d v4, v17, #6 +; CHECK-NEXT: scvtf.2d v7, v7, #6 ; CHECK-NEXT: scvtf.2d v6, v18, #6 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -59,47 +59,47 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { ; CHECK-LABEL: test_srem_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x11, #7282 -; CHECK-NEXT: sbfx x10, x0, #0, #33 +; CHECK-NEXT: mov x8, #7282 +; CHECK-NEXT: sbfx x9, x0, #0, #33 +; CHECK-NEXT: movk x8, #29127, lsl #16 +; CHECK-NEXT: mov x11, #7281 +; CHECK-NEXT: movk x8, #50972, lsl #32 ; CHECK-NEXT: movk x11, #29127, lsl #16 -; CHECK-NEXT: mov x9, #7281 +; CHECK-NEXT: movk x8, #7281, lsl #48 ; CHECK-NEXT: movk x11, #50972, lsl #32 -; CHECK-NEXT: movk x9, #29127, lsl #16 +; CHECK-NEXT: sbfx x12, x1, #0, #33 +; CHECK-NEXT: sbfx x10, x2, #0, #33 +; CHECK-NEXT: smulh x13, x9, x8 ; CHECK-NEXT: movk x11, #7281, lsl #48 -; CHECK-NEXT: movk x9, #50972, lsl #32 -; CHECK-NEXT: sbfx x13, x1, #0, #33 -; CHECK-NEXT: sbfx x8, x2, #0, #33 -; CHECK-NEXT: smulh x12, x10, x11 -; CHECK-NEXT: movk x9, #7281, lsl #48 -; CHECK-NEXT: smulh x11, x13, x11 -; CHECK-NEXT: smulh x9, x8, x9 -; CHECK-NEXT: add x12, x12, x12, lsr #63 -; CHECK-NEXT: sub x9, x9, x8 -; CHECK-NEXT: add x11, x11, x11, lsr #63 -; CHECK-NEXT: add x12, x12, x12, lsl #3 -; CHECK-NEXT: asr x14, x9, #3 -; CHECK-NEXT: sub x10, x10, x12 -; CHECK-NEXT: add x9, x14, x9, lsr #63 +; CHECK-NEXT: smulh x8, x12, x8 +; CHECK-NEXT: smulh x11, x10, x11 +; CHECK-NEXT: add x13, x13, x13, lsr #63 +; CHECK-NEXT: sub x11, x11, x10 +; CHECK-NEXT: add x8, x8, x8, lsr #63 +; CHECK-NEXT: add x13, x13, x13, lsl #3 +; CHECK-NEXT: asr x14, x11, #3 +; CHECK-NEXT: sub x9, x9, x13 +; CHECK-NEXT: add x11, x14, x11, lsr #63 +; CHECK-NEXT: add x8, x8, x8, lsl #3 +; CHECK-NEXT: sub x8, x12, x8 ; CHECK-NEXT: add x11, x11, x11, lsl #3 -; CHECK-NEXT: sub x11, x13, x11 -; CHECK-NEXT: add x9, x9, x9, lsl #3 -; CHECK-NEXT: fmov d0, x10 -; CHECK-NEXT: add x8, x8, x9 +; CHECK-NEXT: fmov d0, x9 +; CHECK-NEXT: add x10, x10, x11 ; CHECK-NEXT: mov x9, #8589934591 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: adrp x11, .LCPI3_0 +; CHECK-NEXT: adrp x12, .LCPI3_1 +; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: fmov d1, x10 ; CHECK-NEXT: dup v2.2d, x9 -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: adrp x9, .LCPI3_1 +; CHECK-NEXT: ldr q3, [x11, :lo12:.LCPI3_0] +; CHECK-NEXT: ldr q4, [x12, :lo12:.LCPI3_1] ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI3_1] -; CHECK-NEXT: cmeq v0.2d, v0.2d, v2.2d -; CHECK-NEXT: cmeq v1.2d, v1.2d, v3.2d +; CHECK-NEXT: cmeq v0.2d, v0.2d, v3.2d +; CHECK-NEXT: cmeq v1.2d, v1.2d, v4.2d ; CHECK-NEXT: mvn v0.16b, v0.16b -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: xtn v0.2s, v0.2d +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: xtn v1.2s, v1.2d ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll @@ -7,6 +7,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: adrp x9, .LCPI0_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: adrp x8, .LCPI0_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] @@ -17,11 +18,10 @@ ; CHECK-NEXT: adrp x8, .LCPI0_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -39,12 +39,12 @@ ; CHECK-NEXT: mov w9, #39321 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: adrp x10, .LCPI1_0 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q0, [x10, :lo12:.LCPI1_0] ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -60,12 +60,12 @@ ; CHECK-NEXT: mov w9, #39321 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: adrp x10, .LCPI2_0 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q0, [x10, :lo12:.LCPI2_0] ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -83,17 +83,17 @@ ; CHECK-NEXT: mov w9, #9362 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -107,17 +107,17 @@ ; CHECK-NEXT: mov w9, #9362 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp ne <4 x i32> %srem, @@ -131,6 +131,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: adrp x9, .LCPI5_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] ; CHECK-NEXT: adrp x8, .LCPI5_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_1] @@ -141,11 +142,10 @@ ; CHECK-NEXT: adrp x8, .LCPI5_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -157,6 +157,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: adrp x9, .LCPI6_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0] ; CHECK-NEXT: adrp x8, .LCPI6_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI6_1] @@ -167,11 +168,10 @@ ; CHECK-NEXT: adrp x8, .LCPI6_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_4] -; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp ne <4 x i32> %srem, @@ -187,6 +187,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: adrp x9, .LCPI7_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0] ; CHECK-NEXT: adrp x8, .LCPI7_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI7_1] @@ -197,11 +198,10 @@ ; CHECK-NEXT: adrp x8, .LCPI7_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -215,6 +215,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI8_0 ; CHECK-NEXT: adrp x9, .LCPI8_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0] ; CHECK-NEXT: adrp x8, .LCPI8_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI8_1] @@ -225,11 +226,10 @@ ; CHECK-NEXT: adrp x8, .LCPI8_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -243,6 +243,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI9_0 ; CHECK-NEXT: adrp x9, .LCPI9_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0] ; CHECK-NEXT: adrp x8, .LCPI9_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI9_1] @@ -253,11 +254,10 @@ ; CHECK-NEXT: adrp x8, .LCPI9_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -275,12 +275,12 @@ ; CHECK-NEXT: mov w9, #39321 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: adrp x10, .LCPI10_0 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q0, [x10, :lo12:.LCPI10_0] ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -298,17 +298,17 @@ ; CHECK-NEXT: mov w9, #9362 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -322,6 +322,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: adrp x9, .LCPI12_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: adrp x8, .LCPI12_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI12_1] @@ -332,11 +333,10 @@ ; CHECK-NEXT: adrp x8, .LCPI12_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -441,6 +441,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: adrp x9, .LCPI16_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: adrp x8, .LCPI16_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI16_1] @@ -451,11 +452,10 @@ ; CHECK-NEXT: adrp x8, .LCPI16_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -469,6 +469,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: adrp x9, .LCPI17_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: adrp x8, .LCPI17_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI17_1] @@ -479,11 +480,10 @@ ; CHECK-NEXT: adrp x8, .LCPI17_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -497,6 +497,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: adrp x9, .LCPI18_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: adrp x8, .LCPI18_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_1] @@ -507,11 +508,10 @@ ; CHECK-NEXT: adrp x8, .LCPI18_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -529,12 +529,12 @@ ; CHECK-NEXT: mov w9, #39321 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: adrp x10, .LCPI19_0 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: adrp x8, .LCPI19_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q0, [x10, :lo12:.LCPI19_0] ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -552,17 +552,17 @@ ; CHECK-NEXT: mov w9, #9362 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI20_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -576,6 +576,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI21_0 ; CHECK-NEXT: adrp x9, .LCPI21_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0] ; CHECK-NEXT: adrp x8, .LCPI21_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI21_1] @@ -586,11 +587,10 @@ ; CHECK-NEXT: adrp x8, .LCPI21_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -606,6 +606,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI22_0 ; CHECK-NEXT: adrp x9, .LCPI22_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0] ; CHECK-NEXT: adrp x8, .LCPI22_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI22_1] @@ -616,11 +617,10 @@ ; CHECK-NEXT: adrp x8, .LCPI22_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -634,6 +634,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 ; CHECK-NEXT: adrp x9, .LCPI23_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] ; CHECK-NEXT: adrp x8, .LCPI23_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI23_1] @@ -644,11 +645,10 @@ ; CHECK-NEXT: adrp x8, .LCPI23_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -662,6 +662,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: adrp x9, .LCPI24_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0] ; CHECK-NEXT: adrp x8, .LCPI24_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI24_1] @@ -672,11 +673,10 @@ ; CHECK-NEXT: adrp x8, .LCPI24_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -691,6 +691,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI25_0 ; CHECK-NEXT: adrp x9, .LCPI25_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] ; CHECK-NEXT: adrp x8, .LCPI25_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI25_1] @@ -701,11 +702,10 @@ ; CHECK-NEXT: adrp x8, .LCPI25_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -718,6 +718,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI26_0 ; CHECK-NEXT: adrp x9, .LCPI26_1 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_0] ; CHECK-NEXT: adrp x8, .LCPI26_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI26_1] @@ -728,11 +729,10 @@ ; CHECK-NEXT: adrp x8, .LCPI26_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll @@ -33,6 +33,7 @@ ; CHECK-NEXT: mov w9, #47184 ; CHECK-NEXT: movk w8, #49807, lsl #16 ; CHECK-NEXT: movk w9, #1310, lsl #16 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: mov w8, #23592 @@ -40,11 +41,10 @@ ; CHECK-NEXT: movk w8, #655, lsl #16 ; CHECK-NEXT: shl v0.4s, v2.4s, #30 ; CHECK-NEXT: ushr v1.4s, v2.4s, #2 +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -86,6 +86,7 @@ ; CHECK-NEXT: mov w9, #47184 ; CHECK-NEXT: movk w8, #49807, lsl #16 ; CHECK-NEXT: movk w9, #1310, lsl #16 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: mov w8, #23592 @@ -93,11 +94,10 @@ ; CHECK-NEXT: movk w8, #655, lsl #16 ; CHECK-NEXT: shl v0.4s, v2.4s, #30 ; CHECK-NEXT: ushr v1.4s, v2.4s, #2 +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -114,15 +114,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v1.4s, #25 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: smull2 v3.2d, v0.4s, v2.4s -; CHECK-NEXT: smull v2.2d, v0.2s, v2.2s -; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s -; CHECK-NEXT: sshr v3.4s, v2.4s, #3 -; CHECK-NEXT: usra v3.4s, v2.4s, #31 -; CHECK-NEXT: mls v0.4s, v3.4s, v1.4s +; CHECK-NEXT: movi v3.4s, #25 +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; CHECK-NEXT: sshr v2.4s, v1.4s, #3 +; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: mls v0.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -137,15 +137,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v1.4s, #100 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: smull2 v3.2d, v0.4s, v2.4s -; CHECK-NEXT: smull v2.2d, v0.2s, v2.2s -; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s -; CHECK-NEXT: sshr v3.4s, v2.4s, #5 -; CHECK-NEXT: usra v3.4s, v2.4s, #31 -; CHECK-NEXT: mls v0.4s, v3.4s, v1.4s +; CHECK-NEXT: movi v3.4s, #100 +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; CHECK-NEXT: sshr v2.4s, v1.4s, #5 +; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: mls v0.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -184,12 +184,12 @@ define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_pow2: ; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 -; CHECK-NEXT: mov v3.16b, v0.16b +; CHECK-NEXT: cmlt v3.4s, v0.4s, #0 +; CHECK-NEXT: mov v2.16b, v0.16b +; CHECK-NEXT: usra v2.4s, v3.4s, #28 ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: usra v3.4s, v2.4s, #28 -; CHECK-NEXT: bic v3.4s, #15 -; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s +; CHECK-NEXT: bic v2.4s, #15 +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -204,10 +204,10 @@ ; CHECK-LABEL: test_srem_int_min: ; CHECK: // %bb.0: ; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 -; CHECK-NEXT: mov v3.16b, v0.16b -; CHECK-NEXT: movi v1.4s, #128, lsl #24 -; CHECK-NEXT: usra v3.4s, v2.4s, #1 -; CHECK-NEXT: and v1.16b, v3.16b, v1.16b +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: movi v3.4s, #128, lsl #24 +; CHECK-NEXT: usra v1.4s, v2.4s, #1 +; CHECK-NEXT: and v1.16b, v1.16b, v3.16b ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -98,9 +98,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqsub v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: sqsub v0.8b, v1.8b, v0.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -159,9 +159,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: sqsub v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -225,9 +225,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: sqsub v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr b0, [x1] +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: sqsub v0.8b, v1.8b, v0.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -240,9 +240,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr h0, [x1] +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: sqsub v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -255,10 +255,10 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { ; CHECK-LABEL: v16i4: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v0.16b, v0.16b, #4 -; CHECK-NEXT: sshr v1.16b, v1.16b, #4 +; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: sshr v0.16b, v0.16b, #4 +; CHECK-NEXT: sshr v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v0.16b, v0.16b, #4 ; CHECK-NEXT: sqsub v0.16b, v0.16b, v1.16b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll @@ -52,11 +52,11 @@ ; VBITS_EQ_128-LABEL: sdiv_v8i8: ; VBITS_EQ_128: sshll v1.8h, v1.8b, #0 -; VBITS_EQ_128-NEXT: sshll v0.8h, v0.8b, #0 ; VBITS_EQ_128-NEXT: ptrue p0.s, vl4 +; VBITS_EQ_128-NEXT: sshll v0.8h, v0.8b, #0 ; VBITS_EQ_128-NEXT: sunpkhi z2.s, z1.h -; VBITS_EQ_128-NEXT: sunpkhi z3.s, z0.h ; VBITS_EQ_128-NEXT: sunpklo z1.s, z1.h +; VBITS_EQ_128-NEXT: sunpkhi z3.s, z0.h ; VBITS_EQ_128-NEXT: sunpklo z0.s, z0.h ; VBITS_EQ_128-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; VBITS_EQ_128-NEXT: sdiv z0.s, p0/m, z0.s, z1.s @@ -350,8 +350,8 @@ define <4 x i16> @sdiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { ; CHECK-LABEL: sdiv_v4i16: ; CHECK: sshll v1.4s, v1.4h, #0 -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].s, vl4 +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: sdivr [[DIV1:z[0-9]+]].s, [[PG1]]/m, [[OP2:z[0-9]+]].s, [[OP1:z[0-9]+]].s ; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: mov w9, v1.s[2] @@ -364,8 +364,8 @@ ; VBITS_EQ_128-LABEL: sdiv_v4i16: ; VBITS_EQ_128: sshll v1.4s, v1.4h, #0 -; VBITS_EQ_128-NEXT: sshll v0.4s, v0.4h, #0 ; VBITS_EQ_128-NEXT: ptrue p0.s, vl4 +; VBITS_EQ_128-NEXT: sshll v0.4s, v0.4h, #0 ; VBITS_EQ_128-NEXT: sdiv z0.s, p0/m, z0.s, z1.s ; VBITS_EQ_128-NEXT: xtn v0.4h, v0.4s ; VBITS_EQ_128-NEXT: ret @@ -744,11 +744,11 @@ ; VBITS_EQ_128-LABEL: udiv_v8i8: ; VBITS_EQ_128: ushll v1.8h, v1.8b, #0 -; VBITS_EQ_128-NEXT: ushll v0.8h, v0.8b, #0 ; VBITS_EQ_128-NEXT: ptrue p0.s, vl4 +; VBITS_EQ_128-NEXT: ushll v0.8h, v0.8b, #0 ; VBITS_EQ_128-NEXT: uunpkhi z2.s, z1.h -; VBITS_EQ_128-NEXT: uunpkhi z3.s, z0.h ; VBITS_EQ_128-NEXT: uunpklo z1.s, z1.h +; VBITS_EQ_128-NEXT: uunpkhi z3.s, z0.h ; VBITS_EQ_128-NEXT: uunpklo z0.s, z0.h ; VBITS_EQ_128-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; VBITS_EQ_128-NEXT: udiv z0.s, p0/m, z0.s, z1.s @@ -1040,8 +1040,8 @@ define <4 x i16> @udiv_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { ; CHECK-LABEL: udiv_v4i16: ; CHECK: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].s, vl4 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: udivr [[DIV1:z[0-9]+]].s, [[PG1]]/m, [[OP2:z[0-9]+]].s, [[OP1:z[0-9]+]].s ; CHECK-NEXT: mov w8, v1.s[1] ; CHECK-NEXT: mov w9, v1.s[2] @@ -1054,8 +1054,8 @@ ; VBITS_EQ_128-LABEL: udiv_v4i16: ; VBITS_EQ_128: ushll v1.4s, v1.4h, #0 -; VBITS_EQ_128-NEXT: ushll v0.4s, v0.4h, #0 ; VBITS_EQ_128-NEXT: ptrue p0.s, vl4 +; VBITS_EQ_128-NEXT: ushll v0.4s, v0.4h, #0 ; VBITS_EQ_128-NEXT: udiv z0.s, p0/m, z0.s, z1.s ; VBITS_EQ_128-NEXT: xtn v0.4h, v0.4s ; VBITS_EQ_128-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll @@ -288,16 +288,16 @@ ; CHECK-LABEL: smulh_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: sshll v0.2d, v0.2s, #0 -; CHECK-NEXT: sshll v1.2d, v1.2s, #0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: sshll v1.2d, v1.2s, #0 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: shrn v0.2s, v0.2d, #32 ; CHECK-NEXT: ret ; VBITS_EQ_128-LABEL: smulh_v2i32: ; VBITS_EQ_128: sshll v0.2d, v0.2s, #0 -; VBITS_EQ_128-NEXT: sshll v1.2d, v1.2s, #0 ; VBITS_EQ_128-NEXT: ptrue p0.d, vl2 +; VBITS_EQ_128-NEXT: sshll v1.2d, v1.2s, #0 ; VBITS_EQ_128-NEXT: mul z0.d, p0/m, z0.d, z1.d ; VBITS_EQ_128-NEXT: shrn v0.2s, v0.2d, #32 ; VBITS_EQ_128-NEXT: ret @@ -785,16 +785,16 @@ ; CHECK-LABEL: umulh_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-NEXT: mul z0.d, p0/m, z0.d, z1.d ; CHECK-NEXT: shrn v0.2s, v0.2d, #32 ; CHECK-NEXT: ret ; VBITS_EQ_128-LABEL: umulh_v2i32: ; VBITS_EQ_128: ushll v0.2d, v0.2s, #0 -; VBITS_EQ_128-NEXT: ushll v1.2d, v1.2s, #0 ; VBITS_EQ_128-NEXT: ptrue p0.d, vl2 +; VBITS_EQ_128-NEXT: ushll v1.2d, v1.2s, #0 ; VBITS_EQ_128-NEXT: mul z0.d, p0/m, z0.d, z1.d ; VBITS_EQ_128-NEXT: shrn v0.2s, v0.2d, #32 ; VBITS_EQ_128-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll @@ -53,11 +53,11 @@ ; VBITS_EQ_128-LABEL: srem_v8i8: ; VBITS_EQ_128: sshll v2.8h, v1.8b, #0 -; VBITS_EQ_128-NEXT: sshll v3.8h, v0.8b, #0 ; VBITS_EQ_128-NEXT: ptrue p0.s, vl4 +; VBITS_EQ_128-NEXT: sshll v3.8h, v0.8b, #0 ; VBITS_EQ_128-NEXT: sunpkhi z4.s, z2.h -; VBITS_EQ_128-NEXT: sunpkhi z5.s, z3.h ; VBITS_EQ_128-NEXT: sunpklo z2.s, z2.h +; VBITS_EQ_128-NEXT: sunpkhi z5.s, z3.h ; VBITS_EQ_128-NEXT: sunpklo z3.s, z3.h ; VBITS_EQ_128-NEXT: sdivr z4.s, p0/m, z4.s, z5.s ; VBITS_EQ_128-NEXT: sdivr z2.s, p0/m, z2.s, z3.s @@ -364,8 +364,8 @@ define <4 x i16> @srem_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { ; CHECK-LABEL: srem_v4i16: ; CHECK: sshll v2.4s, v1.4h, #0 -; CHECK-NEXT: sshll v3.4s, v0.4h, #0 ; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].s, vl4 +; CHECK-NEXT: sshll v3.4s, v0.4h, #0 ; CHECK-NEXT: sdivr [[DIV1:z[0-9]+]].s, [[PG1]]/m, z2.s, z3.s ; CHECK-NEXT: mov [[SCALAR1:w[0-9]+]], [[VEC:v[0-9]+]].s[1] ; CHECK-NEXT: mov [[SCALAR2:w[0-9]+]], [[VEC]].s[2] @@ -379,8 +379,8 @@ ; VBITS_EQ_128-LABEL: srem_v4i16: ; VBITS_EQ_128: sshll v2.4s, v1.4h, #0 -; VBITS_EQ_128-NEXT: sshll v3.4s, v0.4h, #0 ; VBITS_EQ_128-NEXT: ptrue p0.s, vl4 +; VBITS_EQ_128-NEXT: sshll v3.4s, v0.4h, #0 ; VBITS_EQ_128-NEXT: sdivr z2.s, p0/m, z2.s, z3.s ; VBITS_EQ_128-NEXT: xtn v2.4h, v2.4s ; VBITS_EQ_128-NEXT: mls v0.4h, v2.4h, v1.4h @@ -812,11 +812,11 @@ ; VBITS_EQ_128-LABEL: urem_v8i8: ; VBITS_EQ_128: ushll v2.8h, v1.8b, #0 -; VBITS_EQ_128-NEXT: ushll v3.8h, v0.8b, #0 ; VBITS_EQ_128-NEXT: ptrue p0.s, vl4 +; VBITS_EQ_128-NEXT: ushll v3.8h, v0.8b, #0 ; VBITS_EQ_128-NEXT: uunpkhi z4.s, z2.h -; VBITS_EQ_128-NEXT: uunpkhi z5.s, z3.h ; VBITS_EQ_128-NEXT: uunpklo z2.s, z2.h +; VBITS_EQ_128-NEXT: uunpkhi z5.s, z3.h ; VBITS_EQ_128-NEXT: uunpklo z3.s, z3.h ; VBITS_EQ_128-NEXT: udivr z4.s, p0/m, z4.s, z5.s ; VBITS_EQ_128-NEXT: udivr z2.s, p0/m, z2.s, z3.s @@ -1121,8 +1121,8 @@ define <4 x i16> @urem_v4i16(<4 x i16> %op1, <4 x i16> %op2) #0 { ; CHECK-LABEL: urem_v4i16: ; CHECK: ushll v2.4s, v1.4h, #0 -; CHECK-NEXT: ushll v3.4s, v0.4h, #0 ; CHECK-NEXT: ptrue [[PG1:p[0-9]+]].s, vl4 +; CHECK-NEXT: ushll v3.4s, v0.4h, #0 ; CHECK-NEXT: udivr [[DIV1:z[0-9]+]].s, [[PG1]]/m, z2.s, z3.s ; CHECK-NEXT: mov [[SCALAR1:w[0-9]+]], [[VEC:v[0-9]+]].s[1] ; CHECK-NEXT: mov [[SCALAR2:w[0-9]+]], [[VEC]].s[2] @@ -1136,8 +1136,8 @@ ; VBITS_EQ_128-LABEL: urem_v4i16: ; VBITS_EQ_128: ushll v2.4s, v1.4h, #0 -; VBITS_EQ_128-NEXT: ushll v3.4s, v0.4h, #0 ; VBITS_EQ_128-NEXT: ptrue p0.s, vl4 +; VBITS_EQ_128-NEXT: ushll v3.4s, v0.4h, #0 ; VBITS_EQ_128-NEXT: udivr z2.s, p0/m, z2.s, z3.s ; VBITS_EQ_128-NEXT: xtn v2.4h, v2.4s ; VBITS_EQ_128-NEXT: mls v0.4h, v2.4h, v1.4h diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function masked_scatter_v8i8,masked_scatter_v8i16,masked_scatter_v8i32,masked_scatter_v8i64 --prefix VBITS_EQ_256 ; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -check-prefix=NO_SVE ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256 ; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK @@ -85,9 +86,9 @@ ; VBITS_EQ_256-NEXT: shl v2.4h, v2.4h, #8 ; VBITS_EQ_256-NEXT: shl v1.4h, v1.4h, #8 ; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h -; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s ; VBITS_EQ_256-NEXT: sshr v2.4h, v2.4h, #8 ; VBITS_EQ_256-NEXT: sshr v1.4h, v1.4h, #8 +; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s ; VBITS_EQ_256-NEXT: sunpklo z2.s, z2.h ; VBITS_EQ_256-NEXT: sunpklo z1.s, z1.h ; VBITS_EQ_256-NEXT: sunpklo z2.d, z2.s @@ -99,7 +100,6 @@ ; VBITS_EQ_256-NEXT: st1b { z1.d }, p1, [z4.d] ; VBITS_EQ_256-NEXT: st1b { z0.d }, p0, [z3.d] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_scatter_v8i8: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr d0, [x0] @@ -108,8 +108,8 @@ ; VBITS_GE_512-NEXT: cmeq v2.8b, v0.8b, #0 ; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_512-NEXT: sunpklo z2.h, z2.b +; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_512-NEXT: sunpklo z2.s, z2.h ; VBITS_GE_512-NEXT: sunpklo z2.d, z2.s ; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z2.d, #0 @@ -131,8 +131,8 @@ ; VBITS_GE_1024-NEXT: cmeq v2.16b, v0.16b, #0 ; VBITS_GE_1024-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_1024-NEXT: sunpklo z2.h, z2.b +; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_1024-NEXT: sunpklo z2.s, z2.h ; VBITS_GE_1024-NEXT: sunpklo z2.d, z2.s ; VBITS_GE_1024-NEXT: cmpne p0.d, p0/z, z2.d, #0 @@ -226,8 +226,8 @@ ; VBITS_EQ_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3] ; VBITS_EQ_256-NEXT: ext v3.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h -; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s ; VBITS_EQ_256-NEXT: sunpklo z2.s, z1.h +; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s ; VBITS_EQ_256-NEXT: ext v1.16b, v1.16b, v1.16b, #8 ; VBITS_EQ_256-NEXT: sunpklo z2.d, z2.s ; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z2.d, #0 @@ -240,7 +240,6 @@ ; VBITS_EQ_256-NEXT: uunpklo z1.d, z3.s ; VBITS_EQ_256-NEXT: st1h { z1.d }, p0, [z4.d] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_scatter_v8i16: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr q0, [x0] @@ -369,7 +368,6 @@ ; VBITS_EQ_256-NEXT: st1w { z1.d }, p0, [z3.d] ; VBITS_EQ_256-NEXT: st1w { z0.d }, p1, [z2.d] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_scatter_v8i32: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.s, vl8 @@ -455,10 +453,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q2, [x1] -; CHECK-NEXT: cmeq v1.2d, v0.2d, #0 -; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: st1d { z0.d }, p0, [z2.d] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: cmeq v2.2d, v0.2d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] ; CHECK-NEXT: ret %vals = load <2 x i64>, <2 x i64>* %a %ptrs = load <2 x i64*>, <2 x i64*>* %b @@ -498,7 +496,6 @@ ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [z3.d] ; VBITS_EQ_256-NEXT: st1d { z0.d }, p1, [z2.d] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_scatter_v8i64: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 diff --git a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll --- a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll +++ b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll @@ -15,8 +15,8 @@ ; CHECK-NOARG-NEXT: ldp q6, q4, [x1] ; CHECK-NOARG-NEXT: stp q0, q1, [x0, #32] ; CHECK-NOARG-NEXT: add v2.4s, v2.4s, v6.4s -; CHECK-NOARG-NEXT: add v3.4s, v3.4s, v4.4s -; CHECK-NOARG-NEXT: stp q2, q3, [x0] +; CHECK-NOARG-NEXT: add v0.4s, v3.4s, v4.4s +; CHECK-NOARG-NEXT: stp q2, q0, [x0] ; CHECK-NOARG-NEXT: ret ; ; CHECK-ARG-LABEL: func_vscale_none: @@ -47,8 +47,8 @@ ; CHECK-NEXT: ldp q6, q4, [x1] ; CHECK-NEXT: stp q0, q1, [x0, #32] ; CHECK-NEXT: add v2.4s, v2.4s, v6.4s -; CHECK-NEXT: add v3.4s, v3.4s, v4.4s -; CHECK-NEXT: stp q2, q3, [x0] +; CHECK-NEXT: add v0.4s, v3.4s, v4.4s +; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %op2 = load <16 x i32>, <16 x i32>* %b diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -97,9 +97,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -158,9 +158,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqadd v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -225,9 +225,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr b0, [x1] +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -240,9 +240,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr h0, [x1] +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: uqadd v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll @@ -67,25 +67,25 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: adrp x9, .LCPI4_1 ; CHECK-NEXT: mov v0.h[1], w1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: adrp x8, .LCPI4_1 +; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI4_1] +; CHECK-NEXT: adrp x8, .LCPI4_2 ; CHECK-NEXT: mov v0.h[2], w2 ; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_1] -; CHECK-NEXT: adrp x8, .LCPI4_2 -; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h ; CHECK-NEXT: movi d1, #0x0000000000ffff -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_2] +; CHECK-NEXT: mul v0.4h, v0.4h, v2.4h +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2] ; CHECK-NEXT: adrp x8, .LCPI4_3 -; CHECK-NEXT: shl v2.4h, v0.4h, #1 +; CHECK-NEXT: shl v3.4h, v0.4h, #1 ; CHECK-NEXT: bic v0.4h, #248, lsl #8 -; CHECK-NEXT: ushl v2.4h, v2.4h, v3.4h ; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_3] -; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b +; CHECK-NEXT: ushl v1.4h, v3.4h, v2.4h +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_3] +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: bic v0.4h, #248, lsl #8 -; CHECK-NEXT: cmhi v0.4h, v0.4h, v1.4h +; CHECK-NEXT: cmhi v0.4h, v0.4h, v2.4h ; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: umov w1, v0.h[1] ; CHECK-NEXT: umov w2, v0.h[2] diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll @@ -7,6 +7,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: adrp x9, .LCPI0_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: adrp x8, .LCPI0_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_2] @@ -15,11 +16,10 @@ ; CHECK-NEXT: adrp x8, .LCPI0_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -34,13 +34,13 @@ ; CHECK-LABEL: test_urem_odd_allones_eq: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI1_0 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] ; CHECK-NEXT: adrp x8, .LCPI1_1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_1] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -51,13 +51,13 @@ ; CHECK-LABEL: test_urem_odd_allones_ne: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI2_0 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: adrp x8, .LCPI2_1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_1] ; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp ne <4 x i32> %urem, @@ -71,6 +71,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: adrp x9, .LCPI3_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: adrp x8, .LCPI3_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_2] @@ -79,11 +80,10 @@ ; CHECK-NEXT: adrp x8, .LCPI3_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -95,6 +95,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: adrp x9, .LCPI4_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: adrp x8, .LCPI4_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_2] @@ -103,11 +104,10 @@ ; CHECK-NEXT: adrp x8, .LCPI4_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_3] -; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp ne <4 x i32> %urem, @@ -121,6 +121,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: adrp x9, .LCPI5_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] ; CHECK-NEXT: adrp x8, .LCPI5_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_2] @@ -129,11 +130,10 @@ ; CHECK-NEXT: adrp x8, .LCPI5_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -145,6 +145,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: adrp x9, .LCPI6_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0] ; CHECK-NEXT: adrp x8, .LCPI6_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI6_2] @@ -153,11 +154,10 @@ ; CHECK-NEXT: adrp x8, .LCPI6_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_3] -; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp ne <4 x i32> %urem, @@ -173,6 +173,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: adrp x9, .LCPI7_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0] ; CHECK-NEXT: adrp x8, .LCPI7_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI7_2] @@ -181,11 +182,10 @@ ; CHECK-NEXT: adrp x8, .LCPI7_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -199,6 +199,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI8_0 ; CHECK-NEXT: adrp x9, .LCPI8_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0] ; CHECK-NEXT: adrp x8, .LCPI8_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI8_2] @@ -207,11 +208,10 @@ ; CHECK-NEXT: adrp x8, .LCPI8_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -225,6 +225,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI9_0 ; CHECK-NEXT: adrp x9, .LCPI9_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0] ; CHECK-NEXT: adrp x8, .LCPI9_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI9_2] @@ -233,11 +234,10 @@ ; CHECK-NEXT: adrp x8, .LCPI9_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -252,14 +252,14 @@ ; CHECK-LABEL: test_urem_odd_one: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: adrp x9, .LCPI10_0 ; CHECK-NEXT: movk w8, #52428, lsl #16 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI10_0] +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -273,16 +273,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #28087 ; CHECK-NEXT: movk w8, #46811, lsl #16 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -296,6 +296,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: adrp x9, .LCPI12_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: adrp x8, .LCPI12_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI12_2] @@ -304,11 +305,10 @@ ; CHECK-NEXT: adrp x8, .LCPI12_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -324,6 +324,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: adrp x9, .LCPI13_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0] ; CHECK-NEXT: adrp x8, .LCPI13_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI13_2] @@ -332,11 +333,10 @@ ; CHECK-NEXT: adrp x8, .LCPI13_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -350,6 +350,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: adrp x9, .LCPI14_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] ; CHECK-NEXT: adrp x8, .LCPI14_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_2] @@ -358,11 +359,10 @@ ; CHECK-NEXT: adrp x8, .LCPI14_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -376,6 +376,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: adrp x9, .LCPI15_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI15_2] @@ -384,11 +385,10 @@ ; CHECK-NEXT: adrp x8, .LCPI15_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -404,6 +404,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: adrp x9, .LCPI16_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: adrp x8, .LCPI16_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI16_2] @@ -412,11 +413,10 @@ ; CHECK-NEXT: adrp x8, .LCPI16_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -430,6 +430,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: adrp x9, .LCPI17_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: adrp x8, .LCPI17_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI17_2] @@ -438,11 +439,10 @@ ; CHECK-NEXT: adrp x8, .LCPI17_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -456,6 +456,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: adrp x9, .LCPI18_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: adrp x8, .LCPI18_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_2] @@ -464,11 +465,10 @@ ; CHECK-NEXT: adrp x8, .LCPI18_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -483,13 +483,13 @@ ; CHECK-LABEL: test_urem_odd_allones_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI19_0 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0] ; CHECK-NEXT: adrp x8, .LCPI19_1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_1] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -503,6 +503,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI20_0 ; CHECK-NEXT: adrp x9, .LCPI20_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] ; CHECK-NEXT: adrp x8, .LCPI20_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI20_2] @@ -511,11 +512,10 @@ ; CHECK-NEXT: adrp x8, .LCPI20_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -529,6 +529,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI21_0 ; CHECK-NEXT: adrp x9, .LCPI21_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0] ; CHECK-NEXT: adrp x8, .LCPI21_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI21_2] @@ -537,11 +538,10 @@ ; CHECK-NEXT: adrp x8, .LCPI21_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -557,6 +557,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI22_0 ; CHECK-NEXT: adrp x9, .LCPI22_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0] ; CHECK-NEXT: adrp x8, .LCPI22_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI22_2] @@ -565,11 +566,10 @@ ; CHECK-NEXT: adrp x8, .LCPI22_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -583,6 +583,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 ; CHECK-NEXT: adrp x9, .LCPI23_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] ; CHECK-NEXT: adrp x8, .LCPI23_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI23_2] @@ -591,11 +592,10 @@ ; CHECK-NEXT: adrp x8, .LCPI23_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -609,6 +609,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: adrp x9, .LCPI24_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0] ; CHECK-NEXT: adrp x8, .LCPI24_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI24_2] @@ -617,11 +618,10 @@ ; CHECK-NEXT: adrp x8, .LCPI24_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -636,6 +636,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI25_0 ; CHECK-NEXT: adrp x9, .LCPI25_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] ; CHECK-NEXT: adrp x8, .LCPI25_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI25_2] @@ -644,11 +645,10 @@ ; CHECK-NEXT: adrp x8, .LCPI25_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -661,6 +661,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI26_0 ; CHECK-NEXT: adrp x9, .LCPI26_2 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_0] ; CHECK-NEXT: adrp x8, .LCPI26_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI26_2] @@ -669,11 +670,10 @@ ; CHECK-NEXT: adrp x8, .LCPI26_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll @@ -54,11 +54,11 @@ ; CHECK-NEXT: mov w8, #43690 ; CHECK-NEXT: movk w8, #10922, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, @@ -70,18 +70,18 @@ ; CHECK-LABEL: t32_6_part1: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: movk w9, #43690, lsl #16 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: movk w8, #43690, lsl #16 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI3_1 -; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_1] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll @@ -7,14 +7,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #23593 ; CHECK-NEXT: movk w8, #49807, lsl #16 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: mov w8, #28835 ; CHECK-NEXT: movk w8, #2621, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -28,17 +28,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #23593 ; CHECK-NEXT: movk w8, #49807, lsl #16 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: mov w8, #23592 ; CHECK-NEXT: movk w8, #655, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: shl v1.4s, v0.4s, #30 ; CHECK-NEXT: ushr v0.4s, v0.4s, #2 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -53,13 +53,13 @@ ; CHECK-LABEL: test_urem_odd_neg25: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI2_0 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: adrp x8, .LCPI2_1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_1] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -72,16 +72,16 @@ ; CHECK-LABEL: test_urem_even_neg100: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: adrp x8, .LCPI3_1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] ; CHECK-NEXT: shl v1.4s, v0.4s, #30 ; CHECK-NEXT: ushr v0.4s, v0.4s, #2 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_1] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -98,13 +98,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v1.4s, #25 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: umull2 v3.2d, v0.4s, v2.4s -; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s -; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s -; CHECK-NEXT: ushr v2.4s, v2.4s, #3 -; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; CHECK-NEXT: movi v2.4s, #25 +; CHECK-NEXT: ushr v1.4s, v1.4s, #3 +; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b @@ -120,13 +120,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v1.4s, #100 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: umull2 v3.2d, v0.4s, v2.4s -; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s -; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s -; CHECK-NEXT: ushr v2.4s, v2.4s, #5 -; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; CHECK-NEXT: movi v2.4s, #100 +; CHECK-NEXT: ushr v1.4s, v1.4s, #5 +; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b @@ -167,10 +167,10 @@ ; CHECK-LABEL: test_urem_pow2: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #15 +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -182,8 +182,8 @@ define <4 x i32> @test_urem_int_min(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_urem_int_min: ; CHECK: // %bb.0: -; CHECK-NEXT: bic v0.4s, #128, lsl #24 ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: bic v0.4s, #128, lsl #24 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll @@ -5,11 +5,11 @@ ; CHECK-LABEL: t0_all_tautological: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: adrp x9, .LCPI0_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: adrp x8, .LCPI0_1 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_1] -; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -98,9 +98,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqsub v0.8b, v1.8b, v0.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -155,9 +155,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqsub v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -220,9 +220,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr b0, [x1] +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: uqsub v0.8b, v1.8b, v0.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -235,9 +235,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr h0, [x1] +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: uqsub v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px diff --git a/llvm/test/CodeGen/AArch64/vec_cttz.ll b/llvm/test/CodeGen/AArch64/vec_cttz.ll --- a/llvm/test/CodeGen/AArch64/vec_cttz.ll +++ b/llvm/test/CodeGen/AArch64/vec_cttz.ll @@ -85,8 +85,8 @@ ; CHECK-NEXT: movi v1.8h, #1 ; CHECK-NEXT: sub v1.8h, v0.8h, v1.8h ; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-NEXT: movi v1.8h, #16 ; CHECK-NEXT: clz v0.8h, v0.8h +; CHECK-NEXT: movi v1.8h, #16 ; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %b = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) @@ -99,8 +99,8 @@ ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: sub v1.4s, v0.4s, v1.4s ; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-NEXT: movi v1.4s, #32 ; CHECK-NEXT: clz v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #32 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ret %b = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -52,8 +52,8 @@ ; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: st1 { v1.s }[2], [x8] ; CHECK-NEXT: str d1, [x0] +; CHECK-NEXT: st1 { v1.s }[2], [x8] ; CHECK-NEXT: ret %t = call {<3 x i32>, <3 x i1>} @llvm.uadd.with.overflow.v3i32(<3 x i32> %a0, <3 x i32> %a1) %val = extractvalue {<3 x i32>, <3 x i1>} %t, 0 @@ -84,27 +84,27 @@ ; CHECK-NEXT: fmov s0, w6 ; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: add x9, sp, #8 ; CHECK-NEXT: ldr s2, [sp, #16] -; CHECK-NEXT: fmov s3, w4 +; CHECK-NEXT: add x9, sp, #24 +; CHECK-NEXT: add x10, sp, #8 ; CHECK-NEXT: mov v0.s[1], w7 +; CHECK-NEXT: fmov s3, w4 ; CHECK-NEXT: mov v1.s[1], w1 +; CHECK-NEXT: ld1 { v2.s }[1], [x9] ; CHECK-NEXT: mov v3.s[1], w5 ; CHECK-NEXT: ld1 { v0.s }[2], [x8] -; CHECK-NEXT: add x8, sp, #24 ; CHECK-NEXT: mov v1.s[2], w2 -; CHECK-NEXT: ld1 { v2.s }[1], [x8] -; CHECK-NEXT: ld1 { v0.s }[3], [x9] -; CHECK-NEXT: mov v1.s[3], w3 ; CHECK-NEXT: ldr x8, [sp, #32] ; CHECK-NEXT: add v2.4s, v3.4s, v2.4s -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s -; CHECK-NEXT: cmhi v3.4s, v3.4s, v2.4s +; CHECK-NEXT: ld1 { v0.s }[3], [x10] +; CHECK-NEXT: mov v1.s[3], w3 ; CHECK-NEXT: str d2, [x8, #16] -; CHECK-NEXT: cmhi v1.4s, v1.4s, v0.4s -; CHECK-NEXT: str q0, [x8] +; CHECK-NEXT: cmhi v3.4s, v3.4s, v2.4s ; CHECK-NEXT: mov w5, v3.s[1] ; CHECK-NEXT: fmov w4, s3 +; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhi v1.4s, v1.4s, v0.4s +; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] @@ -141,23 +141,23 @@ ; CHECK-NEXT: add v4.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhi v0.16b, v0.16b, v4.16b ; CHECK-NEXT: str q4, [x0] -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b -; CHECK-NEXT: zip2 v0.8b, v0.8b, v0.8b +; CHECK-NEXT: zip1 v1.8b, v0.8b, v0.8b +; CHECK-NEXT: zip2 v2.8b, v0.8b, v0.8b +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: zip1 v3.8b, v1.8b, v0.8b -; CHECK-NEXT: zip2 v1.8b, v1.8b, v0.8b -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: zip1 v3.8b, v0.8b, v0.8b +; CHECK-NEXT: zip2 v5.8b, v0.8b, v0.8b +; CHECK-NEXT: shl v1.4s, v1.4s, #31 +; CHECK-NEXT: shl v2.4s, v2.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v1.4s, #0 +; CHECK-NEXT: cmlt v1.4s, v2.4s, #0 +; CHECK-NEXT: ushll v2.4s, v3.4h, #0 +; CHECK-NEXT: ushll v3.4s, v5.4h, #0 ; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: shl v5.4s, v0.4s, #31 -; CHECK-NEXT: cmlt v0.4s, v2.4s, #0 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: shl v6.4s, v1.4s, #31 -; CHECK-NEXT: cmlt v1.4s, v5.4s, #0 -; CHECK-NEXT: cmlt v2.4s, v3.4s, #0 -; CHECK-NEXT: cmlt v3.4s, v6.4s, #0 +; CHECK-NEXT: cmlt v2.4s, v2.4s, #0 +; CHECK-NEXT: cmlt v3.4s, v3.4s, #0 ; CHECK-NEXT: ret %t = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1) %val = extractvalue {<16 x i8>, <16 x i1>} %t, 0 @@ -213,26 +213,26 @@ ; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: bic v0.4s, #255, lsl #24 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, v0.s[3] ; CHECK-NEXT: mov w9, v0.s[2] ; CHECK-NEXT: mov w10, v0.s[1] ; CHECK-NEXT: fmov w11, s0 +; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: cmeq v1.4s, v1.4s, v0.4s ; CHECK-NEXT: strh w9, [x0, #6] ; CHECK-NEXT: sturh w10, [x0, #3] ; CHECK-NEXT: lsr w9, w9, #16 -; CHECK-NEXT: strh w11, [x0] -; CHECK-NEXT: cmeq v1.4s, v1.4s, v0.4s +; CHECK-NEXT: lsr w10, w10, #16 ; CHECK-NEXT: strb w8, [x0, #11] -; CHECK-NEXT: lsr w8, w10, #16 -; CHECK-NEXT: lsr w10, w11, #16 -; CHECK-NEXT: strb w9, [x0, #8] +; CHECK-NEXT: lsr w8, w11, #16 +; CHECK-NEXT: strh w11, [x0] ; CHECK-NEXT: mvn v0.16b, v1.16b -; CHECK-NEXT: strb w8, [x0, #5] -; CHECK-NEXT: strb w10, [x0, #2] +; CHECK-NEXT: strb w9, [x0, #8] +; CHECK-NEXT: strb w10, [x0, #5] +; CHECK-NEXT: strb w8, [x0, #2] ; CHECK-NEXT: ret %t = call {<4 x i24>, <4 x i1>} @llvm.uadd.with.overflow.v4i24(<4 x i24> %a0, <4 x i24> %a1) %val = extractvalue {<4 x i24>, <4 x i1>} %t, 0 @@ -249,20 +249,20 @@ ; CHECK-NEXT: and v1.8b, v1.8b, v2.8b ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: and v1.8b, v0.8b, v2.8b ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[2] ; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w11, v0.h[3] -; CHECK-NEXT: cmeq v1.4h, v1.4h, v0.4h +; CHECK-NEXT: and v1.8b, v0.8b, v2.8b +; CHECK-NEXT: cmeq v0.4h, v1.4h, v0.4h ; CHECK-NEXT: and w8, w8, #0x1 ; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: mvn v0.8b, v0.8b +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: bfi w10, w8, #1, #1 -; CHECK-NEXT: mvn v1.8b, v1.8b ; CHECK-NEXT: bfi w10, w9, #2, #1 ; CHECK-NEXT: bfi w10, w11, #3, #29 ; CHECK-NEXT: and w8, w10, #0xf -; CHECK-NEXT: sshll v0.4s, v1.4h, #0 ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.uadd.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -22,8 +22,8 @@ ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: shrn v0.2s, v1.2d, #32 ; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: str s1, [x0] ; CHECK-NEXT: cmtst v0.2s, v0.2s, v0.2s +; CHECK-NEXT: str s1, [x0] ; CHECK-NEXT: ret %t = call {<1 x i32>, <1 x i1>} @llvm.umul.with.overflow.v1i32(<1 x i32> %a0, <1 x i32> %a1) %val = extractvalue {<1 x i32>, <1 x i1>} %t, 0 @@ -39,8 +39,8 @@ ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: shrn v0.2s, v1.2d, #32 ; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: cmtst v0.2s, v0.2s, v0.2s +; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: ret %t = call {<2 x i32>, <2 x i1>} @llvm.umul.with.overflow.v2i32(<2 x i32> %a0, <2 x i32> %a1) %val = extractvalue {<2 x i32>, <2 x i1>} %t, 0 @@ -96,37 +96,37 @@ ; CHECK-NEXT: fmov s0, w6 ; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: add x9, sp, #8 ; CHECK-NEXT: ldr s2, [sp, #16] -; CHECK-NEXT: fmov s3, w4 +; CHECK-NEXT: add x9, sp, #24 +; CHECK-NEXT: add x10, sp, #8 ; CHECK-NEXT: mov v0.s[1], w7 +; CHECK-NEXT: fmov s3, w4 ; CHECK-NEXT: mov v1.s[1], w1 +; CHECK-NEXT: ld1 { v2.s }[1], [x9] ; CHECK-NEXT: mov v3.s[1], w5 ; CHECK-NEXT: ld1 { v0.s }[2], [x8] -; CHECK-NEXT: add x8, sp, #24 ; CHECK-NEXT: mov v1.s[2], w2 -; CHECK-NEXT: ld1 { v2.s }[1], [x8] -; CHECK-NEXT: ld1 { v0.s }[3], [x9] -; CHECK-NEXT: mov v1.s[3], w3 ; CHECK-NEXT: ldr x8, [sp, #32] -; CHECK-NEXT: umull2 v6.2d, v3.4s, v2.4s +; CHECK-NEXT: umull2 v4.2d, v3.4s, v2.4s +; CHECK-NEXT: ld1 { v0.s }[3], [x10] +; CHECK-NEXT: mov v1.s[3], w3 ; CHECK-NEXT: umull v7.2d, v3.2s, v2.2s -; CHECK-NEXT: umull2 v4.2d, v1.4s, v0.4s -; CHECK-NEXT: umull v5.2d, v1.2s, v0.2s ; CHECK-NEXT: mul v2.4s, v3.4s, v2.4s -; CHECK-NEXT: mul v0.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp2 v4.4s, v5.4s, v4.4s -; CHECK-NEXT: uzp2 v5.4s, v7.4s, v6.4s +; CHECK-NEXT: umull2 v5.2d, v1.4s, v0.4s +; CHECK-NEXT: umull v6.2d, v1.2s, v0.2s +; CHECK-NEXT: uzp2 v4.4s, v7.4s, v4.4s ; CHECK-NEXT: str d2, [x8, #16] -; CHECK-NEXT: str q0, [x8] +; CHECK-NEXT: mul v0.4s, v1.4s, v0.4s +; CHECK-NEXT: uzp2 v5.4s, v6.4s, v5.4s ; CHECK-NEXT: cmtst v4.4s, v4.4s, v4.4s +; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: cmtst v3.4s, v5.4s, v5.4s -; CHECK-NEXT: mov w1, v4.s[1] -; CHECK-NEXT: mov w2, v4.s[2] -; CHECK-NEXT: mov w3, v4.s[3] -; CHECK-NEXT: mov w5, v3.s[1] -; CHECK-NEXT: fmov w0, s4 -; CHECK-NEXT: fmov w4, s3 +; CHECK-NEXT: mov w5, v4.s[1] +; CHECK-NEXT: fmov w4, s4 +; CHECK-NEXT: mov w1, v3.s[1] +; CHECK-NEXT: mov w2, v3.s[2] +; CHECK-NEXT: mov w3, v3.s[3] +; CHECK-NEXT: fmov w0, s3 ; CHECK-NEXT: ret %t = call {<6 x i32>, <6 x i1>} @llvm.umul.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1) %val = extractvalue {<6 x i32>, <6 x i1>} %t, 0 @@ -166,29 +166,27 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b ; CHECK-NEXT: umull v3.8h, v0.8b, v1.8b +; CHECK-NEXT: mul v5.16b, v0.16b, v1.16b ; CHECK-NEXT: uzp2 v2.16b, v3.16b, v2.16b +; CHECK-NEXT: str q5, [x0] ; CHECK-NEXT: cmtst v2.16b, v2.16b, v2.16b -; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: zip1 v4.8b, v2.8b, v0.8b -; CHECK-NEXT: zip2 v2.8b, v2.8b, v0.8b -; CHECK-NEXT: zip1 v5.8b, v3.8b, v0.8b -; CHECK-NEXT: zip2 v3.8b, v3.8b, v0.8b -; CHECK-NEXT: ushll v4.4s, v4.4h, #0 -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: ushll v5.4s, v5.4h, #0 -; CHECK-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-NEXT: shl v4.4s, v4.4s, #31 +; CHECK-NEXT: zip1 v3.8b, v2.8b, v0.8b +; CHECK-NEXT: zip2 v4.8b, v2.8b, v0.8b +; CHECK-NEXT: ext v0.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ushll v1.4s, v3.4h, #0 +; CHECK-NEXT: ushll v2.4s, v4.4h, #0 +; CHECK-NEXT: zip1 v3.8b, v0.8b, v0.8b +; CHECK-NEXT: zip2 v4.8b, v0.8b, v0.8b +; CHECK-NEXT: shl v1.4s, v1.4s, #31 +; CHECK-NEXT: shl v2.4s, v2.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v1.4s, #0 +; CHECK-NEXT: cmlt v1.4s, v2.4s, #0 +; CHECK-NEXT: ushll v2.4s, v3.4h, #0 +; CHECK-NEXT: ushll v3.4s, v4.4h, #0 ; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: shl v6.4s, v5.4s, #31 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: cmlt v4.4s, v4.4s, #0 -; CHECK-NEXT: cmlt v5.4s, v2.4s, #0 -; CHECK-NEXT: cmlt v2.4s, v6.4s, #0 +; CHECK-NEXT: cmlt v2.4s, v2.4s, #0 ; CHECK-NEXT: cmlt v3.4s, v3.4s, #0 -; CHECK-NEXT: mul v6.16b, v0.16b, v1.16b -; CHECK-NEXT: mov v0.16b, v4.16b -; CHECK-NEXT: mov v1.16b, v5.16b -; CHECK-NEXT: str q6, [x0] ; CHECK-NEXT: ret %t = call {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1) %val = extractvalue {<16 x i8>, <16 x i1>} %t, 0 @@ -262,30 +260,30 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: bic v0.4s, #255, lsl #24 -; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s -; CHECK-NEXT: umull v3.2d, v0.2s, v1.2s -; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: uzp2 v1.4s, v3.4s, v2.4s -; CHECK-NEXT: ushr v2.4s, v0.4s, #24 -; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: mov w10, v0.s[1] -; CHECK-NEXT: cmeq v1.4s, v1.4s, #0 -; CHECK-NEXT: fmov w11, s0 -; CHECK-NEXT: cmtst v2.4s, v2.4s, v2.4s +; CHECK-NEXT: mul v2.4s, v0.4s, v1.4s +; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: mov w8, v2.s[3] +; CHECK-NEXT: mov w10, v2.s[2] +; CHECK-NEXT: mov w11, v2.s[1] +; CHECK-NEXT: ushr v1.4s, v2.4s, #24 +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v3.4s +; CHECK-NEXT: fmov w9, s2 +; CHECK-NEXT: cmtst v1.4s, v1.4s, v1.4s ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: strh w9, [x0, #6] -; CHECK-NEXT: lsr w9, w9, #16 -; CHECK-NEXT: sturh w10, [x0, #3] -; CHECK-NEXT: orn v0.16b, v2.16b, v1.16b +; CHECK-NEXT: strh w10, [x0, #6] +; CHECK-NEXT: lsr w10, w10, #16 +; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-NEXT: sturh w11, [x0, #3] +; CHECK-NEXT: lsr w11, w11, #16 ; CHECK-NEXT: strb w8, [x0, #11] -; CHECK-NEXT: lsr w8, w10, #16 -; CHECK-NEXT: lsr w10, w11, #16 -; CHECK-NEXT: strh w11, [x0] -; CHECK-NEXT: strb w9, [x0, #8] -; CHECK-NEXT: strb w8, [x0, #5] -; CHECK-NEXT: strb w10, [x0, #2] +; CHECK-NEXT: lsr w8, w9, #16 +; CHECK-NEXT: strh w9, [x0] +; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b +; CHECK-NEXT: strb w10, [x0, #8] +; CHECK-NEXT: strb w11, [x0, #5] +; CHECK-NEXT: strb w8, [x0, #2] ; CHECK-NEXT: ret %t = call {<4 x i24>, <4 x i1>} @llvm.umul.with.overflow.v4i24(<4 x i24> %a0, <4 x i24> %a1) %val = extractvalue {<4 x i24>, <4 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -407,17 +407,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ushll v2.4s, v1.4h, #0 +; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 ; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v2.4s -; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v1.4s -; CHECK-NEXT: uaddl v2.2d, v3.2s, v2.2s -; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s -; CHECK-NEXT: add v1.2d, v5.2d, v4.2d -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v1.4s +; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v2.4s +; CHECK-NEXT: uaddl v1.2d, v3.2s, v1.2s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v2.2s +; CHECK-NEXT: add v2.2d, v5.2d, v4.2d ; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d ; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -432,17 +432,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0 -; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: sshll v2.4s, v1.4h, #0 +; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0 ; CHECK-NEXT: sshll2 v3.4s, v0.8h, #0 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: saddl2 v4.2d, v3.4s, v2.4s -; CHECK-NEXT: saddl2 v5.2d, v0.4s, v1.4s -; CHECK-NEXT: saddl v2.2d, v3.2s, v2.2s -; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s -; CHECK-NEXT: add v1.2d, v5.2d, v4.2d -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: saddl2 v4.2d, v3.4s, v1.4s +; CHECK-NEXT: saddl2 v5.2d, v0.4s, v2.4s +; CHECK-NEXT: saddl v1.2d, v3.2s, v1.2s +; CHECK-NEXT: saddl v0.2d, v0.2s, v2.2s +; CHECK-NEXT: add v2.2d, v5.2d, v4.2d ; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d ; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret @@ -1029,17 +1029,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ushll2 v1.8h, v0.16b, #0 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll2 v2.4s, v1.8h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ushll v2.4s, v1.4h, #0 +; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 ; CHECK-NEXT: ushll2 v3.4s, v0.8h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v2.4s -; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v1.4s -; CHECK-NEXT: uaddl v2.2d, v3.2s, v2.2s -; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s -; CHECK-NEXT: add v1.2d, v5.2d, v4.2d -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: uaddl2 v4.2d, v3.4s, v1.4s +; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v2.4s +; CHECK-NEXT: uaddl v1.2d, v3.2s, v1.2s +; CHECK-NEXT: uaddl v0.2d, v0.2s, v2.2s +; CHECK-NEXT: add v2.2d, v5.2d, v4.2d ; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d ; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x8, d0 ; CHECK-NEXT: add x0, x8, x0 @@ -1056,17 +1056,17 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sshll2 v1.8h, v0.16b, #0 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll2 v2.4s, v1.8h, #0 -; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: sshll v2.4s, v1.4h, #0 +; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0 ; CHECK-NEXT: sshll2 v3.4s, v0.8h, #0 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: saddl2 v4.2d, v3.4s, v2.4s -; CHECK-NEXT: saddl2 v5.2d, v0.4s, v1.4s -; CHECK-NEXT: saddl v2.2d, v3.2s, v2.2s -; CHECK-NEXT: saddl v0.2d, v0.2s, v1.2s -; CHECK-NEXT: add v1.2d, v5.2d, v4.2d -; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: saddl2 v4.2d, v3.4s, v1.4s +; CHECK-NEXT: saddl2 v5.2d, v0.4s, v2.4s +; CHECK-NEXT: saddl v1.2d, v3.2s, v1.2s +; CHECK-NEXT: saddl v0.2d, v0.2s, v2.2s +; CHECK-NEXT: add v2.2d, v5.2d, v4.2d ; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d ; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x8, d0 ; CHECK-NEXT: add x0, x8, x0 @@ -1766,29 +1766,29 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: ushll2 v2.8h, v0.16b, #0 ; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll2 v3.8h, v1.16b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll2 v4.4s, v2.8h, #0 -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: ushll2 v5.4s, v0.8h, #0 +; CHECK-NEXT: ushll v3.4s, v2.4h, #0 +; CHECK-NEXT: ushll2 v2.4s, v2.8h, #0 +; CHECK-NEXT: ushll2 v4.4s, v0.8h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-NEXT: ushll v6.4s, v3.4h, #0 -; CHECK-NEXT: ushll v7.4s, v1.4h, #0 -; CHECK-NEXT: ushll2 v3.4s, v3.8h, #0 -; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 -; CHECK-NEXT: uaddl2 v16.2d, v5.4s, v4.4s -; CHECK-NEXT: uaddl v4.2d, v5.2s, v4.2s -; CHECK-NEXT: uaddl2 v5.2d, v0.4s, v2.4s -; CHECK-NEXT: uaddl v0.2d, v0.2s, v2.2s -; CHECK-NEXT: uaddl2 v2.2d, v1.4s, v3.4s +; CHECK-NEXT: uaddl2 v5.2d, v4.4s, v2.4s +; CHECK-NEXT: uaddl2 v6.2d, v0.4s, v3.4s +; CHECK-NEXT: ushll2 v7.8h, v1.16b, #0 +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-NEXT: uaddl v2.2d, v4.2s, v2.2s +; CHECK-NEXT: add v4.2d, v6.2d, v5.2d +; CHECK-NEXT: uaddl v0.2d, v0.2s, v3.2s +; CHECK-NEXT: ushll v3.4s, v7.4h, #0 +; CHECK-NEXT: ushll2 v5.4s, v7.8h, #0 +; CHECK-NEXT: ushll2 v6.4s, v1.8h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: uaddl2 v7.2d, v6.4s, v5.4s +; CHECK-NEXT: uaddl v5.2d, v6.2s, v5.2s +; CHECK-NEXT: uaddl2 v6.2d, v1.4s, v3.4s ; CHECK-NEXT: uaddl v1.2d, v1.2s, v3.2s -; CHECK-NEXT: uaddl2 v3.2d, v7.4s, v6.4s -; CHECK-NEXT: uaddl v6.2d, v7.2s, v6.2s -; CHECK-NEXT: add v5.2d, v5.2d, v16.2d +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v2.2d, v6.2d, v7.2d +; CHECK-NEXT: add v1.2d, v1.2d, v5.2d ; CHECK-NEXT: add v0.2d, v0.2d, v4.2d -; CHECK-NEXT: add v2.2d, v3.2d, v2.2d -; CHECK-NEXT: add v1.2d, v6.2d, v1.2d -; CHECK-NEXT: add v0.2d, v0.2d, v5.2d ; CHECK-NEXT: add v1.2d, v1.2d, v2.2d ; CHECK-NEXT: add v0.2d, v0.2d, v1.2d ; CHECK-NEXT: addp d0, v0.2d @@ -1808,29 +1808,29 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sshll2 v2.8h, v0.16b, #0 ; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll2 v3.8h, v1.16b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: sshll2 v4.4s, v2.8h, #0 -; CHECK-NEXT: sshll v2.4s, v2.4h, #0 -; CHECK-NEXT: sshll2 v5.4s, v0.8h, #0 +; CHECK-NEXT: sshll v3.4s, v2.4h, #0 +; CHECK-NEXT: sshll2 v2.4s, v2.8h, #0 +; CHECK-NEXT: sshll2 v4.4s, v0.8h, #0 ; CHECK-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-NEXT: sshll v6.4s, v3.4h, #0 -; CHECK-NEXT: sshll v7.4s, v1.4h, #0 -; CHECK-NEXT: sshll2 v3.4s, v3.8h, #0 -; CHECK-NEXT: sshll2 v1.4s, v1.8h, #0 -; CHECK-NEXT: saddl2 v16.2d, v5.4s, v4.4s -; CHECK-NEXT: saddl v4.2d, v5.2s, v4.2s -; CHECK-NEXT: saddl2 v5.2d, v0.4s, v2.4s -; CHECK-NEXT: saddl v0.2d, v0.2s, v2.2s -; CHECK-NEXT: saddl2 v2.2d, v1.4s, v3.4s +; CHECK-NEXT: saddl2 v5.2d, v4.4s, v2.4s +; CHECK-NEXT: saddl2 v6.2d, v0.4s, v3.4s +; CHECK-NEXT: sshll2 v7.8h, v1.16b, #0 +; CHECK-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-NEXT: saddl v2.2d, v4.2s, v2.2s +; CHECK-NEXT: add v4.2d, v6.2d, v5.2d +; CHECK-NEXT: saddl v0.2d, v0.2s, v3.2s +; CHECK-NEXT: sshll v3.4s, v7.4h, #0 +; CHECK-NEXT: sshll2 v5.4s, v7.8h, #0 +; CHECK-NEXT: sshll2 v6.4s, v1.8h, #0 +; CHECK-NEXT: sshll v1.4s, v1.4h, #0 +; CHECK-NEXT: saddl2 v7.2d, v6.4s, v5.4s +; CHECK-NEXT: saddl v5.2d, v6.2s, v5.2s +; CHECK-NEXT: saddl2 v6.2d, v1.4s, v3.4s ; CHECK-NEXT: saddl v1.2d, v1.2s, v3.2s -; CHECK-NEXT: saddl2 v3.2d, v7.4s, v6.4s -; CHECK-NEXT: saddl v6.2d, v7.2s, v6.2s -; CHECK-NEXT: add v5.2d, v5.2d, v16.2d +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: add v2.2d, v6.2d, v7.2d +; CHECK-NEXT: add v1.2d, v1.2d, v5.2d ; CHECK-NEXT: add v0.2d, v0.2d, v4.2d -; CHECK-NEXT: add v2.2d, v3.2d, v2.2d -; CHECK-NEXT: add v1.2d, v6.2d, v1.2d -; CHECK-NEXT: add v0.2d, v0.2d, v5.2d ; CHECK-NEXT: add v1.2d, v1.2d, v2.2d ; CHECK-NEXT: add v0.2d, v0.2d, v1.2d ; CHECK-NEXT: addp d0, v0.2d @@ -1925,21 +1925,21 @@ define i64 @add_pair_v4i8_v4i64_sext(<4 x i8> %x, <4 x i8> %y) { ; CHECK-LABEL: add_pair_v4i8_v4i64_sext: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: ushll v2.2d, v0.2s, #0 -; CHECK-NEXT: ushll v3.2d, v1.2s, #0 -; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v2.2d, v1.2s, #0 +; CHECK-NEXT: ushll v3.2d, v0.2s, #0 ; CHECK-NEXT: ushll2 v1.2d, v1.4s, #0 -; CHECK-NEXT: shl v2.2d, v2.2d, #56 +; CHECK-NEXT: ushll2 v0.2d, v0.4s, #0 ; CHECK-NEXT: shl v3.2d, v3.2d, #56 +; CHECK-NEXT: shl v2.2d, v2.2d, #56 ; CHECK-NEXT: shl v0.2d, v0.2d, #56 +; CHECK-NEXT: sshr v3.2d, v3.2d, #56 ; CHECK-NEXT: shl v1.2d, v1.2d, #56 ; CHECK-NEXT: sshr v2.2d, v2.2d, #56 -; CHECK-NEXT: sshr v3.2d, v3.2d, #56 -; CHECK-NEXT: ssra v2.2d, v0.2d, #56 -; CHECK-NEXT: ssra v3.2d, v1.2d, #56 -; CHECK-NEXT: add v0.2d, v2.2d, v3.2d +; CHECK-NEXT: ssra v3.2d, v0.2d, #56 +; CHECK-NEXT: ssra v2.2d, v1.2d, #56 +; CHECK-NEXT: add v0.2d, v3.2d, v2.2d ; CHECK-NEXT: addp d0, v0.2d ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll @@ -96,9 +96,9 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind { ; CHECK-LABEL: test_v9i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #-1 ; CHECK-NEXT: umov w12, v0.b[4] +; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov v1.b[9], w8 ; CHECK-NEXT: mov v1.b[10], w8 ; CHECK-NEXT: mov v1.b[11], w8 @@ -129,8 +129,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: and v1.8b, v0.8b, v1.8b -; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov w9, s0 ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %b = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -302,14 +302,14 @@ ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 ; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] ; CHECK-FP-NEXT: mvni v1.8h, #4, lsl #8 -; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 -; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] +; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -302,14 +302,14 @@ ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 ; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] ; CHECK-FP-NEXT: mvni v1.8h, #132, lsl #8 -; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 -; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] +; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] diff --git a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll @@ -82,8 +82,8 @@ define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_copysign_v2f32_v2f64: ; CHECK: ; %bb.0: -; CHECK-NEXT: mvni.2s v2, #128, lsl #24 ; CHECK-NEXT: fcvtn v1.2s, v1.2d +; CHECK-NEXT: mvni.2s v2, #128, lsl #24 ; CHECK-NEXT: bif.8b v0, v1, v2 ; CHECK-NEXT: ret %tmp0 = fptrunc <2 x double> %b to <2 x float> @@ -110,9 +110,9 @@ ; CHECK-LABEL: test_copysign_v4f32_v4f64: ; CHECK: ; %bb.0: ; CHECK-NEXT: fcvtn v1.2s, v1.2d -; CHECK-NEXT: mvni.4s v3, #128, lsl #24 ; CHECK-NEXT: fcvtn2 v1.4s, v2.2d -; CHECK-NEXT: bif.16b v0, v1, v3 +; CHECK-NEXT: mvni.4s v2, #128, lsl #24 +; CHECK-NEXT: bif.16b v0, v1, v2 ; CHECK-NEXT: ret %tmp0 = fptrunc <4 x double> %b to <4 x float> %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0) @@ -191,21 +191,21 @@ ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 ; NOFP16-NEXT: mov h3, v1[1] ; NOFP16-NEXT: mov h4, v0[1] -; NOFP16-NEXT: mvni.4s v2, #128, lsl #24 ; NOFP16-NEXT: fcvt s5, h1 ; NOFP16-NEXT: fcvt s6, h0 ; NOFP16-NEXT: mov h7, v1[2] ; NOFP16-NEXT: mov h16, v0[2] +; NOFP16-NEXT: mvni.4s v2, #128, lsl #24 +; NOFP16-NEXT: mov h1, v1[3] ; NOFP16-NEXT: fcvt s3, h3 ; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: mov h1, v1[3] ; NOFP16-NEXT: bit.16b v5, v6, v2 ; NOFP16-NEXT: fcvt s6, h7 ; NOFP16-NEXT: fcvt s7, h16 +; NOFP16-NEXT: fcvt s1, h1 ; NOFP16-NEXT: bit.16b v3, v4, v2 ; NOFP16-NEXT: mov h4, v0[3] ; NOFP16-NEXT: fcvt h0, s5 -; NOFP16-NEXT: fcvt s1, h1 ; NOFP16-NEXT: bit.16b v6, v7, v2 ; NOFP16-NEXT: fcvt h3, s3 ; NOFP16-NEXT: fcvt s4, h4 @@ -233,9 +233,9 @@ ; NOFP16-NEXT: fcvtn v1.4h, v1.4s ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 ; NOFP16-NEXT: mov h3, v0[1] -; NOFP16-NEXT: mvni.4s v2, #128, lsl #24 ; NOFP16-NEXT: fcvt s5, h0 ; NOFP16-NEXT: mov h7, v0[2] +; NOFP16-NEXT: mvni.4s v2, #128, lsl #24 ; NOFP16-NEXT: mov h4, v1[1] ; NOFP16-NEXT: fcvt s6, h1 ; NOFP16-NEXT: mov h16, v1[2] @@ -263,8 +263,8 @@ ; ; FP16-LABEL: test_copysign_v4f16_v4f32: ; FP16: ; %bb.0: -; FP16-NEXT: mvni.4h v2, #128, lsl #8 ; FP16-NEXT: fcvtn v1.4h, v1.4s +; FP16-NEXT: mvni.4h v2, #128, lsl #8 ; FP16-NEXT: bif.8b v0, v1, v2 ; FP16-NEXT: ret %tmp0 = fptrunc <4 x float> %b to <4 x half> @@ -278,28 +278,28 @@ ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 ; NOFP16-NEXT: mov d4, v1[1] ; NOFP16-NEXT: mov h5, v0[1] -; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 ; NOFP16-NEXT: fcvt s1, d1 ; NOFP16-NEXT: fcvt s6, h0 ; NOFP16-NEXT: mov h7, v0[2] +; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 ; NOFP16-NEXT: fcvt s4, d4 ; NOFP16-NEXT: fcvt s5, h5 ; NOFP16-NEXT: bit.16b v1, v6, v3 ; NOFP16-NEXT: fcvt s6, d2 ; NOFP16-NEXT: fcvt s7, h7 -; NOFP16-NEXT: bit.16b v4, v5, v3 ; NOFP16-NEXT: mov d2, v2[1] +; NOFP16-NEXT: bit.16b v4, v5, v3 ; NOFP16-NEXT: mov h5, v0[3] ; NOFP16-NEXT: fcvt h0, s1 ; NOFP16-NEXT: bit.16b v6, v7, v3 -; NOFP16-NEXT: fcvt h1, s4 ; NOFP16-NEXT: fcvt s2, d2 +; NOFP16-NEXT: fcvt h1, s4 ; NOFP16-NEXT: fcvt s4, h5 ; NOFP16-NEXT: fcvt h5, s6 ; NOFP16-NEXT: mov.h v0[1], v1[0] ; NOFP16-NEXT: mov.16b v1, v3 -; NOFP16-NEXT: mov.h v0[2], v5[0] ; NOFP16-NEXT: bsl.16b v1, v4, v2 +; NOFP16-NEXT: mov.h v0[2], v5[0] ; NOFP16-NEXT: fcvt h1, s1 ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 @@ -307,17 +307,17 @@ ; ; FP16-LABEL: test_copysign_v4f16_v4f64: ; FP16: ; %bb.0: -; FP16-NEXT: mov d4, v1[1] +; FP16-NEXT: mov d3, v1[1] ; FP16-NEXT: fcvt h1, d1 -; FP16-NEXT: mvni.4h v3, #128, lsl #8 -; FP16-NEXT: fcvt h4, d4 -; FP16-NEXT: mov.h v1[1], v4[0] -; FP16-NEXT: fcvt h4, d2 +; FP16-NEXT: fcvt h3, d3 +; FP16-NEXT: mov.h v1[1], v3[0] +; FP16-NEXT: fcvt h3, d2 ; FP16-NEXT: mov d2, v2[1] -; FP16-NEXT: mov.h v1[2], v4[0] +; FP16-NEXT: mov.h v1[2], v3[0] ; FP16-NEXT: fcvt h2, d2 ; FP16-NEXT: mov.h v1[3], v2[0] -; FP16-NEXT: bif.8b v0, v1, v3 +; FP16-NEXT: mvni.4h v2, #128, lsl #8 +; FP16-NEXT: bif.8b v0, v1, v2 ; FP16-NEXT: ret %tmp0 = fptrunc <4 x double> %b to <4 x half> %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) @@ -333,33 +333,33 @@ ; NOFP16: ; %bb.0: ; NOFP16-NEXT: mov h5, v1[1] ; NOFP16-NEXT: mov h6, v0[1] -; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 ; NOFP16-NEXT: fcvt s2, h1 ; NOFP16-NEXT: fcvt s4, h0 ; NOFP16-NEXT: mov h7, v1[2] ; NOFP16-NEXT: mov h16, v0[2] +; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 +; NOFP16-NEXT: mov h17, v0[3] ; NOFP16-NEXT: fcvt s5, h5 ; NOFP16-NEXT: fcvt s6, h6 -; NOFP16-NEXT: mov h17, v0[3] ; NOFP16-NEXT: mov h18, v0[5] ; NOFP16-NEXT: bit.16b v2, v4, v3 ; NOFP16-NEXT: mov h4, v1[3] ; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: fcvt s16, h16 -; NOFP16-NEXT: bit.16b v5, v6, v3 ; NOFP16-NEXT: fcvt s17, h17 +; NOFP16-NEXT: bit.16b v5, v6, v3 ; NOFP16-NEXT: mov.16b v6, v3 ; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: fcvt h2, s2 -; NOFP16-NEXT: fcvt h5, s5 ; NOFP16-NEXT: bsl.16b v6, v16, v7 ; NOFP16-NEXT: mov h7, v1[4] ; NOFP16-NEXT: mov h16, v0[4] +; NOFP16-NEXT: fcvt h2, s2 +; NOFP16-NEXT: fcvt h5, s5 ; NOFP16-NEXT: bit.16b v4, v17, v3 ; NOFP16-NEXT: mov h17, v1[5] -; NOFP16-NEXT: mov.h v2[1], v5[0] ; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: mov.h v2[1], v5[0] ; NOFP16-NEXT: fcvt h5, s6 ; NOFP16-NEXT: fcvt s6, h17 ; NOFP16-NEXT: fcvt s17, h18 @@ -403,11 +403,11 @@ ; NOFP16: ; %bb.0: ; NOFP16-NEXT: fcvtn v1.4h, v1.4s ; NOFP16-NEXT: fcvtn v2.4h, v2.4s -; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 ; NOFP16-NEXT: mov h4, v0[1] ; NOFP16-NEXT: mov h5, v0[4] ; NOFP16-NEXT: fcvt s7, h0 ; NOFP16-NEXT: mov h17, v0[2] +; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 ; NOFP16-NEXT: mov h6, v1[1] ; NOFP16-NEXT: fcvt s16, h1 ; NOFP16-NEXT: fcvt s4, h4 @@ -425,29 +425,29 @@ ; NOFP16-NEXT: fcvt h1, s7 ; NOFP16-NEXT: mov.16b v7, v3 ; NOFP16-NEXT: fcvt h4, s4 +; NOFP16-NEXT: bsl.16b v7, v17, v18 ; NOFP16-NEXT: fcvt s6, h6 ; NOFP16-NEXT: fcvt s16, h16 -; NOFP16-NEXT: fcvt h5, s5 -; NOFP16-NEXT: bsl.16b v7, v17, v18 ; NOFP16-NEXT: mov h17, v0[5] ; NOFP16-NEXT: mov h18, v2[1] +; NOFP16-NEXT: fcvt h5, s5 ; NOFP16-NEXT: mov.h v1[1], v4[0] -; NOFP16-NEXT: bif.16b v6, v16, v3 ; NOFP16-NEXT: fcvt h4, s7 +; NOFP16-NEXT: bif.16b v6, v16, v3 ; NOFP16-NEXT: fcvt s7, h17 ; NOFP16-NEXT: fcvt s17, h18 -; NOFP16-NEXT: mov h16, v2[2] -; NOFP16-NEXT: mov h2, v2[3] -; NOFP16-NEXT: fcvt h6, s6 ; NOFP16-NEXT: mov.h v1[2], v4[0] ; NOFP16-NEXT: mov h4, v0[6] -; NOFP16-NEXT: bif.16b v7, v17, v3 -; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: mov h16, v2[2] +; NOFP16-NEXT: fcvt h6, s6 ; NOFP16-NEXT: mov h0, v0[7] -; NOFP16-NEXT: fcvt s2, h2 +; NOFP16-NEXT: bif.16b v7, v17, v3 +; NOFP16-NEXT: mov h2, v2[3] ; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: fcvt s16, h16 ; NOFP16-NEXT: mov.h v1[3], v6[0] ; NOFP16-NEXT: fcvt s0, h0 +; NOFP16-NEXT: fcvt s2, h2 ; NOFP16-NEXT: bif.16b v4, v16, v3 ; NOFP16-NEXT: mov.h v1[4], v5[0] ; NOFP16-NEXT: fcvt h5, s7 @@ -464,9 +464,9 @@ ; FP16: ; %bb.0: ; FP16-NEXT: fcvtn v2.4h, v2.4s ; FP16-NEXT: fcvtn v1.4h, v1.4s -; FP16-NEXT: mvni.8h v3, #128, lsl #8 ; FP16-NEXT: mov.d v1[1], v2[0] -; FP16-NEXT: bif.16b v0, v1, v3 +; FP16-NEXT: mvni.8h v2, #128, lsl #8 +; FP16-NEXT: bif.16b v0, v1, v2 ; FP16-NEXT: ret %tmp0 = fptrunc <8 x float> %b to <8 x half> %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0) diff --git a/llvm/test/CodeGen/AArch64/vselect-constants.ll b/llvm/test/CodeGen/AArch64/vselect-constants.ll --- a/llvm/test/CodeGen/AArch64/vselect-constants.ll +++ b/llvm/test/CodeGen/AArch64/vselect-constants.ll @@ -10,11 +10,11 @@ define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_C1_or_C2_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: adrp x9, .LCPI0_1 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] ; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b @@ -29,9 +29,9 @@ ; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: adrp x9, .LCPI1_1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI1_1] -; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI1_1] +; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -41,11 +41,11 @@ define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_Cplus1_or_C_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: adrp x9, .LCPI2_1 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] ; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b @@ -60,9 +60,9 @@ ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: adrp x9, .LCPI3_1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_1] -; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI3_1] +; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -72,11 +72,11 @@ define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_Cminus1_or_C_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: adrp x9, .LCPI4_1 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_1] ; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b @@ -91,9 +91,9 @@ ; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: adrp x9, .LCPI5_1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_1] -; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI5_1] +; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s @@ -1070,27 +1070,27 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.50 abs d29, d24 -# CHECK-NEXT: 1 4 1.00 abs v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 abs v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 abs v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 abs v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 abs v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 abs v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 abs v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 add d17, d31, d29 -# CHECK-NEXT: 1 4 0.50 add v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 addhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 addhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 addhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 addhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 addhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 addhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 addp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 addp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 and v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 bic v0.4h, #15, lsl #8 -# CHECK-NEXT: 1 4 0.50 bic v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 abs d29, d24 +# CHECK-NEXT: 1 3 1.00 abs v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 abs v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 abs v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 abs v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 abs v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 abs v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 abs v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 add d17, d31, d29 +# CHECK-NEXT: 1 2 0.50 add v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 addhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: 1 3 1.00 addhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 addhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 addhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 addhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: 1 3 1.00 addhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 addp v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 addp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 1 0.50 and v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 1 0.50 bic v0.4h, #15, lsl #8 +# CHECK-NEXT: 1 1 0.50 bic v0.8b, v0.8b, v0.8b # CHECK-NEXT: 1 4 1.00 bif v0.16b, v0.16b, v0.16b # CHECK-NEXT: 1 4 1.00 bit v0.16b, v0.16b, v0.16b # CHECK-NEXT: 1 4 0.50 bsl v0.8b, v0.8b, v0.8b @@ -1106,28 +1106,28 @@ # CHECK-NEXT: 1 4 1.00 clz v0.4s, v0.4s # CHECK-NEXT: 1 4 0.50 clz v0.8b, v0.8b # CHECK-NEXT: 1 4 1.00 clz v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 cmeq d20, d21, #0 -# CHECK-NEXT: 1 4 0.50 cmeq d20, d21, d22 -# CHECK-NEXT: 1 4 1.00 cmeq v0.16b, v0.16b, #0 -# CHECK-NEXT: 1 4 1.00 cmeq v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 cmge d20, d21, #0 -# CHECK-NEXT: 1 4 0.50 cmge d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 cmge v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 cmge v0.8b, v0.8b, #0 -# CHECK-NEXT: 1 4 0.50 cmgt d20, d21, #0 -# CHECK-NEXT: 1 4 0.50 cmgt d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 cmgt v0.2s, v0.2s, #0 -# CHECK-NEXT: 1 4 1.00 cmgt v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 cmhi d20, d21, d22 -# CHECK-NEXT: 1 4 1.00 cmhi v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 cmhs d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 cmhs v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 cmle d20, d21, #0 -# CHECK-NEXT: 1 4 1.00 cmle v0.2d, v0.2d, #0 -# CHECK-NEXT: 1 4 0.50 cmlt d20, d21, #0 -# CHECK-NEXT: 1 4 1.00 cmlt v0.8h, v0.8h, #0 -# CHECK-NEXT: 1 4 0.50 cmtst d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 cmtst v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 cmeq d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmeq d20, d21, d22 +# CHECK-NEXT: 1 2 1.00 cmeq v0.16b, v0.16b, #0 +# CHECK-NEXT: 1 2 1.00 cmeq v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 cmge d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmge d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmge v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 cmge v0.8b, v0.8b, #0 +# CHECK-NEXT: 1 2 0.50 cmgt d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmgt d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmgt v0.2s, v0.2s, #0 +# CHECK-NEXT: 1 2 1.00 cmgt v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 cmhi d20, d21, d22 +# CHECK-NEXT: 1 2 1.00 cmhi v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 cmhs d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmhs v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 cmle d20, d21, #0 +# CHECK-NEXT: 1 2 1.00 cmle v0.2d, v0.2d, #0 +# CHECK-NEXT: 1 2 0.50 cmlt d20, d21, #0 +# CHECK-NEXT: 1 2 1.00 cmlt v0.8h, v0.8h, #0 +# CHECK-NEXT: 1 3 0.50 cmtst d20, d21, d22 +# CHECK-NEXT: 1 3 0.50 cmtst v0.2s, v0.2s, v0.2s # CHECK-NEXT: 1 4 1.00 cnt v0.16b, v0.16b # CHECK-NEXT: 1 4 0.50 cnt v0.8b, v0.8b # CHECK-NEXT: 1 2 0.50 dup v0.16b, w28 @@ -1137,7 +1137,7 @@ # CHECK-NEXT: 1 2 0.50 dup v0.4s, w28 # CHECK-NEXT: 1 4 0.50 dup v0.8b, w28 # CHECK-NEXT: 1 2 0.50 dup v0.8h, w28 -# CHECK-NEXT: 1 4 1.00 eor v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 1 1.00 eor v0.16b, v0.16b, v0.16b # CHECK-NEXT: 1 4 1.00 ext v0.16b, v0.16b, v0.16b, #3 # CHECK-NEXT: 1 4 0.50 ext v0.8b, v0.8b, v0.8b, #3 # CHECK-NEXT: 1 4 0.50 fabd d29, d24, d20 @@ -1429,8 +1429,8 @@ # CHECK-NEXT: 1 4 0.50 mov d6, v0.d[1] # CHECK-NEXT: 1 4 0.50 mov h2, v0.h[5] # CHECK-NEXT: 1 4 0.50 mov s17, v0.s[2] -# CHECK-NEXT: 1 4 1.00 mov v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 mov v0.8b, v0.8b +# CHECK-NEXT: 1 1 1.00 mov v0.16b, v0.16b +# CHECK-NEXT: 1 1 0.50 mov v0.8b, v0.8b # CHECK-NEXT: 1 4 0.50 movi d15, #0xff00ff00ff00ff # CHECK-NEXT: 1 4 1.00 movi v0.16b, #31 # CHECK-NEXT: 1 4 1.00 movi v0.2d, #0xff0000ff0000ffff @@ -1438,31 +1438,31 @@ # CHECK-NEXT: 1 4 1.00 movi v0.4s, #255, lsl #24 # CHECK-NEXT: 1 4 0.50 movi v0.8b, #255 # CHECK-NEXT: 1 4 0.50 mul v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 mvni v0.2s, #0 -# CHECK-NEXT: 1 4 1.00 mvni v0.4s, #16, msl #16 -# CHECK-NEXT: 1 4 0.50 neg d29, d24 -# CHECK-NEXT: 1 4 1.00 neg v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 neg v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 neg v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 neg v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 neg v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 neg v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 neg v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 mvn v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 mvn v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 orn v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 mov v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 orr v0.8h, #31 -# CHECK-NEXT: 1 4 1.00 pmul v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 pmul v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 pmull v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 pmull2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 raddhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 raddhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 raddhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 raddhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 raddhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 raddhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: 1 1 0.50 mvni v0.2s, #0 +# CHECK-NEXT: 1 1 1.00 mvni v0.4s, #16, msl #16 +# CHECK-NEXT: 1 2 0.50 neg d29, d24 +# CHECK-NEXT: 1 2 1.00 neg v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 neg v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 neg v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 neg v0.4h, v0.4h +# CHECK-NEXT: 1 2 1.00 neg v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 neg v0.8b, v0.8b +# CHECK-NEXT: 1 2 1.00 neg v0.8h, v0.8h +# CHECK-NEXT: 1 1 1.00 mvn v0.16b, v0.16b +# CHECK-NEXT: 1 1 0.50 mvn v0.8b, v0.8b +# CHECK-NEXT: 1 1 1.00 orn v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 1 1.00 mov v0.16b, v0.16b +# CHECK-NEXT: 1 1 1.00 orr v0.8h, #31 +# CHECK-NEXT: 1 3 1.00 pmul v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 pmul v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 pmull v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 pmull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 4 2.00 raddhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: 1 4 2.00 raddhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: 1 4 2.00 raddhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: 1 4 2.00 raddhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: 1 4 2.00 raddhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: 1 4 2.00 raddhn2 v0.8h, v0.4s, v0.4s # CHECK-NEXT: 1 4 1.00 rbit v0.16b, v0.16b # CHECK-NEXT: 1 4 0.50 rbit v0.8b, v0.8b # CHECK-NEXT: 1 4 0.50 rev16 v21.8b, v1.8b @@ -1477,56 +1477,56 @@ # CHECK-NEXT: 1 4 1.00 rev64 v2.8h, v4.8h # CHECK-NEXT: 1 4 0.50 rev64 v4.2s, v0.2s # CHECK-NEXT: 1 4 1.00 rev64 v6.4s, v8.4s -# CHECK-NEXT: 1 4 0.50 rshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 rshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 rshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 rshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 rshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 rshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 1.00 rsubhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 rsubhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 rsubhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 rsubhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 rsubhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 rsubhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 saba v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 sabal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sabal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sabal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 sabal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sabal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 sabal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 sabd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sabdl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sabdl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sabdl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 sabdl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sabdl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 sabdl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 sadalp v0.1d, v0.2s -# CHECK-NEXT: 1 4 1.00 sadalp v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 sadalp v0.2s, v0.4h -# CHECK-NEXT: 1 4 0.50 sadalp v0.4h, v0.8b -# CHECK-NEXT: 1 4 1.00 sadalp v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 sadalp v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 saddl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 saddl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 saddl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 saddl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 saddl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 saddl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 saddlp v0.1d, v0.2s -# CHECK-NEXT: 1 4 1.00 saddlp v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 saddlp v0.2s, v0.4h -# CHECK-NEXT: 1 4 0.50 saddlp v0.4h, v0.8b -# CHECK-NEXT: 1 4 1.00 saddlp v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 saddlp v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 saddw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: 1 4 1.00 saddw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: 1 4 1.00 saddw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: 1 4 1.00 saddw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: 1 4 1.00 saddw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 saddw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 1 3 0.50 rshrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 rshrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 rshrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 rshrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 rshrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 3 1.00 rshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 4 2.00 rsubhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: 1 4 2.00 rsubhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: 1 4 2.00 rsubhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: 1 4 2.00 rsubhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: 1 4 2.00 rsubhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: 1 4 2.00 rsubhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: 1 4 2.00 saba v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 4 2.00 sabal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 4 2.00 sabal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 4 2.00 sabal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 4 2.00 sabal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 4 2.00 sabal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 4 2.00 sabal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 sabd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 sabdl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 sabdl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 sabdl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 sabdl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 sabdl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 sabdl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 4 2.00 sadalp v0.1d, v0.2s +# CHECK-NEXT: 1 4 2.00 sadalp v0.2d, v0.4s +# CHECK-NEXT: 1 4 2.00 sadalp v0.2s, v0.4h +# CHECK-NEXT: 1 4 2.00 sadalp v0.4h, v0.8b +# CHECK-NEXT: 1 4 2.00 sadalp v0.4s, v0.8h +# CHECK-NEXT: 1 4 2.00 sadalp v0.8h, v0.16b +# CHECK-NEXT: 1 3 1.00 saddl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 saddl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 saddl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 saddl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 saddl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 saddl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 saddlp v0.1d, v0.2s +# CHECK-NEXT: 1 3 1.00 saddlp v0.2d, v0.4s +# CHECK-NEXT: 1 3 0.50 saddlp v0.2s, v0.4h +# CHECK-NEXT: 1 3 0.50 saddlp v0.4h, v0.8b +# CHECK-NEXT: 1 3 1.00 saddlp v0.4s, v0.8h +# CHECK-NEXT: 1 3 1.00 saddlp v0.8h, v0.16b +# CHECK-NEXT: 1 3 1.00 saddw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: 1 3 1.00 saddw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: 1 3 1.00 saddw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: 1 3 1.00 saddw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: 1 3 1.00 saddw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: 1 3 1.00 saddw2 v0.8h, v0.8h, v0.16b # CHECK-NEXT: 1 4 0.50 scvtf d21, d12 # CHECK-NEXT: 1 4 0.50 scvtf d21, d12, #64 # CHECK-NEXT: 1 4 0.50 scvtf s22, s13 @@ -1539,33 +1539,33 @@ # CHECK-NEXT: 1 4 0.50 scvtf v0.4s, v0.4s # CHECK-NEXT: 1 4 0.50 scvtf v0.4s, v0.4s, #3 # CHECK-NEXT: 1 4 0.50 scvtf v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 shadd v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 shl d7, d10, #12 -# CHECK-NEXT: 1 4 1.00 shl v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 shl v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 shl v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 shl v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 1.00 shll v0.2d, v0.2s, #32 -# CHECK-NEXT: 1 4 1.00 shll v0.4s, v0.4h, #16 -# CHECK-NEXT: 1 4 1.00 shll v0.8h, v0.8b, #8 -# CHECK-NEXT: 1 4 1.00 shll v0.2d, v0.2s, #32 -# CHECK-NEXT: 1 4 1.00 shll v0.4s, v0.4h, #16 -# CHECK-NEXT: 1 4 1.00 shll v0.8h, v0.8b, #8 -# CHECK-NEXT: 1 4 1.00 shll2 v0.2d, v0.4s, #32 -# CHECK-NEXT: 1 4 1.00 shll2 v0.4s, v0.8h, #16 -# CHECK-NEXT: 1 4 1.00 shll2 v0.8h, v0.16b, #8 -# CHECK-NEXT: 1 4 1.00 shll2 v0.2d, v0.4s, #32 -# CHECK-NEXT: 1 4 1.00 shll2 v0.4s, v0.8h, #16 -# CHECK-NEXT: 1 4 1.00 shll2 v0.8h, v0.16b, #8 -# CHECK-NEXT: 1 4 0.50 shrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 shrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 shrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 shrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 shrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 shrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 shsub v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 shsub v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 sli d10, d14, #12 +# CHECK-NEXT: 1 2 0.50 shadd v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 shl d7, d10, #12 +# CHECK-NEXT: 1 2 0.50 shl v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 2 0.50 shl v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 shl v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 2 0.50 shl v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 2 1.00 shll v0.2d, v0.2s, #32 +# CHECK-NEXT: 1 2 1.00 shll v0.4s, v0.4h, #16 +# CHECK-NEXT: 1 2 1.00 shll v0.8h, v0.8b, #8 +# CHECK-NEXT: 1 2 1.00 shll v0.2d, v0.2s, #32 +# CHECK-NEXT: 1 2 1.00 shll v0.4s, v0.4h, #16 +# CHECK-NEXT: 1 2 1.00 shll v0.8h, v0.8b, #8 +# CHECK-NEXT: 1 2 1.00 shll2 v0.2d, v0.4s, #32 +# CHECK-NEXT: 1 2 1.00 shll2 v0.4s, v0.8h, #16 +# CHECK-NEXT: 1 2 1.00 shll2 v0.8h, v0.16b, #8 +# CHECK-NEXT: 1 2 1.00 shll2 v0.2d, v0.4s, #32 +# CHECK-NEXT: 1 2 1.00 shll2 v0.4s, v0.8h, #16 +# CHECK-NEXT: 1 2 1.00 shll2 v0.8h, v0.16b, #8 +# CHECK-NEXT: 1 2 0.50 shrn v0.2s, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 shrn v0.4h, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 shrn v0.8b, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 shrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 shrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 shrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 shsub v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 shsub v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 sli d10, d14, #12 # CHECK-NEXT: 1 4 1.00 sli v0.16b, v0.16b, #3 # CHECK-NEXT: 1 4 1.00 sli v0.2d, v0.2d, #3 # CHECK-NEXT: 1 4 0.50 sli v0.2s, v0.2s, #3 @@ -1573,18 +1573,18 @@ # CHECK-NEXT: 1 4 1.00 sli v0.4s, v0.4s, #3 # CHECK-NEXT: 1 4 0.50 sli v0.8b, v0.8b, #3 # CHECK-NEXT: 1 4 1.00 sli v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 smax v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 smax v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 smax v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 smaxp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 smaxp v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 smaxp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 smin v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 smax v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 smax v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 smax v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 smaxp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 smaxp v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 smaxp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 1.00 smin v0.16b, v0.16b, v0.16b # CHECK-NEXT: 1 4 1.00 smin v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 smin v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 sminp v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 smin v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 1.00 sminp v0.16b, v0.16b, v0.16b # CHECK-NEXT: 1 4 1.00 sminp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sminp v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 1.00 sminp v0.8h, v0.8h, v0.8h # CHECK-NEXT: 1 4 1.00 smlal v0.2d, v0.2s, v0.2s # CHECK-NEXT: 1 4 1.00 smlal v0.4s, v0.4h, v0.4h # CHECK-NEXT: 1 4 1.00 smlal v0.8h, v0.8b, v0.8b @@ -1614,53 +1614,53 @@ # CHECK-NEXT: 1 4 1.00 sqabs v0.4s, v0.4s # CHECK-NEXT: 1 4 0.50 sqabs v0.8b, v0.8b # CHECK-NEXT: 1 4 1.00 sqabs v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqadd b20, b11, b15 -# CHECK-NEXT: 1 4 1.00 sqadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 sqadd v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sqdmlal d19, s24, s12 -# CHECK-NEXT: 1 4 0.50 sqdmlal d8, s9, v0.s[1] -# CHECK-NEXT: 1 4 0.50 sqdmlal s0, h0, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqdmlal s17, h27, h12 +# CHECK-NEXT: 1 3 0.50 sqadd b20, b11, b15 +# CHECK-NEXT: 1 3 1.00 sqadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 sqadd v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 1.00 sqdmlal d19, s24, s12 +# CHECK-NEXT: 1 4 1.00 sqdmlal d8, s9, v0.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmlal s17, h27, h12 # CHECK-NEXT: 1 4 1.00 sqdmlal v0.2d, v0.2s, v0.2s # CHECK-NEXT: 1 4 1.00 sqdmlal v0.4s, v0.4h, v0.4h # CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.2d, v0.4s, v0.4s # CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqdmlsl d12, s23, s13 -# CHECK-NEXT: 1 4 0.50 sqdmlsl d8, s9, v0.s[1] -# CHECK-NEXT: 1 4 0.50 sqdmlsl s0, h0, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqdmlsl s14, h12, h25 +# CHECK-NEXT: 1 4 1.00 sqdmlsl d12, s23, s13 +# CHECK-NEXT: 1 4 1.00 sqdmlsl d8, s9, v0.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s14, h12, h25 # CHECK-NEXT: 1 4 1.00 sqdmlsl v0.2d, v0.2s, v0.2s # CHECK-NEXT: 1 4 1.00 sqdmlsl v0.4s, v0.4h, v0.4h # CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.2d, v0.4s, v0.4s # CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.4s, v0.8h, v0.8h # CHECK-NEXT: 1 4 0.50 sqdmulh h10, h11, h12 -# CHECK-NEXT: 1 4 0.50 sqdmulh h7, h15, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqdmulh s15, s14, v0.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmulh s15, s14, v0.s[1] # CHECK-NEXT: 1 4 0.50 sqdmulh s20, s21, s2 # CHECK-NEXT: 1 4 0.50 sqdmulh v0.2s, v0.2s, v0.2s # CHECK-NEXT: 1 4 1.00 sqdmulh v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 sqdmull d1, s1, v0.s[1] -# CHECK-NEXT: 1 4 0.50 sqdmull d15, s22, s12 -# CHECK-NEXT: 1 4 0.50 sqdmull s1, h1, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqdmull s12, h22, h12 +# CHECK-NEXT: 1 4 1.00 sqdmull d1, s1, v0.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmull d15, s22, s12 +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmull s12, h22, h12 # CHECK-NEXT: 1 4 1.00 sqdmull v0.2d, v0.2s, v0.2s # CHECK-NEXT: 1 4 1.00 sqdmull v0.4s, v0.4h, v0.4h # CHECK-NEXT: 1 4 1.00 sqdmull2 v0.2d, v0.4s, v0.4s # CHECK-NEXT: 1 4 1.00 sqdmull2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqneg b19, b14 -# CHECK-NEXT: 1 4 0.50 sqneg d18, d12 -# CHECK-NEXT: 1 4 0.50 sqneg h21, h15 -# CHECK-NEXT: 1 4 0.50 sqneg s20, s12 -# CHECK-NEXT: 1 4 1.00 sqneg v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 sqneg v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 sqneg v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sqneg v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqneg v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 sqneg v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 sqneg v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 sqneg b19, b14 +# CHECK-NEXT: 1 3 0.50 sqneg d18, d12 +# CHECK-NEXT: 1 3 0.50 sqneg h21, h15 +# CHECK-NEXT: 1 3 0.50 sqneg s20, s12 +# CHECK-NEXT: 1 3 1.00 sqneg v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 sqneg v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 sqneg v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 sqneg v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 sqneg v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 sqneg v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 sqneg v0.8h, v0.8h # CHECK-NEXT: 1 4 0.50 sqrdmulh h10, h11, h12 -# CHECK-NEXT: 1 4 0.50 sqrdmulh h7, h15, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqrdmulh s15, s14, v0.s[1] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqrdmulh s15, s14, v0.s[1] # CHECK-NEXT: 1 4 0.50 sqrdmulh s20, s21, s2 # CHECK-NEXT: 1 4 0.50 sqrdmulh v0.4h, v0.4h, v0.4h # CHECK-NEXT: 1 4 1.00 sqrdmulh v0.8h, v0.8h, v0.8h @@ -1732,10 +1732,10 @@ # CHECK-NEXT: 1 4 1.00 sqshrun2 v0.16b, v0.8h, #3 # CHECK-NEXT: 1 4 1.00 sqshrun2 v0.4s, v0.2d, #3 # CHECK-NEXT: 1 4 1.00 sqshrun2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqsub s20, s10, s7 -# CHECK-NEXT: 1 4 1.00 sqsub v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 sqsub v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 sqsub v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 sqsub s20, s10, s7 +# CHECK-NEXT: 1 3 1.00 sqsub v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 1.00 sqsub v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 sqsub v0.8b, v0.8b, v0.8b # CHECK-NEXT: 1 4 0.50 sqxtn b18, h18 # CHECK-NEXT: 1 4 0.50 sqxtn h20, s17 # CHECK-NEXT: 1 4 0.50 sqxtn s19, d14 @@ -1754,10 +1754,10 @@ # CHECK-NEXT: 1 4 1.00 sqxtun2 v0.16b, v0.8h # CHECK-NEXT: 1 4 1.00 sqxtun2 v0.4s, v0.2d # CHECK-NEXT: 1 4 1.00 sqxtun2 v0.8h, v0.4s -# CHECK-NEXT: 1 4 0.50 srhadd v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 srhadd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 srhadd v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 sri d10, d12, #14 +# CHECK-NEXT: 1 2 0.50 srhadd v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 srhadd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 srhadd v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 sri d10, d12, #14 # CHECK-NEXT: 1 4 1.00 sri v0.16b, v0.16b, #3 # CHECK-NEXT: 1 4 1.00 sri v0.2d, v0.2d, #3 # CHECK-NEXT: 1 4 0.50 sri v0.2s, v0.2s, #3 @@ -1765,61 +1765,61 @@ # CHECK-NEXT: 1 4 1.00 sri v0.4s, v0.4s, #3 # CHECK-NEXT: 1 4 0.50 sri v0.8b, v0.8b, #3 # CHECK-NEXT: 1 4 1.00 sri v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 srshl d16, d16, d16 -# CHECK-NEXT: 1 4 0.50 srshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 srshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 srshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 srshr d19, d18, #7 -# CHECK-NEXT: 1 4 1.00 srshr v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 srshr v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 srshr v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 srshr v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 srshr v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 srshr v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 srshr v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 srsra d15, d11, #19 -# CHECK-NEXT: 1 4 1.00 srsra v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 srsra v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 srsra v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 srsra v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 srsra v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 srsra v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 srsra v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 sshl d31, d31, d31 -# CHECK-NEXT: 1 4 1.00 sshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 sshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 sshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 sshll v0.2d, v0.2s, #3 -# CHECK-NEXT: 1 4 1.00 sshll2 v0.4s, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 sshr d15, d16, #12 -# CHECK-NEXT: 1 4 1.00 sshr v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 sshr v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sshr v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 sshr v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 sshr v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sshr v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 sshr v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 ssra d18, d12, #21 -# CHECK-NEXT: 1 4 1.00 ssra v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 ssra v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 ssra v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 ssra v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 ssra v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 ssra v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 ssra v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 ssubl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 ssubl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 ssubl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 ssubl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 ssubl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 ssubl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 ssubw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: 1 4 1.00 ssubw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: 1 4 1.00 ssubw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: 1 4 1.00 ssubw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: 1 4 1.00 ssubw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 ssubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 1 3 0.50 srshl d16, d16, d16 +# CHECK-NEXT: 1 3 0.50 srshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 srshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 srshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 0.50 srshr d19, d18, #7 +# CHECK-NEXT: 1 3 1.00 srshr v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 4 2.00 srsra d15, d11, #19 +# CHECK-NEXT: 1 4 2.00 srsra v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 sshl d31, d31, d31 +# CHECK-NEXT: 1 2 1.00 sshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 sshl v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 sshl v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 sshl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 1.00 sshll v0.2d, v0.2s, #3 +# CHECK-NEXT: 1 2 1.00 sshll2 v0.4s, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 sshr d15, d16, #12 +# CHECK-NEXT: 1 2 0.50 sshr v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 0.50 ssra d18, d12, #21 +# CHECK-NEXT: 1 3 1.00 ssra v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 ssra v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 ssra v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 ssra v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 ssubl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 ssubl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 ssubl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 ssubl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 ssubl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 ssubl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 ssubw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: 1 3 1.00 ssubw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: 1 3 1.00 ssubw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: 1 3 1.00 ssubw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: 1 3 1.00 ssubw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: 1 3 1.00 ssubw2 v0.8h, v0.8h, v0.16b # CHECK-NEXT: 1 4 1.00 * st1 { v0.16b }, [x0] # CHECK-NEXT: 2 5 2.00 * st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 # CHECK-NEXT: 1 5 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] @@ -1842,19 +1842,19 @@ # CHECK-NEXT: 2 5 4.00 * st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 # CHECK-NEXT: 1 5 2.00 * st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] # CHECK-NEXT: 2 5 2.00 * st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 -# CHECK-NEXT: 1 4 0.50 sub d15, d5, d16 -# CHECK-NEXT: 1 4 1.00 sub v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 suqadd b19, b14 -# CHECK-NEXT: 1 4 0.50 suqadd d18, d22 -# CHECK-NEXT: 1 4 0.50 suqadd h20, h15 -# CHECK-NEXT: 1 4 0.50 suqadd s21, s12 -# CHECK-NEXT: 1 4 1.00 suqadd v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 suqadd v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 suqadd v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 suqadd v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 suqadd v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 suqadd v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 suqadd v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 sub d15, d5, d16 +# CHECK-NEXT: 1 2 1.00 sub v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 suqadd b19, b14 +# CHECK-NEXT: 1 3 0.50 suqadd d18, d22 +# CHECK-NEXT: 1 3 0.50 suqadd h20, h15 +# CHECK-NEXT: 1 3 0.50 suqadd s21, s12 +# CHECK-NEXT: 1 3 1.00 suqadd v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 suqadd v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 suqadd v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 suqadd v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 suqadd v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 suqadd v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 suqadd v0.8h, v0.8h # CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b }, v0.16b # CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b }, v0.16b # CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b @@ -1885,44 +1885,44 @@ # CHECK-NEXT: 1 4 1.00 trn2 v0.4s, v0.4s, v0.4s # CHECK-NEXT: 1 4 0.50 trn2 v0.8b, v0.8b, v0.8b # CHECK-NEXT: 1 4 1.00 trn2 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 uaba v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uabal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 uabal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uabal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uabal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 uabal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uabal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 uabd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uabdl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 uabdl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uabdl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uabdl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 uabdl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uabdl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 uadalp v0.1d, v0.2s -# CHECK-NEXT: 1 4 1.00 uadalp v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 uadalp v0.2s, v0.4h -# CHECK-NEXT: 1 4 0.50 uadalp v0.4h, v0.8b -# CHECK-NEXT: 1 4 1.00 uadalp v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 uadalp v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 uaddl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 uaddl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uaddl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uaddl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 uaddl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uaddl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 uaddlp v0.1d, v0.2s -# CHECK-NEXT: 1 4 1.00 uaddlp v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 uaddlp v0.2s, v0.4h -# CHECK-NEXT: 1 4 0.50 uaddlp v0.4h, v0.8b -# CHECK-NEXT: 1 4 1.00 uaddlp v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 uaddlp v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 uaddw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: 1 4 1.00 uaddw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: 1 4 1.00 uaddw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: 1 4 1.00 uaddw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: 1 4 1.00 uaddw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 uaddw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 1 4 2.00 uaba v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 4 2.00 uabal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 4 2.00 uabal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 4 2.00 uabal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 4 2.00 uabal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 4 2.00 uabal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 4 2.00 uabal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 uabd v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 uabdl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 uabdl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 uabdl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 uabdl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 uabdl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 uabdl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 4 2.00 uadalp v0.1d, v0.2s +# CHECK-NEXT: 1 4 2.00 uadalp v0.2d, v0.4s +# CHECK-NEXT: 1 4 2.00 uadalp v0.2s, v0.4h +# CHECK-NEXT: 1 4 2.00 uadalp v0.4h, v0.8b +# CHECK-NEXT: 1 4 2.00 uadalp v0.4s, v0.8h +# CHECK-NEXT: 1 4 2.00 uadalp v0.8h, v0.16b +# CHECK-NEXT: 1 3 1.00 uaddl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 uaddl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 uaddl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 uaddl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 uaddl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 uaddl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 uaddlp v0.1d, v0.2s +# CHECK-NEXT: 1 3 1.00 uaddlp v0.2d, v0.4s +# CHECK-NEXT: 1 3 0.50 uaddlp v0.2s, v0.4h +# CHECK-NEXT: 1 3 0.50 uaddlp v0.4h, v0.8b +# CHECK-NEXT: 1 3 1.00 uaddlp v0.4s, v0.8h +# CHECK-NEXT: 1 3 1.00 uaddlp v0.8h, v0.16b +# CHECK-NEXT: 1 3 1.00 uaddw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: 1 3 1.00 uaddw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: 1 3 1.00 uaddw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: 1 3 1.00 uaddw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: 1 3 1.00 uaddw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: 1 3 1.00 uaddw2 v0.8h, v0.8h, v0.16b # CHECK-NEXT: 1 4 0.50 ucvtf d21, d14 # CHECK-NEXT: 1 4 0.50 ucvtf d21, d14, #64 # CHECK-NEXT: 1 4 0.50 ucvtf s22, s13 @@ -1935,21 +1935,21 @@ # CHECK-NEXT: 1 4 0.50 ucvtf v0.4s, v0.4s # CHECK-NEXT: 1 4 0.50 ucvtf v0.4s, v0.4s, #3 # CHECK-NEXT: 1 4 0.50 ucvtf v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uhadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 uhadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uhsub v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umax v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 uhadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 uhadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 1.00 uhsub v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 1.00 umax v0.16b, v0.16b, v0.16b # CHECK-NEXT: 1 4 1.00 umax v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umax v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 umaxp v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 umax v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 1.00 umaxp v0.16b, v0.16b, v0.16b # CHECK-NEXT: 1 4 1.00 umaxp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umaxp v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 umin v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 umin v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 umin v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 uminp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 uminp v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 uminp v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 1.00 umaxp v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 umin v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 umin v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 umin v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 uminp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 uminp v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 0.50 uminp v0.8b, v0.8b, v0.8b # CHECK-NEXT: 1 4 1.00 umlal v0.2d, v0.2s, v0.2s # CHECK-NEXT: 1 4 1.00 umlal v0.4s, v0.4h, v0.4h # CHECK-NEXT: 1 4 1.00 umlal v0.8h, v0.8b, v0.8b @@ -1968,8 +1968,8 @@ # CHECK-NEXT: 1 4 1.00 umull2 v0.2d, v0.4s, v0.4s # CHECK-NEXT: 1 4 1.00 umull2 v0.4s, v0.8h, v0.8h # CHECK-NEXT: 1 4 1.00 umull2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 uqadd h0, h1, h5 -# CHECK-NEXT: 1 4 1.00 uqadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 uqadd h0, h1, h5 +# CHECK-NEXT: 1 3 1.00 uqadd v0.8h, v0.8h, v0.8h # CHECK-NEXT: 1 4 0.50 uqrshl b11, b20, b30 # CHECK-NEXT: 1 4 0.50 uqrshl s23, s20, s16 # CHECK-NEXT: 1 4 1.00 uqrshl v0.16b, v0.16b, v0.16b @@ -2011,8 +2011,8 @@ # CHECK-NEXT: 1 4 1.00 uqshrn2 v0.16b, v0.8h, #3 # CHECK-NEXT: 1 4 1.00 uqshrn2 v0.4s, v0.2d, #3 # CHECK-NEXT: 1 4 1.00 uqshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 uqsub d16, d16, d16 -# CHECK-NEXT: 1 4 0.50 uqsub v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 3 0.50 uqsub d16, d16, d16 +# CHECK-NEXT: 1 3 0.50 uqsub v0.4h, v0.4h, v0.4h # CHECK-NEXT: 1 4 0.50 uqxtn b18, h18 # CHECK-NEXT: 1 4 0.50 uqxtn h20, s17 # CHECK-NEXT: 1 4 0.50 uqxtn s19, d14 @@ -2024,77 +2024,77 @@ # CHECK-NEXT: 1 4 1.00 uqxtn2 v0.8h, v0.4s # CHECK-NEXT: 1 4 0.50 urecpe v0.2s, v0.2s # CHECK-NEXT: 1 4 1.00 urecpe v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 urhadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 urhadd v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 urhadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 urshl d8, d7, d4 -# CHECK-NEXT: 1 4 1.00 urshl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 urshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 urshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 urshl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 urshr d20, d23, #31 -# CHECK-NEXT: 1 4 1.00 urshr v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 urshr v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 urshr v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 urshr v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 urshr v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 urshr v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 urshr v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 2 1.00 urhadd v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 urhadd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 1.00 urhadd v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 urshl d8, d7, d4 +# CHECK-NEXT: 1 3 1.00 urshl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 urshl v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 3 1.00 urshl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 urshl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 urshr d20, d23, #31 +# CHECK-NEXT: 1 3 1.00 urshr v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.8h, v0.8h, #3 # CHECK-NEXT: 1 12 9.00 ursqrte v0.2s, v0.2s # CHECK-NEXT: 1 12 9.00 ursqrte v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 ursra d18, d10, #13 -# CHECK-NEXT: 1 4 1.00 ursra v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 ursra v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 ursra v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 ursra v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 ursra v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 ursra v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 ursra v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 ushl d0, d0, d0 -# CHECK-NEXT: 1 4 1.00 ushl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 ushl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 ushl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 ushll v0.4s, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 ushll2 v0.8h, v0.16b, #3 -# CHECK-NEXT: 1 4 0.50 ushr d10, d17, #18 -# CHECK-NEXT: 1 4 1.00 ushr v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 ushr v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 ushr v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 ushr v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 ushr v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 ushr v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 ushr v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 usqadd b19, b14 -# CHECK-NEXT: 1 4 0.50 usqadd d18, d22 -# CHECK-NEXT: 1 4 0.50 usqadd h20, h15 -# CHECK-NEXT: 1 4 0.50 usqadd s21, s12 -# CHECK-NEXT: 1 4 1.00 usqadd v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 usqadd v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 usqadd v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 usqadd v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 usqadd v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 usqadd v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 usqadd v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 usra d20, d13, #61 -# CHECK-NEXT: 1 4 1.00 usra v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 usra v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 usra v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 usra v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 usra v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 usra v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 usra v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 usubl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 usubl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 usubl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 usubl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 usubl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 usubl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 usubw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: 1 4 1.00 usubw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: 1 4 1.00 usubw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: 1 4 1.00 usubw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: 1 4 1.00 usubw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 usubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 1 4 2.00 ursra d18, d10, #13 +# CHECK-NEXT: 1 4 2.00 ursra v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 2 0.50 ushl d0, d0, d0 +# CHECK-NEXT: 1 2 1.00 ushl v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 ushl v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 1.00 ushl v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 1.00 ushll v0.4s, v0.4h, #3 +# CHECK-NEXT: 1 2 1.00 ushll2 v0.8h, v0.16b, #3 +# CHECK-NEXT: 1 2 0.50 ushr d10, d17, #18 +# CHECK-NEXT: 1 2 0.50 ushr v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 0.50 usqadd b19, b14 +# CHECK-NEXT: 1 3 0.50 usqadd d18, d22 +# CHECK-NEXT: 1 3 0.50 usqadd h20, h15 +# CHECK-NEXT: 1 3 0.50 usqadd s21, s12 +# CHECK-NEXT: 1 3 1.00 usqadd v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 usqadd v0.2d, v0.2d +# CHECK-NEXT: 1 3 0.50 usqadd v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 usqadd v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 usqadd v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 usqadd v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 usqadd v0.8h, v0.8h +# CHECK-NEXT: 1 3 0.50 usra d20, d13, #61 +# CHECK-NEXT: 1 3 1.00 usra v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.2d, v0.2d, #3 +# CHECK-NEXT: 1 3 0.50 usra v0.2s, v0.2s, #3 +# CHECK-NEXT: 1 3 0.50 usra v0.4h, v0.4h, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.4s, v0.4s, #3 +# CHECK-NEXT: 1 3 0.50 usra v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 3 1.00 usubl v0.2d, v0.2s, v0.2s +# CHECK-NEXT: 1 3 1.00 usubl v0.4s, v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 usubl v0.8h, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 usubl2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: 1 3 1.00 usubl2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: 1 3 1.00 usubl2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 3 1.00 usubw v0.2d, v0.2d, v0.2s +# CHECK-NEXT: 1 3 1.00 usubw v0.4s, v0.4s, v0.4h +# CHECK-NEXT: 1 3 1.00 usubw v0.8h, v0.8h, v0.8b +# CHECK-NEXT: 1 3 1.00 usubw2 v0.2d, v0.2d, v0.4s +# CHECK-NEXT: 1 3 1.00 usubw2 v0.4s, v0.4s, v0.8h +# CHECK-NEXT: 1 3 1.00 usubw2 v0.8h, v0.8h, v0.16b # CHECK-NEXT: 1 4 1.00 uzp1 v0.16b, v0.16b, v0.16b # CHECK-NEXT: 1 4 1.00 uzp1 v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1 4 0.50 uzp1 v0.2s, v0.2s, v0.2s @@ -2146,7 +2146,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] -# CHECK-NEXT: - - - - 716.50 716.50 197.00 3.00 3.00 107.00 - 52.00 +# CHECK-NEXT: - - - - 780.00 780.00 197.00 3.00 3.00 107.00 - 52.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] Instructions: @@ -2537,12 +2537,12 @@ # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - pmul v0.8b, v0.8b, v0.8b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmull v0.8h, v0.8b, v0.8b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmull2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn2 v0.8h, v0.4s, v0.4s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rbit v0.16b, v0.16b # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rbit v0.8b, v0.8b # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev16 v21.8b, v1.8b @@ -2563,19 +2563,19 @@ # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.16b, v0.8h, #3 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.4s, v0.2d, #3 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saba v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn v0.2s, v0.2d, v0.2d +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn v0.4h, v0.4s, v0.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn v0.8b, v0.8h, v0.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn2 v0.16b, v0.8h, v0.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn2 v0.4s, v0.2d, v0.2d +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - saba v0.16b, v0.16b, v0.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal2 v0.8h, v0.16b, v0.16b # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sabd v0.4h, v0.4h, v0.4h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.2d, v0.2s, v0.2s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.4s, v0.4h, v0.4h @@ -2583,12 +2583,12 @@ # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.2d, v0.4s, v0.4s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.4s, v0.8h, v0.8h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sadalp v0.1d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.2d, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sadalp v0.2s, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sadalp v0.4h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.8h, v0.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sadalp v0.1d, v0.2s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sadalp v0.2d, v0.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sadalp v0.2s, v0.4h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sadalp v0.4h, v0.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sadalp v0.4s, v0.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sadalp v0.8h, v0.16b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.2d, v0.2s, v0.2s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.4s, v0.4h, v0.4h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.8h, v0.8b, v0.8b @@ -2621,10 +2621,10 @@ # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.8h, v0.8h # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shadd v0.8b, v0.8b, v0.8b # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl d7, d10, #12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shl v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shl v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.2d, v0.2d, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shl v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.4s, v0.4s, #3 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.2d, v0.2s, #32 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.4s, v0.4h, #16 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.8h, v0.8b, #8 @@ -2640,9 +2640,9 @@ # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.2s, v0.2d, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.4h, v0.4s, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn2 v0.16b, v0.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn2 v0.4s, v0.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn2 v0.8h, v0.4s, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shsub v0.2s, v0.2s, v0.2s # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shsub v0.4h, v0.4h, v0.4h # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli d10, d14, #12 @@ -2697,32 +2697,32 @@ # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqadd b20, b11, b15 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqadd v0.16b, v0.16b, v0.16b # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqadd v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlal d19, s24, s12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlal d8, s9, v0.s[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlal s0, h0, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlal s17, h27, h12 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal d19, s24, s12 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal d8, s9, v0.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s17, h27, h12 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal v0.2d, v0.2s, v0.2s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal v0.4s, v0.4h, v0.4h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal2 v0.2d, v0.4s, v0.4s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlsl d12, s23, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlsl d8, s9, v0.s[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlsl s0, h0, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlsl s14, h12, h25 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl d12, s23, s13 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl d8, s9, v0.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s14, h12, h25 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl v0.2d, v0.2s, v0.2s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl v0.4s, v0.4h, v0.4h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl2 v0.2d, v0.4s, v0.4s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl2 v0.4s, v0.8h, v0.8h # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh h10, h11, h12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh h7, h15, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh s15, s14, v0.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v0.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh s15, s14, v0.s[1] # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh s20, s21, s2 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh v0.2s, v0.2s, v0.2s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmull d1, s1, v0.s[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmull d15, s22, s12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmull s1, h1, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmull s12, h22, h12 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull d1, s1, v0.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull d15, s22, s12 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v0.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s12, h22, h12 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull v0.2d, v0.2s, v0.2s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull v0.4s, v0.4h, v0.4h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull2 v0.2d, v0.4s, v0.4s @@ -2739,8 +2739,8 @@ # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg v0.8b, v0.8b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqneg v0.8h, v0.8h # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh h10, h11, h12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh h7, h15, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh s15, s14, v0.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v0.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh s15, s14, v0.s[1] # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh s20, s21, s2 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh v0.4h, v0.4h, v0.4h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh v0.8h, v0.8h, v0.8h @@ -2857,29 +2857,29 @@ # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.4s, v0.4s, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.8b, v0.8b, #3 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srsra d15, d11, #19 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srsra v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srsra v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srsra v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra d15, d11, #19 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.8h, v0.8h, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl d31, d31, d31 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshl v0.2d, v0.2d, v0.2d # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v0.2s, v0.2s, v0.2s # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v0.4h, v0.4h, v0.4h # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshll v0.2d, v0.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshll v0.2d, v0.2s, #3 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshll2 v0.4s, v0.8h, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr d15, d16, #12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshr v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshr v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.2d, v0.2d, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.2s, v0.2s, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshr v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.4s, v0.4s, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshr v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.8h, v0.8h, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra d18, d12, #21 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.16b, v0.16b, #3 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.2d, v0.2d, #3 @@ -2965,13 +2965,13 @@ # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.4s, v0.4s, v0.4s # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v0.8b, v0.8b, v0.8b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uaba v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uaba v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal v0.2d, v0.2s, v0.2s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal v0.4s, v0.4h, v0.4h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal v0.8h, v0.8b, v0.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal2 v0.2d, v0.4s, v0.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal2 v0.8h, v0.16b, v0.16b # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uabd v0.4h, v0.4h, v0.4h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.2d, v0.2s, v0.2s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.4s, v0.4h, v0.4h @@ -2979,12 +2979,12 @@ # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.2d, v0.4s, v0.4s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.4s, v0.8h, v0.8h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uadalp v0.1d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.2d, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uadalp v0.2s, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uadalp v0.4h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.8h, v0.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uadalp v0.1d, v0.2s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uadalp v0.2d, v0.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uadalp v0.2s, v0.4h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uadalp v0.4h, v0.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uadalp v0.4s, v0.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uadalp v0.8h, v0.16b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.2d, v0.2s, v0.2s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.4s, v0.4h, v0.4h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.8h, v0.8b, v0.8b @@ -3122,28 +3122,28 @@ # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.8h, v0.8h, #3 # CHECK-NEXT: - - - - - - 9.00 - - - - - ursqrte v0.2s, v0.2s # CHECK-NEXT: - - - - - - 9.00 - - - - - ursqrte v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ursra d18, d10, #13 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ursra v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ursra v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ursra v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra d18, d10, #13 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.2s, v0.2s, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.4h, v0.4h, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.8b, v0.8b, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.8h, v0.8h, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushl d0, d0, d0 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v0.16b, v0.16b, v0.16b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v0.4s, v0.4s, v0.4s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushll v0.4s, v0.4h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushll v0.4s, v0.4h, #3 # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushll2 v0.8h, v0.16b, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr d10, d17, #18 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushr v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushr v0.2d, v0.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.16b, v0.16b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.2d, v0.2d, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.2s, v0.2s, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushr v0.4s, v0.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.4s, v0.4s, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushr v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.8h, v0.8h, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd b19, b14 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd d18, d22 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd h20, h15