diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td --- a/llvm/lib/Target/AArch64/AArch64SchedA55.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td @@ -24,9 +24,10 @@ // or 5. Setting it 4 looked to be good trade-off. let MispredictPenalty = 8; // A branch direction mispredict. let PostRAScheduler = 1; // Enable PostRA scheduler pass. - let CompleteModel = 0; // Covers instructions applicable to Cortex-A55. + let CompleteModel = 1; // Covers instructions applicable to Cortex-A55. - list UnsupportedFeatures = [HasSVE]; + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; @@ -158,10 +159,19 @@ let Latency = n; let BeginGroup = 1; } +class CortexA55WriteVqDual : SchedWriteRes<[res, res]> { + let Latency = n; +} class CortexA55WriteVqL : SchedWriteRes<[res, res, res, res]> { let Latency = n; let BeginGroup = 1; } +// Used for tbl/tbx +class CortexA55WriteVqTab : SchedWriteRes<[res, res]> { + let Latency = n; + let BeginGroup = 1; + let ResourceCycles = [rc, rc]; +} def CortexA55WriteDotScVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; def CortexA55WriteDotVq_4 : CortexA55WriteVq<4, CortexA55UnitFPALU>; def CortexA55WriteDotVd_4 : CortexA55WriteVd<4, CortexA55UnitFPALU>; @@ -177,6 +187,15 @@ def CortexA55WriteAluVd_1 : CortexA55WriteVd<1, CortexA55UnitFPALU>; def CortexA55WriteAluVq_1 : CortexA55WriteVq<1, CortexA55UnitFPALU>; def CortexA55WriteAluVqL_4 : CortexA55WriteVqL<4, CortexA55UnitFPALU>; +def CortexA55WriteAluTab_2_1 : CortexA55WriteVqTab<2, 1, CortexA55UnitFPALU>; +def CortexA55WriteAluTab_3_2 : CortexA55WriteVqTab<3, 2, CortexA55UnitFPALU>; +def CortexA55WriteAluTab_4_3 : CortexA55WriteVqTab<4, 3, CortexA55UnitFPALU>; +def CortexA55WriteAluTab_5_4 : CortexA55WriteVqTab<5, 4, CortexA55UnitFPALU>; +def CortexA55WriteAluTab_6_5 : CortexA55WriteVqTab<6, 5, CortexA55UnitFPALU>; +// According to A55 optimization guide AESIMC/AESMC can be dual-issued. +def CortexA55WriteAesMixCol : CortexA55WriteVqDual<2, CortexA55UnitFPALU>; +def CortexA55WriteAesEnc : CortexA55WriteVq<2, CortexA55UnitFPALU>; +def CortexA55WriteAesDec : CortexA55WriteVq<2, CortexA55UnitFPALU>; def : SchedAlias>; def : SchedAlias>; @@ -263,6 +282,8 @@ def CortexA55ReadMlaL : SchedReadAdvance<3, [CortexA55WriteMlaLVq_4]>; def CortexA55ReadDot : SchedReadAdvance<3, [CortexA55WriteDotVd_4, CortexA55WriteDotVq_4]>; def CortexA55ReadDotSc : SchedReadAdvance<3, [CortexA55WriteDotScVq_4]>; +def CortexA55ReadAesMC : SchedReadAdvance<1, [CortexA55WriteAesEnc]>; +def CortexA55ReadAesIMC : SchedReadAdvance<1, [CortexA55WriteAesDec]>; //===----------------------------------------------------------------------===// // Subtarget-specific InstRWs. @@ -488,4 +509,114 @@ def : InstRW<[CortexA55WriteAluVd_3], (instregex "[SU]RSHLv(1i64|2i32|4i16|8i8)")>; def : InstRW<[CortexA55WriteAluVq_3], (instregex "[SU]RSHLv(2i64|4i32|8i16|16i8)")>; +// 4.16. Advanced SIMD floating-point instructions +// ASIMD FP compare +def : InstRW<[CortexA55WriteAluVd_2], (instregex "FAC(GT|GE)(16|32|64|v2f32|v4f16)", + "FCM(EQ|GT|GE)(16|32|64|v1i|v2i32|v4i16|v2f32|v4f16)", "FCM(LE|LT)(v1i|v2i32|v4i16)")>; +// , "FCM(LE|LT)(v2i64|v4i32|v8i16" +def : InstRW<[CortexA55WriteAluVq_2], (instregex "FAC(GT|GE)(v2f64|v4f32|v8f16)", + "FCM(EQ|GT|GE)(v2i64|v4i32|v2f64|v4f32|v8)", "FCM(LE|LT)(v2i64|v4i32|v8i16)")>; +// ASIMD FP divide, H-form +def : InstRW<[CortexA55WriteFDivHP], (instrs FDIVv4f16, FDIVv8f16)>; +// ASIMD FP divide, S-form +def : InstRW<[CortexA55WriteFDivSP], (instrs FDIVv2f32, FDIVv4f32)>; +// ASIMD FP divide, D-form +def : InstRW<[CortexA55WriteFDivDP], (instrs FDIVv2f64)>; +// ASIMD FP max/min, reduce +def : InstRW<[CortexA55WriteAluVq_4], (instregex "FMAX(NM)?Vv", "FMIN(NM)?Vv")>; +// ASIMD FP multiply, by element +def : InstRW<[CortexA55WriteAluVq_4], (instregex "FMULX?v[1248]i")>; + +// 4.17. Advanced SIMD miscellaneous instructions +// ASIMD bit reverse / ASIMD bitwise insert +def : InstRW<[CortexA55WriteAluVd_2], (instregex "R?BITv8", "BIFv8", "BSLv8")>; +def : InstRW<[CortexA55WriteAluVq_2], (instregex "R?BITv16", "BIFv16", "BSLv16")>; +// ASIMD count +def : InstRW<[CortexA55WriteAluVd_2], (instregex "CLZv(2i32|4i16|8i8)", "CNTv8")>; +def : InstRW<[CortexA55WriteAluVq_2], (instregex "CLZv(4i32|8i16|16i8)", "CNTv16")>; +// ASIMD count #2 +def : InstRW<[CortexA55WriteAluVd_3], (instregex "CLSv(2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_3], (instregex "CLSv(4i32|8i16|16i8)")>; +// ASIMD extract +def : InstRW<[CortexA55WriteAluVd_2], (instrs EXTv8i8)>; +def : InstRW<[CortexA55WriteAluVq_2], (instrs EXTv16i8)>; +// ASIMD extract narrow +//def : InstRW<[CortexA55WriteAluVd_1], (instregex "XTNv(2i32|4i16|8i8)")>; +//def : InstRW<[CortexA55WriteAluVq_1], (instregex "XTNv(4i32|8i16|16i8)")>; +// ASIMD extract narrow, saturating +// Cortex A55 optimization guide says all sqxtn/uqxtn instructions have +// throughput 2 +def : InstRW<[CortexA55WriteAluVd_2], (instregex "[SU]QXTNv", "SQXTUNv")>; +// ASIMD insert, element to element +def : InstRW<[CortexA55WriteAluVd_2], (instregex "INSvi(8|16|32|64)lane$")>; +// ASIMD move, integer immed +def : InstRW<[CortexA55WriteAluVd_1], (instregex "MOVI(D|v2i|v2s|v8b|v4i16)")>; +def : InstRW<[CortexA55WriteAluVq_1], (instregex "MOVIv(2d|4s|8i|16b|4i32)")>; +// ASIMD move, FP immed +def : InstRW<[CortexA55WriteAluVd_1], (instregex "FMOVv")>; +// ASIMD reverse +def : InstRW<[CortexA55WriteAluVd_2], (instregex "REV16v8i8", "REV32v(4i16|8i8)", + "REV64v(2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_2], (instregex "REV16v16i8", "REV32v(8i16|16i8)", + "REV64v(4i32|8i16|16i8)")>; +// ASIMD table lookup (TBL, 1 reg) +def : InstRW<[CortexA55WriteAluTab_2_1], (instrs TBLv8i8One, TBLv16i8One)>; +// ASIMD table lookup (TBL, 2 regs) +def : InstRW<[CortexA55WriteAluTab_3_2], (instrs TBLv8i8Two, TBLv16i8Two)>; +// ASIMD table lookup (TBL, 3 regs) +def : InstRW<[CortexA55WriteAluTab_4_3], (instrs TBLv8i8Three, TBLv16i8Three)>; +// ASIMD table lookup (TBL, 4 regs) +def : InstRW<[CortexA55WriteAluTab_5_4], (instrs TBLv8i8Four, TBLv16i8Four)>; +// ASIMD table lookup (TBX, 1 reg) +def : InstRW<[CortexA55WriteAluTab_3_2], (instrs TBXv8i8One, TBXv16i8One)>; +// ASIMD table lookup (TBX, 2 regs) +def : InstRW<[CortexA55WriteAluTab_4_3], (instrs TBXv8i8Two, TBXv16i8Two)>; +// ASIMD table lookup (TBX, 3 regs) +def : InstRW<[CortexA55WriteAluTab_5_4], (instrs TBXv8i8Three, TBXv16i8Three)>; +// ASIMD table lookup (TBX, 4 regs) +def : InstRW<[CortexA55WriteAluTab_6_5], (instrs TBXv8i8Four, TBXv16i8Four)>; +// ASIMD transfer, element to gen reg +// FIXME: AArch64 instruction definition has WriteVq for SMOV/UMOV, while +// A55 optimization guide says throughput is 2 +def : InstRW<[CortexA55WriteAluVd_2], (instregex "[SU]MOVv")>; +// ASIMD transfer, gen reg to element +// FIXME: AArch64 instruction definition has WriteVq for INS.*gpr, while +// A55 optimization guide says throughput is 2 +def : InstRW<[CortexA55WriteAluVd_2], (instregex "INSvi(8|16|32|64)gpr$")>; +// ASIMD transpose, 64-bit (.2D) +def : InstRW<[CortexA55WriteAluVq_2], (instrs TRN1v2i32, TRN2v2i32)>; +// ASIMD transpose, other +def : InstRW<[CortexA55WriteAluVd_2], (instregex "TRN[12]v(4i16|8i8)$")>; +def : InstRW<[CortexA55WriteAluVq_2], (instregex "TRN[12]v(2i64|4i32|8i16|16i8)$")>; +// ASIMD unzip/zip +def : InstRW<[CortexA55WriteAluVd_2], (instregex "(ZIP|UZP)[12]v(2i32|4i16|8i8)")>; +def : InstRW<[CortexA55WriteAluVq_2], (instregex "(ZIP|UZP)[12]v(2i64|4i32|8i16|16i8)")>; + +// 4.20. Cryptographic Extension +// Crypto AES ops +def : InstRW<[CortexA55WriteAesEnc], (instrs AESErr)>; +def : InstRW<[CortexA55WriteAesDec], (instrs AESDrr)>; +// Crypto AES ops #2 +def : InstRW<[CortexA55WriteAesMixCol, CortexA55ReadAesIMC], (instrs AESIMCrr, AESIMCrrTied)>; +def : InstRW<[CortexA55WriteAesMixCol, CortexA55ReadAesMC], (instrs AESMCrr, AESMCrrTied)>; +// Crypto polynomial (64x64) multiply long +def : InstRW<[CortexA55WriteAluVq_2], (instrs PMULLv1i64, PMULLv2i64)>; +// Crypto SHA1 xor ops +def : InstRW<[CortexA55WriteAluVq_2], (instrs SHA1SU0rrr)>; +// Crypto SHA1 schedule acceleration ops +def : InstRW<[CortexA55WriteAluVq_2], (instrs SHA1Hrr, SHA1SU1rr)>; +// Crypto SHA1 hash acceleration ops +def : InstRW<[CortexA55WriteAluVq_4], (instregex "SHA1[CMP]rrr$")>; +// Crypto SHA256 schedule acceleration ops +def : InstRW<[CortexA55WriteAluVq_3], (instrs SHA256SU0rr)>; +// Crypto SHA256 schedule acceleration ops +def : InstRW<[CortexA55WriteAluVq_4], (instrs SHA256SU1rrr)>; +// Crypto SHA256 hash acceleration ops +def : InstRW<[CortexA55WriteAluVq_4], (instregex "SHA256H2?rrr$")>; + +// 4.21. CRC +// CRC checksum ops +def : InstRW<[CortexA55WriteAluVd_2], (instregex "CRC32C?[BHX]rr$")>; +// CRC checksum ops #2 +def : InstRW<[CortexA55WriteAluVd_1], (instrs CRC32CWrr, CRC32Wrr)>; } diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll @@ -4,7 +4,7 @@ ; COST-LABEL: sel.v8i8 ; COST: Found an estimated cost of 42 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; CODE-LABEL: sel.v8i8 -; CODE: tbl v0.8b, { v0.16b }, v1.8b +; CODE: tbl v0.8b, { v0.16b }, v2.8b define <8 x i8> @sel.v8i8(<8 x i8> %v0, <8 x i8> %v1) { %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ret <8 x i8> %tmp0 diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll --- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -123,9 +123,9 @@ ; CODE: mov ; CODE: mov ; CODE: mov +; CODE: ldr ; CODE: cmge ; CODE: cmge -; CODE: ldr ; CODE: bif ; CODE: bif ; CODE: ext diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll @@ -57,10 +57,11 @@ ; GISEL-LABEL: combine_vec_udiv_nonuniform: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI1_4 -; GISEL-NEXT: adrp x9, .LCPI1_0 +; GISEL-NEXT: adrp x9, .LCPI1_5 +; GISEL-NEXT: adrp x10, .LCPI1_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4] ; GISEL-NEXT: adrp x8, .LCPI1_3 -; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI1_0] +; GISEL-NEXT: ldr q5, [x10, :lo12:.LCPI1_0] ; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3] ; GISEL-NEXT: adrp x8, .LCPI1_2 @@ -68,19 +69,18 @@ ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2] -; GISEL-NEXT: adrp x8, .LCPI1_5 +; GISEL-NEXT: adrp x8, .LCPI1_1 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h +; GISEL-NEXT: ldr q6, [x8, :lo12:.LCPI1_1] ; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h ; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h ; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5] -; GISEL-NEXT: adrp x8, .LCPI1_1 +; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI1_5] ; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI1_1] ; GISEL-NEXT: shl v3.8h, v3.8h, #15 ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h -; GISEL-NEXT: neg v2.8h, v4.8h +; GISEL-NEXT: neg v2.8h, v6.8h ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h ; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b @@ -100,33 +100,33 @@ ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h ; SDAG-NEXT: umull2 v1.4s, v0.8h, v2.8h ; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI2_2] ; SDAG-NEXT: uzp2 v0.8h, v0.8h, v1.8h -; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_2] -; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h +; SDAG-NEXT: ushl v0.8h, v0.8h, v2.8h ; SDAG-NEXT: ret ; ; GISEL-LABEL: combine_vec_udiv_nonuniform2: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI2_3 ; GISEL-NEXT: adrp x9, .LCPI2_4 -; GISEL-NEXT: adrp x10, .LCPI2_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3] ; GISEL-NEXT: adrp x8, .LCPI2_2 -; GISEL-NEXT: ldr q4, [x10, :lo12:.LCPI2_0] +; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI2_4] +; GISEL-NEXT: adrp x9, .LCPI2_0 ; GISEL-NEXT: neg v1.8h, v1.8h -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2] +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_2] ; GISEL-NEXT: adrp x8, .LCPI2_1 +; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI2_0] ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h -; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h -; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_1] -; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h -; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI2_4] +; GISEL-NEXT: umull2 v5.4s, v1.8h, v3.8h +; GISEL-NEXT: umull v1.4s, v1.4h, v3.4h +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI2_1] ; GISEL-NEXT: cmeq v2.8h, v2.8h, v4.8h -; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h -; GISEL-NEXT: neg v3.8h, v5.8h +; GISEL-NEXT: neg v3.8h, v3.8h ; GISEL-NEXT: shl v2.8h, v2.8h, #15 -; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h +; GISEL-NEXT: uzp2 v1.8h, v1.8h, v5.8h ; GISEL-NEXT: sshr v2.8h, v2.8h, #15 +; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -154,20 +154,20 @@ ; GISEL-NEXT: adrp x9, .LCPI3_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2] ; GISEL-NEXT: adrp x8, .LCPI3_3 -; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI3_0] +; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_0] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_3] ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h -; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3] ; GISEL-NEXT: adrp x8, .LCPI3_1 -; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h -; GISEL-NEXT: sub v4.8h, v0.8h, v1.8h -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_1] -; GISEL-NEXT: shl v2.8h, v2.8h, #15 -; GISEL-NEXT: usra v1.8h, v4.8h, #1 -; GISEL-NEXT: neg v3.8h, v3.8h -; GISEL-NEXT: sshr v2.8h, v2.8h, #15 -; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h +; GISEL-NEXT: cmeq v3.8h, v3.8h, v4.8h +; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI3_1] +; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h +; GISEL-NEXT: shl v3.8h, v3.8h, #15 +; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h +; GISEL-NEXT: usra v1.8h, v2.8h, #1 +; GISEL-NEXT: neg v2.8h, v4.8h +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -181,37 +181,37 @@ ; SDAG-NEXT: adrp x9, .LCPI4_2 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; SDAG-NEXT: adrp x8, .LCPI4_1 -; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI4_2] ; SDAG-NEXT: umull2 v2.8h, v0.16b, v1.16b +; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI4_1] ; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b -; SDAG-NEXT: uzp2 v1.16b, v1.16b, v2.16b -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_1] ; SDAG-NEXT: adrp x8, .LCPI4_3 -; SDAG-NEXT: ushl v1.16b, v1.16b, v2.16b -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_3] -; SDAG-NEXT: and v1.16b, v1.16b, v3.16b -; SDAG-NEXT: and v0.16b, v0.16b, v2.16b +; SDAG-NEXT: uzp2 v1.16b, v1.16b, v2.16b +; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI4_2] +; SDAG-NEXT: ushl v1.16b, v1.16b, v3.16b +; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI4_3] +; SDAG-NEXT: and v1.16b, v1.16b, v2.16b +; SDAG-NEXT: and v0.16b, v0.16b, v3.16b ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b ; SDAG-NEXT: ret ; ; GISEL-LABEL: combine_vec_udiv_nonuniform4: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI4_2 -; GISEL-NEXT: adrp x9, .LCPI4_0 +; GISEL-NEXT: adrp x9, .LCPI4_3 +; GISEL-NEXT: adrp x10, .LCPI4_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2] -; GISEL-NEXT: adrp x8, .LCPI4_3 -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI4_0] -; GISEL-NEXT: umull2 v2.8h, v0.16b, v1.16b -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_3] -; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b ; GISEL-NEXT: adrp x8, .LCPI4_1 -; GISEL-NEXT: cmeq v3.16b, v3.16b, v4.16b -; GISEL-NEXT: uzp2 v1.16b, v1.16b, v2.16b -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI4_1] -; GISEL-NEXT: shl v3.16b, v3.16b, #7 -; GISEL-NEXT: neg v2.16b, v2.16b -; GISEL-NEXT: ushl v1.16b, v1.16b, v2.16b -; GISEL-NEXT: sshr v2.16b, v3.16b, #7 +; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI4_3] +; GISEL-NEXT: ldr q3, [x10, :lo12:.LCPI4_0] +; GISEL-NEXT: umull2 v4.8h, v0.16b, v1.16b +; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b +; GISEL-NEXT: cmeq v2.16b, v2.16b, v3.16b +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_1] +; GISEL-NEXT: shl v2.16b, v2.16b, #7 +; GISEL-NEXT: uzp2 v1.16b, v1.16b, v4.16b +; GISEL-NEXT: neg v3.16b, v3.16b +; GISEL-NEXT: sshr v2.16b, v2.16b, #7 +; GISEL-NEXT: ushl v1.16b, v1.16b, v3.16b ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %div = udiv <16 x i8> %x, @@ -248,26 +248,26 @@ ; GISEL-LABEL: pr38477: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI5_3 -; GISEL-NEXT: adrp x9, .LCPI5_0 +; GISEL-NEXT: adrp x9, .LCPI5_4 +; GISEL-NEXT: adrp x10, .LCPI5_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3] ; GISEL-NEXT: adrp x8, .LCPI5_2 -; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI5_0] +; GISEL-NEXT: ldr q5, [x10, :lo12:.LCPI5_0] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_2] -; GISEL-NEXT: adrp x8, .LCPI5_4 +; GISEL-NEXT: adrp x8, .LCPI5_1 ; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h ; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h +; GISEL-NEXT: ldr q6, [x8, :lo12:.LCPI5_1] ; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4] -; GISEL-NEXT: adrp x8, .LCPI5_1 +; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI5_4] ; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI5_1] ; GISEL-NEXT: shl v3.8h, v3.8h, #15 ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h -; GISEL-NEXT: neg v2.8h, v4.8h +; GISEL-NEXT: neg v2.8h, v6.8h ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h ; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b diff --git a/llvm/test/CodeGen/AArch64/aarch64-avoid-illegal-extract-subvector.ll b/llvm/test/CodeGen/AArch64/aarch64-avoid-illegal-extract-subvector.ll --- a/llvm/test/CodeGen/AArch64/aarch64-avoid-illegal-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-avoid-illegal-extract-subvector.ll @@ -4,9 +4,10 @@ ; CHECK-LABEL: test1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov w8, v0.s[2] +; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret %i1 = extractelement <4 x i32> %x, i32 1 %zi1 = zext i32 %i1 to i64 @@ -24,9 +25,9 @@ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov w9, v0.s[2] ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov v1.d[1], x9 +; CHECK-NEXT: mov w8, v0.s[2] +; CHECK-NEXT: mov v1.d[1], x8 ; CHECK-NEXT: ret entry: %1 = add <4 x i32> %0, diff --git a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll --- a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll @@ -167,8 +167,8 @@ ; CHECK-LABEL: fmov_modimm_t11: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 -; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 ; CHECK-NEXT: fmov v1.4s, #3.00000000 +; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 ; CHECK-NEXT: ld1 { v0.8h }, [x8] ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] @@ -183,8 +183,8 @@ ; CHECK-LABEL: fmov_modimm_t12: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 -; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 ; CHECK-NEXT: fmov v1.2d, #0.17968750 +; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 ; CHECK-NEXT: ld1 { v0.8h }, [x8] ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -111,11 +111,11 @@ define <4 x i32> @amull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: amull_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B @@ -129,11 +129,11 @@ define <2 x i64> @amull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind { ; CHECK-LABEL: amull_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smull v1.2d, v1.2s, v2.2s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B @@ -275,12 +275,12 @@ define <4 x i32> @amlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { ; CHECK-LABEL: amlal_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlal v1.4s, v0.4h, v2.4h ; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smlal v2.4s, v1.4h, v3.4h -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B @@ -296,12 +296,12 @@ define <2 x i64> @amlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { ; CHECK-LABEL: amlal_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlal v1.2d, v0.2s, v2.2s ; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smlal v2.2d, v1.2s, v3.2s -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B @@ -445,12 +445,12 @@ define <4 x i32> @amlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { ; CHECK-LABEL: amlsl_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlsl v1.4s, v0.4h, v2.4h ; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smlsl v2.4s, v1.4h, v3.4h -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B @@ -466,12 +466,12 @@ define <2 x i64> @amlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { ; CHECK-LABEL: amlsl_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlsl v1.2d, v0.2s, v2.2s ; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smlsl v2.2d, v1.2s, v3.2s -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B @@ -599,9 +599,9 @@ ; CHECK-LABEL: amull_extvec_v4i16_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1234 +; CHECK-NEXT: dup v1.4h, w8 +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: dup v2.4h, w8 -; CHECK-NEXT: smull v0.4s, v0.4h, v2.4h ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp3 = zext <4 x i16> %arg to <4 x i32> @@ -614,9 +614,9 @@ ; CHECK-LABEL: amull_extvec_v2i32_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1234 +; CHECK-NEXT: dup v1.2s, w8 +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-NEXT: dup v2.2s, w8 -; CHECK-NEXT: smull v0.2d, v0.2s, v2.2s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp3 = zext <2 x i32> %arg to <2 x i64> @@ -768,9 +768,9 @@ define <8 x i32> @amull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) { ; CHECK-LABEL: amull2_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: smull2 v3.4s, v0.8h, v1.8h ; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: and v1.16b, v3.16b, v2.16b ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret @@ -784,9 +784,9 @@ define <4 x i64> @amull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) { ; CHECK-LABEL: amull2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: smull2 v3.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: and v1.16b, v3.16b, v2.16b ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll b/llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll --- a/llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-wide-mul.ll @@ -121,15 +121,15 @@ ; CHECK-NEXT: ushll2 v0.8h, v0.16b, #0 ; CHECK-NEXT: ushll v25.8h, v1.8b, #0 ; CHECK-NEXT: ushll2 v1.8h, v1.16b, #0 +; CHECK-NEXT: mov v16.16b, v7.16b +; CHECK-NEXT: mov v17.16b, v6.16b +; CHECK-NEXT: ldp q6, q7, [sp] ; CHECK-NEXT: ushll v19.4s, v18.4h, #0 ; CHECK-NEXT: ushll v20.4s, v0.4h, #0 ; CHECK-NEXT: ushll2 v18.4s, v18.8h, #0 ; CHECK-NEXT: ushll v26.4s, v25.4h, #0 ; CHECK-NEXT: ushll v27.4s, v1.4h, #0 ; CHECK-NEXT: ushll2 v25.4s, v25.8h, #0 -; CHECK-NEXT: mov v16.16b, v7.16b -; CHECK-NEXT: mov v17.16b, v6.16b -; CHECK-NEXT: ldp q6, q7, [sp] ; CHECK-NEXT: ushll2 v0.4s, v0.8h, #0 ; CHECK-NEXT: ushll2 v1.4s, v1.8h, #0 ; CHECK-NEXT: ext v21.16b, v19.16b, v19.16b, #8 diff --git a/llvm/test/CodeGen/AArch64/active_lane_mask.ll b/llvm/test/CodeGen/AArch64/active_lane_mask.ll --- a/llvm/test/CodeGen/AArch64/active_lane_mask.ll +++ b/llvm/test/CodeGen/AArch64/active_lane_mask.ll @@ -442,14 +442,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI25_0 ; CHECK-NEXT: dup v0.4h, w0 -; CHECK-NEXT: movi d2, #0xff00ff00ff00ff -; CHECK-NEXT: dup v3.4h, w1 +; CHECK-NEXT: dup v2.4h, w1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI25_0] ; CHECK-NEXT: bic v0.4h, #255, lsl #8 -; CHECK-NEXT: bic v3.4h, #255, lsl #8 +; CHECK-NEXT: bic v2.4h, #255, lsl #8 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: umin v0.4h, v0.4h, v2.4h -; CHECK-NEXT: cmhi v0.4h, v3.4h, v0.4h +; CHECK-NEXT: movi d1, #0xff00ff00ff00ff +; CHECK-NEXT: umin v0.4h, v0.4h, v1.4h +; CHECK-NEXT: cmhi v0.4h, v2.4h, v0.4h ; CHECK-NEXT: ret %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i8(i8 %index, i8 %TC) ret <4 x i1> %active.lane.mask @@ -459,15 +459,15 @@ ; CHECK-LABEL: lane_mask_v2i1_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI26_0 -; CHECK-NEXT: movi d0, #0x0000ff000000ff -; CHECK-NEXT: dup v1.2s, w0 +; CHECK-NEXT: dup v0.2s, w0 +; CHECK-NEXT: movi d2, #0x0000ff000000ff ; CHECK-NEXT: dup v3.2s, w1 -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI26_0] -; CHECK-NEXT: and v1.8b, v1.8b, v0.8b -; CHECK-NEXT: add v1.2s, v1.2s, v2.2s -; CHECK-NEXT: umin v1.2s, v1.2s, v0.2s -; CHECK-NEXT: and v0.8b, v3.8b, v0.8b -; CHECK-NEXT: cmhi v0.2s, v0.2s, v1.2s +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI26_0] +; CHECK-NEXT: and v0.8b, v0.8b, v2.8b +; CHECK-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-NEXT: and v1.8b, v3.8b, v2.8b +; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s +; CHECK-NEXT: cmhi v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ret %active.lane.mask = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i8(i8 %index, i8 %TC) ret <2 x i1> %active.lane.mask diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll --- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll +++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll @@ -57,9 +57,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -134,9 +134,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -367,9 +367,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -446,9 +446,9 @@ ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -526,9 +526,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -606,9 +606,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -685,9 +685,9 @@ ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll --- a/llvm/test/CodeGen/AArch64/arm64-dup.ll +++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll @@ -446,11 +446,11 @@ define void @disguised_dup(<4 x float> %x, <4 x float>* %p1, <4 x float>* %p2) { ; CHECK-LABEL: disguised_dup: ; CHECK: // %bb.0: -; CHECK-NEXT: ext.16b v1, v0, v0, #12 -; CHECK-NEXT: dup.4s v0, v0[0] -; CHECK-NEXT: ext.16b v1, v1, v0, #8 -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: str q0, [x1] +; CHECK-NEXT: dup.4s v1, v0[0] +; CHECK-NEXT: ext.16b v0, v0, v0, #12 +; CHECK-NEXT: ext.16b v0, v0, v1, #8 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: str q1, [x1] ; CHECK-NEXT: ret %shuf = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %dup = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll @@ -36,12 +36,12 @@ define double @test3(double %a, float %b, float %c) nounwind { ; CHECK-LABEL: test3: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.2d v3, #0xffffffffffffffff -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fadd s1, s1, s2 -; CHECK-NEXT: fneg.2d v2, v3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: movi.2d v3, #0xffffffffffffffff +; CHECK-NEXT: fneg.2d v3, v3 ; CHECK-NEXT: fcvt d1, s1 -; CHECK-NEXT: bif.16b v0, v1, v2 +; CHECK-NEXT: bif.16b v0, v1, v3 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp1 = fadd float %b, %c diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll --- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -185,8 +185,8 @@ ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload @@ -206,9 +206,9 @@ ; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str xzr, [sp, #32] +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload @@ -228,8 +228,8 @@ ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something @@ -250,9 +250,9 @@ ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str xzr, [sp, #64] +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something @@ -273,8 +273,8 @@ ; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 144 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: stp q0, q0, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #32] @@ -298,8 +298,8 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 272 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: stp q0, q0, [sp, #192] ; CHECK-NEXT: stp q0, q0, [sp, #160] @@ -451,8 +451,8 @@ ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload @@ -472,11 +472,11 @@ ; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: str x8, [sp, #32] +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: str x8, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #64 @@ -495,8 +495,8 @@ ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something @@ -517,11 +517,11 @@ ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: str x8, [sp, #64] +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: str x8, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload @@ -541,8 +541,8 @@ ; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 144 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: stp q0, q0, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #32] @@ -566,8 +566,8 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 272 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: stp q0, q0, [sp, #192] ; CHECK-NEXT: stp q0, q0, [sp, #160] diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -117,9 +117,9 @@ define <4 x i32> @test_vaddl_a16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vaddl_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <4 x i16> %a to <4 x i32> @@ -132,9 +132,9 @@ define <2 x i64> @test_vaddl_a32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vaddl_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <2 x i32> %a to <2 x i64> @@ -247,9 +247,9 @@ define <4 x i32> @test_vaddl_high_a16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vaddl_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddl2 v0.4s, v0.8h, v1.8h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> @@ -264,9 +264,9 @@ define <2 x i64> @test_vaddl_high_a32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vaddl_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: uaddl2 v0.2d, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> @@ -360,9 +360,9 @@ define <4 x i32> @test_vaddw_a16(<4 x i32> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vaddw_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <4 x i16> %b to <4 x i32> @@ -374,9 +374,9 @@ define <2 x i64> @test_vaddw_a32(<2 x i64> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vaddw_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <2 x i32> %b to <2 x i64> @@ -474,9 +474,9 @@ define <4 x i32> @test_vaddw_high_a16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vaddw_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddw2 v0.4s, v0.4s, v1.8h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> @@ -489,9 +489,9 @@ define <2 x i64> @test_vaddw_high_a32(<2 x i64> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vaddw_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: uaddw2 v0.2d, v0.2d, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> @@ -590,9 +590,9 @@ define <4 x i32> @test_vsubl_a16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vsubl_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: usubl v0.4s, v0.4h, v1.4h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <4 x i16> %a to <4 x i32> @@ -605,9 +605,9 @@ define <2 x i64> @test_vsubl_a32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vsubl_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: usubl v0.2d, v0.2s, v1.2s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <2 x i32> %a to <2 x i64> @@ -720,9 +720,9 @@ define <4 x i32> @test_vsubl_high_a16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vsubl_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: usubl2 v0.4s, v0.8h, v1.8h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> @@ -737,9 +737,9 @@ define <2 x i64> @test_vsubl_high_a32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vsubl_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: usubl2 v0.2d, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> @@ -833,9 +833,9 @@ define <4 x i32> @test_vsubw_a16(<4 x i32> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vsubw_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: usubw v0.4s, v0.4s, v1.4h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <4 x i16> %b to <4 x i32> @@ -847,9 +847,9 @@ define <2 x i64> @test_vsubw_a32(<2 x i64> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vsubw_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: usubw v0.2d, v0.2d, v1.2s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <2 x i32> %b to <2 x i64> @@ -947,9 +947,9 @@ define <4 x i32> @test_vsubw_high_a16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vsubw_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: usubw2 v0.4s, v0.4s, v1.8h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> @@ -962,9 +962,9 @@ define <2 x i64> @test_vsubw_high_a32(<2 x i64> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vsubw_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: usubw2 v0.2d, v0.2d, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> @@ -2510,8 +2510,8 @@ ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: fmov d1, x1 ; CHECK-NEXT: pmull v0.1q, v0.1d, v1.1d -; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: ret entry: %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) @@ -2523,8 +2523,8 @@ ; CHECK-LABEL: test_vmull_high_p64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d -; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: ret entry: %0 = extractelement <2 x i64> %a, i32 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll @@ -200,9 +200,9 @@ ; CHECK-LABEL: test_sabd_v2i32_const: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: movi d0, #0x00ffffffff0000 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: sabd v0.2s, v1.2s, v0.2s +; CHECK-NEXT: movi d1, #0x00ffffffff0000 +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32( <2 x i32> , diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll @@ -73,14 +73,14 @@ define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) { ; CHECK-LABEL: mul2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: fmov x10, d0 -; CHECK-NEXT: mov x8, v1.d[1] -; CHECK-NEXT: mov x11, v0.d[1] +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: mov x10, v0.d[1] +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: mov x9, v1.d[1] ; CHECK-NEXT: mul x9, x10, x9 -; CHECK-NEXT: mul x8, x11, x8 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %tmp3 = mul <2 x i64> %A, %B; ret <2 x i64> %tmp3 @@ -162,32 +162,32 @@ ; CHECK-NEXT: smov w9, v0.b[1] ; CHECK-NEXT: smov w10, v0.b[0] ; CHECK-NEXT: smov w11, v0.b[2] -; CHECK-NEXT: smov w12, v0.b[3] -; CHECK-NEXT: smov w13, v0.b[4] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.b[0] +; CHECK-NEXT: smov w12, v0.b[3] +; CHECK-NEXT: smov w13, v0.b[4] +; CHECK-NEXT: smov w14, v0.b[5] +; CHECK-NEXT: smov w15, v0.b[6] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.b[5] ; CHECK-NEXT: mov v2.b[1], w8 +; CHECK-NEXT: smov w8, v1.b[7] ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 -; CHECK-NEXT: smov w10, v0.b[6] +; CHECK-NEXT: smov w10, v0.b[7] ; CHECK-NEXT: sdiv w12, w13, w12 -; CHECK-NEXT: smov w13, v0.b[5] +; CHECK-NEXT: smov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 -; CHECK-NEXT: smov w11, v0.b[7] -; CHECK-NEXT: sdiv w8, w13, w9 -; CHECK-NEXT: smov w9, v1.b[6] +; CHECK-NEXT: sdiv w13, w14, w13 +; CHECK-NEXT: smov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 -; CHECK-NEXT: sdiv w9, w10, w9 -; CHECK-NEXT: smov w10, v1.b[7] -; CHECK-NEXT: mov v2.b[5], w8 -; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: sdiv w9, w15, w14 +; CHECK-NEXT: mov v2.b[5], w13 +; CHECK-NEXT: sdiv w8, w10, w8 ; CHECK-NEXT: mov v2.b[6], w9 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 @@ -203,66 +203,66 @@ ; CHECK-NEXT: smov w9, v0.b[1] ; CHECK-NEXT: smov w10, v0.b[0] ; CHECK-NEXT: smov w11, v0.b[2] +; CHECK-NEXT: sdiv w8, w9, w8 +; CHECK-NEXT: smov w9, v1.b[0] ; CHECK-NEXT: smov w12, v0.b[3] ; CHECK-NEXT: smov w13, v0.b[4] ; CHECK-NEXT: smov w14, v0.b[5] ; CHECK-NEXT: smov w15, v0.b[6] -; CHECK-NEXT: sdiv w8, w9, w8 -; CHECK-NEXT: smov w9, v1.b[0] ; CHECK-NEXT: smov w16, v0.b[7] ; CHECK-NEXT: smov w17, v0.b[8] +; CHECK-NEXT: smov w18, v0.b[9] +; CHECK-NEXT: smov w0, v0.b[10] +; CHECK-NEXT: smov w1, v0.b[11] +; CHECK-NEXT: smov w2, v0.b[12] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.b[9] ; CHECK-NEXT: mov v2.b[1], w8 +; CHECK-NEXT: smov w8, v1.b[13] ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 -; CHECK-NEXT: smov w10, v0.b[10] +; CHECK-NEXT: smov w10, v0.b[13] ; CHECK-NEXT: sdiv w12, w13, w12 ; CHECK-NEXT: smov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 -; CHECK-NEXT: smov w11, v0.b[11] +; CHECK-NEXT: smov w11, v0.b[14] ; CHECK-NEXT: sdiv w13, w14, w13 ; CHECK-NEXT: smov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 -; CHECK-NEXT: smov w12, v0.b[12] +; CHECK-NEXT: smov w12, v0.b[15] ; CHECK-NEXT: sdiv w14, w15, w14 ; CHECK-NEXT: smov w15, v1.b[7] ; CHECK-NEXT: mov v2.b[5], w13 -; CHECK-NEXT: smov w13, v0.b[13] ; CHECK-NEXT: sdiv w15, w16, w15 ; CHECK-NEXT: smov w16, v1.b[8] ; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: sdiv w16, w17, w16 -; CHECK-NEXT: smov w17, v0.b[9] +; CHECK-NEXT: smov w17, v1.b[9] ; CHECK-NEXT: mov v2.b[7], w15 -; CHECK-NEXT: sdiv w8, w17, w9 -; CHECK-NEXT: smov w9, v1.b[10] +; CHECK-NEXT: sdiv w17, w18, w17 +; CHECK-NEXT: smov w18, v1.b[10] ; CHECK-NEXT: mov v2.b[8], w16 -; CHECK-NEXT: sdiv w9, w10, w9 -; CHECK-NEXT: smov w10, v1.b[11] -; CHECK-NEXT: mov v2.b[9], w8 +; CHECK-NEXT: sdiv w18, w0, w18 +; CHECK-NEXT: smov w0, v1.b[11] +; CHECK-NEXT: mov v2.b[9], w17 +; CHECK-NEXT: sdiv w0, w1, w0 +; CHECK-NEXT: smov w1, v1.b[12] +; CHECK-NEXT: mov v2.b[10], w18 +; CHECK-NEXT: sdiv w9, w2, w1 +; CHECK-NEXT: mov v2.b[11], w0 +; CHECK-NEXT: sdiv w8, w10, w8 +; CHECK-NEXT: smov w10, v1.b[14] +; CHECK-NEXT: mov v2.b[12], w9 ; CHECK-NEXT: sdiv w10, w11, w10 -; CHECK-NEXT: smov w11, v1.b[12] -; CHECK-NEXT: mov v2.b[10], w9 -; CHECK-NEXT: smov w9, v1.b[14] -; CHECK-NEXT: sdiv w11, w12, w11 -; CHECK-NEXT: smov w12, v1.b[13] -; CHECK-NEXT: mov v2.b[11], w10 -; CHECK-NEXT: smov w10, v1.b[15] -; CHECK-NEXT: sdiv w8, w13, w12 -; CHECK-NEXT: smov w12, v0.b[14] -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: smov w11, v0.b[15] -; CHECK-NEXT: sdiv w9, w12, w9 +; CHECK-NEXT: smov w11, v1.b[15] ; CHECK-NEXT: mov v2.b[13], w8 -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[14], w9 -; CHECK-NEXT: mov v2.b[15], w8 +; CHECK-NEXT: sdiv w11, w12, w11 +; CHECK-NEXT: mov v2.b[14], w10 +; CHECK-NEXT: mov v2.b[15], w11 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp3 = sdiv <16 x i8> %A, %B; @@ -292,18 +292,18 @@ ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w10, v0.h[0] ; CHECK-NEXT: smov w11, v0.h[2] -; CHECK-NEXT: smov w12, v0.h[3] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.h[0] +; CHECK-NEXT: smov w12, v0.h[3] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.h[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.h[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: sdiv w8, w12, w11 +; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: mov v0.h[3], w11 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = sdiv <4 x i16> %A, %B; @@ -317,32 +317,32 @@ ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w10, v0.h[0] ; CHECK-NEXT: smov w11, v0.h[2] -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: smov w13, v0.h[4] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.h[0] +; CHECK-NEXT: smov w12, v0.h[3] +; CHECK-NEXT: smov w13, v0.h[4] +; CHECK-NEXT: smov w14, v0.h[5] +; CHECK-NEXT: smov w15, v0.h[6] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.h[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.h[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.h[5] ; CHECK-NEXT: mov v2.h[1], w8 +; CHECK-NEXT: smov w8, v1.h[7] ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.h[4] ; CHECK-NEXT: mov v2.h[2], w10 -; CHECK-NEXT: smov w10, v0.h[6] +; CHECK-NEXT: smov w10, v0.h[7] ; CHECK-NEXT: sdiv w12, w13, w12 -; CHECK-NEXT: smov w13, v0.h[5] +; CHECK-NEXT: smov w13, v1.h[5] ; CHECK-NEXT: mov v2.h[3], w11 -; CHECK-NEXT: smov w11, v0.h[7] -; CHECK-NEXT: sdiv w8, w13, w9 -; CHECK-NEXT: smov w9, v1.h[6] +; CHECK-NEXT: sdiv w13, w14, w13 +; CHECK-NEXT: smov w14, v1.h[6] ; CHECK-NEXT: mov v2.h[4], w12 -; CHECK-NEXT: sdiv w9, w10, w9 -; CHECK-NEXT: smov w10, v1.h[7] -; CHECK-NEXT: mov v2.h[5], w8 -; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: sdiv w9, w15, w14 +; CHECK-NEXT: mov v2.h[5], w13 +; CHECK-NEXT: sdiv w8, w10, w8 ; CHECK-NEXT: mov v2.h[6], w9 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b @@ -391,18 +391,18 @@ ; CHECK-NEXT: mov w9, v0.s[1] ; CHECK-NEXT: fmov w10, s0 ; CHECK-NEXT: mov w11, v0.s[2] -; CHECK-NEXT: mov w12, v0.s[3] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: mov w12, v0.s[3] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: mov w10, v1.s[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: mov w11, v1.s[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: sdiv w8, w12, w11 +; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: mov v0.s[2], w10 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: mov v0.s[3], w11 ; CHECK-NEXT: ret %tmp3 = sdiv <4 x i32> %A, %B; ret <4 x i32> %tmp3 @@ -461,32 +461,32 @@ ; CHECK-NEXT: umov w9, v0.b[1] ; CHECK-NEXT: umov w10, v0.b[0] ; CHECK-NEXT: umov w11, v0.b[2] -; CHECK-NEXT: umov w12, v0.b[3] -; CHECK-NEXT: umov w13, v0.b[4] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.b[0] +; CHECK-NEXT: umov w12, v0.b[3] +; CHECK-NEXT: umov w13, v0.b[4] +; CHECK-NEXT: umov w14, v0.b[5] +; CHECK-NEXT: umov w15, v0.b[6] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.b[5] ; CHECK-NEXT: mov v2.b[1], w8 +; CHECK-NEXT: umov w8, v1.b[7] ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 -; CHECK-NEXT: umov w10, v0.b[6] +; CHECK-NEXT: umov w10, v0.b[7] ; CHECK-NEXT: udiv w12, w13, w12 -; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: udiv w8, w13, w9 -; CHECK-NEXT: umov w9, v1.b[6] +; CHECK-NEXT: udiv w13, w14, w13 +; CHECK-NEXT: umov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 -; CHECK-NEXT: udiv w9, w10, w9 -; CHECK-NEXT: umov w10, v1.b[7] -; CHECK-NEXT: mov v2.b[5], w8 -; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: udiv w9, w15, w14 +; CHECK-NEXT: mov v2.b[5], w13 +; CHECK-NEXT: udiv w8, w10, w8 ; CHECK-NEXT: mov v2.b[6], w9 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 @@ -502,66 +502,66 @@ ; CHECK-NEXT: umov w9, v0.b[1] ; CHECK-NEXT: umov w10, v0.b[0] ; CHECK-NEXT: umov w11, v0.b[2] +; CHECK-NEXT: udiv w8, w9, w8 +; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: umov w12, v0.b[3] ; CHECK-NEXT: umov w13, v0.b[4] ; CHECK-NEXT: umov w14, v0.b[5] ; CHECK-NEXT: umov w15, v0.b[6] -; CHECK-NEXT: udiv w8, w9, w8 -; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: umov w16, v0.b[7] ; CHECK-NEXT: umov w17, v0.b[8] +; CHECK-NEXT: umov w18, v0.b[9] +; CHECK-NEXT: umov w0, v0.b[10] +; CHECK-NEXT: umov w1, v0.b[11] +; CHECK-NEXT: umov w2, v0.b[12] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.b[9] ; CHECK-NEXT: mov v2.b[1], w8 +; CHECK-NEXT: umov w8, v1.b[13] ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 -; CHECK-NEXT: umov w10, v0.b[10] +; CHECK-NEXT: umov w10, v0.b[13] ; CHECK-NEXT: udiv w12, w13, w12 ; CHECK-NEXT: umov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 -; CHECK-NEXT: umov w11, v0.b[11] +; CHECK-NEXT: umov w11, v0.b[14] ; CHECK-NEXT: udiv w13, w14, w13 ; CHECK-NEXT: umov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 -; CHECK-NEXT: umov w12, v0.b[12] +; CHECK-NEXT: umov w12, v0.b[15] ; CHECK-NEXT: udiv w14, w15, w14 ; CHECK-NEXT: umov w15, v1.b[7] ; CHECK-NEXT: mov v2.b[5], w13 -; CHECK-NEXT: umov w13, v0.b[13] ; CHECK-NEXT: udiv w15, w16, w15 ; CHECK-NEXT: umov w16, v1.b[8] ; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: udiv w16, w17, w16 -; CHECK-NEXT: umov w17, v0.b[9] +; CHECK-NEXT: umov w17, v1.b[9] ; CHECK-NEXT: mov v2.b[7], w15 -; CHECK-NEXT: udiv w8, w17, w9 -; CHECK-NEXT: umov w9, v1.b[10] +; CHECK-NEXT: udiv w17, w18, w17 +; CHECK-NEXT: umov w18, v1.b[10] ; CHECK-NEXT: mov v2.b[8], w16 -; CHECK-NEXT: udiv w9, w10, w9 -; CHECK-NEXT: umov w10, v1.b[11] -; CHECK-NEXT: mov v2.b[9], w8 +; CHECK-NEXT: udiv w18, w0, w18 +; CHECK-NEXT: umov w0, v1.b[11] +; CHECK-NEXT: mov v2.b[9], w17 +; CHECK-NEXT: udiv w0, w1, w0 +; CHECK-NEXT: umov w1, v1.b[12] +; CHECK-NEXT: mov v2.b[10], w18 +; CHECK-NEXT: udiv w9, w2, w1 +; CHECK-NEXT: mov v2.b[11], w0 +; CHECK-NEXT: udiv w8, w10, w8 +; CHECK-NEXT: umov w10, v1.b[14] +; CHECK-NEXT: mov v2.b[12], w9 ; CHECK-NEXT: udiv w10, w11, w10 -; CHECK-NEXT: umov w11, v1.b[12] -; CHECK-NEXT: mov v2.b[10], w9 -; CHECK-NEXT: umov w9, v1.b[14] -; CHECK-NEXT: udiv w11, w12, w11 -; CHECK-NEXT: umov w12, v1.b[13] -; CHECK-NEXT: mov v2.b[11], w10 -; CHECK-NEXT: umov w10, v1.b[15] -; CHECK-NEXT: udiv w8, w13, w12 -; CHECK-NEXT: umov w12, v0.b[14] -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: umov w11, v0.b[15] -; CHECK-NEXT: udiv w9, w12, w9 +; CHECK-NEXT: umov w11, v1.b[15] ; CHECK-NEXT: mov v2.b[13], w8 -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[14], w9 -; CHECK-NEXT: mov v2.b[15], w8 +; CHECK-NEXT: udiv w11, w12, w11 +; CHECK-NEXT: mov v2.b[14], w10 +; CHECK-NEXT: mov v2.b[15], w11 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp3 = udiv <16 x i8> %A, %B; @@ -591,18 +591,18 @@ ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: umov w12, v0.h[3] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.h[0] +; CHECK-NEXT: umov w12, v0.h[3] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.h[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.h[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: udiv w8, w12, w11 +; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: mov v0.h[3], w11 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = udiv <4 x i16> %A, %B; @@ -616,32 +616,32 @@ ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: umov w12, v0.h[3] -; CHECK-NEXT: umov w13, v0.h[4] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.h[0] +; CHECK-NEXT: umov w12, v0.h[3] +; CHECK-NEXT: umov w13, v0.h[4] +; CHECK-NEXT: umov w14, v0.h[5] +; CHECK-NEXT: umov w15, v0.h[6] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.h[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.h[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.h[5] ; CHECK-NEXT: mov v2.h[1], w8 +; CHECK-NEXT: umov w8, v1.h[7] ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.h[4] ; CHECK-NEXT: mov v2.h[2], w10 -; CHECK-NEXT: umov w10, v0.h[6] +; CHECK-NEXT: umov w10, v0.h[7] ; CHECK-NEXT: udiv w12, w13, w12 -; CHECK-NEXT: umov w13, v0.h[5] +; CHECK-NEXT: umov w13, v1.h[5] ; CHECK-NEXT: mov v2.h[3], w11 -; CHECK-NEXT: umov w11, v0.h[7] -; CHECK-NEXT: udiv w8, w13, w9 -; CHECK-NEXT: umov w9, v1.h[6] +; CHECK-NEXT: udiv w13, w14, w13 +; CHECK-NEXT: umov w14, v1.h[6] ; CHECK-NEXT: mov v2.h[4], w12 -; CHECK-NEXT: udiv w9, w10, w9 -; CHECK-NEXT: umov w10, v1.h[7] -; CHECK-NEXT: mov v2.h[5], w8 -; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: udiv w9, w15, w14 +; CHECK-NEXT: mov v2.h[5], w13 +; CHECK-NEXT: udiv w8, w10, w8 ; CHECK-NEXT: mov v2.h[6], w9 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b @@ -690,18 +690,18 @@ ; CHECK-NEXT: mov w9, v0.s[1] ; CHECK-NEXT: fmov w10, s0 ; CHECK-NEXT: mov w11, v0.s[2] -; CHECK-NEXT: mov w12, v0.s[3] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: mov w12, v0.s[3] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: mov w10, v1.s[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: mov w11, v1.s[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: udiv w8, w12, w11 +; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: mov v0.s[2], w10 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: mov v0.s[3], w11 ; CHECK-NEXT: ret %tmp3 = udiv <4 x i32> %A, %B; ret <4 x i32> %tmp3 @@ -755,49 +755,53 @@ define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: srem8x8: ; CHECK: // %bb.0: +; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w11, v1.b[0] ; CHECK-NEXT: smov w12, v0.b[0] ; CHECK-NEXT: smov w8, v1.b[1] ; CHECK-NEXT: smov w9, v0.b[1] +; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w14, v1.b[2] ; CHECK-NEXT: smov w15, v0.b[2] ; CHECK-NEXT: smov w17, v1.b[3] ; CHECK-NEXT: smov w18, v0.b[3] -; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.b[4] ; CHECK-NEXT: smov w2, v0.b[4] +; CHECK-NEXT: smov w4, v1.b[5] +; CHECK-NEXT: smov w5, v0.b[5] +; CHECK-NEXT: smov w7, v1.b[6] +; CHECK-NEXT: smov w19, v0.b[6] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.b[5] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.b[5] ; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: smov w11, v0.b[6] +; CHECK-NEXT: smov w11, v0.b[7] ; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: smov w10, v1.b[6] +; CHECK-NEXT: smov w10, v1.b[7] ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.b[7] +; CHECK-NEXT: msub w9, w16, w14, w15 ; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.b[7] -; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 +; CHECK-NEXT: mov v2.b[2], w9 +; CHECK-NEXT: msub w13, w0, w17, w18 ; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: mov v2.b[3], w14 +; CHECK-NEXT: mov v2.b[3], w13 ; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: sdiv w9, w13, w12 +; CHECK-NEXT: sdiv w6, w5, w4 ; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: msub w9, w6, w4, w5 +; CHECK-NEXT: sdiv w12, w19, w7 ; CHECK-NEXT: mov v2.b[5], w9 +; CHECK-NEXT: msub w12, w12, w7, w19 +; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: mov v2.b[6], w12 ; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: sdiv w12, w16, w15 -; CHECK-NEXT: mov v2.b[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = srem <8 x i8> %A, %B; ret <8 x i8> %tmp3 @@ -806,11 +810,14 @@ define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: srem16x8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: sub sp, sp, #128 +; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 128 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -819,15 +826,20 @@ ; CHECK-NEXT: .cfi_offset w24, -48 ; CHECK-NEXT: .cfi_offset w25, -56 ; CHECK-NEXT: .cfi_offset w26, -64 -; CHECK-NEXT: smov w11, v1.b[0] -; CHECK-NEXT: smov w12, v0.b[0] +; CHECK-NEXT: .cfi_offset w27, -72 +; CHECK-NEXT: .cfi_offset w28, -80 +; CHECK-NEXT: .cfi_offset w30, -88 +; CHECK-NEXT: .cfi_offset w29, -96 ; CHECK-NEXT: smov w8, v1.b[1] -; CHECK-NEXT: smov w9, v0.b[1] -; CHECK-NEXT: smov w14, v1.b[2] +; CHECK-NEXT: smov w6, v0.b[1] +; CHECK-NEXT: smov w16, v1.b[2] ; CHECK-NEXT: smov w15, v0.b[2] +; CHECK-NEXT: str w8, [sp, #24] // 4-byte Folded Spill +; CHECK-NEXT: sdiv w8, w6, w8 ; CHECK-NEXT: smov w17, v1.b[3] ; CHECK-NEXT: smov w18, v0.b[3] -; CHECK-NEXT: sdiv w13, w12, w11 +; CHECK-NEXT: smov w3, v1.b[0] +; CHECK-NEXT: smov w0, v0.b[0] ; CHECK-NEXT: smov w1, v1.b[4] ; CHECK-NEXT: smov w2, v0.b[4] ; CHECK-NEXT: smov w4, v1.b[5] @@ -838,72 +850,84 @@ ; CHECK-NEXT: smov w22, v0.b[7] ; CHECK-NEXT: smov w24, v1.b[8] ; CHECK-NEXT: smov w25, v0.b[8] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.b[9] -; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.b[9] -; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: smov w11, v0.b[10] -; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: smov w10, v1.b[10] -; CHECK-NEXT: sdiv w16, w15, w14 -; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.b[11] -; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.b[11] -; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 -; CHECK-NEXT: smov w18, v1.b[12] -; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: smov w0, v0.b[12] -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: smov w2, v1.b[13] -; CHECK-NEXT: sdiv w6, w5, w4 -; CHECK-NEXT: smov w3, v0.b[13] -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w17, w6, w4, w5 -; CHECK-NEXT: sdiv w20, w19, w7 -; CHECK-NEXT: mov v2.b[5], w17 -; CHECK-NEXT: msub w17, w20, w7, w19 -; CHECK-NEXT: sdiv w23, w22, w21 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[6], w17 -; CHECK-NEXT: msub w1, w23, w21, w22 +; CHECK-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NEXT: sdiv w9, w15, w16 +; CHECK-NEXT: smov w27, v1.b[9] +; CHECK-NEXT: smov w28, v0.b[9] +; CHECK-NEXT: smov w30, v1.b[10] +; CHECK-NEXT: smov w12, v0.b[10] +; CHECK-NEXT: smov w11, v1.b[11] +; CHECK-NEXT: smov w10, v0.b[11] +; CHECK-NEXT: sdiv w8, w18, w17 +; CHECK-NEXT: sdiv w20, w0, w3 +; CHECK-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: msub w0, w20, w3, w0 +; CHECK-NEXT: sdiv w9, w2, w1 +; CHECK-NEXT: ldp w23, w20, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: msub w6, w20, w23, w6 +; CHECK-NEXT: ldp w20, w0, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: sdiv w8, w5, w4 +; CHECK-NEXT: mov v2.b[1], w6 +; CHECK-NEXT: msub w15, w0, w16, w15 +; CHECK-NEXT: smov w16, v1.b[13] +; CHECK-NEXT: msub w17, w20, w17, w18 +; CHECK-NEXT: smov w0, v0.b[13] +; CHECK-NEXT: mov v2.b[2], w15 +; CHECK-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: sdiv w9, w19, w7 +; CHECK-NEXT: ldr w6, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: mov v2.b[3], w17 +; CHECK-NEXT: ldr w15, [sp, #8] // 4-byte Folded Reload +; CHECK-NEXT: msub w1, w6, w1, w2 +; CHECK-NEXT: smov w2, v0.b[14] +; CHECK-NEXT: msub w15, w15, w4, w5 +; CHECK-NEXT: sdiv w8, w22, w21 +; CHECK-NEXT: mov v2.b[4], w1 +; CHECK-NEXT: smov w1, v1.b[14] +; CHECK-NEXT: mov v2.b[5], w15 +; CHECK-NEXT: stp w8, w9, [sp] // 8-byte Folded Spill ; CHECK-NEXT: sdiv w26, w25, w24 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[7], w1 -; CHECK-NEXT: msub w1, w26, w24, w25 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[8], w1 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: smov w13, v1.b[15] -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[9], w9 -; CHECK-NEXT: smov w9, v1.b[14] -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: smov w10, v0.b[14] -; CHECK-NEXT: sdiv w14, w16, w15 -; CHECK-NEXT: mov v2.b[10], w8 -; CHECK-NEXT: msub w11, w14, w15, w16 -; CHECK-NEXT: smov w14, v0.b[15] -; CHECK-NEXT: sdiv w17, w0, w18 -; CHECK-NEXT: mov v2.b[11], w11 -; CHECK-NEXT: msub w11, w17, w18, w0 -; CHECK-NEXT: sdiv w12, w3, w2 -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: msub w12, w12, w2, w3 -; CHECK-NEXT: sdiv w8, w10, w9 -; CHECK-NEXT: mov v2.b[13], w12 -; CHECK-NEXT: msub w8, w8, w9, w10 -; CHECK-NEXT: sdiv w11, w14, w13 -; CHECK-NEXT: mov v2.b[14], w8 -; CHECK-NEXT: msub w8, w11, w13, w14 +; CHECK-NEXT: ldp w4, w17, [sp] // 8-byte Folded Reload +; CHECK-NEXT: smov w9, v1.b[12] +; CHECK-NEXT: smov w8, v0.b[12] +; CHECK-NEXT: msub w17, w17, w7, w19 +; CHECK-NEXT: msub w4, w4, w21, w22 +; CHECK-NEXT: msub w5, w26, w24, w25 +; CHECK-NEXT: sdiv w29, w28, w27 +; CHECK-NEXT: mov v2.b[6], w17 +; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[7], w4 +; CHECK-NEXT: smov w4, v0.b[15] +; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[8], w5 +; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: msub w17, w29, w27, w28 +; CHECK-NEXT: sdiv w14, w12, w30 +; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[9], w17 +; CHECK-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: msub w12, w14, w30, w12 +; CHECK-NEXT: smov w14, v1.b[15] +; CHECK-NEXT: sdiv w13, w10, w11 +; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[10], w12 +; CHECK-NEXT: msub w10, w13, w11, w10 +; CHECK-NEXT: sdiv w3, w8, w9 +; CHECK-NEXT: mov v2.b[11], w10 +; CHECK-NEXT: msub w8, w3, w9, w8 +; CHECK-NEXT: sdiv w18, w0, w16 +; CHECK-NEXT: mov v2.b[12], w8 +; CHECK-NEXT: msub w9, w18, w16, w0 +; CHECK-NEXT: sdiv w15, w2, w1 +; CHECK-NEXT: mov v2.b[13], w9 +; CHECK-NEXT: msub w10, w15, w1, w2 +; CHECK-NEXT: sdiv w11, w4, w14 +; CHECK-NEXT: mov v2.b[14], w10 +; CHECK-NEXT: msub w8, w11, w14, w4 ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret %tmp3 = srem <16 x i8> %A, %B; ret <16 x i8> %tmp3 @@ -933,21 +957,21 @@ ; CHECK-NEXT: smov w12, v0.h[0] ; CHECK-NEXT: smov w8, v1.h[1] ; CHECK-NEXT: smov w9, v0.h[1] +; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w14, v1.h[2] ; CHECK-NEXT: smov w15, v0.h[2] -; CHECK-NEXT: sdiv w13, w12, w11 +; CHECK-NEXT: smov w17, v1.h[3] +; CHECK-NEXT: smov w18, v0.h[3] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.h[3] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.h[3] ; CHECK-NEXT: fmov s0, w11 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w9, w12, w13 +; CHECK-NEXT: msub w9, w16, w14, w15 +; CHECK-NEXT: sdiv w12, w18, w17 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: msub w8, w12, w17, w18 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -958,47 +982,51 @@ define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) { ; CHECK-LABEL: srem8x16: ; CHECK: // %bb.0: +; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -16 ; CHECK-NEXT: smov w11, v1.h[0] ; CHECK-NEXT: smov w12, v0.h[0] ; CHECK-NEXT: smov w8, v1.h[1] ; CHECK-NEXT: smov w9, v0.h[1] +; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w14, v1.h[2] ; CHECK-NEXT: smov w15, v0.h[2] ; CHECK-NEXT: smov w17, v1.h[3] ; CHECK-NEXT: smov w18, v0.h[3] -; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.h[4] ; CHECK-NEXT: smov w2, v0.h[4] +; CHECK-NEXT: smov w4, v1.h[5] +; CHECK-NEXT: smov w5, v0.h[5] +; CHECK-NEXT: smov w7, v1.h[6] +; CHECK-NEXT: smov w19, v0.h[6] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.h[5] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.h[5] ; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: smov w11, v0.h[6] +; CHECK-NEXT: smov w11, v0.h[7] ; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: smov w10, v1.h[6] +; CHECK-NEXT: smov w10, v1.h[7] ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.h[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.h[7] +; CHECK-NEXT: msub w9, w16, w14, w15 ; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.h[7] -; CHECK-NEXT: mov v2.h[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 +; CHECK-NEXT: mov v2.h[2], w9 +; CHECK-NEXT: msub w13, w0, w17, w18 ; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: mov v2.h[3], w14 +; CHECK-NEXT: mov v2.h[3], w13 ; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: sdiv w9, w13, w12 +; CHECK-NEXT: sdiv w6, w5, w4 ; CHECK-NEXT: mov v2.h[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: msub w9, w6, w4, w5 +; CHECK-NEXT: sdiv w12, w19, w7 ; CHECK-NEXT: mov v2.h[5], w9 +; CHECK-NEXT: msub w12, w12, w7, w19 +; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: mov v2.h[6], w12 ; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: sdiv w12, w16, w15 -; CHECK-NEXT: mov v2.h[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = srem <8 x i16> %A, %B; ret <8 x i16> %tmp3 @@ -1058,10 +1086,10 @@ ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: sdiv w9, w18, w17 -; CHECK-NEXT: mov v0.s[2], w10 -; CHECK-NEXT: msub w8, w9, w17, w18 +; CHECK-NEXT: msub w9, w16, w14, w15 +; CHECK-NEXT: sdiv w12, w18, w17 +; CHECK-NEXT: mov v0.s[2], w9 +; CHECK-NEXT: msub w8, w12, w17, w18 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %tmp3 = srem <4 x i32> %A, %B; @@ -1119,49 +1147,53 @@ define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: urem8x8: ; CHECK: // %bb.0: +; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w11, v1.b[0] ; CHECK-NEXT: umov w12, v0.b[0] ; CHECK-NEXT: umov w8, v1.b[1] ; CHECK-NEXT: umov w9, v0.b[1] +; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w14, v1.b[2] ; CHECK-NEXT: umov w15, v0.b[2] ; CHECK-NEXT: umov w17, v1.b[3] ; CHECK-NEXT: umov w18, v0.b[3] -; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.b[4] ; CHECK-NEXT: umov w2, v0.b[4] +; CHECK-NEXT: umov w4, v1.b[5] +; CHECK-NEXT: umov w5, v0.b[5] +; CHECK-NEXT: umov w7, v1.b[6] +; CHECK-NEXT: umov w19, v0.b[6] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.b[5] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: umov w11, v0.b[6] +; CHECK-NEXT: umov w11, v0.b[7] ; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: umov w10, v1.b[6] +; CHECK-NEXT: umov w10, v1.b[7] ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.b[7] +; CHECK-NEXT: msub w9, w16, w14, w15 ; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.b[7] -; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 +; CHECK-NEXT: mov v2.b[2], w9 +; CHECK-NEXT: msub w13, w0, w17, w18 ; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: mov v2.b[3], w14 +; CHECK-NEXT: mov v2.b[3], w13 ; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: udiv w9, w13, w12 +; CHECK-NEXT: udiv w6, w5, w4 ; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: msub w9, w6, w4, w5 +; CHECK-NEXT: udiv w12, w19, w7 ; CHECK-NEXT: mov v2.b[5], w9 +; CHECK-NEXT: msub w12, w12, w7, w19 +; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: mov v2.b[6], w12 ; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: udiv w12, w16, w15 -; CHECK-NEXT: mov v2.b[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = urem <8 x i8> %A, %B; ret <8 x i8> %tmp3 @@ -1170,11 +1202,14 @@ define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: urem16x8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: sub sp, sp, #128 +; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 128 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -1183,15 +1218,20 @@ ; CHECK-NEXT: .cfi_offset w24, -48 ; CHECK-NEXT: .cfi_offset w25, -56 ; CHECK-NEXT: .cfi_offset w26, -64 -; CHECK-NEXT: umov w11, v1.b[0] -; CHECK-NEXT: umov w12, v0.b[0] +; CHECK-NEXT: .cfi_offset w27, -72 +; CHECK-NEXT: .cfi_offset w28, -80 +; CHECK-NEXT: .cfi_offset w30, -88 +; CHECK-NEXT: .cfi_offset w29, -96 ; CHECK-NEXT: umov w8, v1.b[1] -; CHECK-NEXT: umov w9, v0.b[1] -; CHECK-NEXT: umov w14, v1.b[2] +; CHECK-NEXT: umov w6, v0.b[1] +; CHECK-NEXT: umov w16, v1.b[2] ; CHECK-NEXT: umov w15, v0.b[2] +; CHECK-NEXT: str w8, [sp, #24] // 4-byte Folded Spill +; CHECK-NEXT: udiv w8, w6, w8 ; CHECK-NEXT: umov w17, v1.b[3] ; CHECK-NEXT: umov w18, v0.b[3] -; CHECK-NEXT: udiv w13, w12, w11 +; CHECK-NEXT: umov w3, v1.b[0] +; CHECK-NEXT: umov w0, v0.b[0] ; CHECK-NEXT: umov w1, v1.b[4] ; CHECK-NEXT: umov w2, v0.b[4] ; CHECK-NEXT: umov w4, v1.b[5] @@ -1202,72 +1242,84 @@ ; CHECK-NEXT: umov w22, v0.b[7] ; CHECK-NEXT: umov w24, v1.b[8] ; CHECK-NEXT: umov w25, v0.b[8] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.b[9] -; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.b[9] -; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: umov w11, v0.b[10] -; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: umov w10, v1.b[10] -; CHECK-NEXT: udiv w16, w15, w14 -; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.b[11] -; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.b[11] -; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 -; CHECK-NEXT: umov w18, v1.b[12] -; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: umov w0, v0.b[12] -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: umov w2, v1.b[13] -; CHECK-NEXT: udiv w6, w5, w4 -; CHECK-NEXT: umov w3, v0.b[13] -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w17, w6, w4, w5 -; CHECK-NEXT: udiv w20, w19, w7 -; CHECK-NEXT: mov v2.b[5], w17 -; CHECK-NEXT: msub w17, w20, w7, w19 -; CHECK-NEXT: udiv w23, w22, w21 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[6], w17 -; CHECK-NEXT: msub w1, w23, w21, w22 +; CHECK-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NEXT: udiv w9, w15, w16 +; CHECK-NEXT: umov w27, v1.b[9] +; CHECK-NEXT: umov w28, v0.b[9] +; CHECK-NEXT: umov w30, v1.b[10] +; CHECK-NEXT: umov w12, v0.b[10] +; CHECK-NEXT: umov w11, v1.b[11] +; CHECK-NEXT: umov w10, v0.b[11] +; CHECK-NEXT: udiv w8, w18, w17 +; CHECK-NEXT: udiv w20, w0, w3 +; CHECK-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: msub w0, w20, w3, w0 +; CHECK-NEXT: udiv w9, w2, w1 +; CHECK-NEXT: ldp w23, w20, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: msub w6, w20, w23, w6 +; CHECK-NEXT: ldp w20, w0, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: udiv w8, w5, w4 +; CHECK-NEXT: mov v2.b[1], w6 +; CHECK-NEXT: msub w15, w0, w16, w15 +; CHECK-NEXT: umov w16, v1.b[13] +; CHECK-NEXT: msub w17, w20, w17, w18 +; CHECK-NEXT: umov w0, v0.b[13] +; CHECK-NEXT: mov v2.b[2], w15 +; CHECK-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: udiv w9, w19, w7 +; CHECK-NEXT: ldr w6, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: mov v2.b[3], w17 +; CHECK-NEXT: ldr w15, [sp, #8] // 4-byte Folded Reload +; CHECK-NEXT: msub w1, w6, w1, w2 +; CHECK-NEXT: umov w2, v0.b[14] +; CHECK-NEXT: msub w15, w15, w4, w5 +; CHECK-NEXT: udiv w8, w22, w21 +; CHECK-NEXT: mov v2.b[4], w1 +; CHECK-NEXT: umov w1, v1.b[14] +; CHECK-NEXT: mov v2.b[5], w15 +; CHECK-NEXT: stp w8, w9, [sp] // 8-byte Folded Spill ; CHECK-NEXT: udiv w26, w25, w24 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[7], w1 -; CHECK-NEXT: msub w1, w26, w24, w25 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[8], w1 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: umov w13, v1.b[15] -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[9], w9 -; CHECK-NEXT: umov w9, v1.b[14] -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: umov w10, v0.b[14] -; CHECK-NEXT: udiv w14, w16, w15 -; CHECK-NEXT: mov v2.b[10], w8 -; CHECK-NEXT: msub w11, w14, w15, w16 -; CHECK-NEXT: umov w14, v0.b[15] -; CHECK-NEXT: udiv w17, w0, w18 -; CHECK-NEXT: mov v2.b[11], w11 -; CHECK-NEXT: msub w11, w17, w18, w0 -; CHECK-NEXT: udiv w12, w3, w2 -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: msub w12, w12, w2, w3 -; CHECK-NEXT: udiv w8, w10, w9 -; CHECK-NEXT: mov v2.b[13], w12 -; CHECK-NEXT: msub w8, w8, w9, w10 -; CHECK-NEXT: udiv w11, w14, w13 -; CHECK-NEXT: mov v2.b[14], w8 -; CHECK-NEXT: msub w8, w11, w13, w14 +; CHECK-NEXT: ldp w4, w17, [sp] // 8-byte Folded Reload +; CHECK-NEXT: umov w9, v1.b[12] +; CHECK-NEXT: umov w8, v0.b[12] +; CHECK-NEXT: msub w17, w17, w7, w19 +; CHECK-NEXT: msub w4, w4, w21, w22 +; CHECK-NEXT: msub w5, w26, w24, w25 +; CHECK-NEXT: udiv w29, w28, w27 +; CHECK-NEXT: mov v2.b[6], w17 +; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[7], w4 +; CHECK-NEXT: umov w4, v0.b[15] +; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[8], w5 +; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: msub w17, w29, w27, w28 +; CHECK-NEXT: udiv w14, w12, w30 +; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[9], w17 +; CHECK-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: msub w12, w14, w30, w12 +; CHECK-NEXT: umov w14, v1.b[15] +; CHECK-NEXT: udiv w13, w10, w11 +; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[10], w12 +; CHECK-NEXT: msub w10, w13, w11, w10 +; CHECK-NEXT: udiv w3, w8, w9 +; CHECK-NEXT: mov v2.b[11], w10 +; CHECK-NEXT: msub w8, w3, w9, w8 +; CHECK-NEXT: udiv w18, w0, w16 +; CHECK-NEXT: mov v2.b[12], w8 +; CHECK-NEXT: msub w9, w18, w16, w0 +; CHECK-NEXT: udiv w15, w2, w1 +; CHECK-NEXT: mov v2.b[13], w9 +; CHECK-NEXT: msub w10, w15, w1, w2 +; CHECK-NEXT: udiv w11, w4, w14 +; CHECK-NEXT: mov v2.b[14], w10 +; CHECK-NEXT: msub w8, w11, w14, w4 ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret %tmp3 = urem <16 x i8> %A, %B; ret <16 x i8> %tmp3 @@ -1297,21 +1349,21 @@ ; CHECK-NEXT: umov w12, v0.h[0] ; CHECK-NEXT: umov w8, v1.h[1] ; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w14, v1.h[2] ; CHECK-NEXT: umov w15, v0.h[2] -; CHECK-NEXT: udiv w13, w12, w11 +; CHECK-NEXT: umov w17, v1.h[3] +; CHECK-NEXT: umov w18, v0.h[3] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.h[3] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.h[3] ; CHECK-NEXT: fmov s0, w11 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w9, w12, w13 +; CHECK-NEXT: msub w9, w16, w14, w15 +; CHECK-NEXT: udiv w12, w18, w17 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: msub w8, w12, w17, w18 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -1322,47 +1374,51 @@ define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) { ; CHECK-LABEL: urem8x16: ; CHECK: // %bb.0: +; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -16 ; CHECK-NEXT: umov w11, v1.h[0] ; CHECK-NEXT: umov w12, v0.h[0] ; CHECK-NEXT: umov w8, v1.h[1] ; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w14, v1.h[2] ; CHECK-NEXT: umov w15, v0.h[2] ; CHECK-NEXT: umov w17, v1.h[3] ; CHECK-NEXT: umov w18, v0.h[3] -; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.h[4] ; CHECK-NEXT: umov w2, v0.h[4] +; CHECK-NEXT: umov w4, v1.h[5] +; CHECK-NEXT: umov w5, v0.h[5] +; CHECK-NEXT: umov w7, v1.h[6] +; CHECK-NEXT: umov w19, v0.h[6] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.h[5] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.h[5] ; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: umov w11, v0.h[6] +; CHECK-NEXT: umov w11, v0.h[7] ; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: umov w10, v1.h[6] +; CHECK-NEXT: umov w10, v1.h[7] ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.h[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.h[7] +; CHECK-NEXT: msub w9, w16, w14, w15 ; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.h[7] -; CHECK-NEXT: mov v2.h[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 +; CHECK-NEXT: mov v2.h[2], w9 +; CHECK-NEXT: msub w13, w0, w17, w18 ; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: mov v2.h[3], w14 +; CHECK-NEXT: mov v2.h[3], w13 ; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: udiv w9, w13, w12 +; CHECK-NEXT: udiv w6, w5, w4 ; CHECK-NEXT: mov v2.h[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: msub w9, w6, w4, w5 +; CHECK-NEXT: udiv w12, w19, w7 ; CHECK-NEXT: mov v2.h[5], w9 +; CHECK-NEXT: msub w12, w12, w7, w19 +; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: mov v2.h[6], w12 ; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: udiv w12, w16, w15 -; CHECK-NEXT: mov v2.h[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = urem <8 x i16> %A, %B; ret <8 x i16> %tmp3 @@ -1422,10 +1478,10 @@ ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: udiv w9, w18, w17 -; CHECK-NEXT: mov v0.s[2], w10 -; CHECK-NEXT: msub w8, w9, w17, w18 +; CHECK-NEXT: msub w9, w16, w14, w15 +; CHECK-NEXT: udiv w12, w18, w17 +; CHECK-NEXT: mov v0.s[2], w9 +; CHECK-NEXT: msub w8, w12, w17, w18 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %tmp3 = urem <4 x i32> %A, %B; diff --git a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll --- a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll +++ b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: and x8, x1, #0x3 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: fmov.2d v0, #2.00000000 -; CHECK-NEXT: bfi x9, x8, #2, #2 ; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: bfi x9, x8, #2, #2 ; CHECK-NEXT: ldr s0, [x9] ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: add sp, sp, #16 @@ -31,8 +31,8 @@ ; CHECK-NEXT: and x8, x1, #0x3 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: movi.16b v0, #63 -; CHECK-NEXT: bfi x9, x8, #2, #2 ; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: bfi x9, x8, #2, #2 ; CHECK-NEXT: ldr s0, [x9] ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: add sp, sp, #16 diff --git a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll --- a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll +++ b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll @@ -17,12 +17,13 @@ ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: _test1: ; @test1 ; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: ; %bb.0: -; CHECK-NEXT: Lloh0: -; CHECK-NEXT: adrp x8, lCPI0_0@PAGE -; CHECK-NEXT: Lloh1: -; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x8, lCPI0_0@PAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 ret [1 x <4 x float>] [<4 x float> bitcast (<1 x i128> to <4 x float>)] } @@ -39,22 +40,23 @@ ; CHECK-NEXT: .p2align 2 ; CHECK-NEXT: _test2: ; @test2 ; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: ; %bb.0: -; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x8, lCPI1_0@PAGE -; CHECK-NEXT: Lloh3: -; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] -; CHECK-NEXT: mov s2, v1[1] -; CHECK-NEXT: fneg s0, s1 -; CHECK-NEXT: mov s3, v1[2] -; CHECK-NEXT: mov s1, v1[3] -; CHECK-NEXT: fneg s2, s2 -; CHECK-NEXT: fneg s1, s1 -; CHECK-NEXT: mov.s v0[1], v2[0] -; CHECK-NEXT: fneg s2, s3 -; CHECK-NEXT: mov.s v0[2], v2[0] -; CHECK-NEXT: mov.s v0[3], v1[0] -; CHECK-NEXT: ret +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: adrp x8, lCPI1_0@PAGE +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] +; CHECK-NEXT: mov s0, v1[1] +; CHECK-NEXT: mov s2, v1[2] +; CHECK-NEXT: fneg s3, s0 +; CHECK-NEXT: fneg s0, s1 +; CHECK-NEXT: mov s1, v1[3] +; CHECK-NEXT: fneg s2, s2 +; CHECK-NEXT: mov.s v0[1], v3[0] +; CHECK-NEXT: fneg s1, s1 +; CHECK-NEXT: mov.s v0[2], v2[0] +; CHECK-NEXT: mov.s v0[3], v1[0] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3 ; ret [1 x <4 x float>] [<4 x float> bitcast (<1 x i128> to <4 x float>), i32 0)), diff --git a/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll --- a/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll +++ b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll @@ -4,9 +4,9 @@ define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind { ; CHECK-LABEL: foo: ; CHECK: ; %bb.0: -; CHECK-NEXT: fmov.4s v2, #1.00000000 ; CHECK-NEXT: fcmeq.4s v0, v0, v1 -; CHECK-NEXT: and.16b v0, v0, v2 +; CHECK-NEXT: fmov.4s v1, #1.00000000 +; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: ret %cmp = fcmp oeq <4 x float> %val, %test %ext = zext <4 x i1> %cmp to <4 x i32> @@ -19,8 +19,8 @@ define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwind { ; CHECK-LABEL: foo1: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.4s v2, #1 ; CHECK-NEXT: fcmeq.4s v0, v0, v1 +; CHECK-NEXT: movi.4s v2, #1 ; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: ushll2.2d v1, v0, #0 ; CHECK-NEXT: ushll.2d v0, v0, #0 @@ -44,8 +44,8 @@ ; CHECK-NEXT: adrp x8, lCPI2_0@PAGE ; CHECK-NEXT: fcmeq.4s v0, v0, v1 ; CHECK-NEXT: Lloh1: -; CHECK-NEXT: ldr q1, [x8, lCPI2_0@PAGEOFF] -; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: ldr q2, [x8, lCPI2_0@PAGEOFF] +; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 %cmp = fcmp oeq <4 x float> %val, %test diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -225,61 +225,61 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #64] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp] -; CHECK-NEXT: ldr w10, [sp, #8] +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: ldr w10, [sp] ; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: ldr w8, [sp, #80] ; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: ldr w11, [sp, #8] ; CHECK-NEXT: movi.16b v2, #1 -; CHECK-NEXT: mov.b v1[1], w8 -; CHECK-NEXT: ldr w8, [sp, #80] +; CHECK-NEXT: mov.b v1[1], w9 +; CHECK-NEXT: ldr w9, [sp, #88] ; CHECK-NEXT: mov.b v0[2], w2 ; CHECK-NEXT: mov.b v1[2], w8 -; CHECK-NEXT: ldr w8, [sp, #88] -; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: mov.b v1[3], w8 ; CHECK-NEXT: ldr w8, [sp, #96] +; CHECK-NEXT: mov.b v0[3], w3 +; CHECK-NEXT: mov.b v1[3], w9 +; CHECK-NEXT: ldr w9, [sp, #104] ; CHECK-NEXT: mov.b v0[4], w4 ; CHECK-NEXT: mov.b v1[4], w8 -; CHECK-NEXT: ldr w8, [sp, #104] -; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: mov.b v1[5], w8 ; CHECK-NEXT: ldr w8, [sp, #112] +; CHECK-NEXT: mov.b v0[5], w5 +; CHECK-NEXT: mov.b v1[5], w9 +; CHECK-NEXT: ldr w9, [sp, #120] ; CHECK-NEXT: mov.b v0[6], w6 ; CHECK-NEXT: mov.b v1[6], w8 -; CHECK-NEXT: ldr w8, [sp, #120] -; CHECK-NEXT: mov.b v0[7], w7 -; CHECK-NEXT: mov.b v1[7], w8 ; CHECK-NEXT: ldr w8, [sp, #128] -; CHECK-NEXT: mov.b v0[8], w9 -; CHECK-NEXT: ldr w9, [sp, #16] +; CHECK-NEXT: mov.b v0[7], w7 +; CHECK-NEXT: mov.b v1[7], w9 +; CHECK-NEXT: ldr w9, [sp, #136] +; CHECK-NEXT: mov.b v0[8], w10 +; CHECK-NEXT: ldr w10, [sp, #16] ; CHECK-NEXT: mov.b v1[8], w8 -; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #24] -; CHECK-NEXT: mov.b v1[9], w8 ; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v0[10], w9 -; CHECK-NEXT: ldr w9, [sp, #32] +; CHECK-NEXT: mov.b v0[9], w11 +; CHECK-NEXT: ldr w11, [sp, #24] +; CHECK-NEXT: mov.b v1[9], w9 +; CHECK-NEXT: ldr w9, [sp, #152] +; CHECK-NEXT: mov.b v0[10], w10 +; CHECK-NEXT: ldr w10, [sp, #32] ; CHECK-NEXT: mov.b v1[10], w8 -; CHECK-NEXT: ldr w8, [sp, #152] -; CHECK-NEXT: mov.b v0[11], w10 -; CHECK-NEXT: ldr w10, [sp, #40] -; CHECK-NEXT: mov.b v1[11], w8 ; CHECK-NEXT: ldr w8, [sp, #160] -; CHECK-NEXT: mov.b v0[12], w9 -; CHECK-NEXT: ldr w9, [sp, #48] +; CHECK-NEXT: mov.b v0[11], w11 +; CHECK-NEXT: ldr w11, [sp, #40] +; CHECK-NEXT: mov.b v1[11], w9 +; CHECK-NEXT: ldr w9, [sp, #168] +; CHECK-NEXT: mov.b v0[12], w10 +; CHECK-NEXT: ldr w10, [sp, #48] ; CHECK-NEXT: mov.b v1[12], w8 -; CHECK-NEXT: ldr w8, [sp, #168] -; CHECK-NEXT: mov.b v0[13], w10 -; CHECK-NEXT: ldr w10, [sp, #56] -; CHECK-NEXT: mov.b v1[13], w8 ; CHECK-NEXT: ldr w8, [sp, #176] -; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: mov.b v0[13], w11 +; CHECK-NEXT: ldr w11, [sp, #56] +; CHECK-NEXT: mov.b v1[13], w9 +; CHECK-NEXT: ldr w9, [sp, #184] +; CHECK-NEXT: mov.b v0[14], w10 ; CHECK-NEXT: mov.b v1[14], w8 -; CHECK-NEXT: ldr w8, [sp, #184] -; CHECK-NEXT: mov.b v0[15], w10 -; CHECK-NEXT: mov.b v1[15], w8 +; CHECK-NEXT: mov.b v0[15], w11 +; CHECK-NEXT: mov.b v1[15], w9 ; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: and.16b v1, v1, v2 ; CHECK-NEXT: ret @@ -292,59 +292,59 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #64] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp] -; CHECK-NEXT: ldr w10, [sp, #8] +; CHECK-NEXT: ldr w9, [sp, #80] +; CHECK-NEXT: ldr w10, [sp] ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: ldr w8, [sp, #72] ; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: ldr w11, [sp, #8] ; CHECK-NEXT: mov.b v1[1], w8 -; CHECK-NEXT: ldr w8, [sp, #80] -; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: mov.b v1[2], w8 ; CHECK-NEXT: ldr w8, [sp, #88] +; CHECK-NEXT: mov.b v0[2], w2 +; CHECK-NEXT: mov.b v1[2], w9 +; CHECK-NEXT: ldr w9, [sp, #96] ; CHECK-NEXT: mov.b v0[3], w3 ; CHECK-NEXT: mov.b v1[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: mov.b v1[4], w8 ; CHECK-NEXT: ldr w8, [sp, #104] +; CHECK-NEXT: mov.b v0[4], w4 +; CHECK-NEXT: mov.b v1[4], w9 +; CHECK-NEXT: ldr w9, [sp, #112] ; CHECK-NEXT: mov.b v0[5], w5 ; CHECK-NEXT: mov.b v1[5], w8 -; CHECK-NEXT: ldr w8, [sp, #112] -; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w8 ; CHECK-NEXT: ldr w8, [sp, #120] +; CHECK-NEXT: mov.b v0[6], w6 +; CHECK-NEXT: mov.b v1[6], w9 +; CHECK-NEXT: ldr w9, [sp, #128] ; CHECK-NEXT: mov.b v0[7], w7 ; CHECK-NEXT: mov.b v1[7], w8 -; CHECK-NEXT: ldr w8, [sp, #128] -; CHECK-NEXT: mov.b v0[8], w9 -; CHECK-NEXT: ldr w9, [sp, #16] -; CHECK-NEXT: mov.b v1[8], w8 ; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #24] +; CHECK-NEXT: mov.b v0[8], w10 +; CHECK-NEXT: ldr w10, [sp, #16] +; CHECK-NEXT: mov.b v1[8], w9 +; CHECK-NEXT: ldr w9, [sp, #144] +; CHECK-NEXT: mov.b v0[9], w11 +; CHECK-NEXT: ldr w11, [sp, #24] ; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v0[10], w9 -; CHECK-NEXT: ldr w9, [sp, #32] -; CHECK-NEXT: mov.b v1[10], w8 ; CHECK-NEXT: ldr w8, [sp, #152] -; CHECK-NEXT: mov.b v0[11], w10 -; CHECK-NEXT: ldr w10, [sp, #40] +; CHECK-NEXT: mov.b v0[10], w10 +; CHECK-NEXT: ldr w10, [sp, #32] +; CHECK-NEXT: mov.b v1[10], w9 +; CHECK-NEXT: ldr w9, [sp, #160] +; CHECK-NEXT: mov.b v0[11], w11 +; CHECK-NEXT: ldr w11, [sp, #40] ; CHECK-NEXT: mov.b v1[11], w8 -; CHECK-NEXT: ldr w8, [sp, #160] -; CHECK-NEXT: mov.b v0[12], w9 -; CHECK-NEXT: ldr w9, [sp, #48] -; CHECK-NEXT: mov.b v1[12], w8 ; CHECK-NEXT: ldr w8, [sp, #168] -; CHECK-NEXT: mov.b v0[13], w10 -; CHECK-NEXT: ldr w10, [sp, #56] +; CHECK-NEXT: mov.b v0[12], w10 +; CHECK-NEXT: ldr w10, [sp, #48] +; CHECK-NEXT: mov.b v1[12], w9 +; CHECK-NEXT: ldr w9, [sp, #176] +; CHECK-NEXT: mov.b v0[13], w11 +; CHECK-NEXT: ldr w11, [sp, #56] ; CHECK-NEXT: mov.b v1[13], w8 -; CHECK-NEXT: ldr w8, [sp, #176] -; CHECK-NEXT: mov.b v0[14], w9 -; CHECK-NEXT: mov.b v1[14], w8 ; CHECK-NEXT: ldr w8, [sp, #184] -; CHECK-NEXT: mov.b v0[15], w10 +; CHECK-NEXT: mov.b v0[14], w10 +; CHECK-NEXT: mov.b v1[14], w9 +; CHECK-NEXT: mov.b v0[15], w11 ; CHECK-NEXT: mov.b v1[15], w8 ; CHECK-NEXT: shl.16b v0, v0, #7 ; CHECK-NEXT: shl.16b v1, v1, #7 @@ -360,127 +360,127 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #320] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp, #64] -; CHECK-NEXT: ldr w10, [sp, #192] +; CHECK-NEXT: ldr w9, [sp, #192] +; CHECK-NEXT: ldr w10, [sp, #336] ; CHECK-NEXT: fmov s3, w8 ; CHECK-NEXT: ldr w8, [sp, #328] -; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: fmov s2, w9 ; CHECK-NEXT: ldr w9, [sp, #200] -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: ldr w10, [sp, #336] -; CHECK-NEXT: mov.b v3[1], w8 -; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: mov.b v0[1], w1 ; CHECK-NEXT: ldr w11, [sp, #352] -; CHECK-NEXT: mov.b v2[1], w9 -; CHECK-NEXT: ldr w9, [sp, #80] -; CHECK-NEXT: mov.b v1[1], w8 +; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: mov.b v3[1], w8 ; CHECK-NEXT: ldr w8, [sp, #344] +; CHECK-NEXT: mov.b v2[1], w9 +; CHECK-NEXT: ldr w9, [sp, #368] ; CHECK-NEXT: mov.b v3[2], w10 -; CHECK-NEXT: ldr w10, [sp, #208] +; CHECK-NEXT: ldr w10, [sp, #360] ; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: ldr w12, [sp, #368] -; CHECK-NEXT: ldr w13, [sp, #384] -; CHECK-NEXT: mov.b v1[2], w9 -; CHECK-NEXT: ldr w9, [sp, #360] -; CHECK-NEXT: mov.b v2[2], w10 -; CHECK-NEXT: ldr w10, [sp, #88] +; CHECK-NEXT: ldr w12, [sp, #272] ; CHECK-NEXT: mov.b v3[3], w8 -; CHECK-NEXT: ldr w8, [sp, #216] +; CHECK-NEXT: ldr w8, [sp, #208] ; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: ldr w14, [sp, #400] -; CHECK-NEXT: mov.b v1[3], w10 -; CHECK-NEXT: ldr w10, [sp, #376] -; CHECK-NEXT: mov.b v2[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] +; CHECK-NEXT: ldr w13, [sp, #136] ; CHECK-NEXT: mov.b v3[4], w11 -; CHECK-NEXT: ldr w11, [sp, #224] -; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: ldr w15, [sp, #416] -; CHECK-NEXT: mov.b v1[4], w8 -; CHECK-NEXT: ldr w8, [sp, #392] -; CHECK-NEXT: mov.b v2[4], w11 -; CHECK-NEXT: ldr w11, [sp, #104] -; CHECK-NEXT: mov.b v3[5], w9 +; CHECK-NEXT: ldr w11, [sp, #216] +; CHECK-NEXT: mov.b v2[2], w8 +; CHECK-NEXT: ldr w8, [sp, #376] +; CHECK-NEXT: mov.b v3[5], w10 +; CHECK-NEXT: ldr w10, [sp, #224] +; CHECK-NEXT: mov.b v2[3], w11 +; CHECK-NEXT: ldr w11, [sp, #384] +; CHECK-NEXT: mov.b v3[6], w9 ; CHECK-NEXT: ldr w9, [sp, #232] -; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: ldr w16, [sp, #432] -; CHECK-NEXT: mov.b v1[5], w11 -; CHECK-NEXT: ldr w11, [sp, #408] +; CHECK-NEXT: mov.b v2[4], w10 +; CHECK-NEXT: ldr w10, [sp, #392] +; CHECK-NEXT: mov.b v3[7], w8 +; CHECK-NEXT: ldr w8, [sp, #240] ; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w9, [sp, #112] -; CHECK-NEXT: mov.b v3[6], w12 -; CHECK-NEXT: ldr w12, [sp, #240] +; CHECK-NEXT: ldr w9, [sp, #400] +; CHECK-NEXT: mov.b v3[8], w11 +; CHECK-NEXT: ldr w11, [sp, #248] +; CHECK-NEXT: mov.b v2[6], w8 +; CHECK-NEXT: ldr w8, [sp, #408] +; CHECK-NEXT: mov.b v3[9], w10 +; CHECK-NEXT: ldr w10, [sp, #256] +; CHECK-NEXT: mov.b v2[7], w11 +; CHECK-NEXT: ldr w11, [sp, #416] +; CHECK-NEXT: mov.b v3[10], w9 +; CHECK-NEXT: ldr w9, [sp, #264] +; CHECK-NEXT: mov.b v2[8], w10 +; CHECK-NEXT: ldr w10, [sp, #424] +; CHECK-NEXT: mov.b v3[11], w8 +; CHECK-NEXT: ldr w8, [sp, #432] +; CHECK-NEXT: mov.b v2[9], w9 +; CHECK-NEXT: ldr w9, [sp, #64] +; CHECK-NEXT: mov.b v3[12], w11 +; CHECK-NEXT: mov.b v0[4], w4 +; CHECK-NEXT: ldr w11, [sp, #280] +; CHECK-NEXT: mov.b v2[10], w12 +; CHECK-NEXT: mov.b v3[13], w10 +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: mov.b v0[5], w5 +; CHECK-NEXT: mov.b v3[14], w8 +; CHECK-NEXT: ldr w8, [sp, #80] ; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w9 -; CHECK-NEXT: ldr w9, [sp, #424] -; CHECK-NEXT: mov.b v2[6], w12 -; CHECK-NEXT: ldr w12, [sp, #120] -; CHECK-NEXT: mov.b v3[7], w10 -; CHECK-NEXT: ldr w10, [sp, #248] +; CHECK-NEXT: ldr w14, [sp, #144] +; CHECK-NEXT: mov.b v1[1], w9 +; CHECK-NEXT: ldr w9, [sp, #88] ; CHECK-NEXT: mov.b v0[7], w7 -; CHECK-NEXT: mov.b v1[7], w12 -; CHECK-NEXT: ldr w12, [sp] -; CHECK-NEXT: mov.b v2[7], w10 -; CHECK-NEXT: ldr w10, [sp, #128] -; CHECK-NEXT: mov.b v3[8], w13 -; CHECK-NEXT: ldr w13, [sp, #256] -; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #440] -; CHECK-NEXT: mov.b v1[8], w10 -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: mov.b v2[8], w13 -; CHECK-NEXT: ldr w13, [sp, #136] -; CHECK-NEXT: mov.b v3[9], w8 -; CHECK-NEXT: ldr w8, [sp, #264] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #272] +; CHECK-NEXT: ldr w10, [sp, #288] +; CHECK-NEXT: mov.b v1[2], w8 +; CHECK-NEXT: ldr w8, [sp, #96] +; CHECK-NEXT: mov.b v2[11], w11 +; CHECK-NEXT: ldr w11, [sp, #440] +; CHECK-NEXT: mov.b v1[3], w9 +; CHECK-NEXT: ldr w9, [sp, #104] +; CHECK-NEXT: mov.b v2[12], w10 +; CHECK-NEXT: ldr w10, [sp, #296] +; CHECK-NEXT: mov.b v1[4], w8 +; CHECK-NEXT: ldr w8, [sp, #112] +; CHECK-NEXT: ldr w12, [sp, #312] +; CHECK-NEXT: mov.b v3[15], w11 +; CHECK-NEXT: mov.b v1[5], w9 +; CHECK-NEXT: ldr w9, [sp, #120] +; CHECK-NEXT: mov.b v2[13], w10 +; CHECK-NEXT: ldr w10, [sp, #304] +; CHECK-NEXT: mov.b v1[6], w8 +; CHECK-NEXT: ldr w8, [sp, #128] +; CHECK-NEXT: movi.16b v4, #1 +; CHECK-NEXT: mov.b v1[7], w9 +; CHECK-NEXT: ldr w9, [sp] +; CHECK-NEXT: mov.b v2[14], w10 +; CHECK-NEXT: mov.b v1[8], w8 +; CHECK-NEXT: ldr w8, [sp, #8] +; CHECK-NEXT: mov.b v0[8], w9 +; CHECK-NEXT: ldr w9, [sp, #16] ; CHECK-NEXT: mov.b v1[9], w13 -; CHECK-NEXT: ldr w13, [sp, #16] -; CHECK-NEXT: mov.b v2[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v3[10], w14 -; CHECK-NEXT: ldr w14, [sp, #280] -; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #296] -; CHECK-NEXT: mov.b v1[10], w8 +; CHECK-NEXT: ldr w13, [sp, #152] +; CHECK-NEXT: mov.b v0[9], w8 ; CHECK-NEXT: ldr w8, [sp, #24] -; CHECK-NEXT: mov.b v2[10], w10 -; CHECK-NEXT: ldr w10, [sp, #152] -; CHECK-NEXT: mov.b v3[11], w11 -; CHECK-NEXT: ldr w11, [sp, #288] +; CHECK-NEXT: mov.b v1[10], w14 +; CHECK-NEXT: ldr w14, [sp, #160] +; CHECK-NEXT: mov.b v0[10], w9 +; CHECK-NEXT: ldr w9, [sp, #32] +; CHECK-NEXT: mov.b v1[11], w13 +; CHECK-NEXT: ldr w13, [sp, #168] ; CHECK-NEXT: mov.b v0[11], w8 -; CHECK-NEXT: ldr w8, [sp, #32] -; CHECK-NEXT: mov.b v1[11], w10 -; CHECK-NEXT: ldr w10, [sp, #160] -; CHECK-NEXT: mov.b v2[11], w14 -; CHECK-NEXT: mov.b v3[12], w15 -; CHECK-NEXT: mov.b v0[12], w8 ; CHECK-NEXT: ldr w8, [sp, #40] -; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #168] -; CHECK-NEXT: mov.b v2[12], w11 -; CHECK-NEXT: ldr w11, [sp, #312] -; CHECK-NEXT: mov.b v3[13], w9 -; CHECK-NEXT: ldr w9, [sp, #304] +; CHECK-NEXT: mov.b v1[12], w14 +; CHECK-NEXT: ldr w14, [sp, #176] +; CHECK-NEXT: mov.b v0[12], w9 +; CHECK-NEXT: ldr w9, [sp, #48] +; CHECK-NEXT: mov.b v1[13], w13 +; CHECK-NEXT: ldr w13, [sp, #184] ; CHECK-NEXT: mov.b v0[13], w8 -; CHECK-NEXT: ldr w8, [sp, #48] -; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #176] -; CHECK-NEXT: mov.b v2[13], w13 -; CHECK-NEXT: mov.b v3[14], w16 -; CHECK-NEXT: mov.b v0[14], w8 ; CHECK-NEXT: ldr w8, [sp, #56] -; CHECK-NEXT: mov.b v1[14], w10 -; CHECK-NEXT: mov.b v2[14], w9 -; CHECK-NEXT: ldr w9, [sp, #184] -; CHECK-NEXT: movi.16b v4, #1 +; CHECK-NEXT: mov.b v1[14], w14 +; CHECK-NEXT: mov.b v2[15], w12 +; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: mov.b v1[15], w13 ; CHECK-NEXT: mov.b v0[15], w8 -; CHECK-NEXT: mov.b v1[15], w9 -; CHECK-NEXT: mov.b v2[15], w11 -; CHECK-NEXT: mov.b v3[15], w12 -; CHECK-NEXT: and.16b v0, v0, v4 ; CHECK-NEXT: and.16b v1, v1, v4 +; CHECK-NEXT: and.16b v0, v0, v4 ; CHECK-NEXT: and.16b v2, v2, v4 ; CHECK-NEXT: and.16b v3, v3, v4 ; CHECK-NEXT: ret @@ -492,132 +492,132 @@ ; CHECK-LABEL: sext_v64i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #320] -; CHECK-NEXT: fmov s3, w0 -; CHECK-NEXT: ldr w9, [sp, #64] -; CHECK-NEXT: ldr w10, [sp, #192] -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: ldr w10, [sp, #336] +; CHECK-NEXT: ldr w9, [sp, #192] +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: ldr w8, [sp, #328] -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldr w9, [sp, #72] -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: ldr w10, [sp, #80] -; CHECK-NEXT: mov.b v0[1], w8 -; CHECK-NEXT: ldr w8, [sp, #200] -; CHECK-NEXT: mov.b v1[1], w9 -; CHECK-NEXT: ldr w9, [sp, #336] -; CHECK-NEXT: mov.b v3[1], w1 -; CHECK-NEXT: ldr w11, [sp, #88] +; CHECK-NEXT: ldr w11, [sp, #352] +; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: fmov s3, w9 +; CHECK-NEXT: ldr w9, [sp, #200] ; CHECK-NEXT: mov.b v2[1], w8 ; CHECK-NEXT: ldr w8, [sp, #344] -; CHECK-NEXT: mov.b v0[2], w9 -; CHECK-NEXT: ldr w9, [sp, #208] -; CHECK-NEXT: mov.b v1[2], w10 -; CHECK-NEXT: ldr w10, [sp, #352] -; CHECK-NEXT: mov.b v3[2], w2 -; CHECK-NEXT: ldr w12, [sp, #96] -; CHECK-NEXT: mov.b v2[2], w9 -; CHECK-NEXT: ldr w9, [sp, #360] -; CHECK-NEXT: mov.b v0[3], w8 -; CHECK-NEXT: ldr w8, [sp, #216] -; CHECK-NEXT: mov.b v1[3], w11 -; CHECK-NEXT: ldr w13, [sp, #104] -; CHECK-NEXT: mov.b v3[3], w3 -; CHECK-NEXT: ldr w11, [sp, #368] +; CHECK-NEXT: mov.b v0[2], w2 +; CHECK-NEXT: ldr w13, [sp, #128] +; CHECK-NEXT: mov.b v2[2], w10 +; CHECK-NEXT: ldr w10, [sp, #360] +; CHECK-NEXT: mov.b v3[1], w9 +; CHECK-NEXT: ldr w9, [sp, #368] ; CHECK-NEXT: mov.b v2[3], w8 -; CHECK-NEXT: ldr w14, [sp, #112] -; CHECK-NEXT: mov.b v0[4], w10 -; CHECK-NEXT: ldr w10, [sp, #224] -; CHECK-NEXT: mov.b v1[4], w12 +; CHECK-NEXT: ldr w8, [sp, #208] +; CHECK-NEXT: mov.b v0[3], w3 +; CHECK-NEXT: ldr w12, [sp, #264] +; CHECK-NEXT: mov.b v2[4], w11 +; CHECK-NEXT: ldr w11, [sp, #216] +; CHECK-NEXT: mov.b v3[2], w8 ; CHECK-NEXT: ldr w8, [sp, #376] -; CHECK-NEXT: mov.b v3[4], w4 -; CHECK-NEXT: ldr w15, [sp, #120] -; CHECK-NEXT: mov.b v2[4], w10 -; CHECK-NEXT: ldr w12, [sp, #384] -; CHECK-NEXT: mov.b v0[5], w9 +; CHECK-NEXT: mov.b v2[5], w10 +; CHECK-NEXT: ldr w10, [sp, #224] +; CHECK-NEXT: mov.b v3[3], w11 +; CHECK-NEXT: ldr w11, [sp, #384] +; CHECK-NEXT: mov.b v2[6], w9 ; CHECK-NEXT: ldr w9, [sp, #232] -; CHECK-NEXT: mov.b v1[5], w13 -; CHECK-NEXT: ldr w16, [sp, #128] -; CHECK-NEXT: mov.b v3[5], w5 +; CHECK-NEXT: mov.b v3[4], w10 ; CHECK-NEXT: ldr w10, [sp, #392] -; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w13, [sp, #400] -; CHECK-NEXT: mov.b v0[6], w11 -; CHECK-NEXT: ldr w11, [sp, #240] -; CHECK-NEXT: mov.b v1[6], w14 -; CHECK-NEXT: ldr w9, [sp, #408] -; CHECK-NEXT: mov.b v3[6], w6 -; CHECK-NEXT: ldr w14, [sp, #416] -; CHECK-NEXT: mov.b v2[6], w11 -; CHECK-NEXT: ldr w11, [sp, #424] -; CHECK-NEXT: mov.b v0[7], w8 -; CHECK-NEXT: ldr w8, [sp, #248] -; CHECK-NEXT: mov.b v1[7], w15 -; CHECK-NEXT: ldr w15, [sp, #432] -; CHECK-NEXT: mov.b v3[7], w7 ; CHECK-NEXT: mov.b v2[7], w8 -; CHECK-NEXT: ldr w8, [sp] -; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #256] -; CHECK-NEXT: mov.b v1[8], w16 -; CHECK-NEXT: ldr w16, [sp, #440] -; CHECK-NEXT: mov.b v3[8], w8 -; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v2[8], w12 -; CHECK-NEXT: ldr w12, [sp, #8] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #264] -; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #272] -; CHECK-NEXT: mov.b v3[9], w12 -; CHECK-NEXT: ldr w12, [sp, #144] +; CHECK-NEXT: ldr w8, [sp, #240] +; CHECK-NEXT: mov.b v3[5], w9 +; CHECK-NEXT: ldr w9, [sp, #400] +; CHECK-NEXT: mov.b v2[8], w11 +; CHECK-NEXT: ldr w11, [sp, #248] +; CHECK-NEXT: mov.b v3[6], w8 +; CHECK-NEXT: ldr w8, [sp, #408] ; CHECK-NEXT: mov.b v2[9], w10 -; CHECK-NEXT: ldr w10, [sp, #16] -; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #280] -; CHECK-NEXT: mov.b v1[10], w12 -; CHECK-NEXT: ldr w12, [sp, #152] +; CHECK-NEXT: mov.b v0[4], w4 +; CHECK-NEXT: mov.b v3[7], w11 +; CHECK-NEXT: ldr w11, [sp, #416] +; CHECK-NEXT: mov.b v2[10], w9 +; CHECK-NEXT: ldr w9, [sp, #424] +; CHECK-NEXT: mov.b v0[5], w5 +; CHECK-NEXT: ldr w10, [sp, #256] +; CHECK-NEXT: mov.b v2[11], w8 +; CHECK-NEXT: ldr w8, [sp, #432] +; CHECK-NEXT: mov.b v0[6], w6 +; CHECK-NEXT: ldr w14, [sp, #136] +; CHECK-NEXT: mov.b v2[12], w11 +; CHECK-NEXT: ldr w11, [sp, #64] +; CHECK-NEXT: mov.b v0[7], w7 +; CHECK-NEXT: mov.b v3[8], w10 +; CHECK-NEXT: mov.b v2[13], w9 +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: ldr w10, [sp, #272] +; CHECK-NEXT: mov.b v2[14], w8 +; CHECK-NEXT: ldr w8, [sp, #80] +; CHECK-NEXT: mov.b v3[9], w12 +; CHECK-NEXT: ldr w12, [sp, #280] +; CHECK-NEXT: mov.b v1[1], w9 +; CHECK-NEXT: ldr w9, [sp, #88] ; CHECK-NEXT: mov.b v3[10], w10 -; CHECK-NEXT: ldr w10, [sp, #160] -; CHECK-NEXT: mov.b v2[10], w8 -; CHECK-NEXT: ldr w8, [sp, #24] -; CHECK-NEXT: mov.b v0[11], w9 -; CHECK-NEXT: ldr w9, [sp, #288] -; CHECK-NEXT: mov.b v1[11], w12 -; CHECK-NEXT: ldr w12, [sp, #296] -; CHECK-NEXT: mov.b v3[11], w8 +; CHECK-NEXT: ldr w10, [sp, #288] +; CHECK-NEXT: mov.b v1[2], w8 +; CHECK-NEXT: ldr w8, [sp, #96] +; CHECK-NEXT: mov.b v3[11], w12 +; CHECK-NEXT: ldr w12, [sp, #304] +; CHECK-NEXT: mov.b v1[3], w9 +; CHECK-NEXT: ldr w9, [sp, #104] +; CHECK-NEXT: mov.b v3[12], w10 +; CHECK-NEXT: ldr w10, [sp, #296] +; CHECK-NEXT: mov.b v1[4], w8 +; CHECK-NEXT: ldr w8, [sp, #112] +; CHECK-NEXT: ldr w11, [sp, #440] +; CHECK-NEXT: mov.b v1[5], w9 +; CHECK-NEXT: ldr w9, [sp, #120] +; CHECK-NEXT: mov.b v3[13], w10 +; CHECK-NEXT: mov.b v1[6], w8 +; CHECK-NEXT: ldr w8, [sp] +; CHECK-NEXT: mov.b v3[14], w12 +; CHECK-NEXT: mov.b v2[15], w11 +; CHECK-NEXT: mov.b v1[7], w9 +; CHECK-NEXT: ldr w9, [sp, #8] +; CHECK-NEXT: mov.b v0[8], w8 +; CHECK-NEXT: ldr w8, [sp, #16] +; CHECK-NEXT: mov.b v1[8], w13 +; CHECK-NEXT: ldr w13, [sp, #144] +; CHECK-NEXT: mov.b v0[9], w9 +; CHECK-NEXT: ldr w9, [sp, #24] +; CHECK-NEXT: mov.b v1[9], w14 +; CHECK-NEXT: ldr w14, [sp, #152] +; CHECK-NEXT: mov.b v0[10], w8 ; CHECK-NEXT: ldr w8, [sp, #32] -; CHECK-NEXT: mov.b v2[11], w13 -; CHECK-NEXT: mov.b v0[12], w14 -; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #168] -; CHECK-NEXT: mov.b v3[12], w8 -; CHECK-NEXT: ldr w8, [sp, #40] -; CHECK-NEXT: mov.b v2[12], w9 -; CHECK-NEXT: ldr w9, [sp, #304] -; CHECK-NEXT: mov.b v0[13], w11 -; CHECK-NEXT: ldr w11, [sp, #312] -; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #176] -; CHECK-NEXT: mov.b v3[13], w8 +; CHECK-NEXT: mov.b v1[10], w13 +; CHECK-NEXT: ldr w13, [sp, #160] +; CHECK-NEXT: mov.b v0[11], w9 +; CHECK-NEXT: ldr w9, [sp, #40] +; CHECK-NEXT: mov.b v1[11], w14 +; CHECK-NEXT: ldr w14, [sp, #168] +; CHECK-NEXT: mov.b v0[12], w8 ; CHECK-NEXT: ldr w8, [sp, #48] -; CHECK-NEXT: mov.b v2[13], w12 -; CHECK-NEXT: mov.b v0[14], w15 -; CHECK-NEXT: mov.b v1[14], w10 -; CHECK-NEXT: ldr w10, [sp, #184] -; CHECK-NEXT: mov.b v3[14], w8 -; CHECK-NEXT: ldr w8, [sp, #56] -; CHECK-NEXT: mov.b v2[14], w9 -; CHECK-NEXT: mov.b v0[15], w16 -; CHECK-NEXT: mov.b v1[15], w10 +; CHECK-NEXT: mov.b v1[12], w13 +; CHECK-NEXT: ldr w13, [sp, #176] +; CHECK-NEXT: mov.b v0[13], w9 +; CHECK-NEXT: ldr w9, [sp, #56] +; CHECK-NEXT: mov.b v1[13], w14 +; CHECK-NEXT: ldr w14, [sp, #184] +; CHECK-NEXT: mov.b v0[14], w8 +; CHECK-NEXT: ldr w8, [sp, #312] +; CHECK-NEXT: mov.b v1[14], w13 +; CHECK-NEXT: shl.16b v4, v2, #7 +; CHECK-NEXT: mov.b v0[15], w9 +; CHECK-NEXT: mov.b v1[15], w14 ; CHECK-NEXT: mov.b v3[15], w8 -; CHECK-NEXT: mov.b v2[15], w11 -; CHECK-NEXT: shl.16b v4, v0, #7 +; CHECK-NEXT: shl.16b v0, v0, #7 ; CHECK-NEXT: shl.16b v1, v1, #7 ; CHECK-NEXT: shl.16b v3, v3, #7 -; CHECK-NEXT: shl.16b v2, v2, #7 -; CHECK-NEXT: cmlt.16b v0, v3, #0 +; CHECK-NEXT: cmlt.16b v0, v0, #0 ; CHECK-NEXT: cmlt.16b v1, v1, #0 -; CHECK-NEXT: cmlt.16b v2, v2, #0 +; CHECK-NEXT: cmlt.16b v2, v3, #0 ; CHECK-NEXT: cmlt.16b v3, v4, #0 ; CHECK-NEXT: ret %res = sext <64 x i1> %arg to <64 x i8> diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -301,8 +301,8 @@ ; ; GISEL-LABEL: uabdl4s_rdx_i32: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: usubl.4s v0, v0, v1 +; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: cmgt.4s v1, v2, v0 ; GISEL-NEXT: neg.4s v2, v0 ; GISEL-NEXT: shl.4s v1, v1, #31 @@ -374,8 +374,8 @@ ; ; GISEL-LABEL: uabdl2d_rdx_i64: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: usubl.2d v0, v0, v1 +; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: cmgt.2d v1, v2, v0 ; GISEL-NEXT: neg.2d v2, v0 ; GISEL-NEXT: shl.2d v1, v1, #63 @@ -1640,8 +1640,8 @@ ; GISEL-LABEL: abspattern4: ; GISEL: // %bb.0: ; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.4s v2, v0 ; GISEL-NEXT: cmge.4s v1, v0, v1 +; GISEL-NEXT: neg.4s v2, v0 ; GISEL-NEXT: shl.4s v1, v1, #31 ; GISEL-NEXT: sshr.4s v1, v1, #31 ; GISEL-NEXT: bif.16b v0, v2, v1 @@ -1662,8 +1662,8 @@ ; GISEL-LABEL: abspattern5: ; GISEL: // %bb.0: ; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.8h v2, v0 ; GISEL-NEXT: cmgt.8h v1, v0, v1 +; GISEL-NEXT: neg.8h v2, v0 ; GISEL-NEXT: shl.8h v1, v1, #15 ; GISEL-NEXT: sshr.8h v1, v1, #15 ; GISEL-NEXT: bif.16b v0, v2, v1 @@ -1684,8 +1684,8 @@ ; GISEL-LABEL: abspattern6: ; GISEL: // %bb.0: ; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.16b v2, v0 ; GISEL-NEXT: cmgt.16b v1, v1, v0 +; GISEL-NEXT: neg.16b v2, v0 ; GISEL-NEXT: shl.16b v1, v1, #7 ; GISEL-NEXT: sshr.16b v1, v1, #7 ; GISEL-NEXT: bit.16b v0, v2, v1 @@ -1706,8 +1706,8 @@ ; GISEL-LABEL: abspattern7: ; GISEL: // %bb.0: ; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.2d v2, v0 ; GISEL-NEXT: cmge.2d v1, v1, v0 +; GISEL-NEXT: neg.2d v2, v0 ; GISEL-NEXT: shl.2d v1, v1, #63 ; GISEL-NEXT: sshr.2d v1, v1, #63 ; GISEL-NEXT: bit.16b v0, v2, v1 @@ -1727,8 +1727,8 @@ ; ; GISEL-LABEL: uabd_i32: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: ssubl.2d v0, v0, v1 +; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: cmgt.2d v1, v2, v0 ; GISEL-NEXT: neg.2d v2, v0 ; GISEL-NEXT: shl.2d v1, v1, #63 @@ -1755,21 +1755,21 @@ ; CHECK-NEXT: asr x12, x9, #63 ; CHECK-NEXT: asr x13, x11, #63 ; CHECK-NEXT: subs x9, x9, x11 -; CHECK-NEXT: sbcs x11, x12, x13 -; CHECK-NEXT: asr x12, x8, #63 +; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: sbcs x12, x12, x13 ; CHECK-NEXT: asr x13, x10, #63 ; CHECK-NEXT: subs x8, x8, x10 -; CHECK-NEXT: sbcs x10, x12, x13 -; CHECK-NEXT: negs x12, x8 +; CHECK-NEXT: sbcs x10, x11, x13 +; CHECK-NEXT: negs x11, x8 ; CHECK-NEXT: ngcs x13, x10 ; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: csel x2, x12, x8, lt +; CHECK-NEXT: csel x2, x11, x8, lt ; CHECK-NEXT: csel x3, x13, x10, lt ; CHECK-NEXT: negs x8, x9 -; CHECK-NEXT: ngcs x10, x11 -; CHECK-NEXT: cmp x11, #0 +; CHECK-NEXT: ngcs x10, x12 +; CHECK-NEXT: cmp x12, #0 ; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: csel x1, x10, x11, lt +; CHECK-NEXT: csel x1, x10, x12, lt ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov.d v0[1], x1 ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll --- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll @@ -1062,8 +1062,8 @@ define <8 x i8> @andmaskv8i8(<8 x i16> %src1, <8 x i8> %src2) nounwind { ; CHECK-LABEL: andmaskv8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.8b v2, #7 ; CHECK-NEXT: xtn.8b v0, v0 +; CHECK-NEXT: movi.8b v2, #7 ; CHECK-NEXT: and.8b v0, v0, v2 ; CHECK-NEXT: uhadd.8b v0, v0, v1 ; CHECK-NEXT: ret @@ -1078,9 +1078,9 @@ define <16 x i8> @andmaskv16i8(<16 x i16> %src1, <16 x i8> %src2) nounwind { ; CHECK-LABEL: andmaskv16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.16b v3, #7 ; CHECK-NEXT: uzp1.16b v0, v0, v1 -; CHECK-NEXT: and.16b v0, v0, v3 +; CHECK-NEXT: movi.16b v1, #7 +; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: uhadd.16b v0, v0, v2 ; CHECK-NEXT: ret %zextsrc1 = and <16 x i16> %src1, @@ -1094,10 +1094,10 @@ define <16 x i8> @andmask2v16i8(<16 x i16> %src1, <16 x i16> %src2) nounwind { ; CHECK-LABEL: andmask2v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.16b v4, #3 -; CHECK-NEXT: movi.16b v5, #7 ; CHECK-NEXT: uzp1.16b v2, v2, v3 ; CHECK-NEXT: uzp1.16b v0, v0, v1 +; CHECK-NEXT: movi.16b v4, #3 +; CHECK-NEXT: movi.16b v5, #7 ; CHECK-NEXT: and.16b v1, v2, v4 ; CHECK-NEXT: and.16b v0, v0, v5 ; CHECK-NEXT: uhadd.16b v0, v0, v1 @@ -1113,8 +1113,8 @@ define <8 x i8> @andmask2v8i8(<8 x i16> %src1, <8 x i16> %src2) nounwind { ; CHECK-LABEL: andmask2v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi.8b v2, #7 ; CHECK-NEXT: xtn.8b v0, v0 +; CHECK-NEXT: movi.8b v2, #7 ; CHECK-NEXT: xtn.8b v1, v1 ; CHECK-NEXT: and.8b v0, v0, v2 ; CHECK-NEXT: uhadd.8b v0, v0, v1 @@ -1131,8 +1131,8 @@ ; CHECK-LABEL: andmask3v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: movi.8h v2, #7 -; CHECK-NEXT: bic.8h v1, #254, lsl #8 ; CHECK-NEXT: and.16b v0, v0, v2 +; CHECK-NEXT: bic.8h v1, #254, lsl #8 ; CHECK-NEXT: uhadd.8h v0, v0, v1 ; CHECK-NEXT: ret %zextsrc1 = and <8 x i16> %src1, diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -988,14 +988,14 @@ define <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-LABEL: mul_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: fmov x10, d0 -; CHECK-NEXT: mov.d x8, v1[1] -; CHECK-NEXT: mov.d x11, v0[1] +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: mov.d x10, v0[1] +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: mov.d x9, v1[1] ; CHECK-NEXT: mul x9, x10, x9 -; CHECK-NEXT: mul x8, x11, x8 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov.d v0[1], x8 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov.d v0[1], x9 ; CHECK-NEXT: ret %tmp1 = mul <2 x i64> %A, %B ret <2 x i64> %tmp1 diff --git a/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll b/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll --- a/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll +++ b/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll @@ -8,10 +8,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: umov w9, v0.h[1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: mov v1.s[1], w8 +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %y = bitcast <2 x half> %x to <2 x i16> ret <2 x i16> %y diff --git a/llvm/test/CodeGen/AArch64/build-vector-extract.ll b/llvm/test/CodeGen/AArch64/build-vector-extract.ll --- a/llvm/test/CodeGen/AArch64/build-vector-extract.ll +++ b/llvm/test/CodeGen/AArch64/build-vector-extract.ll @@ -16,10 +16,9 @@ define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract0_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 0 %z = zext i32 %e to i64 @@ -30,8 +29,8 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: zip1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 @@ -43,10 +42,9 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 %z = zext i32 %e to i64 @@ -57,8 +55,8 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) { ; CHECK-LABEL: extract2_i32_zext_insert0_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 2 @@ -70,10 +68,9 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract2_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[2] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 2 %z = zext i32 %e to i64 @@ -96,10 +93,9 @@ define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract3_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 3 %z = zext i32 %e to i64 @@ -123,10 +119,9 @@ define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract0_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 0 %z = zext i32 %e to i64 @@ -137,8 +132,8 @@ define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert1_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #4 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 @@ -150,10 +145,9 @@ define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 %z = zext i32 %e to i64 @@ -175,10 +169,9 @@ define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract2_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[2] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 2 %z = zext i32 %e to i64 @@ -201,10 +194,9 @@ define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract3_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 3 %z = zext i32 %e to i64 @@ -227,10 +219,9 @@ define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract0_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 0 %z = zext i16 %e to i64 @@ -253,10 +244,9 @@ define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract1_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 1 %z = zext i16 %e to i64 @@ -279,10 +269,9 @@ define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract2_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 2 %z = zext i16 %e to i64 @@ -305,10 +294,9 @@ define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract3_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 3 %z = zext i16 %e to i64 @@ -331,10 +319,9 @@ define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract0_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 0 %z = zext i16 %e to i64 @@ -357,10 +344,9 @@ define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract1_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 1 %z = zext i16 %e to i64 @@ -383,10 +369,9 @@ define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract2_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 2 %z = zext i16 %e to i64 @@ -409,10 +394,9 @@ define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract3_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 3 %z = zext i16 %e to i64 @@ -437,10 +421,9 @@ define <2 x i64> @extract0_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract0_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 0 %z = zext i8 %e to i64 @@ -463,10 +446,9 @@ define <2 x i64> @extract1_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract1_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 1 %z = zext i8 %e to i64 @@ -489,10 +471,9 @@ define <2 x i64> @extract2_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract2_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 2 %z = zext i8 %e to i64 @@ -515,10 +496,9 @@ define <2 x i64> @extract3_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract3_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 3 %z = zext i8 %e to i64 @@ -541,10 +521,9 @@ define <2 x i64> @extract0_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract0_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 0 %z = zext i8 %e to i64 @@ -567,10 +546,9 @@ define <2 x i64> @extract1_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract1_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 1 %z = zext i8 %e to i64 @@ -593,10 +571,9 @@ define <2 x i64> @extract2_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract2_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 2 %z = zext i8 %e to i64 @@ -619,10 +596,9 @@ define <2 x i64> @extract3_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract3_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 3 %z = zext i8 %e to i64 diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -114,8 +114,8 @@ define <7 x i8> @sign_7xi8(<7 x i8> %a) { ; CHECK-LABEL: sign_7xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 +; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %c = icmp sgt <7 x i8> %a, @@ -126,8 +126,8 @@ define <8 x i8> @sign_8xi8(<8 x i8> %a) { ; CHECK-LABEL: sign_8xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 +; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %c = icmp sgt <8 x i8> %a, @@ -138,8 +138,8 @@ define <16 x i8> @sign_16xi8(<16 x i8> %a) { ; CHECK-LABEL: sign_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 +; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %c = icmp sgt <16 x i8> %a, @@ -177,11 +177,11 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: cmgt v1.4s, v0.4s, v1.4s ; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 +; CHECK-NEXT: xtn v0.4h, v1.4s ; CHECK-NEXT: orr v2.4s, #1 -; CHECK-NEXT: cmgt v1.4s, v0.4s, v1.4s ; CHECK-NEXT: str q2, [sp] // 16-byte Folded Spill -; CHECK-NEXT: xtn v0.4h, v1.4s ; CHECK-NEXT: bl use_4xi1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload @@ -198,10 +198,10 @@ ; CHECK-LABEL: not_sign_4xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v1.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v1.16b, v0.16b, v1.16b ; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, @@ -227,12 +227,12 @@ define <4 x i32> @not_sign_4xi32_3(<4 x i32> %a) { ; CHECK-LABEL: not_sign_4xi32_3: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bsl v0.16b, v2.16b, v3.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, %res = select <4 x i1> %c, <4 x i32> , <4 x i32> @@ -244,18 +244,18 @@ ; CHECK-LABEL: sign_4xi65: ; CHECK: // %bb.0: ; CHECK-NEXT: sbfx x8, x1, #0, #1 -; CHECK-NEXT: sbfx x10, x5, #0, #1 +; CHECK-NEXT: sbfx x10, x7, #0, #1 ; CHECK-NEXT: orr x9, x8, #0x1 ; CHECK-NEXT: lsr x1, x8, #63 -; CHECK-NEXT: sbfx x8, x7, #0, #1 -; CHECK-NEXT: orr x4, x10, #0x1 -; CHECK-NEXT: lsr x5, x10, #63 -; CHECK-NEXT: orr x6, x8, #0x1 +; CHECK-NEXT: sbfx x8, x5, #0, #1 +; CHECK-NEXT: orr x6, x10, #0x1 +; CHECK-NEXT: orr x4, x8, #0x1 +; CHECK-NEXT: lsr x5, x8, #63 ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: sbfx x9, x3, #0, #1 ; CHECK-NEXT: orr x2, x9, #0x1 ; CHECK-NEXT: lsr x3, x9, #63 -; CHECK-NEXT: lsr x7, x8, #63 +; CHECK-NEXT: lsr x7, x10, #63 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/combine-mul.ll b/llvm/test/CodeGen/AArch64/combine-mul.ll --- a/llvm/test/CodeGen/AArch64/combine-mul.ll +++ b/llvm/test/CodeGen/AArch64/combine-mul.ll @@ -28,8 +28,8 @@ define <4 x i1> @PR48683_vec_undef(<4 x i32> %x) { ; CHECK-LABEL: PR48683_vec_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #2 ; CHECK-NEXT: mul v0.4s, v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #2 ; CHECK-NEXT: cmtst v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll b/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll --- a/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll @@ -10,11 +10,11 @@ define void @no_combine(i32 %p) local_unnamed_addr { ; CHECK-LABEL: no_combine: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v0.4h, #4 -; CHECK-NEXT: dup v1.4s, w0 -; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: mov v1.d[1], v0.d[0] -; CHECK-NEXT: uzp1 v0.16b, v1.16b, v1.16b +; CHECK-NEXT: dup v0.4s, w0 +; CHECK-NEXT: movi v1.4h, #4 +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: uzp1 v0.16b, v0.16b, v0.16b ; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll --- a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll +++ b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll @@ -8,16 +8,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: movi v2.4h, #1 -; CHECK-NEXT: dup v0.4h, v0.h[0] ; CHECK-NEXT: mov w1, wzr +; CHECK-NEXT: dup v0.4h, v0.h[0] ; CHECK-NEXT: mov w2, wzr ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: adrp x8, .LCPI0_1 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_1] -; CHECK-NEXT: cmgt v0.4h, v2.4h, v0.4h -; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: movi v1.4h, #1 +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_1] +; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h +; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: shl v0.4h, v0.4h, #15 ; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 ; CHECK-NEXT: umov w0, v0.h[0] diff --git a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll --- a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll +++ b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll @@ -74,66 +74,66 @@ ; ALL-NEXT: smov w9, v0.b[1] ; ALL-NEXT: smov w10, v0.b[0] ; ALL-NEXT: smov w11, v0.b[2] +; ALL-NEXT: sdiv w8, w9, w8 +; ALL-NEXT: smov w9, v1.b[0] ; ALL-NEXT: smov w12, v0.b[3] ; ALL-NEXT: smov w13, v0.b[4] ; ALL-NEXT: smov w14, v0.b[5] ; ALL-NEXT: smov w15, v0.b[6] -; ALL-NEXT: sdiv w8, w9, w8 -; ALL-NEXT: smov w9, v1.b[0] ; ALL-NEXT: smov w16, v0.b[7] ; ALL-NEXT: smov w17, v0.b[8] +; ALL-NEXT: smov w18, v0.b[9] +; ALL-NEXT: smov w1, v0.b[10] +; ALL-NEXT: smov w2, v0.b[11] +; ALL-NEXT: smov w3, v0.b[12] ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: smov w10, v1.b[2] ; ALL-NEXT: sdiv w10, w11, w10 ; ALL-NEXT: smov w11, v1.b[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: smov w9, v1.b[9] ; ALL-NEXT: mov v2.b[1], w8 +; ALL-NEXT: smov w8, v1.b[13] ; ALL-NEXT: sdiv w11, w12, w11 ; ALL-NEXT: smov w12, v1.b[4] ; ALL-NEXT: mov v2.b[2], w10 -; ALL-NEXT: smov w10, v0.b[10] +; ALL-NEXT: smov w10, v0.b[13] ; ALL-NEXT: sdiv w12, w13, w12 ; ALL-NEXT: smov w13, v1.b[5] ; ALL-NEXT: mov v2.b[3], w11 -; ALL-NEXT: smov w11, v0.b[11] +; ALL-NEXT: smov w11, v0.b[14] ; ALL-NEXT: sdiv w13, w14, w13 ; ALL-NEXT: smov w14, v1.b[6] ; ALL-NEXT: mov v2.b[4], w12 -; ALL-NEXT: smov w12, v0.b[12] +; ALL-NEXT: smov w12, v0.b[15] ; ALL-NEXT: sdiv w14, w15, w14 ; ALL-NEXT: smov w15, v1.b[7] ; ALL-NEXT: mov v2.b[5], w13 -; ALL-NEXT: smov w13, v0.b[13] ; ALL-NEXT: sdiv w15, w16, w15 ; ALL-NEXT: smov w16, v1.b[8] ; ALL-NEXT: mov v2.b[6], w14 ; ALL-NEXT: sdiv w16, w17, w16 -; ALL-NEXT: smov w17, v0.b[9] +; ALL-NEXT: smov w17, v1.b[9] ; ALL-NEXT: mov v2.b[7], w15 -; ALL-NEXT: sdiv w8, w17, w9 -; ALL-NEXT: smov w9, v1.b[10] +; ALL-NEXT: sdiv w17, w18, w17 +; ALL-NEXT: smov w18, v1.b[10] ; ALL-NEXT: mov v2.b[8], w16 -; ALL-NEXT: sdiv w9, w10, w9 -; ALL-NEXT: smov w10, v1.b[11] -; ALL-NEXT: mov v2.b[9], w8 +; ALL-NEXT: sdiv w18, w1, w18 +; ALL-NEXT: smov w1, v1.b[11] +; ALL-NEXT: mov v2.b[9], w17 +; ALL-NEXT: sdiv w1, w2, w1 +; ALL-NEXT: smov w2, v1.b[12] +; ALL-NEXT: mov v2.b[10], w18 +; ALL-NEXT: sdiv w9, w3, w2 +; ALL-NEXT: mov v2.b[11], w1 +; ALL-NEXT: sdiv w8, w10, w8 +; ALL-NEXT: smov w10, v1.b[14] +; ALL-NEXT: mov v2.b[12], w9 ; ALL-NEXT: sdiv w10, w11, w10 -; ALL-NEXT: smov w11, v1.b[12] -; ALL-NEXT: mov v2.b[10], w9 -; ALL-NEXT: smov w9, v1.b[14] -; ALL-NEXT: sdiv w11, w12, w11 -; ALL-NEXT: smov w12, v1.b[13] -; ALL-NEXT: mov v2.b[11], w10 -; ALL-NEXT: smov w10, v1.b[15] -; ALL-NEXT: sdiv w8, w13, w12 -; ALL-NEXT: smov w12, v0.b[14] -; ALL-NEXT: mov v2.b[12], w11 -; ALL-NEXT: smov w11, v0.b[15] -; ALL-NEXT: sdiv w9, w12, w9 +; ALL-NEXT: smov w11, v1.b[15] ; ALL-NEXT: mov v2.b[13], w8 -; ALL-NEXT: sdiv w8, w11, w10 -; ALL-NEXT: mov v2.b[14], w9 -; ALL-NEXT: mov v2.b[15], w8 +; ALL-NEXT: sdiv w11, w12, w11 +; ALL-NEXT: mov v2.b[14], w10 +; ALL-NEXT: mov v2.b[15], w11 ; ALL-NEXT: mls v0.16b, v2.16b, v1.16b ; ALL-NEXT: str q2, [x0] ; ALL-NEXT: ret @@ -151,32 +151,32 @@ ; ALL-NEXT: smov w9, v0.h[1] ; ALL-NEXT: smov w10, v0.h[0] ; ALL-NEXT: smov w11, v0.h[2] -; ALL-NEXT: smov w12, v0.h[3] -; ALL-NEXT: smov w13, v0.h[4] ; ALL-NEXT: sdiv w8, w9, w8 ; ALL-NEXT: smov w9, v1.h[0] +; ALL-NEXT: smov w12, v0.h[3] +; ALL-NEXT: smov w13, v0.h[4] +; ALL-NEXT: smov w14, v0.h[5] +; ALL-NEXT: smov w15, v0.h[6] ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: smov w10, v1.h[2] ; ALL-NEXT: sdiv w10, w11, w10 ; ALL-NEXT: smov w11, v1.h[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: smov w9, v1.h[5] ; ALL-NEXT: mov v2.h[1], w8 +; ALL-NEXT: smov w8, v1.h[7] ; ALL-NEXT: sdiv w11, w12, w11 ; ALL-NEXT: smov w12, v1.h[4] ; ALL-NEXT: mov v2.h[2], w10 -; ALL-NEXT: smov w10, v0.h[6] +; ALL-NEXT: smov w10, v0.h[7] ; ALL-NEXT: sdiv w12, w13, w12 -; ALL-NEXT: smov w13, v0.h[5] +; ALL-NEXT: smov w13, v1.h[5] ; ALL-NEXT: mov v2.h[3], w11 -; ALL-NEXT: smov w11, v0.h[7] -; ALL-NEXT: sdiv w8, w13, w9 -; ALL-NEXT: smov w9, v1.h[6] +; ALL-NEXT: sdiv w13, w14, w13 +; ALL-NEXT: smov w14, v1.h[6] ; ALL-NEXT: mov v2.h[4], w12 -; ALL-NEXT: sdiv w9, w10, w9 -; ALL-NEXT: smov w10, v1.h[7] -; ALL-NEXT: mov v2.h[5], w8 -; ALL-NEXT: sdiv w8, w11, w10 +; ALL-NEXT: sdiv w9, w15, w14 +; ALL-NEXT: mov v2.h[5], w13 +; ALL-NEXT: sdiv w8, w10, w8 ; ALL-NEXT: mov v2.h[6], w9 ; ALL-NEXT: mov v2.h[7], w8 ; ALL-NEXT: mls v0.8h, v2.8h, v1.8h @@ -196,18 +196,18 @@ ; ALL-NEXT: mov w9, v0.s[1] ; ALL-NEXT: fmov w10, s0 ; ALL-NEXT: mov w11, v0.s[2] -; ALL-NEXT: mov w12, v0.s[3] ; ALL-NEXT: sdiv w8, w9, w8 ; ALL-NEXT: fmov w9, s1 +; ALL-NEXT: mov w12, v0.s[3] ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: mov w10, v1.s[2] ; ALL-NEXT: sdiv w10, w11, w10 ; ALL-NEXT: mov w11, v1.s[3] ; ALL-NEXT: fmov s2, w9 ; ALL-NEXT: mov v2.s[1], w8 -; ALL-NEXT: sdiv w8, w12, w11 +; ALL-NEXT: sdiv w11, w12, w11 ; ALL-NEXT: mov v2.s[2], w10 -; ALL-NEXT: mov v2.s[3], w8 +; ALL-NEXT: mov v2.s[3], w11 ; ALL-NEXT: mls v0.4s, v2.4s, v1.4s ; ALL-NEXT: str q2, [x0] ; ALL-NEXT: ret @@ -228,13 +228,13 @@ ; ALL-NEXT: sdiv x9, x9, x8 ; ALL-NEXT: mul x8, x9, x8 ; ALL-NEXT: sdiv x11, x11, x10 -; ALL-NEXT: fmov d2, x9 ; ALL-NEXT: fmov d1, x8 ; ALL-NEXT: mul x10, x11, x10 -; ALL-NEXT: mov v2.d[1], x11 ; ALL-NEXT: mov v1.d[1], x10 -; ALL-NEXT: str q2, [x0] ; ALL-NEXT: sub v0.2d, v0.2d, v1.2d +; ALL-NEXT: fmov d1, x9 +; ALL-NEXT: mov v1.d[1], x11 +; ALL-NEXT: str q1, [x0] ; ALL-NEXT: ret %div = sdiv <2 x i64> %x, %y store <2 x i64> %div, <2 x i64>* %divdst, align 16 diff --git a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll --- a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll @@ -74,66 +74,66 @@ ; ALL-NEXT: umov w9, v0.b[1] ; ALL-NEXT: umov w10, v0.b[0] ; ALL-NEXT: umov w11, v0.b[2] +; ALL-NEXT: udiv w8, w9, w8 +; ALL-NEXT: umov w9, v1.b[0] ; ALL-NEXT: umov w12, v0.b[3] ; ALL-NEXT: umov w13, v0.b[4] ; ALL-NEXT: umov w14, v0.b[5] ; ALL-NEXT: umov w15, v0.b[6] -; ALL-NEXT: udiv w8, w9, w8 -; ALL-NEXT: umov w9, v1.b[0] ; ALL-NEXT: umov w16, v0.b[7] ; ALL-NEXT: umov w17, v0.b[8] +; ALL-NEXT: umov w18, v0.b[9] +; ALL-NEXT: umov w1, v0.b[10] +; ALL-NEXT: umov w2, v0.b[11] +; ALL-NEXT: umov w3, v0.b[12] ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: umov w10, v1.b[2] ; ALL-NEXT: udiv w10, w11, w10 ; ALL-NEXT: umov w11, v1.b[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: umov w9, v1.b[9] ; ALL-NEXT: mov v2.b[1], w8 +; ALL-NEXT: umov w8, v1.b[13] ; ALL-NEXT: udiv w11, w12, w11 ; ALL-NEXT: umov w12, v1.b[4] ; ALL-NEXT: mov v2.b[2], w10 -; ALL-NEXT: umov w10, v0.b[10] +; ALL-NEXT: umov w10, v0.b[13] ; ALL-NEXT: udiv w12, w13, w12 ; ALL-NEXT: umov w13, v1.b[5] ; ALL-NEXT: mov v2.b[3], w11 -; ALL-NEXT: umov w11, v0.b[11] +; ALL-NEXT: umov w11, v0.b[14] ; ALL-NEXT: udiv w13, w14, w13 ; ALL-NEXT: umov w14, v1.b[6] ; ALL-NEXT: mov v2.b[4], w12 -; ALL-NEXT: umov w12, v0.b[12] +; ALL-NEXT: umov w12, v0.b[15] ; ALL-NEXT: udiv w14, w15, w14 ; ALL-NEXT: umov w15, v1.b[7] ; ALL-NEXT: mov v2.b[5], w13 -; ALL-NEXT: umov w13, v0.b[13] ; ALL-NEXT: udiv w15, w16, w15 ; ALL-NEXT: umov w16, v1.b[8] ; ALL-NEXT: mov v2.b[6], w14 ; ALL-NEXT: udiv w16, w17, w16 -; ALL-NEXT: umov w17, v0.b[9] +; ALL-NEXT: umov w17, v1.b[9] ; ALL-NEXT: mov v2.b[7], w15 -; ALL-NEXT: udiv w8, w17, w9 -; ALL-NEXT: umov w9, v1.b[10] +; ALL-NEXT: udiv w17, w18, w17 +; ALL-NEXT: umov w18, v1.b[10] ; ALL-NEXT: mov v2.b[8], w16 -; ALL-NEXT: udiv w9, w10, w9 -; ALL-NEXT: umov w10, v1.b[11] -; ALL-NEXT: mov v2.b[9], w8 +; ALL-NEXT: udiv w18, w1, w18 +; ALL-NEXT: umov w1, v1.b[11] +; ALL-NEXT: mov v2.b[9], w17 +; ALL-NEXT: udiv w1, w2, w1 +; ALL-NEXT: umov w2, v1.b[12] +; ALL-NEXT: mov v2.b[10], w18 +; ALL-NEXT: udiv w9, w3, w2 +; ALL-NEXT: mov v2.b[11], w1 +; ALL-NEXT: udiv w8, w10, w8 +; ALL-NEXT: umov w10, v1.b[14] +; ALL-NEXT: mov v2.b[12], w9 ; ALL-NEXT: udiv w10, w11, w10 -; ALL-NEXT: umov w11, v1.b[12] -; ALL-NEXT: mov v2.b[10], w9 -; ALL-NEXT: umov w9, v1.b[14] -; ALL-NEXT: udiv w11, w12, w11 -; ALL-NEXT: umov w12, v1.b[13] -; ALL-NEXT: mov v2.b[11], w10 -; ALL-NEXT: umov w10, v1.b[15] -; ALL-NEXT: udiv w8, w13, w12 -; ALL-NEXT: umov w12, v0.b[14] -; ALL-NEXT: mov v2.b[12], w11 -; ALL-NEXT: umov w11, v0.b[15] -; ALL-NEXT: udiv w9, w12, w9 +; ALL-NEXT: umov w11, v1.b[15] ; ALL-NEXT: mov v2.b[13], w8 -; ALL-NEXT: udiv w8, w11, w10 -; ALL-NEXT: mov v2.b[14], w9 -; ALL-NEXT: mov v2.b[15], w8 +; ALL-NEXT: udiv w11, w12, w11 +; ALL-NEXT: mov v2.b[14], w10 +; ALL-NEXT: mov v2.b[15], w11 ; ALL-NEXT: mls v0.16b, v2.16b, v1.16b ; ALL-NEXT: str q2, [x0] ; ALL-NEXT: ret @@ -151,32 +151,32 @@ ; ALL-NEXT: umov w9, v0.h[1] ; ALL-NEXT: umov w10, v0.h[0] ; ALL-NEXT: umov w11, v0.h[2] -; ALL-NEXT: umov w12, v0.h[3] -; ALL-NEXT: umov w13, v0.h[4] ; ALL-NEXT: udiv w8, w9, w8 ; ALL-NEXT: umov w9, v1.h[0] +; ALL-NEXT: umov w12, v0.h[3] +; ALL-NEXT: umov w13, v0.h[4] +; ALL-NEXT: umov w14, v0.h[5] +; ALL-NEXT: umov w15, v0.h[6] ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: umov w10, v1.h[2] ; ALL-NEXT: udiv w10, w11, w10 ; ALL-NEXT: umov w11, v1.h[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: umov w9, v1.h[5] ; ALL-NEXT: mov v2.h[1], w8 +; ALL-NEXT: umov w8, v1.h[7] ; ALL-NEXT: udiv w11, w12, w11 ; ALL-NEXT: umov w12, v1.h[4] ; ALL-NEXT: mov v2.h[2], w10 -; ALL-NEXT: umov w10, v0.h[6] +; ALL-NEXT: umov w10, v0.h[7] ; ALL-NEXT: udiv w12, w13, w12 -; ALL-NEXT: umov w13, v0.h[5] +; ALL-NEXT: umov w13, v1.h[5] ; ALL-NEXT: mov v2.h[3], w11 -; ALL-NEXT: umov w11, v0.h[7] -; ALL-NEXT: udiv w8, w13, w9 -; ALL-NEXT: umov w9, v1.h[6] +; ALL-NEXT: udiv w13, w14, w13 +; ALL-NEXT: umov w14, v1.h[6] ; ALL-NEXT: mov v2.h[4], w12 -; ALL-NEXT: udiv w9, w10, w9 -; ALL-NEXT: umov w10, v1.h[7] -; ALL-NEXT: mov v2.h[5], w8 -; ALL-NEXT: udiv w8, w11, w10 +; ALL-NEXT: udiv w9, w15, w14 +; ALL-NEXT: mov v2.h[5], w13 +; ALL-NEXT: udiv w8, w10, w8 ; ALL-NEXT: mov v2.h[6], w9 ; ALL-NEXT: mov v2.h[7], w8 ; ALL-NEXT: mls v0.8h, v2.8h, v1.8h @@ -196,18 +196,18 @@ ; ALL-NEXT: mov w9, v0.s[1] ; ALL-NEXT: fmov w10, s0 ; ALL-NEXT: mov w11, v0.s[2] -; ALL-NEXT: mov w12, v0.s[3] ; ALL-NEXT: udiv w8, w9, w8 ; ALL-NEXT: fmov w9, s1 +; ALL-NEXT: mov w12, v0.s[3] ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: mov w10, v1.s[2] ; ALL-NEXT: udiv w10, w11, w10 ; ALL-NEXT: mov w11, v1.s[3] ; ALL-NEXT: fmov s2, w9 ; ALL-NEXT: mov v2.s[1], w8 -; ALL-NEXT: udiv w8, w12, w11 +; ALL-NEXT: udiv w11, w12, w11 ; ALL-NEXT: mov v2.s[2], w10 -; ALL-NEXT: mov v2.s[3], w8 +; ALL-NEXT: mov v2.s[3], w11 ; ALL-NEXT: mls v0.4s, v2.4s, v1.4s ; ALL-NEXT: str q2, [x0] ; ALL-NEXT: ret @@ -228,13 +228,13 @@ ; ALL-NEXT: udiv x9, x9, x8 ; ALL-NEXT: mul x8, x9, x8 ; ALL-NEXT: udiv x11, x11, x10 -; ALL-NEXT: fmov d2, x9 ; ALL-NEXT: fmov d1, x8 ; ALL-NEXT: mul x10, x11, x10 -; ALL-NEXT: mov v2.d[1], x11 ; ALL-NEXT: mov v1.d[1], x10 -; ALL-NEXT: str q2, [x0] ; ALL-NEXT: sub v0.2d, v0.2d, v1.2d +; ALL-NEXT: fmov d1, x9 +; ALL-NEXT: mov v1.d[1], x11 +; ALL-NEXT: str q1, [x0] ; ALL-NEXT: ret %div = udiv <2 x i64> %x, %y store <2 x i64> %div, <2 x i64>* %divdst, align 16 diff --git a/llvm/test/CodeGen/AArch64/expand-vector-rot.ll b/llvm/test/CodeGen/AArch64/expand-vector-rot.ll --- a/llvm/test/CodeGen/AArch64/expand-vector-rot.ll +++ b/llvm/test/CodeGen/AArch64/expand-vector-rot.ll @@ -6,15 +6,15 @@ define <2 x i16> @rotlv2_16(<2 x i16> %vec2_16, <2 x i16> %shift) { ; CHECK-LABEL: rotlv2_16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.2s, #15 -; CHECK-NEXT: movi d3, #0x00ffff0000ffff -; CHECK-NEXT: neg v4.2s, v1.2s -; CHECK-NEXT: and v4.8b, v4.8b, v2.8b -; CHECK-NEXT: and v3.8b, v0.8b, v3.8b -; CHECK-NEXT: neg v4.2s, v4.2s -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: neg v2.2s, v1.2s +; CHECK-NEXT: movi v3.2s, #15 +; CHECK-NEXT: movi d4, #0x00ffff0000ffff +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b +; CHECK-NEXT: and v2.8b, v2.8b, v3.8b +; CHECK-NEXT: and v4.8b, v0.8b, v4.8b +; CHECK-NEXT: neg v2.2s, v2.2s ; CHECK-NEXT: ushl v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushl v2.2s, v3.2s, v4.2s +; CHECK-NEXT: ushl v2.2s, v4.2s, v2.2s ; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b ; CHECK-NEXT: ret %1 = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %vec2_16, <2 x i16> %vec2_16, <2 x i16> %shift) diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll --- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll +++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll @@ -180,48 +180,48 @@ define <8 x i16> @test_v8f16(<8 x half> %in) { ; CHECK-NO16-LABEL: test_v8f16: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov h2, v0.h[4] -; CHECK-NO16-NEXT: mov h3, v0.h[5] -; CHECK-NO16-NEXT: mov h4, v0.h[1] -; CHECK-NO16-NEXT: mov h5, v0.h[2] -; CHECK-NO16-NEXT: mov h6, v0.h[6] +; CHECK-NO16-NEXT: mov h2, v0.h[1] +; CHECK-NO16-NEXT: mov h3, v0.h[4] +; CHECK-NO16-NEXT: mov h4, v0.h[5] +; CHECK-NO16-NEXT: mov h6, v0.h[2] +; CHECK-NO16-NEXT: mov h16, v0.h[6] +; CHECK-NO16-NEXT: mov h5, v0.h[3] ; CHECK-NO16-NEXT: fcvt s7, h0 -; CHECK-NO16-NEXT: fmov s1, #4.00000000 -; CHECK-NO16-NEXT: mov h16, v0.h[3] +; CHECK-NO16-NEXT: mov h0, v0.h[7] ; CHECK-NO16-NEXT: fcvt s2, h2 ; CHECK-NO16-NEXT: fcvt s3, h3 ; CHECK-NO16-NEXT: fcvt s4, h4 -; CHECK-NO16-NEXT: mov h0, v0.h[7] -; CHECK-NO16-NEXT: fcvt s5, h5 +; CHECK-NO16-NEXT: fmov s1, #4.00000000 ; CHECK-NO16-NEXT: fcvt s6, h6 -; CHECK-NO16-NEXT: fmul s7, s7, s1 ; CHECK-NO16-NEXT: fcvt s16, h16 +; CHECK-NO16-NEXT: fcvt s5, h5 +; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fmul s7, s7, s1 ; CHECK-NO16-NEXT: fmul s2, s2, s1 ; CHECK-NO16-NEXT: fmul s3, s3, s1 ; CHECK-NO16-NEXT: fmul s4, s4, s1 -; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s5, s5, s1 ; CHECK-NO16-NEXT: fmul s6, s6, s1 -; CHECK-NO16-NEXT: fcvt h7, s7 ; CHECK-NO16-NEXT: fmul s16, s16, s1 -; CHECK-NO16-NEXT: fcvt h2, s2 -; CHECK-NO16-NEXT: fcvt h3, s3 -; CHECK-NO16-NEXT: fcvt h4, s4 +; CHECK-NO16-NEXT: fmul s5, s5, s1 ; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h1, s5 -; CHECK-NO16-NEXT: fcvt h5, s6 -; CHECK-NO16-NEXT: mov v2.h[1], v3.h[0] -; CHECK-NO16-NEXT: fcvt h3, s16 -; CHECK-NO16-NEXT: mov v7.h[1], v4.h[0] +; CHECK-NO16-NEXT: fcvt h7, s7 +; CHECK-NO16-NEXT: fcvt h1, s2 +; CHECK-NO16-NEXT: fcvt h2, s3 +; CHECK-NO16-NEXT: fcvt h3, s4 +; CHECK-NO16-NEXT: fcvt h4, s6 +; CHECK-NO16-NEXT: fcvt h6, s16 +; CHECK-NO16-NEXT: fcvt h5, s5 ; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: mov v2.h[2], v5.h[0] -; CHECK-NO16-NEXT: mov v7.h[2], v1.h[0] +; CHECK-NO16-NEXT: mov v7.h[1], v1.h[0] +; CHECK-NO16-NEXT: mov v2.h[1], v3.h[0] +; CHECK-NO16-NEXT: mov v7.h[2], v4.h[0] +; CHECK-NO16-NEXT: mov v2.h[2], v6.h[0] +; CHECK-NO16-NEXT: mov v7.h[3], v5.h[0] ; CHECK-NO16-NEXT: mov v2.h[3], v0.h[0] -; CHECK-NO16-NEXT: mov v7.h[3], v3.h[0] -; CHECK-NO16-NEXT: fcvtl v0.4s, v2.4h ; CHECK-NO16-NEXT: fcvtl v1.4s, v7.4h -; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NO16-NEXT: fcvtl v0.4s, v2.4h ; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NO16-NEXT: ret ; @@ -237,8 +237,8 @@ define <4 x i16> @test_v4f16(<4 x half> %in) { ; CHECK-NO16-LABEL: test_v4f16: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h @@ -258,8 +258,8 @@ define <4 x i32> @test_v4f16_i32(<4 x half> %in) { ; CHECK-NO16-LABEL: test_v4f16_i32: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h @@ -476,100 +476,100 @@ ; CHECK-NO16-NEXT: mov h2, v0.h[4] ; CHECK-NO16-NEXT: mov h3, v0.h[5] ; CHECK-NO16-NEXT: mov h4, v0.h[6] -; CHECK-NO16-NEXT: fmov s1, #4.00000000 ; CHECK-NO16-NEXT: mov h5, v0.h[7] +; CHECK-NO16-NEXT: fmov s1, #4.00000000 ; CHECK-NO16-NEXT: mov h6, v0.h[1] ; CHECK-NO16-NEXT: mov h7, v0.h[2] -; CHECK-NO16-NEXT: fcvt s16, h0 +; CHECK-NO16-NEXT: mov h16, v0.h[3] ; CHECK-NO16-NEXT: fcvt s2, h2 ; CHECK-NO16-NEXT: fcvt s3, h3 ; CHECK-NO16-NEXT: fcvt s4, h4 -; CHECK-NO16-NEXT: mov h0, v0.h[3] ; CHECK-NO16-NEXT: fcvt s5, h5 +; CHECK-NO16-NEXT: fcvt s0, h0 ; CHECK-NO16-NEXT: fcvt s6, h6 +; CHECK-NO16-NEXT: fcvt s7, h7 ; CHECK-NO16-NEXT: mov w9, #32767 -; CHECK-NO16-NEXT: mov w10, #-32768 ; CHECK-NO16-NEXT: fmul s2, s2, s1 ; CHECK-NO16-NEXT: fmul s3, s3, s1 ; CHECK-NO16-NEXT: fmul s4, s4, s1 -; CHECK-NO16-NEXT: fcvt s0, h0 ; CHECK-NO16-NEXT: fmul s5, s5, s1 +; CHECK-NO16-NEXT: fmul s0, s0, s1 ; CHECK-NO16-NEXT: fmul s6, s6, s1 +; CHECK-NO16-NEXT: mov w10, #-32768 ; CHECK-NO16-NEXT: fcvt h2, s2 ; CHECK-NO16-NEXT: fcvt h3, s3 -; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fcvt h4, s4 ; CHECK-NO16-NEXT: fcvt h5, s5 -; CHECK-NO16-NEXT: fcvt h6, s6 -; CHECK-NO16-NEXT: mov v2.h[1], v3.h[0] -; CHECK-NO16-NEXT: fcvt h3, s4 -; CHECK-NO16-NEXT: fcvt s4, h7 -; CHECK-NO16-NEXT: fmul s7, s16, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: mov v2.h[2], v3.h[0] -; CHECK-NO16-NEXT: fmul s3, s4, s1 -; CHECK-NO16-NEXT: fcvt h4, s7 +; CHECK-NO16-NEXT: mov v2.h[1], v3.h[0] +; CHECK-NO16-NEXT: fcvt s3, h16 +; CHECK-NO16-NEXT: mov v2.h[2], v4.h[0] +; CHECK-NO16-NEXT: fmul s4, s7, s1 ; CHECK-NO16-NEXT: mov v2.h[3], v5.h[0] -; CHECK-NO16-NEXT: fcvt h1, s3 -; CHECK-NO16-NEXT: mov v4.h[1], v6.h[0] +; CHECK-NO16-NEXT: fmul s1, s3, s1 +; CHECK-NO16-NEXT: fcvt h3, s6 ; CHECK-NO16-NEXT: fcvtl v2.4s, v2.4h -; CHECK-NO16-NEXT: mov v4.h[2], v1.h[0] -; CHECK-NO16-NEXT: mov s1, v2.s[1] +; CHECK-NO16-NEXT: fcvt h4, s4 +; CHECK-NO16-NEXT: fcvt h1, s1 +; CHECK-NO16-NEXT: mov v0.h[1], v3.h[0] +; CHECK-NO16-NEXT: mov s3, v2.s[1] ; CHECK-NO16-NEXT: fcvtzs w11, s2 -; CHECK-NO16-NEXT: mov v4.h[3], v0.h[0] -; CHECK-NO16-NEXT: mov s0, v2.s[2] +; CHECK-NO16-NEXT: mov v0.h[2], v4.h[0] +; CHECK-NO16-NEXT: mov v0.h[3], v1.h[0] +; CHECK-NO16-NEXT: mov s1, v2.s[2] +; CHECK-NO16-NEXT: fcvtzs w8, s3 ; CHECK-NO16-NEXT: mov s2, v2.s[3] -; CHECK-NO16-NEXT: fcvtzs w8, s1 -; CHECK-NO16-NEXT: fcvtl v1.4s, v4.4h -; CHECK-NO16-NEXT: fcvtzs w12, s0 +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h ; CHECK-NO16-NEXT: cmp w8, w9 -; CHECK-NO16-NEXT: fcvtzs w13, s2 +; CHECK-NO16-NEXT: fcvtzs w12, s1 ; CHECK-NO16-NEXT: csel w8, w8, w9, lt +; CHECK-NO16-NEXT: fcvtzs w13, s2 ; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: mov s1, v0.s[1] ; CHECK-NO16-NEXT: csel w8, w8, w10, gt ; CHECK-NO16-NEXT: cmp w11, w9 ; CHECK-NO16-NEXT: csel w11, w11, w9, lt -; CHECK-NO16-NEXT: mov s0, v1.s[1] +; CHECK-NO16-NEXT: fcvtzs w15, s0 ; CHECK-NO16-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: fcvtzs w15, s1 ; CHECK-NO16-NEXT: csel w11, w11, w10, gt ; CHECK-NO16-NEXT: cmp w12, w9 ; CHECK-NO16-NEXT: csel w12, w12, w9, lt +; CHECK-NO16-NEXT: fcvtzs w14, s1 ; CHECK-NO16-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: fcvtzs w14, s0 +; CHECK-NO16-NEXT: mov s1, v0.s[2] ; CHECK-NO16-NEXT: csel w12, w12, w10, gt ; CHECK-NO16-NEXT: cmp w13, w9 ; CHECK-NO16-NEXT: csel w13, w13, w9, lt -; CHECK-NO16-NEXT: mov s0, v1.s[2] +; CHECK-NO16-NEXT: mov s0, v0.s[3] ; CHECK-NO16-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: fmov s2, w11 ; CHECK-NO16-NEXT: csel w13, w13, w10, gt ; CHECK-NO16-NEXT: cmp w14, w9 ; CHECK-NO16-NEXT: csel w14, w14, w9, lt +; CHECK-NO16-NEXT: fcvtzs w16, s1 ; CHECK-NO16-NEXT: cmn w14, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: fcvtzs w17, s0 ; CHECK-NO16-NEXT: csel w14, w14, w10, gt ; CHECK-NO16-NEXT: cmp w15, w9 ; CHECK-NO16-NEXT: csel w15, w15, w9, lt +; CHECK-NO16-NEXT: fmov s0, w11 ; CHECK-NO16-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: csel w11, w15, w10, gt -; CHECK-NO16-NEXT: fcvtzs w15, s0 -; CHECK-NO16-NEXT: mov s0, v1.s[3] -; CHECK-NO16-NEXT: mov v2.s[1], w8 -; CHECK-NO16-NEXT: fmov s1, w11 -; CHECK-NO16-NEXT: cmp w15, w9 -; CHECK-NO16-NEXT: csel w8, w15, w9, lt -; CHECK-NO16-NEXT: fcvtzs w11, s0 -; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: csel w15, w15, w10, gt +; CHECK-NO16-NEXT: cmp w16, w9 +; CHECK-NO16-NEXT: csel w11, w16, w9, lt +; CHECK-NO16-NEXT: mov v0.s[1], w8 +; CHECK-NO16-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: fmov s1, w15 +; CHECK-NO16-NEXT: csel w11, w11, w10, gt +; CHECK-NO16-NEXT: cmp w17, w9 +; CHECK-NO16-NEXT: mov v0.s[2], w12 +; CHECK-NO16-NEXT: csel w8, w17, w9, lt ; CHECK-NO16-NEXT: mov v1.s[1], w14 +; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768 ; CHECK-NO16-NEXT: csel w8, w8, w10, gt -; CHECK-NO16-NEXT: mov v2.s[2], w12 -; CHECK-NO16-NEXT: cmp w11, w9 -; CHECK-NO16-NEXT: csel w9, w11, w9, lt -; CHECK-NO16-NEXT: mov v1.s[2], w8 -; CHECK-NO16-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: csel w8, w9, w10, gt -; CHECK-NO16-NEXT: mov v2.s[3], w13 +; CHECK-NO16-NEXT: mov v0.s[3], w13 +; CHECK-NO16-NEXT: mov v1.s[2], w11 ; CHECK-NO16-NEXT: mov v1.s[3], w8 -; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-NO16-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-NO16-NEXT: ret ; ; CHECK-FP16-LABEL: test_v8f16_sat: @@ -584,8 +584,8 @@ define <4 x i16> @test_v4f16_sat(<4 x half> %in) { ; CHECK-NO16-LABEL: test_v4f16_sat: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h @@ -605,8 +605,8 @@ define <4 x i32> @test_v4f16_i32_sat(<4 x half> %in) { ; CHECK-NO16-LABEL: test_v4f16_i32_sat: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h @@ -628,8 +628,8 @@ define <4 x i32> @test_extrasat(<4 x float> %f) { ; CHECK-LABEL: test_extrasat: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff ; CHECK-NEXT: fcvtzu v0.4s, v0.4s, #3 +; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %mul.i = fmul <4 x float> %f, diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll --- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll +++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll @@ -100,8 +100,8 @@ define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { ; CHECK-LABEL: splat_three_fdiv_4xfloat: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov v4.4s, #1.00000000 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: fmov v4.4s, #1.00000000 ; CHECK-NEXT: dup v0.4s, v0.s[0] ; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s ; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s @@ -120,8 +120,8 @@ define <4 x float> @splat_fdiv_v4f32(float %D, <4 x float> %a) #1 { ; CHECK-LABEL: splat_fdiv_v4f32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov v2.4s, #1.00000000 ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: fmov v2.4s, #1.00000000 ; CHECK-NEXT: dup v0.4s, v0.s[0] ; CHECK-NEXT: fdiv v0.4s, v2.4s, v0.4s ; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s diff --git a/llvm/test/CodeGen/AArch64/fdiv_combine.ll b/llvm/test/CodeGen/AArch64/fdiv_combine.ll --- a/llvm/test/CodeGen/AArch64/fdiv_combine.ll +++ b/llvm/test/CodeGen/AArch64/fdiv_combine.ll @@ -29,8 +29,8 @@ define <2 x float> @test3(<2 x i32> %in) { ; CHECK-LABEL: test3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov.2s v1, #9.00000000 ; CHECK-NEXT: scvtf.2s v0, v0 +; CHECK-NEXT: fmov.2s v1, #9.00000000 ; CHECK-NEXT: fdiv.2s v0, v0, v1 ; CHECK-NEXT: ret entry: @@ -43,8 +43,8 @@ define <2 x float> @test4(<2 x i32> %in) { ; CHECK-LABEL: test4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi.2s v1, #80, lsl #24 ; CHECK-NEXT: scvtf.2s v0, v0 +; CHECK-NEXT: movi.2s v1, #80, lsl #24 ; CHECK-NEXT: fdiv.2s v0, v0, v1 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll --- a/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll +++ b/llvm/test/CodeGen/AArch64/fp-intrinsics-vector.ll @@ -241,15 +241,15 @@ ; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: csetm w8, eq ; CHECK-NEXT: fcmp s3, s2 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: csetm w9, eq ; CHECK-NEXT: fcmp s5, s4 -; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: csetm w8, eq ; CHECK-NEXT: fcmp s0, s1 +; CHECK-NEXT: mov v2.s[1], w9 +; CHECK-NEXT: csetm w9, eq ; CHECK-NEXT: mov v2.s[2], w8 -; CHECK-NEXT: csetm w8, eq -; CHECK-NEXT: mov v2.s[3], w8 +; CHECK-NEXT: mov v2.s[3], w9 ; CHECK-NEXT: xtn v0.4h, v2.4s ; CHECK-NEXT: ret entry: @@ -269,15 +269,15 @@ ; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: csetm w8, eq ; CHECK-NEXT: fcmpe s3, s2 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: csetm w8, eq +; CHECK-NEXT: csetm w9, eq ; CHECK-NEXT: fcmpe s5, s4 -; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: csetm w8, eq ; CHECK-NEXT: fcmpe s0, s1 +; CHECK-NEXT: mov v2.s[1], w9 +; CHECK-NEXT: csetm w9, eq ; CHECK-NEXT: mov v2.s[2], w8 -; CHECK-NEXT: csetm w8, eq -; CHECK-NEXT: mov v2.s[3], w8 +; CHECK-NEXT: mov v2.s[3], w9 ; CHECK-NEXT: xtn v0.4h, v2.4s ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll --- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -13,48 +13,48 @@ ; CHECK-CVT-NEXT: mov h7, v0.h[2] ; CHECK-CVT-NEXT: mov h16, v1.h[3] ; CHECK-CVT-NEXT: mov h17, v0.h[3] +; CHECK-CVT-NEXT: mov h18, v1.h[4] +; CHECK-CVT-NEXT: mov h19, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: mov h20, v1.h[5] ; CHECK-CVT-NEXT: fadd s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h7 -; CHECK-CVT-NEXT: fcvt s7, h16 -; CHECK-CVT-NEXT: fcvt s16, h17 -; CHECK-CVT-NEXT: fadd s3, s3, s2 +; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s16, h16 +; CHECK-CVT-NEXT: fcvt s17, h17 +; CHECK-CVT-NEXT: fcvt s18, h18 +; CHECK-CVT-NEXT: fcvt s19, h19 +; CHECK-CVT-NEXT: fadd s2, s3, s2 +; CHECK-CVT-NEXT: fadd s3, s7, s6 +; CHECK-CVT-NEXT: fcvt s6, h20 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fadd s7, s17, s16 +; CHECK-CVT-NEXT: mov h16, v1.h[6] +; CHECK-CVT-NEXT: fadd s17, s19, s18 +; CHECK-CVT-NEXT: mov h18, v0.h[6] +; CHECK-CVT-NEXT: fcvt h19, s2 ; CHECK-CVT-NEXT: fcvt h2, s4 -; CHECK-CVT-NEXT: fadd s4, s6, s5 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: mov h6, v0.h[4] -; CHECK-CVT-NEXT: fadd s7, s16, s7 -; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: mov h16, v0.h[5] -; CHECK-CVT-NEXT: fcvt h7, s7 -; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: mov h6, v1.h[5] -; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] -; CHECK-CVT-NEXT: fadd s3, s5, s4 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: fcvt s5, h16 -; CHECK-CVT-NEXT: mov h6, v1.h[6] -; CHECK-CVT-NEXT: mov h16, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] ; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: fadd s4, s5, s6 ; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: fadd s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h16 +; CHECK-CVT-NEXT: fcvt s5, h16 +; CHECK-CVT-NEXT: fcvt s6, h18 +; CHECK-CVT-NEXT: mov v2.h[1], v19.h[0] +; CHECK-CVT-NEXT: fcvt h7, s7 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fadd s4, s6, s5 +; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] +; CHECK-CVT-NEXT: fadd s3, s6, s5 +; CHECK-CVT-NEXT: fcvt h5, s17 +; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] ; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: fcvt h1, s4 +; CHECK-CVT-NEXT: mov v2.h[4], v5.h[0] +; CHECK-CVT-NEXT: fcvt h3, s3 +; CHECK-CVT-NEXT: mov v2.h[5], v1.h[0] ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: mov v2.h[6], v3.h[0] ; CHECK-CVT-NEXT: mov v2.h[7], v0.h[0] @@ -82,48 +82,48 @@ ; CHECK-CVT-NEXT: mov h7, v0.h[2] ; CHECK-CVT-NEXT: mov h16, v1.h[3] ; CHECK-CVT-NEXT: mov h17, v0.h[3] +; CHECK-CVT-NEXT: mov h18, v1.h[4] +; CHECK-CVT-NEXT: mov h19, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: mov h20, v1.h[5] ; CHECK-CVT-NEXT: fsub s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h7 -; CHECK-CVT-NEXT: fcvt s7, h16 -; CHECK-CVT-NEXT: fcvt s16, h17 -; CHECK-CVT-NEXT: fsub s3, s3, s2 +; CHECK-CVT-NEXT: mov h5, v0.h[5] +; CHECK-CVT-NEXT: fcvt s16, h16 +; CHECK-CVT-NEXT: fcvt s17, h17 +; CHECK-CVT-NEXT: fcvt s18, h18 +; CHECK-CVT-NEXT: fcvt s19, h19 +; CHECK-CVT-NEXT: fsub s2, s3, s2 +; CHECK-CVT-NEXT: fsub s3, s7, s6 +; CHECK-CVT-NEXT: fcvt s6, h20 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fsub s7, s17, s16 +; CHECK-CVT-NEXT: mov h16, v1.h[6] +; CHECK-CVT-NEXT: fsub s17, s19, s18 +; CHECK-CVT-NEXT: mov h18, v0.h[6] +; CHECK-CVT-NEXT: fcvt h19, s2 ; CHECK-CVT-NEXT: fcvt h2, s4 -; CHECK-CVT-NEXT: fsub s4, s6, s5 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: mov h6, v0.h[4] -; CHECK-CVT-NEXT: fsub s7, s16, s7 -; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: mov h16, v0.h[5] -; CHECK-CVT-NEXT: fcvt h7, s7 -; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: mov h6, v1.h[5] -; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] -; CHECK-CVT-NEXT: fsub s3, s5, s4 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: fcvt s5, h16 -; CHECK-CVT-NEXT: mov h6, v1.h[6] -; CHECK-CVT-NEXT: mov h16, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] ; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: fsub s4, s5, s6 ; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: fsub s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h16 +; CHECK-CVT-NEXT: fcvt s5, h16 +; CHECK-CVT-NEXT: fcvt s6, h18 +; CHECK-CVT-NEXT: mov v2.h[1], v19.h[0] +; CHECK-CVT-NEXT: fcvt h7, s7 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fsub s4, s6, s5 +; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] +; CHECK-CVT-NEXT: fsub s3, s6, s5 +; CHECK-CVT-NEXT: fcvt h5, s17 +; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] ; CHECK-CVT-NEXT: fsub s0, s0, s1 -; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: fcvt h1, s4 +; CHECK-CVT-NEXT: mov v2.h[4], v5.h[0] +; CHECK-CVT-NEXT: fcvt h3, s3 +; CHECK-CVT-NEXT: mov v2.h[5], v1.h[0] ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: mov v2.h[6], v3.h[0] ; CHECK-CVT-NEXT: mov v2.h[7], v0.h[0] @@ -149,50 +149,50 @@ ; CHECK-CVT-NEXT: fcvt s5, h0 ; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: mov h7, v0.h[2] -; CHECK-CVT-NEXT: mov h16, v0.h[3] +; CHECK-CVT-NEXT: mov h16, v1.h[3] +; CHECK-CVT-NEXT: mov h17, v0.h[3] +; CHECK-CVT-NEXT: mov h18, v1.h[4] +; CHECK-CVT-NEXT: mov h19, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fmul s4, s5, s4 -; CHECK-CVT-NEXT: mov h5, v1.h[3] ; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: fcvt s7, h7 -; CHECK-CVT-NEXT: fmul s3, s3, s2 -; CHECK-CVT-NEXT: fcvt h2, s4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: fcvt s5, h16 -; CHECK-CVT-NEXT: fmul s6, s7, s6 -; CHECK-CVT-NEXT: mov h7, v1.h[4] -; CHECK-CVT-NEXT: mov h16, v0.h[4] -; CHECK-CVT-NEXT: fcvt h3, s3 +; CHECK-CVT-NEXT: mov h20, v1.h[5] ; CHECK-CVT-NEXT: fmul s4, s5, s4 ; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: fcvt h6, s6 -; CHECK-CVT-NEXT: fcvt s7, h7 -; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] -; CHECK-CVT-NEXT: mov h3, v1.h[5] ; CHECK-CVT-NEXT: fcvt s16, h16 -; CHECK-CVT-NEXT: fcvt h4, s4 +; CHECK-CVT-NEXT: fcvt s17, h17 +; CHECK-CVT-NEXT: fcvt s18, h18 +; CHECK-CVT-NEXT: fcvt s19, h19 +; CHECK-CVT-NEXT: fmul s2, s3, s2 +; CHECK-CVT-NEXT: fmul s3, s7, s6 +; CHECK-CVT-NEXT: fcvt s6, h20 ; CHECK-CVT-NEXT: fcvt s5, h5 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov v2.h[2], v6.h[0] -; CHECK-CVT-NEXT: fmul s6, s16, s7 -; CHECK-CVT-NEXT: mov h7, v1.h[6] -; CHECK-CVT-NEXT: mov h16, v0.h[6] +; CHECK-CVT-NEXT: fmul s7, s17, s16 +; CHECK-CVT-NEXT: mov h16, v1.h[6] +; CHECK-CVT-NEXT: fmul s17, s19, s18 +; CHECK-CVT-NEXT: mov h18, v0.h[6] +; CHECK-CVT-NEXT: fcvt h19, s2 +; CHECK-CVT-NEXT: fcvt h2, s4 ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: fmul s3, s5, s3 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v4.h[0] -; CHECK-CVT-NEXT: fcvt h4, s6 -; CHECK-CVT-NEXT: fcvt s5, h7 -; CHECK-CVT-NEXT: fcvt s6, h16 -; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fmul s4, s5, s6 ; CHECK-CVT-NEXT: fcvt h3, s3 +; CHECK-CVT-NEXT: fcvt s5, h16 +; CHECK-CVT-NEXT: fcvt s6, h18 +; CHECK-CVT-NEXT: mov v2.h[1], v19.h[0] +; CHECK-CVT-NEXT: fcvt h7, s7 +; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v4.h[0] -; CHECK-CVT-NEXT: fmul s4, s6, s5 +; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] +; CHECK-CVT-NEXT: fmul s3, s6, s5 +; CHECK-CVT-NEXT: fcvt h5, s17 +; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] ; CHECK-CVT-NEXT: fmul s0, s0, s1 -; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: fcvt h1, s4 +; CHECK-CVT-NEXT: mov v2.h[4], v5.h[0] +; CHECK-CVT-NEXT: fcvt h3, s3 +; CHECK-CVT-NEXT: mov v2.h[5], v1.h[0] ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: mov v2.h[6], v3.h[0] ; CHECK-CVT-NEXT: mov v2.h[7], v0.h[0] @@ -233,39 +233,38 @@ ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fdiv s3, s4, s3 ; CHECK-CVT-NEXT: mov h4, v1.h[2] -; CHECK-CVT-NEXT: fcvt h18, s2 +; CHECK-CVT-NEXT: fcvt h2, s2 ; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: fdiv s4, s5, s4 ; CHECK-CVT-NEXT: mov h5, v1.h[3] -; CHECK-CVT-NEXT: fcvt h2, s3 ; CHECK-CVT-NEXT: fcvt s5, h5 -; CHECK-CVT-NEXT: mov v2.h[1], v18.h[0] ; CHECK-CVT-NEXT: fdiv s5, s6, s5 ; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: fcvt h4, s4 ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: mov v2.h[2], v4.h[0] ; CHECK-CVT-NEXT: fdiv s6, s7, s6 ; CHECK-CVT-NEXT: mov h7, v1.h[5] -; CHECK-CVT-NEXT: fcvt h4, s5 ; CHECK-CVT-NEXT: fcvt s7, h7 -; CHECK-CVT-NEXT: mov v2.h[3], v4.h[0] ; CHECK-CVT-NEXT: fdiv s7, s16, s7 ; CHECK-CVT-NEXT: mov h16, v1.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fdiv s3, s17, s16 -; CHECK-CVT-NEXT: fdiv s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h1, s6 -; CHECK-CVT-NEXT: mov v2.h[4], v1.h[0] -; CHECK-CVT-NEXT: fcvt h1, s7 -; CHECK-CVT-NEXT: mov v2.h[5], v1.h[0] -; CHECK-CVT-NEXT: fcvt h1, s3 -; CHECK-CVT-NEXT: mov v2.h[6], v1.h[0] -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: mov v2.h[7], v0.h[0] -; CHECK-CVT-NEXT: mov v0.16b, v2.16b +; CHECK-CVT-NEXT: fdiv s16, s17, s16 +; CHECK-CVT-NEXT: fdiv s1, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s3 +; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: mov v0.h[1], v2.h[0] +; CHECK-CVT-NEXT: fcvt h2, s5 +; CHECK-CVT-NEXT: mov v0.h[2], v3.h[0] +; CHECK-CVT-NEXT: fcvt h3, s6 +; CHECK-CVT-NEXT: mov v0.h[3], v2.h[0] +; CHECK-CVT-NEXT: fcvt h2, s7 +; CHECK-CVT-NEXT: mov v0.h[4], v3.h[0] +; CHECK-CVT-NEXT: fcvt h3, s16 +; CHECK-CVT-NEXT: mov v0.h[5], v2.h[0] +; CHECK-CVT-NEXT: fcvt h1, s1 +; CHECK-CVT-NEXT: mov v0.h[6], v3.h[0] +; CHECK-CVT-NEXT: mov v0.h[7], v1.h[0] ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: div_h: @@ -318,20 +317,20 @@ ; CHECK-NEXT: mov d5, v1.d[1] ; CHECK-NEXT: fcvt h1, d1 ; CHECK-NEXT: fcvt h4, d4 +; CHECK-NEXT: fcvt h5, d5 ; CHECK-NEXT: mov v0.h[1], v4.h[0] -; CHECK-NEXT: fcvt h4, d5 +; CHECK-NEXT: mov d4, v2.d[1] ; CHECK-NEXT: mov v0.h[2], v1.h[0] ; CHECK-NEXT: fcvt h1, d2 -; CHECK-NEXT: mov d2, v2.d[1] -; CHECK-NEXT: mov v0.h[3], v4.h[0] -; CHECK-NEXT: fcvt h2, d2 +; CHECK-NEXT: mov v0.h[3], v5.h[0] +; CHECK-NEXT: fcvt h2, d4 +; CHECK-NEXT: mov d4, v3.d[1] ; CHECK-NEXT: mov v0.h[4], v1.h[0] ; CHECK-NEXT: fcvt h1, d3 ; CHECK-NEXT: mov v0.h[5], v2.h[0] -; CHECK-NEXT: mov d2, v3.d[1] +; CHECK-NEXT: fcvt h2, d4 ; CHECK-NEXT: mov v0.h[6], v1.h[0] -; CHECK-NEXT: fcvt h1, d2 -; CHECK-NEXT: mov v0.h[7], v1.h[0] +; CHECK-NEXT: mov v0.h[7], v2.h[0] ; CHECK-NEXT: ret %1 = fptrunc <8 x double> %a to <8 x half> ret <8 x half> %1 @@ -350,25 +349,25 @@ define <8 x double> @h_to_d(<8 x half> %a) { ; CHECK-LABEL: h_to_d: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: mov h3, v0.h[3] -; CHECK-NEXT: mov h4, v0.h[2] +; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: mov h4, v0.h[3] +; CHECK-NEXT: mov h5, v0.h[2] +; CHECK-NEXT: mov h6, v1.h[1] +; CHECK-NEXT: mov h7, v1.h[3] +; CHECK-NEXT: mov h16, v1.h[2] ; CHECK-NEXT: fcvt d0, h0 -; CHECK-NEXT: mov h5, v2.h[1] -; CHECK-NEXT: mov h6, v2.h[3] -; CHECK-NEXT: mov h7, v2.h[2] -; CHECK-NEXT: fcvt d16, h1 +; CHECK-NEXT: fcvt d2, h1 ; CHECK-NEXT: fcvt d17, h3 -; CHECK-NEXT: fcvt d1, h4 -; CHECK-NEXT: fcvt d2, h2 -; CHECK-NEXT: fcvt d4, h5 +; CHECK-NEXT: fcvt d4, h4 +; CHECK-NEXT: fcvt d1, h5 ; CHECK-NEXT: fcvt d5, h6 -; CHECK-NEXT: fcvt d3, h7 -; CHECK-NEXT: mov v0.d[1], v16.d[0] -; CHECK-NEXT: mov v1.d[1], v17.d[0] -; CHECK-NEXT: mov v2.d[1], v4.d[0] -; CHECK-NEXT: mov v3.d[1], v5.d[0] +; CHECK-NEXT: fcvt d6, h7 +; CHECK-NEXT: fcvt d3, h16 +; CHECK-NEXT: mov v0.d[1], v17.d[0] +; CHECK-NEXT: mov v1.d[1], v4.d[0] +; CHECK-NEXT: mov v2.d[1], v5.d[0] +; CHECK-NEXT: mov v3.d[1], v6.d[0] ; CHECK-NEXT: ret %1 = fpext <8 x half> %a to <8 x double> ret <8 x double> %1 @@ -746,59 +745,59 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[2] +; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 ; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: csetm w9, ne +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: fmov s7, w9 ; CHECK-CVT-NEXT: csetm w9, ne ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov v7.h[1], w8 ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcmp s6, s5 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v7.h[2], w9 +; CHECK-CVT-NEXT: csetm w9, ne +; CHECK-CVT-NEXT: fcmp s4, s16 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v7.h[3], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, ne +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v7.h[4], w9 +; CHECK-CVT-NEXT: csetm w9, ne ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v7.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v7.h[6], w9 +; CHECK-CVT-NEXT: mov v7.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v7.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_une: @@ -818,7 +817,7 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v0.h[4] +; CHECK-CVT-NEXT: mov h6, v1.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcmp s3, s2 @@ -827,56 +826,56 @@ ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, eq ; CHECK-CVT-NEXT: csinv w9, w9, wzr, vc -; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fmov s3, w9 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[1], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s2, s4 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[5] -; CHECK-CVT-NEXT: mov h6, v0.h[5] -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: csetm w10, eq +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csinv w10, w10, wzr, vc +; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: csetm w11, eq +; CHECK-CVT-NEXT: csinv w11, w11, wzr, vc +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: csetm w12, eq ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v3.h[3], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: csinv w12, w12, wzr, vc ; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fmov s2, w9 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h5 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[4], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v3.h[5], w8 +; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v2.h[2], w10 +; CHECK-CVT-NEXT: mov v2.h[3], w11 +; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: csinv w9, w9, wzr, vc ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v3.h[6], w8 +; CHECK-CVT-NEXT: mov v2.h[4], w12 +; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc -; CHECK-CVT-NEXT: mov v3.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v3.8h +; CHECK-CVT-NEXT: mov v2.h[6], w9 +; CHECK-CVT-NEXT: mov v2.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v2.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ueq: @@ -896,59 +895,59 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[2] +; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 ; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: csetm w9, hi +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: fmov s7, w9 ; CHECK-CVT-NEXT: csetm w9, hi ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov v7.h[1], w8 ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcmp s6, s5 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v7.h[2], w9 +; CHECK-CVT-NEXT: csetm w9, hi +; CHECK-CVT-NEXT: fcmp s4, s16 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v7.h[3], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, hi +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v7.h[4], w9 +; CHECK-CVT-NEXT: csetm w9, hi ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v7.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v7.h[6], w9 +; CHECK-CVT-NEXT: mov v7.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v7.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ugt: @@ -966,59 +965,59 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[2] +; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 ; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: csetm w9, pl +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: fmov s7, w9 ; CHECK-CVT-NEXT: csetm w9, pl ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov v7.h[1], w8 ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcmp s6, s5 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v7.h[2], w9 +; CHECK-CVT-NEXT: csetm w9, pl +; CHECK-CVT-NEXT: fcmp s4, s16 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v7.h[3], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, pl +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v7.h[4], w9 +; CHECK-CVT-NEXT: csetm w9, pl ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v7.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v7.h[6], w9 +; CHECK-CVT-NEXT: mov v7.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v7.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_uge: @@ -1036,59 +1035,59 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[2] +; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 ; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: csetm w9, lt +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: fmov s7, w9 ; CHECK-CVT-NEXT: csetm w9, lt ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov v7.h[1], w8 ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcmp s6, s5 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v7.h[2], w9 +; CHECK-CVT-NEXT: csetm w9, lt +; CHECK-CVT-NEXT: fcmp s4, s16 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v7.h[3], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, lt +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v7.h[4], w9 +; CHECK-CVT-NEXT: csetm w9, lt ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v7.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v7.h[6], w9 +; CHECK-CVT-NEXT: mov v7.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v7.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ult: @@ -1106,59 +1105,59 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[2] +; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 ; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: csetm w9, le +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: fmov s7, w9 ; CHECK-CVT-NEXT: csetm w9, le ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov v7.h[1], w8 ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcmp s6, s5 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v7.h[2], w9 +; CHECK-CVT-NEXT: csetm w9, le +; CHECK-CVT-NEXT: fcmp s4, s16 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v7.h[3], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, le +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v7.h[4], w9 +; CHECK-CVT-NEXT: csetm w9, le ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v7.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v7.h[6], w9 +; CHECK-CVT-NEXT: mov v7.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v7.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ule: @@ -1176,59 +1175,59 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[2] +; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 ; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: csetm w9, vs +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: fmov s7, w9 ; CHECK-CVT-NEXT: csetm w9, vs ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov v7.h[1], w8 ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcmp s6, s5 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v7.h[2], w9 +; CHECK-CVT-NEXT: csetm w9, vs +; CHECK-CVT-NEXT: fcmp s4, s16 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v7.h[3], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, vs +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v7.h[4], w9 +; CHECK-CVT-NEXT: csetm w9, vs ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v7.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v7.h[6], w9 +; CHECK-CVT-NEXT: mov v7.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v7.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_uno: @@ -1250,7 +1249,7 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v0.h[4] +; CHECK-CVT-NEXT: mov h6, v1.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcmp s3, s2 @@ -1259,56 +1258,56 @@ ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, mi ; CHECK-CVT-NEXT: csinv w9, w9, wzr, le -; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fmov s3, w9 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[1], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s2, s4 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[5] -; CHECK-CVT-NEXT: mov h6, v0.h[5] -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: csetm w10, mi +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csinv w10, w10, wzr, le +; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: csetm w11, mi +; CHECK-CVT-NEXT: csinv w11, w11, wzr, le +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: csetm w12, mi ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v3.h[3], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: csinv w12, w12, wzr, le ; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fmov s2, w9 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h5 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[4], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v3.h[5], w8 +; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v2.h[2], w10 +; CHECK-CVT-NEXT: mov v2.h[3], w11 +; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: csinv w9, w9, wzr, le ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v3.h[6], w8 +; CHECK-CVT-NEXT: mov v2.h[4], w12 +; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le -; CHECK-CVT-NEXT: mov v3.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v3.8h +; CHECK-CVT-NEXT: mov v2.h[6], w9 +; CHECK-CVT-NEXT: mov v2.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v2.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_one: @@ -1327,59 +1326,59 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[2] +; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 ; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: fmov s7, w9 ; CHECK-CVT-NEXT: csetm w9, eq ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov v7.h[1], w8 ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcmp s6, s5 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v7.h[2], w9 +; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: fcmp s4, s16 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v7.h[3], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, eq +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v7.h[4], w9 +; CHECK-CVT-NEXT: csetm w9, eq ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v7.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v7.h[6], w9 +; CHECK-CVT-NEXT: mov v7.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v7.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_oeq: @@ -1396,59 +1395,59 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[2] +; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 ; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: csetm w9, gt +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: fmov s7, w9 ; CHECK-CVT-NEXT: csetm w9, gt ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov v7.h[1], w8 ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcmp s6, s5 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v7.h[2], w9 +; CHECK-CVT-NEXT: csetm w9, gt +; CHECK-CVT-NEXT: fcmp s4, s16 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v7.h[3], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, gt +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v7.h[4], w9 +; CHECK-CVT-NEXT: csetm w9, gt ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v7.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v7.h[6], w9 +; CHECK-CVT-NEXT: mov v7.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v7.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ogt: @@ -1465,59 +1464,59 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[2] +; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 ; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: csetm w9, ge +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: fmov s7, w9 ; CHECK-CVT-NEXT: csetm w9, ge ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov v7.h[1], w8 ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcmp s6, s5 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v7.h[2], w9 +; CHECK-CVT-NEXT: csetm w9, ge +; CHECK-CVT-NEXT: fcmp s4, s16 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v7.h[3], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, ge +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v7.h[4], w9 +; CHECK-CVT-NEXT: csetm w9, ge ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v7.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v7.h[6], w9 +; CHECK-CVT-NEXT: mov v7.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v7.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_oge: @@ -1534,59 +1533,59 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[2] +; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 ; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: fmov s7, w9 ; CHECK-CVT-NEXT: csetm w9, mi ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov v7.h[1], w8 ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcmp s6, s5 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v7.h[2], w9 +; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: fcmp s4, s16 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v7.h[3], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, mi +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v7.h[4], w9 +; CHECK-CVT-NEXT: csetm w9, mi ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v7.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v7.h[6], w9 +; CHECK-CVT-NEXT: mov v7.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v7.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_olt: @@ -1603,59 +1602,59 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[2] +; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 ; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: csetm w9, ls +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: fmov s7, w9 ; CHECK-CVT-NEXT: csetm w9, ls ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov v7.h[1], w8 ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcmp s6, s5 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v7.h[2], w9 +; CHECK-CVT-NEXT: csetm w9, ls +; CHECK-CVT-NEXT: fcmp s4, s16 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v7.h[3], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, ls +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v7.h[4], w9 +; CHECK-CVT-NEXT: csetm w9, ls ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v7.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v7.h[6], w9 +; CHECK-CVT-NEXT: mov v7.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v7.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ole: @@ -1672,59 +1671,59 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: mov h2, v1.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: fcvt s4, h1 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: mov h5, v0.h[2] +; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s7, h0 ; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: fcmp s7, s6 +; CHECK-CVT-NEXT: mov h6, v1.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: csetm w9, vc +; CHECK-CVT-NEXT: fcmp s5, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h7 +; CHECK-CVT-NEXT: fmov s7, w9 ; CHECK-CVT-NEXT: csetm w9, vc ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov v7.h[1], w8 ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] +; CHECK-CVT-NEXT: fcmp s6, s5 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v7.h[2], w9 +; CHECK-CVT-NEXT: csetm w9, vc +; CHECK-CVT-NEXT: fcmp s4, s16 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v7.h[3], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, vc +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v7.h[4], w9 +; CHECK-CVT-NEXT: csetm w9, vc ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 +; CHECK-CVT-NEXT: mov v7.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v7.h[6], w9 +; CHECK-CVT-NEXT: mov v7.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v7.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ord: diff --git a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/AArch64/fpclamptosat_vec.ll @@ -194,10 +194,10 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: movi d1, #0x00ffff0000ffff -; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s -; CHECK-NEXT: smax v0.2s, v0.2s, v2.2s +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i32> @@ -857,10 +857,10 @@ ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fcvtzs v0.2d, v0.2d ; CHECK-NEXT: movi d1, #0x00ffff0000ffff -; CHECK-NEXT: movi v2.2d, #0000000000000000 ; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s -; CHECK-NEXT: smax v0.2s, v0.2s, v2.2s +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret entry: %conv = fptosi <2 x double> %x to <2 x i32> diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -62,13 +62,13 @@ ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: fcvtzs v4.4s, v4.4s ; CHECK-NEXT: mov v0.s[2], v2.s[0] -; CHECK-NEXT: fmov w4, s4 ; CHECK-NEXT: mov v0.s[3], v3.s[0] +; CHECK-NEXT: fmov w4, s4 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptosi.sat.v5f32.v5i32(<5 x float> %f) ret <5 x i32> %x @@ -88,13 +88,13 @@ ; CHECK-NEXT: mov v0.s[2], v2.s[0] ; CHECK-NEXT: fcvtzs v1.4s, v4.4s ; CHECK-NEXT: mov v0.s[3], v3.s[0] -; CHECK-NEXT: mov w5, v1.s[1] -; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptosi.sat.v6f32.v6i32(<6 x float> %f) ret <6 x i32> %x @@ -117,13 +117,13 @@ ; CHECK-NEXT: mov v0.s[3], v3.s[0] ; CHECK-NEXT: fcvtzs v1.4s, v4.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: mov w5, v1.s[1] ; CHECK-NEXT: mov w6, v1.s[2] -; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptosi.sat.v7f32.v7i32(<7 x float> %f) ret <7 x i32> %x @@ -177,13 +177,13 @@ define <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) { ; CHECK-LABEL: test_signed_v3f64_v3i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w8, d0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fcvtzs w9, d0 ; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: fcvtzs w9, d2 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: fcvtzs w8, d2 -; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: mov v0.s[2], w9 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptosi.sat.v3f64.v3i32(<3 x double> %f) @@ -195,13 +195,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: fcvtzs w8, d2 +; CHECK-NEXT: mov d2, v1.d[1] ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov d1, v1.d[1] -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: fcvtzs w8, d2 +; CHECK-NEXT: mov v0.s[2], w9 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptosi.sat.v4f64.v4i32(<4 x double> %f) @@ -619,11 +619,11 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptosi.sat.v5f16.v5i32(<5 x half> %f) ret <5 x i32> %x @@ -636,12 +636,12 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptosi.sat.v6f16.v6i32(<6 x half> %f) ret <6 x i32> %x @@ -650,17 +650,17 @@ define <7 x i32> @test_signed_v7f16_v7i32(<7 x half> %f) { ; CHECK-LABEL: test_signed_v7f16_v7i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v1.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: mov w1, v1.s[1] -; CHECK-NEXT: mov w2, v1.s[2] -; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w6, v0.s[2] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: mov w2, v0.s[2] +; CHECK-NEXT: mov w3, v0.s[3] +; CHECK-NEXT: mov w6, v1.s[2] ; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptosi.sat.v7f16.v7i32(<7 x half> %f) ret <7 x i32> %x @@ -695,11 +695,11 @@ define <2 x i1> @test_signed_v2f32_v2i1(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s -; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s -; CHECK-NEXT: smax v0.2s, v0.2s, v2.2s +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i1> @llvm.fptosi.sat.v2f32.v2i1(<2 x float> %f) ret <2 x i1> %x @@ -708,8 +708,8 @@ define <2 x i8> @test_signed_v2f32_v2i8(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #127 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #127 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mvni v1.2s, #127 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -721,8 +721,8 @@ define <2 x i13> @test_signed_v2f32_v2i13(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #15, msl #8 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #15, msl #8 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mvni v1.2s, #15, msl #8 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -734,8 +734,8 @@ define <2 x i16> @test_signed_v2f32_v2i16(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #127, msl #8 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #127, msl #8 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mvni v1.2s, #127, msl #8 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -747,8 +747,8 @@ define <2 x i19> @test_signed_v2f32_v2i19(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #3, msl #16 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #3, msl #16 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mvni v1.2s, #3, msl #16 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -955,8 +955,8 @@ define <4 x i1> @test_signed_v4f32_v4i1(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -969,8 +969,8 @@ define <4 x i8> @test_signed_v4f32_v4i8(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #127 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #127 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mvni v1.4s, #127 ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -983,8 +983,8 @@ define <4 x i13> @test_signed_v4f32_v4i13(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #15, msl #8 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #15, msl #8 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mvni v1.4s, #15, msl #8 ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1007,8 +1007,8 @@ define <4 x i19> @test_signed_v4f32_v4i19(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #3, msl #16 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #3, msl #16 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mvni v1.4s, #3, msl #16 ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1626,11 +1626,11 @@ ; ; CHECK-FP16-LABEL: test_signed_v4f16_v4i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h -; CHECK-FP16-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h -; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v2.4h +; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i1> @llvm.fptosi.sat.v4f16.v4i1(<4 x half> %f) ret <4 x i1> %x @@ -1650,8 +1650,8 @@ ; ; CHECK-FP16-LABEL: test_signed_v4f16_v4i8: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.4h, #127 ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: movi v1.4h, #127 ; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: mvni v1.4h, #127 ; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h @@ -1675,9 +1675,9 @@ ; CHECK-FP16-LABEL: test_signed_v4f16_v4i13: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: mvni v1.4h, #240, lsl #8 +; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: movi v1.4h, #240, lsl #8 -; CHECK-FP16-NEXT: mvni v2.4h, #240, lsl #8 -; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v2.4h ; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i13> @llvm.fptosi.sat.v4f16.v4i13(<4 x half> %f) @@ -1931,8 +1931,8 @@ ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret @@ -2039,8 +2039,8 @@ ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret @@ -2093,45 +2093,45 @@ ; CHECK-CVT-NEXT: csinv w10, w10, wzr, ge ; CHECK-CVT-NEXT: cmp w11, #0 ; CHECK-CVT-NEXT: csel w11, w11, wzr, lt -; CHECK-CVT-NEXT: fmov s2, w9 +; CHECK-CVT-NEXT: fcvtzs w14, s1 ; CHECK-CVT-NEXT: cmp w11, #0 ; CHECK-CVT-NEXT: csinv w11, w11, wzr, ge ; CHECK-CVT-NEXT: cmp w12, #0 ; CHECK-CVT-NEXT: csel w12, w12, wzr, lt +; CHECK-CVT-NEXT: fcvtzs w15, s0 ; CHECK-CVT-NEXT: cmp w12, #0 +; CHECK-CVT-NEXT: fmov s0, w9 ; CHECK-CVT-NEXT: csinv w12, w12, wzr, ge ; CHECK-CVT-NEXT: cmp w13, #0 ; CHECK-CVT-NEXT: csel w13, w13, wzr, lt ; CHECK-CVT-NEXT: cmp w13, #0 -; CHECK-CVT-NEXT: csinv w9, w13, wzr, ge -; CHECK-CVT-NEXT: fcvtzs w13, s1 -; CHECK-CVT-NEXT: mov v2.s[1], w8 -; CHECK-CVT-NEXT: fmov s1, w9 -; CHECK-CVT-NEXT: cmp w13, #0 -; CHECK-CVT-NEXT: csel w8, w13, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w9, s0 -; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: mov v0.s[1], w8 +; CHECK-CVT-NEXT: csinv w13, w13, wzr, ge +; CHECK-CVT-NEXT: cmp w14, #0 +; CHECK-CVT-NEXT: csel w9, w14, wzr, lt +; CHECK-CVT-NEXT: mov v0.s[2], w10 +; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: fmov s1, w13 +; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge +; CHECK-CVT-NEXT: cmp w15, #0 +; CHECK-CVT-NEXT: mov v0.s[3], w11 +; CHECK-CVT-NEXT: csel w8, w15, wzr, lt ; CHECK-CVT-NEXT: mov v1.s[1], w12 +; CHECK-CVT-NEXT: cmp w8, #0 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge -; CHECK-CVT-NEXT: cmp w9, #0 -; CHECK-CVT-NEXT: csel w9, w9, wzr, lt -; CHECK-CVT-NEXT: mov v2.s[2], w10 -; CHECK-CVT-NEXT: cmp w9, #0 -; CHECK-CVT-NEXT: mov v1.s[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, ge -; CHECK-CVT-NEXT: mov v2.s[3], w11 +; CHECK-CVT-NEXT: mov v1.s[2], w9 ; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h -; CHECK-FP16-NEXT: movi v2.2d, #0xffffffffffffffff +; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h -; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v2.8h +; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret %x = call <8 x i1> @llvm.fptosi.sat.v8f16.v8i1(<8 x half> %f) @@ -2171,35 +2171,35 @@ ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, #127 ; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: fcvtzs w16, s1 ; CHECK-CVT-NEXT: cmn w13, #128 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, #127 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt +; CHECK-CVT-NEXT: fcvtzs w17, s0 ; CHECK-CVT-NEXT: cmn w14, #128 +; CHECK-CVT-NEXT: fmov s0, w11 ; CHECK-CVT-NEXT: csel w14, w14, w10, gt ; CHECK-CVT-NEXT: cmp w15, #127 ; CHECK-CVT-NEXT: csel w15, w15, w8, lt ; CHECK-CVT-NEXT: cmn w15, #128 -; CHECK-CVT-NEXT: csel w11, w15, w10, gt -; CHECK-CVT-NEXT: fcvtzs w15, s1 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: cmp w15, #127 -; CHECK-CVT-NEXT: csel w9, w15, w8, lt -; CHECK-CVT-NEXT: fcvtzs w11, s0 -; CHECK-CVT-NEXT: cmn w9, #128 +; CHECK-CVT-NEXT: mov v0.s[1], w9 +; CHECK-CVT-NEXT: csel w15, w15, w10, gt +; CHECK-CVT-NEXT: cmp w16, #127 +; CHECK-CVT-NEXT: csel w11, w16, w8, lt +; CHECK-CVT-NEXT: mov v0.s[2], w12 +; CHECK-CVT-NEXT: cmn w11, #128 +; CHECK-CVT-NEXT: fmov s1, w15 +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: cmp w17, #127 +; CHECK-CVT-NEXT: mov v0.s[3], w13 +; CHECK-CVT-NEXT: csel w8, w17, w8, lt ; CHECK-CVT-NEXT: mov v1.s[1], w14 -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, #127 -; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w12 ; CHECK-CVT-NEXT: cmn w8, #128 -; CHECK-CVT-NEXT: mov v1.s[2], w9 ; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: mov v1.s[2], w11 ; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; @@ -2245,43 +2245,43 @@ ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, #4095 ; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: fcvtzs w16, s1 ; CHECK-CVT-NEXT: cmn w13, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, #4095 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt +; CHECK-CVT-NEXT: fcvtzs w17, s0 ; CHECK-CVT-NEXT: cmn w14, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: fmov s0, w11 ; CHECK-CVT-NEXT: csel w14, w14, w10, gt ; CHECK-CVT-NEXT: cmp w15, #4095 ; CHECK-CVT-NEXT: csel w15, w15, w8, lt ; CHECK-CVT-NEXT: cmn w15, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: csel w11, w15, w10, gt -; CHECK-CVT-NEXT: fcvtzs w15, s1 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: cmp w15, #4095 -; CHECK-CVT-NEXT: csel w9, w15, w8, lt -; CHECK-CVT-NEXT: fcvtzs w11, s0 -; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: mov v0.s[1], w9 +; CHECK-CVT-NEXT: csel w15, w15, w10, gt +; CHECK-CVT-NEXT: cmp w16, #4095 +; CHECK-CVT-NEXT: csel w11, w16, w8, lt +; CHECK-CVT-NEXT: mov v0.s[2], w12 +; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: fmov s1, w15 +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: cmp w17, #4095 +; CHECK-CVT-NEXT: mov v0.s[3], w13 +; CHECK-CVT-NEXT: csel w8, w17, w8, lt ; CHECK-CVT-NEXT: mov v1.s[1], w14 -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, #4095 -; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w12 ; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: mov v1.s[2], w9 ; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: mov v1.s[2], w11 ; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i13: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: mvni v1.8h, #240, lsl #8 +; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: movi v1.8h, #240, lsl #8 -; CHECK-FP16-NEXT: mvni v2.8h, #240, lsl #8 -; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v2.8h ; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: ret %x = call <8 x i13> @llvm.fptosi.sat.v8f16.v8i13(<8 x half> %f) @@ -2321,35 +2321,35 @@ ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, w8 ; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: fcvtzs w16, s1 ; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, w8 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt +; CHECK-CVT-NEXT: fcvtzs w17, s0 ; CHECK-CVT-NEXT: cmn w14, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s0, w11 ; CHECK-CVT-NEXT: csel w14, w14, w10, gt ; CHECK-CVT-NEXT: cmp w15, w8 ; CHECK-CVT-NEXT: csel w15, w15, w8, lt ; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: csel w11, w15, w10, gt -; CHECK-CVT-NEXT: fcvtzs w15, s1 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: cmp w15, w8 -; CHECK-CVT-NEXT: csel w9, w15, w8, lt -; CHECK-CVT-NEXT: fcvtzs w11, s0 -; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov v0.s[1], w9 +; CHECK-CVT-NEXT: csel w15, w15, w10, gt +; CHECK-CVT-NEXT: cmp w16, w8 +; CHECK-CVT-NEXT: csel w11, w16, w8, lt +; CHECK-CVT-NEXT: mov v0.s[2], w12 +; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s1, w15 +; CHECK-CVT-NEXT: csel w11, w11, w10, gt +; CHECK-CVT-NEXT: cmp w17, w8 +; CHECK-CVT-NEXT: mov v0.s[3], w13 +; CHECK-CVT-NEXT: csel w8, w17, w8, lt ; CHECK-CVT-NEXT: mov v1.s[1], w14 -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w12 ; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov v1.s[2], w9 ; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: mov v1.s[2], w11 ; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v2.8h +; CHECK-CVT-NEXT: uzp1 v0.8h, v1.8h, v0.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i16: @@ -2363,24 +2363,24 @@ define <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) { ; CHECK-LABEL: test_signed_v8f16_v8i19: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v2.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-NEXT: movi v1.4s, #3, msl #16 -; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: movi v2.4s, #3, msl #16 +; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mvni v1.4s, #3, msl #16 -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mov w1, v2.s[1] -; CHECK-NEXT: mov w2, v2.s[2] -; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w3, v2.s[3] -; CHECK-NEXT: mov w6, v0.s[2] -; CHECK-NEXT: mov w7, v0.s[3] -; CHECK-NEXT: fmov w4, s0 -; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: mvni v2.4s, #3, msl #16 +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w6, v1.s[2] +; CHECK-NEXT: mov w7, v1.s[3] +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: mov w2, v0.s[2] +; CHECK-NEXT: mov w3, v0.s[3] ; CHECK-NEXT: ret %x = call <8 x i19> @llvm.fptosi.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x @@ -2520,63 +2520,63 @@ ; CHECK-CVT-LABEL: test_signed_v8f16_v8i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-CVT-NEXT: mov h4, v0.h[2] -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: fcvt s2, h1 +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: fcvt s4, h0 +; CHECK-CVT-NEXT: mov h5, v1.h[2] +; CHECK-CVT-NEXT: fcvt s7, h1 ; CHECK-CVT-NEXT: mov h3, v1.h[1] -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: fcvt s4, h4 -; CHECK-CVT-NEXT: mov h1, v1.h[3] -; CHECK-CVT-NEXT: fcvtzs x9, s5 -; CHECK-CVT-NEXT: fcvtzs x8, s2 -; CHECK-CVT-NEXT: fcvt s2, h3 -; CHECK-CVT-NEXT: mov h3, v0.h[1] +; CHECK-CVT-NEXT: mov h6, v0.h[1] ; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h1 -; CHECK-CVT-NEXT: fcvtzs x10, s2 -; CHECK-CVT-NEXT: fmov d2, x8 +; CHECK-CVT-NEXT: mov h1, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvtzs x8, s4 +; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvtzs x9, s7 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: fmov d0, x9 -; CHECK-CVT-NEXT: mov v2.d[1], x10 -; CHECK-CVT-NEXT: fcvtzs x10, s5 -; CHECK-CVT-NEXT: fmov d1, x8 -; CHECK-CVT-NEXT: fcvtzs x9, s3 -; CHECK-CVT-NEXT: fcvtzs x8, s4 -; CHECK-CVT-NEXT: fmov d3, x10 +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h0 +; CHECK-CVT-NEXT: fcvt s7, h1 +; CHECK-CVT-NEXT: fcvtzs x10, s2 +; CHECK-CVT-NEXT: fmov d2, x9 +; CHECK-CVT-NEXT: fcvtzs x9, s4 +; CHECK-CVT-NEXT: fmov d0, x8 +; CHECK-CVT-NEXT: fcvtzs x11, s3 +; CHECK-CVT-NEXT: fcvtzs x8, s5 +; CHECK-CVT-NEXT: fmov d1, x10 ; CHECK-CVT-NEXT: fcvtzs x10, s6 -; CHECK-CVT-NEXT: mov v0.d[1], x9 -; CHECK-CVT-NEXT: mov v1.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x10 +; CHECK-CVT-NEXT: fmov d3, x9 +; CHECK-CVT-NEXT: fcvtzs x9, s7 +; CHECK-CVT-NEXT: mov v0.d[1], x8 +; CHECK-CVT-NEXT: mov v2.d[1], x11 +; CHECK-CVT-NEXT: mov v1.d[1], x10 +; CHECK-CVT-NEXT: mov v3.d[1], x9 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i64: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h3, v0.h[2] +; CHECK-FP16-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-FP16-NEXT: mov h1, v0.h[2] ; CHECK-FP16-NEXT: mov h5, v0.h[3] -; CHECK-FP16-NEXT: fcvtzs x9, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] +; CHECK-FP16-NEXT: mov h4, v2.h[2] +; CHECK-FP16-NEXT: fcvtzs x9, h2 +; CHECK-FP16-NEXT: mov h3, v2.h[1] +; CHECK-FP16-NEXT: mov h6, v2.h[3] ; CHECK-FP16-NEXT: fcvtzs x8, h1 -; CHECK-FP16-NEXT: mov h4, v1.h[2] -; CHECK-FP16-NEXT: mov h6, v1.h[3] -; CHECK-FP16-NEXT: fcvtzs x10, h2 -; CHECK-FP16-NEXT: fmov d2, x8 -; CHECK-FP16-NEXT: fcvtzs x8, h3 -; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fmov d0, x9 -; CHECK-FP16-NEXT: mov v2.d[1], x10 -; CHECK-FP16-NEXT: fcvtzs x10, h4 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzs x10, h0 +; CHECK-FP16-NEXT: fmov d2, x9 +; CHECK-FP16-NEXT: fcvtzs x9, h4 +; CHECK-FP16-NEXT: fcvtzs x11, h3 +; CHECK-FP16-NEXT: fmov d0, x10 +; CHECK-FP16-NEXT: fcvtzs x10, h1 ; CHECK-FP16-NEXT: fmov d1, x8 -; CHECK-FP16-NEXT: fcvtzs x9, h3 ; CHECK-FP16-NEXT: fcvtzs x8, h5 -; CHECK-FP16-NEXT: fmov d3, x10 -; CHECK-FP16-NEXT: fcvtzs x10, h6 -; CHECK-FP16-NEXT: mov v0.d[1], x9 +; CHECK-FP16-NEXT: fmov d3, x9 +; CHECK-FP16-NEXT: fcvtzs x9, h6 +; CHECK-FP16-NEXT: mov v0.d[1], x10 +; CHECK-FP16-NEXT: mov v2.d[1], x11 ; CHECK-FP16-NEXT: mov v1.d[1], x8 -; CHECK-FP16-NEXT: mov v3.d[1], x10 +; CHECK-FP16-NEXT: mov v3.d[1], x9 ; CHECK-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x @@ -2708,8 +2708,8 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x20, xzr, x8, vs -; CHECK-NEXT: csel x28, xzr, x9, vs +; CHECK-NEXT: csel x28, xzr, x8, vs +; CHECK-NEXT: csel x26, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -2723,59 +2723,59 @@ ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x21, xzr, x8, vs -; CHECK-NEXT: csel x26, xzr, x9, vs +; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fmov d0, x20 ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: ldr x11, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: lsr x10, x28, #28 +; CHECK-NEXT: fmov d0, x28 ; CHECK-NEXT: ldr d1, [sp] // 8-byte Folded Reload -; CHECK-NEXT: lsr x12, x29, #28 -; CHECK-NEXT: mov v0.d[1], x28 +; CHECK-NEXT: lsr x10, x26, #28 +; CHECK-NEXT: ldr x12, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: lsr x11, x29, #28 ; CHECK-NEXT: csel x8, x25, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: stur x11, [x19, #75] -; CHECK-NEXT: ldr x13, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x26 +; CHECK-NEXT: stur x12, [x19, #75] +; CHECK-NEXT: mov v1.d[1], x29 +; CHECK-NEXT: ldr d2, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: fmov x12, d0 ; CHECK-NEXT: csinv x9, x9, xzr, le ; CHECK-NEXT: csel x8, x23, x8, gt ; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: fmov x11, d0 -; CHECK-NEXT: stur x13, [x19, #50] -; CHECK-NEXT: mov v1.d[1], x29 -; CHECK-NEXT: ldr d0, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: strb w10, [x19, #49] -; CHECK-NEXT: extr x10, x28, x11, #28 +; CHECK-NEXT: strb w11, [x19, #24] +; CHECK-NEXT: fmov x10, d1 +; CHECK-NEXT: extr x11, x26, x12, #28 +; CHECK-NEXT: mov v2.d[1], x22 +; CHECK-NEXT: csel x9, xzr, x9, vs ; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: bfi x8, x11, #36, #28 -; CHECK-NEXT: strb w12, [x19, #24] +; CHECK-NEXT: bfi x8, x12, #36, #28 +; CHECK-NEXT: bfi x21, x10, #36, #28 ; CHECK-NEXT: stur x9, [x19, #25] -; CHECK-NEXT: fmov x12, d1 -; CHECK-NEXT: stur x10, [x19, #41] -; CHECK-NEXT: lsr x9, x22, #28 -; CHECK-NEXT: ldr d1, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: ldr x9, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: stur x11, [x19, #41] +; CHECK-NEXT: stur x9, [x19, #50] +; CHECK-NEXT: extr x9, x29, x10, #28 +; CHECK-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload ; CHECK-NEXT: stur x8, [x19, #33] +; CHECK-NEXT: fmov x8, d2 +; CHECK-NEXT: str x9, [x19, #16] +; CHECK-NEXT: lsr x9, x22, #28 ; CHECK-NEXT: ldr x11, [sp, #72] // 8-byte Folded Reload -; CHECK-NEXT: extr x18, x29, x12, #28 -; CHECK-NEXT: mov v0.d[1], x22 -; CHECK-NEXT: bfi x21, x12, #36, #28 -; CHECK-NEXT: str x26, [x19] -; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: stp x20, x21, [x19] +; CHECK-NEXT: extr x12, x22, x8, #28 +; CHECK-NEXT: bfi x27, x8, #36, #28 +; CHECK-NEXT: strb w9, [x19, #99] +; CHECK-NEXT: mov v0.d[1], x11 ; CHECK-NEXT: lsr x10, x11, #28 ; CHECK-NEXT: mov x13, x11 -; CHECK-NEXT: stp x21, x18, [x19, #8] -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: strb w9, [x19, #99] +; CHECK-NEXT: stur x12, [x19, #91] +; CHECK-NEXT: fmov x11, d0 +; CHECK-NEXT: stur x27, [x19, #83] ; CHECK-NEXT: strb w10, [x19, #74] -; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: extr x12, x22, x8, #28 -; CHECK-NEXT: bfi x27, x8, #36, #28 ; CHECK-NEXT: extr x8, x13, x11, #28 ; CHECK-NEXT: bfi x24, x11, #36, #28 -; CHECK-NEXT: stur x12, [x19, #91] -; CHECK-NEXT: stur x27, [x19, #83] ; CHECK-NEXT: stur x8, [x19, #66] ; CHECK-NEXT: stur x24, [x19, #58] ; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload @@ -2992,9 +2992,9 @@ define <8 x i8> @test_signed_v8f32_v8i8(<8 x float> %f) { ; CHECK-LABEL: test_signed_v8f32_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #127 ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v2.4s, #127 ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s ; CHECK-NEXT: mvni v2.4s, #127 @@ -3011,47 +3011,47 @@ define <16 x i8> @test_signed_v16f32_v16i8(<16 x float> %f) { ; CHECK-LABEL: test_signed_v16f32_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v4.4s, #127 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: mvni v5.4s, #127 ; CHECK-NEXT: fcvtzs v1.4s, v1.4s +; CHECK-NEXT: movi v4.4s, #127 ; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: mvni v5.4s, #127 +; CHECK-NEXT: fcvtzs v3.4s, v3.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v4.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v4.4s ; CHECK-NEXT: smin v2.4s, v2.4s, v4.4s +; CHECK-NEXT: smin v3.4s, v3.4s, v4.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v5.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v5.4s ; CHECK-NEXT: smax v2.4s, v2.4s, v5.4s -; CHECK-NEXT: xtn v6.4h, v0.4s -; CHECK-NEXT: umov w8, v6.h[0] -; CHECK-NEXT: umov w9, v6.h[1] +; CHECK-NEXT: smax v3.4s, v3.4s, v5.4s +; CHECK-NEXT: xtn v4.4h, v0.4s ; CHECK-NEXT: xtn v1.4h, v1.4s +; CHECK-NEXT: umov w8, v4.h[0] +; CHECK-NEXT: umov w9, v4.h[2] +; CHECK-NEXT: xtn v2.4h, v2.4s ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: umov w8, v6.h[2] -; CHECK-NEXT: mov v0.b[1], w9 -; CHECK-NEXT: mov v0.b[2], w8 -; CHECK-NEXT: umov w8, v6.h[3] +; CHECK-NEXT: umov w8, v4.h[1] +; CHECK-NEXT: mov v0.b[1], w8 +; CHECK-NEXT: umov w8, v4.h[3] +; CHECK-NEXT: mov v0.b[2], w9 +; CHECK-NEXT: umov w9, v1.h[0] ; CHECK-NEXT: mov v0.b[3], w8 -; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: mov v0.b[4], w8 ; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: mov v0.b[4], w9 +; CHECK-NEXT: umov w9, v1.h[2] ; CHECK-NEXT: mov v0.b[5], w8 -; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[6], w8 ; CHECK-NEXT: umov w8, v1.h[3] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: fcvtzs v2.4s, v3.4s +; CHECK-NEXT: xtn v1.4h, v3.4s +; CHECK-NEXT: mov v0.b[6], w9 +; CHECK-NEXT: umov w9, v2.h[0] ; CHECK-NEXT: mov v0.b[7], w8 -; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: smin v2.4s, v2.4s, v4.4s -; CHECK-NEXT: mov v0.b[8], w8 -; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: smax v2.4s, v2.4s, v5.4s +; CHECK-NEXT: umov w8, v2.h[1] +; CHECK-NEXT: mov v0.b[8], w9 +; CHECK-NEXT: umov w9, v2.h[2] ; CHECK-NEXT: mov v0.b[9], w8 -; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[10], w8 -; CHECK-NEXT: umov w8, v1.h[3] -; CHECK-NEXT: xtn v1.4h, v2.4s +; CHECK-NEXT: umov w8, v2.h[3] +; CHECK-NEXT: mov v0.b[10], w9 ; CHECK-NEXT: mov v0.b[11], w8 ; CHECK-NEXT: umov w8, v1.h[0] ; CHECK-NEXT: mov v0.b[12], w8 @@ -3081,8 +3081,8 @@ define <16 x i16> @test_signed_v16f32_v16i16(<16 x float> %f) { ; CHECK-LABEL: test_signed_v16f32_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NEXT: fcvtzs v4.4s, v1.4s ; CHECK-NEXT: fcvtzs v3.4s, v3.4s ; CHECK-NEXT: sqxtn v0.4h, v0.4s @@ -3166,56 +3166,56 @@ ; CHECK-CVT-NEXT: csel w0, w0, w8, lt ; CHECK-CVT-NEXT: fcvtzs w2, s2 ; CHECK-CVT-NEXT: cmn w0, #128 -; CHECK-CVT-NEXT: fcvtzs w4, s0 +; CHECK-CVT-NEXT: mov s2, v0.s[2] ; CHECK-CVT-NEXT: csel w0, w0, w9, gt ; CHECK-CVT-NEXT: cmp w1, #127 ; CHECK-CVT-NEXT: csel w1, w1, w8, lt ; CHECK-CVT-NEXT: fcvtzs w3, s1 ; CHECK-CVT-NEXT: cmn w1, #128 -; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: csel w1, w1, w9, gt ; CHECK-CVT-NEXT: cmp w2, #127 -; CHECK-CVT-NEXT: csel w2, w2, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 -; CHECK-CVT-NEXT: cmn w2, #128 -; CHECK-CVT-NEXT: fmov s3, w14 -; CHECK-CVT-NEXT: csel w2, w2, w9, gt +; CHECK-CVT-NEXT: csel w11, w2, w8, lt +; CHECK-CVT-NEXT: fcvtzs w2, s0 +; CHECK-CVT-NEXT: cmn w11, #128 +; CHECK-CVT-NEXT: mov v1.s[1], w10 +; CHECK-CVT-NEXT: csel w11, w11, w9, gt ; CHECK-CVT-NEXT: cmp w3, #127 ; CHECK-CVT-NEXT: csel w3, w3, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s1 -; CHECK-CVT-NEXT: cmn w3, #128 ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: csel w3, w3, w9, gt -; CHECK-CVT-NEXT: cmp w4, #127 -; CHECK-CVT-NEXT: csel w11, w4, w8, lt -; CHECK-CVT-NEXT: fmov s4, w0 -; CHECK-CVT-NEXT: cmn w11, #128 -; CHECK-CVT-NEXT: csel w11, w11, w9, gt +; CHECK-CVT-NEXT: cmn w3, #128 +; CHECK-CVT-NEXT: fmov s3, w14 +; CHECK-CVT-NEXT: csel w10, w3, w9, gt +; CHECK-CVT-NEXT: cmp w2, #127 +; CHECK-CVT-NEXT: csel w14, w2, w8, lt +; CHECK-CVT-NEXT: fcvtzs w2, s2 +; CHECK-CVT-NEXT: cmn w14, #128 +; CHECK-CVT-NEXT: mov v1.s[2], w13 +; CHECK-CVT-NEXT: csel w13, w14, w9, gt +; CHECK-CVT-NEXT: fcvtzs w14, s0 +; CHECK-CVT-NEXT: cmp w2, #127 +; CHECK-CVT-NEXT: mov v3.s[1], w15 +; CHECK-CVT-NEXT: csel w15, w2, w8, lt +; CHECK-CVT-NEXT: fmov s0, w0 +; CHECK-CVT-NEXT: fmov s2, w13 +; CHECK-CVT-NEXT: cmn w15, #128 +; CHECK-CVT-NEXT: csel w13, w15, w9, gt ; CHECK-CVT-NEXT: cmp w14, #127 +; CHECK-CVT-NEXT: csel w8, w14, w8, lt +; CHECK-CVT-NEXT: mov v0.s[1], w18 ; CHECK-CVT-NEXT: mov v2.s[1], w10 -; CHECK-CVT-NEXT: csel w10, w14, w8, lt -; CHECK-CVT-NEXT: mov v3.s[1], w15 -; CHECK-CVT-NEXT: cmn w10, #128 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: csel w10, w10, w9, gt -; CHECK-CVT-NEXT: fcvtzs w11, s0 -; CHECK-CVT-NEXT: mov v4.s[1], w18 -; CHECK-CVT-NEXT: mov v1.s[1], w3 -; CHECK-CVT-NEXT: cmp w11, #127 -; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w13 ; CHECK-CVT-NEXT: cmn w8, #128 -; CHECK-CVT-NEXT: mov v3.s[2], w16 ; CHECK-CVT-NEXT: csel w8, w8, w9, gt -; CHECK-CVT-NEXT: mov v4.s[2], w1 -; CHECK-CVT-NEXT: mov v1.s[2], w10 -; CHECK-CVT-NEXT: mov v2.s[3], w12 +; CHECK-CVT-NEXT: mov v3.s[2], w16 +; CHECK-CVT-NEXT: mov v0.s[2], w1 +; CHECK-CVT-NEXT: mov v2.s[2], w13 +; CHECK-CVT-NEXT: mov v1.s[3], w12 ; CHECK-CVT-NEXT: mov v3.s[3], w17 -; CHECK-CVT-NEXT: mov v4.s[3], w2 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h -; CHECK-CVT-NEXT: uzp1 v1.8h, v1.8h, v4.8h -; CHECK-CVT-NEXT: uzp1 v0.16b, v1.16b, v0.16b +; CHECK-CVT-NEXT: mov v0.s[3], w11 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v1.8h, v3.8h, v1.8h +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v0.8h +; CHECK-CVT-NEXT: uzp1 v0.16b, v0.16b, v1.16b ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v16f16_v16i8: @@ -3299,55 +3299,55 @@ ; CHECK-CVT-NEXT: csel w0, w0, w8, lt ; CHECK-CVT-NEXT: fcvtzs w2, s2 ; CHECK-CVT-NEXT: cmn w0, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fcvtzs w4, s0 +; CHECK-CVT-NEXT: mov s2, v0.s[2] ; CHECK-CVT-NEXT: csel w0, w0, w9, gt ; CHECK-CVT-NEXT: cmp w1, w8 ; CHECK-CVT-NEXT: csel w1, w1, w8, lt ; CHECK-CVT-NEXT: fcvtzs w3, s1 ; CHECK-CVT-NEXT: cmn w1, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: csel w1, w1, w9, gt ; CHECK-CVT-NEXT: cmp w2, w8 -; CHECK-CVT-NEXT: csel w2, w2, w8, lt -; CHECK-CVT-NEXT: fmov s2, w11 -; CHECK-CVT-NEXT: cmn w2, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fmov s3, w14 -; CHECK-CVT-NEXT: csel w2, w2, w9, gt +; CHECK-CVT-NEXT: csel w11, w2, w8, lt +; CHECK-CVT-NEXT: fcvtzs w2, s0 +; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov v1.s[1], w10 +; CHECK-CVT-NEXT: csel w11, w11, w9, gt ; CHECK-CVT-NEXT: cmp w3, w8 ; CHECK-CVT-NEXT: csel w3, w3, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s1 -; CHECK-CVT-NEXT: cmn w3, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: csel w3, w3, w9, gt -; CHECK-CVT-NEXT: cmp w4, w8 -; CHECK-CVT-NEXT: csel w11, w4, w8, lt -; CHECK-CVT-NEXT: fmov s4, w0 -; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: csel w11, w11, w9, gt -; CHECK-CVT-NEXT: cmp w14, w8 -; CHECK-CVT-NEXT: mov v2.s[1], w10 -; CHECK-CVT-NEXT: csel w10, w14, w8, lt +; CHECK-CVT-NEXT: cmn w3, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s3, w14 +; CHECK-CVT-NEXT: csel w10, w3, w9, gt +; CHECK-CVT-NEXT: cmp w2, w8 +; CHECK-CVT-NEXT: csel w14, w2, w8, lt +; CHECK-CVT-NEXT: fcvtzs w2, s2 +; CHECK-CVT-NEXT: cmn w14, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: mov v1.s[2], w13 +; CHECK-CVT-NEXT: csel w13, w14, w9, gt +; CHECK-CVT-NEXT: fcvtzs w14, s0 +; CHECK-CVT-NEXT: cmp w2, w8 ; CHECK-CVT-NEXT: mov v3.s[1], w15 -; CHECK-CVT-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fmov s1, w11 -; CHECK-CVT-NEXT: csel w10, w10, w9, gt -; CHECK-CVT-NEXT: fcvtzs w11, s0 -; CHECK-CVT-NEXT: mov v4.s[1], w18 -; CHECK-CVT-NEXT: mov v1.s[1], w3 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w8, w11, w8, lt -; CHECK-CVT-NEXT: mov v2.s[2], w13 +; CHECK-CVT-NEXT: csel w15, w2, w8, lt +; CHECK-CVT-NEXT: fmov s2, w0 +; CHECK-CVT-NEXT: fmov s4, w13 +; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w13, w15, w9, gt +; CHECK-CVT-NEXT: cmp w14, w8 +; CHECK-CVT-NEXT: csel w8, w14, w8, lt +; CHECK-CVT-NEXT: mov v2.s[1], w18 +; CHECK-CVT-NEXT: mov v4.s[1], w10 ; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov v3.s[2], w16 ; CHECK-CVT-NEXT: csel w8, w8, w9, gt -; CHECK-CVT-NEXT: mov v4.s[2], w1 -; CHECK-CVT-NEXT: mov v1.s[2], w10 -; CHECK-CVT-NEXT: mov v2.s[3], w12 +; CHECK-CVT-NEXT: mov v3.s[2], w16 +; CHECK-CVT-NEXT: mov v2.s[2], w1 +; CHECK-CVT-NEXT: mov v4.s[2], w13 +; CHECK-CVT-NEXT: mov v1.s[3], w12 ; CHECK-CVT-NEXT: mov v3.s[3], w17 -; CHECK-CVT-NEXT: mov v4.s[3], w2 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h -; CHECK-CVT-NEXT: uzp1 v1.8h, v1.8h, v4.8h +; CHECK-CVT-NEXT: mov v2.s[3], w11 +; CHECK-CVT-NEXT: mov v4.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v1.8h +; CHECK-CVT-NEXT: uzp1 v1.8h, v4.8h, v2.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v16f16_v16i16: @@ -3368,11 +3368,11 @@ ; CHECK-NEXT: mov w9, #-128 ; CHECK-NEXT: mov d0, v2.d[1] ; CHECK-NEXT: fcvtzs w13, d1 -; CHECK-NEXT: fcvtzs w15, d3 +; CHECK-NEXT: fcvtzs w15, d2 ; CHECK-NEXT: fcvtzs w10, d4 ; CHECK-NEXT: mov d4, v1.d[1] -; CHECK-NEXT: mov d1, v3.d[1] ; CHECK-NEXT: fcvtzs w14, d0 +; CHECK-NEXT: mov d0, v3.d[1] ; CHECK-NEXT: cmp w10, #127 ; CHECK-NEXT: csel w10, w10, w8, lt ; CHECK-NEXT: fcvtzs w12, d4 @@ -3380,6 +3380,7 @@ ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w11, #127 ; CHECK-NEXT: csel w11, w11, w8, lt +; CHECK-NEXT: fcvtzs w16, d0 ; CHECK-NEXT: cmn w11, #128 ; CHECK-NEXT: csel w11, w11, w9, gt ; CHECK-NEXT: cmp w12, #127 @@ -3387,47 +3388,46 @@ ; CHECK-NEXT: cmn w12, #128 ; CHECK-NEXT: csel w12, w12, w9, gt ; CHECK-NEXT: cmp w13, #127 +; CHECK-NEXT: csel w13, w13, w8, lt ; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: csel w11, w13, w8, lt -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: fcvtzs w13, d2 -; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: cmn w13, #128 +; CHECK-NEXT: csel w13, w13, w9, gt ; CHECK-NEXT: cmp w14, #127 +; CHECK-NEXT: csel w14, w14, w8, lt ; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: csel w10, w14, w8, lt +; CHECK-NEXT: cmn w14, #128 +; CHECK-NEXT: csel w14, w14, w9, gt +; CHECK-NEXT: cmp w15, #127 +; CHECK-NEXT: csel w15, w15, w8, lt +; CHECK-NEXT: fmov s1, w13 +; CHECK-NEXT: cmn w15, #128 +; CHECK-NEXT: fcvtzs w13, d3 +; CHECK-NEXT: csel w11, w15, w9, gt +; CHECK-NEXT: cmp w16, #127 +; CHECK-NEXT: csel w10, w16, w8, lt +; CHECK-NEXT: mov v1.s[1], w12 ; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: csel w10, w10, w9, gt ; CHECK-NEXT: cmp w13, #127 +; CHECK-NEXT: csel w8, w13, w8, lt +; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: mov w11, v0.s[1] -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: mov v2.s[1], w12 -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: fcvtzs w12, d1 -; CHECK-NEXT: csel w13, w13, w9, gt -; CHECK-NEXT: mov v0.b[1], w11 -; CHECK-NEXT: fmov w14, s2 -; CHECK-NEXT: cmp w12, #127 -; CHECK-NEXT: fmov s1, w13 -; CHECK-NEXT: csel w12, w12, w8, lt -; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: mov w11, v2.s[1] -; CHECK-NEXT: mov v0.b[2], w14 -; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: csel w8, w15, w8, lt ; CHECK-NEXT: cmn w8, #128 ; CHECK-NEXT: csel w8, w8, w9, gt -; CHECK-NEXT: mov v0.b[3], w11 ; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov v0.b[4], w9 -; CHECK-NEXT: mov v2.s[1], w12 -; CHECK-NEXT: mov v0.b[5], w8 +; CHECK-NEXT: mov v0.b[1], w11 +; CHECK-NEXT: mov v2.s[1], w14 +; CHECK-NEXT: mov w11, v1.s[1] +; CHECK-NEXT: mov v0.b[2], w9 +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov w9, v2.s[1] +; CHECK-NEXT: mov v0.b[3], w11 +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov v0.b[4], w8 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.b[5], w9 +; CHECK-NEXT: mov w9, v1.s[1] ; CHECK-NEXT: mov v0.b[6], w8 ; CHECK-NEXT: mov v0.b[7], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -3441,138 +3441,138 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d16, v0.d[1] ; CHECK-NEXT: mov w8, #127 -; CHECK-NEXT: fcvtzs w11, d0 +; CHECK-NEXT: fcvtzs w13, d0 ; CHECK-NEXT: mov w9, #-128 -; CHECK-NEXT: fcvtzs w13, d1 ; CHECK-NEXT: mov d0, v2.d[1] -; CHECK-NEXT: fcvtzs w14, d2 -; CHECK-NEXT: fcvtzs w10, d16 +; CHECK-NEXT: fcvtzs w10, d1 +; CHECK-NEXT: fcvtzs w11, d2 +; CHECK-NEXT: mov d2, v4.d[1] +; CHECK-NEXT: fcvtzs w12, d16 ; CHECK-NEXT: mov d16, v1.d[1] ; CHECK-NEXT: mov d1, v3.d[1] -; CHECK-NEXT: fcvtzs w15, d0 -; CHECK-NEXT: cmp w10, #127 -; CHECK-NEXT: csel w10, w10, w8, lt -; CHECK-NEXT: fcvtzs w12, d16 -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: csel w11, w11, w8, lt -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: fcvtzs w15, d3 +; CHECK-NEXT: fcvtzs w17, d0 +; CHECK-NEXT: mov d3, v5.d[1] ; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: fcvtzs w16, d4 ; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: fcvtzs w14, d16 ; CHECK-NEXT: cmn w12, #128 ; CHECK-NEXT: csel w12, w12, w9, gt ; CHECK-NEXT: cmp w13, #127 ; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: fmov s0, w11 ; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: csel w11, w13, w9, gt -; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: csel w10, w15, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w13, d3 -; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: csel w10, w10, w9, gt +; CHECK-NEXT: csel w13, w13, w9, gt ; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: fcvtzs w11, d1 -; CHECK-NEXT: mov w15, v0.s[1] ; CHECK-NEXT: csel w14, w14, w8, lt -; CHECK-NEXT: mov v2.s[1], w12 ; CHECK-NEXT: cmn w14, #128 -; CHECK-NEXT: csel w12, w14, w9, gt +; CHECK-NEXT: csel w14, w14, w9, gt +; CHECK-NEXT: cmp w10, #127 +; CHECK-NEXT: csel w10, w10, w8, lt +; CHECK-NEXT: fmov s0, w13 +; CHECK-NEXT: cmn w10, #128 +; CHECK-NEXT: fcvtzs w13, d1 +; CHECK-NEXT: csel w10, w10, w9, gt +; CHECK-NEXT: cmp w17, #127 +; CHECK-NEXT: mov v0.s[1], w12 +; CHECK-NEXT: csel w12, w17, w8, lt +; CHECK-NEXT: cmn w12, #128 +; CHECK-NEXT: fcvtzs w17, d7 +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: csel w10, w12, w9, gt ; CHECK-NEXT: cmp w11, #127 +; CHECK-NEXT: mov w12, v0.s[1] ; CHECK-NEXT: csel w11, w11, w8, lt -; CHECK-NEXT: mov d1, v4.d[1] -; CHECK-NEXT: mov v0.b[1], w15 ; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: fmov w14, s2 +; CHECK-NEXT: mov v1.s[1], w14 ; CHECK-NEXT: csel w11, w11, w9, gt -; CHECK-NEXT: fmov s3, w12 ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov w12, v2.s[1] -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: mov v0.b[2], w14 -; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: csel w13, w13, w9, gt -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: fcvtzs w14, d4 -; CHECK-NEXT: mov d1, v5.d[1] -; CHECK-NEXT: mov v0.b[3], w12 -; CHECK-NEXT: fmov s4, w13 +; CHECK-NEXT: mov v0.b[1], w12 +; CHECK-NEXT: csel w12, w13, w8, lt +; CHECK-NEXT: mov w13, v1.s[1] +; CHECK-NEXT: fmov w14, s1 +; CHECK-NEXT: cmn w12, #128 +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: csel w11, w12, w9, gt ; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: fmov w13, s3 -; CHECK-NEXT: csel w10, w15, w8, lt -; CHECK-NEXT: mov w12, v3.s[1] -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w15, d1 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w14, #127 -; CHECK-NEXT: mov v0.b[4], w13 -; CHECK-NEXT: csel w14, w14, w8, lt -; CHECK-NEXT: mov v4.s[1], w11 +; CHECK-NEXT: fcvtzs w12, d2 +; CHECK-NEXT: csel w15, w15, w8, lt +; CHECK-NEXT: cmn w15, #128 +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov v0.b[2], w14 +; CHECK-NEXT: csel w10, w15, w9, gt +; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: fmov w14, s1 +; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: mov v0.b[3], w13 +; CHECK-NEXT: cmn w12, #128 +; CHECK-NEXT: mov w13, v1.s[1] +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: csel w10, w12, w9, gt +; CHECK-NEXT: cmp w16, #127 +; CHECK-NEXT: fcvtzs w12, d3 +; CHECK-NEXT: mov v0.b[4], w14 +; CHECK-NEXT: csel w14, w16, w8, lt ; CHECK-NEXT: cmn w14, #128 -; CHECK-NEXT: csel w14, w14, w9, gt -; CHECK-NEXT: fcvtzs w13, d5 -; CHECK-NEXT: cmp w15, #127 ; CHECK-NEXT: mov d2, v6.d[1] -; CHECK-NEXT: mov v0.b[5], w12 -; CHECK-NEXT: csel w11, w15, w8, lt -; CHECK-NEXT: fmov w12, s4 -; CHECK-NEXT: cmn w11, #128 -; CHECK-NEXT: fmov s1, w14 -; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: csel w11, w14, w9, gt +; CHECK-NEXT: cmp w12, #127 +; CHECK-NEXT: fcvtzs w15, d5 +; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: mov v0.b[5], w13 +; CHECK-NEXT: cmn w12, #128 +; CHECK-NEXT: fcvtzs w13, d2 +; CHECK-NEXT: csel w12, w12, w9, gt +; CHECK-NEXT: cmp w15, #127 +; CHECK-NEXT: fmov w16, s1 +; CHECK-NEXT: csel w15, w15, w8, lt +; CHECK-NEXT: cmn w15, #128 +; CHECK-NEXT: mov d2, v7.d[1] +; CHECK-NEXT: mov w14, v1.s[1] +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: csel w11, w15, w9, gt ; CHECK-NEXT: cmp w13, #127 -; CHECK-NEXT: mov w14, v4.s[1] -; CHECK-NEXT: mov v0.b[6], w12 +; CHECK-NEXT: fcvtzs w15, d6 +; CHECK-NEXT: mov v0.b[6], w16 ; CHECK-NEXT: csel w13, w13, w8, lt ; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: cmn w13, #128 -; CHECK-NEXT: fcvtzs w15, d2 -; CHECK-NEXT: csel w13, w13, w9, gt -; CHECK-NEXT: fcvtzs w10, d6 ; CHECK-NEXT: mov v0.b[7], w14 +; CHECK-NEXT: csel w10, w13, w9, gt ; CHECK-NEXT: cmp w15, #127 -; CHECK-NEXT: fmov w14, s1 -; CHECK-NEXT: csel w12, w15, w8, lt -; CHECK-NEXT: fmov s2, w13 -; CHECK-NEXT: mov w13, v1.s[1] -; CHECK-NEXT: mov d1, v7.d[1] -; CHECK-NEXT: cmn w12, #128 -; CHECK-NEXT: fcvtzs w15, d7 -; CHECK-NEXT: csel w12, w12, w9, gt -; CHECK-NEXT: cmp w10, #127 -; CHECK-NEXT: mov v0.b[8], w14 -; CHECK-NEXT: csel w10, w10, w8, lt -; CHECK-NEXT: mov v2.s[1], w11 -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: fcvtzs w11, d1 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: mov v0.b[9], w13 -; CHECK-NEXT: fmov w14, s2 -; CHECK-NEXT: cmp w11, #127 -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: csel w10, w11, w8, lt -; CHECK-NEXT: cmn w10, #128 -; CHECK-NEXT: mov w13, v2.s[1] -; CHECK-NEXT: mov v0.b[10], w14 -; CHECK-NEXT: csel w10, w10, w9, gt -; CHECK-NEXT: cmp w15, #127 +; CHECK-NEXT: fcvtzs w14, d2 +; CHECK-NEXT: csel w13, w15, w8, lt +; CHECK-NEXT: cmn w13, #128 +; CHECK-NEXT: mov w15, v1.s[1] +; CHECK-NEXT: csel w13, w13, w9, gt +; CHECK-NEXT: fmov w16, s1 +; CHECK-NEXT: cmp w14, #127 +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: csel w11, w14, w8, lt +; CHECK-NEXT: cmn w11, #128 +; CHECK-NEXT: mov v0.b[8], w16 +; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: cmp w17, #127 ; CHECK-NEXT: mov v1.s[1], w12 -; CHECK-NEXT: csel w8, w15, w8, lt +; CHECK-NEXT: csel w8, w17, w8, lt +; CHECK-NEXT: fmov s2, w13 ; CHECK-NEXT: cmn w8, #128 ; CHECK-NEXT: csel w8, w8, w9, gt -; CHECK-NEXT: mov v0.b[11], w13 ; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov v0.b[12], w9 +; CHECK-NEXT: mov v0.b[9], w15 +; CHECK-NEXT: mov w12, v1.s[1] ; CHECK-NEXT: mov v2.s[1], w10 -; CHECK-NEXT: mov v0.b[13], w8 +; CHECK-NEXT: mov v0.b[10], w9 +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov w9, v2.s[1] +; CHECK-NEXT: mov v0.b[11], w12 +; CHECK-NEXT: mov v1.s[1], w11 +; CHECK-NEXT: mov v0.b[12], w8 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.b[13], w9 +; CHECK-NEXT: mov w9, v1.s[1] ; CHECK-NEXT: mov v0.b[14], w8 ; CHECK-NEXT: mov v0.b[15], w9 ; CHECK-NEXT: ret @@ -3589,11 +3589,11 @@ ; CHECK-NEXT: mov w11, #-32768 ; CHECK-NEXT: mov d0, v2.d[1] ; CHECK-NEXT: fcvtzs w13, d1 -; CHECK-NEXT: fcvtzs w15, d3 +; CHECK-NEXT: fcvtzs w15, d2 ; CHECK-NEXT: fcvtzs w9, d4 ; CHECK-NEXT: mov d4, v1.d[1] -; CHECK-NEXT: mov d1, v3.d[1] ; CHECK-NEXT: fcvtzs w14, d0 +; CHECK-NEXT: mov d0, v3.d[1] ; CHECK-NEXT: cmp w9, w8 ; CHECK-NEXT: csel w9, w9, w8, lt ; CHECK-NEXT: fcvtzs w12, d4 @@ -3601,6 +3601,7 @@ ; CHECK-NEXT: csel w9, w9, w11, gt ; CHECK-NEXT: cmp w10, w8 ; CHECK-NEXT: csel w10, w10, w8, lt +; CHECK-NEXT: fcvtzs w16, d0 ; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w10, w10, w11, gt ; CHECK-NEXT: cmp w12, w8 @@ -3608,47 +3609,46 @@ ; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w12, w12, w11, gt ; CHECK-NEXT: cmp w13, w8 +; CHECK-NEXT: csel w13, w13, w8, lt ; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: csel w10, w13, w8, lt -; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-NEXT: fcvtzs w13, d2 -; CHECK-NEXT: csel w10, w10, w11, gt +; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w13, w13, w11, gt ; CHECK-NEXT: cmp w14, w8 +; CHECK-NEXT: csel w14, w14, w8, lt ; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: csel w9, w14, w8, lt +; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 +; CHECK-NEXT: csel w14, w14, w11, gt +; CHECK-NEXT: cmp w15, w8 +; CHECK-NEXT: csel w15, w15, w8, lt +; CHECK-NEXT: fmov s1, w13 +; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 +; CHECK-NEXT: fcvtzs w13, d3 +; CHECK-NEXT: csel w10, w15, w11, gt +; CHECK-NEXT: cmp w16, w8 +; CHECK-NEXT: csel w9, w16, w8, lt +; CHECK-NEXT: mov v1.s[1], w12 ; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-NEXT: fmov s2, w10 ; CHECK-NEXT: csel w9, w9, w11, gt ; CHECK-NEXT: cmp w13, w8 +; CHECK-NEXT: csel w8, w13, w8, lt +; CHECK-NEXT: fmov s2, w10 ; CHECK-NEXT: mov w10, v0.s[1] -; CHECK-NEXT: csel w13, w13, w8, lt -; CHECK-NEXT: mov v2.s[1], w12 -; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-NEXT: fcvtzs w12, d1 -; CHECK-NEXT: csel w13, w13, w11, gt -; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: fmov w14, s2 -; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: fmov s1, w13 -; CHECK-NEXT: csel w12, w12, w8, lt -; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-NEXT: mov w10, v2.s[1] -; CHECK-NEXT: mov v0.h[2], w14 -; CHECK-NEXT: csel w12, w12, w11, gt -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: csel w8, w15, w8, lt ; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 ; CHECK-NEXT: csel w8, w8, w11, gt -; CHECK-NEXT: mov v0.h[3], w10 -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov v0.h[4], w9 -; CHECK-NEXT: mov v2.s[1], w12 -; CHECK-NEXT: mov v0.h[5], w8 +; CHECK-NEXT: fmov w11, s1 +; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: mov v2.s[1], w14 +; CHECK-NEXT: mov w10, v1.s[1] +; CHECK-NEXT: mov v0.h[2], w11 +; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: mov v1.s[1], w9 ; CHECK-NEXT: mov w9, v2.s[1] +; CHECK-NEXT: mov v0.h[4], w8 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.h[5], w9 +; CHECK-NEXT: mov w9, v1.s[1] ; CHECK-NEXT: mov v0.h[6], w8 ; CHECK-NEXT: mov v0.h[7], w9 ; CHECK-NEXT: ret @@ -3659,139 +3659,139 @@ define <16 x i16> @test_signed_v16f64_v16i16(<16 x double> %f) { ; CHECK-LABEL: test_signed_v16f64_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v0.d[1] -; CHECK-NEXT: mov w9, #32767 -; CHECK-NEXT: fcvtzs w11, d0 -; CHECK-NEXT: mov w8, #-32768 -; CHECK-NEXT: mov d0, v2.d[1] -; CHECK-NEXT: fcvtzs w12, d1 -; CHECK-NEXT: fcvtzs w14, d2 -; CHECK-NEXT: mov d2, v4.d[1] -; CHECK-NEXT: fcvtzs w10, d16 +; CHECK-NEXT: mov d17, v0.d[1] +; CHECK-NEXT: mov w8, #32767 ; CHECK-NEXT: mov d16, v1.d[1] +; CHECK-NEXT: fcvtzs w12, d0 +; CHECK-NEXT: mov w9, #-32768 +; CHECK-NEXT: mov d0, v2.d[1] +; CHECK-NEXT: fcvtzs w10, d1 ; CHECK-NEXT: mov d1, v3.d[1] -; CHECK-NEXT: fcvtzs w16, d3 -; CHECK-NEXT: fcvtzs w15, d0 -; CHECK-NEXT: mov d3, v6.d[1] -; CHECK-NEXT: cmp w10, w9 -; CHECK-NEXT: csel w10, w10, w9, lt -; CHECK-NEXT: fcvtzs w13, d16 -; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-NEXT: fcvtzs w17, d1 -; CHECK-NEXT: csel w10, w10, w8, gt -; CHECK-NEXT: cmp w11, w9 -; CHECK-NEXT: csel w11, w11, w9, lt -; CHECK-NEXT: mov d1, v5.d[1] -; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w11, w11, w8, gt -; CHECK-NEXT: cmp w13, w9 -; CHECK-NEXT: csel w13, w13, w9, lt -; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w13, w13, w8, gt -; CHECK-NEXT: cmp w12, w9 -; CHECK-NEXT: csel w12, w12, w9, lt -; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w12, w12, w8, gt -; CHECK-NEXT: cmp w15, w9 -; CHECK-NEXT: csel w15, w15, w9, lt -; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w11, w15, w8, gt -; CHECK-NEXT: cmp w14, w9 -; CHECK-NEXT: csel w14, w14, w9, lt -; CHECK-NEXT: fcvtzs w15, d4 +; CHECK-NEXT: fcvtzs w14, d17 +; CHECK-NEXT: fcvtzs w11, d2 +; CHECK-NEXT: fcvtzs w15, d16 +; CHECK-NEXT: mov d2, v4.d[1] +; CHECK-NEXT: fcvtzs w17, d0 +; CHECK-NEXT: fcvtzs w13, d3 +; CHECK-NEXT: cmp w14, w8 +; CHECK-NEXT: fcvtzs w18, d1 +; CHECK-NEXT: csel w14, w14, w8, lt +; CHECK-NEXT: mov d3, v5.d[1] ; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w14, w14, w8, gt -; CHECK-NEXT: cmp w17, w9 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: csel w10, w17, w9, lt -; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 -; CHECK-NEXT: fcvtzs w17, d2 -; CHECK-NEXT: csel w10, w10, w8, gt -; CHECK-NEXT: cmp w16, w9 -; CHECK-NEXT: fmov s2, w12 -; CHECK-NEXT: csel w12, w16, w9, lt +; CHECK-NEXT: fcvtzs w16, d4 +; CHECK-NEXT: csel w14, w14, w9, gt +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: mov d4, v6.d[1] ; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-NEXT: mov w16, v0.s[1] -; CHECK-NEXT: csel w12, w12, w8, gt -; CHECK-NEXT: cmp w17, w9 -; CHECK-NEXT: mov v2.s[1], w13 -; CHECK-NEXT: csel w13, w17, w9, lt -; CHECK-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-NEXT: fcvtzs w17, d1 -; CHECK-NEXT: csel w13, w13, w8, gt -; CHECK-NEXT: cmp w15, w9 -; CHECK-NEXT: csel w15, w15, w9, lt -; CHECK-NEXT: fmov s4, w14 +; CHECK-NEXT: csel w12, w12, w9, gt +; CHECK-NEXT: cmp w15, w8 +; CHECK-NEXT: csel w15, w15, w8, lt ; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NEXT: mov v0.h[1], w16 -; CHECK-NEXT: fcvtzs w16, d5 -; CHECK-NEXT: csel w15, w15, w8, gt -; CHECK-NEXT: cmp w17, w9 -; CHECK-NEXT: csel w17, w17, w9, lt +; CHECK-NEXT: csel w15, w15, w9, gt +; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: csel w10, w10, w8, lt +; CHECK-NEXT: fmov s0, w12 +; CHECK-NEXT: cmn w10, #8, lsl #12 // =32768 +; CHECK-NEXT: fcvtzs w12, d2 +; CHECK-NEXT: csel w10, w10, w9, gt +; CHECK-NEXT: cmp w17, w8 +; CHECK-NEXT: csel w17, w17, w8, lt +; CHECK-NEXT: mov v0.s[1], w14 ; CHECK-NEXT: cmn w17, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w14, w17, w8, gt -; CHECK-NEXT: cmp w16, w9 -; CHECK-NEXT: fmov s1, w15 -; CHECK-NEXT: csel w15, w16, w9, lt -; CHECK-NEXT: fcvtzs w16, d3 -; CHECK-NEXT: cmn w15, #8, lsl #12 // =32768 -; CHECK-NEXT: mov v4.s[1], w11 -; CHECK-NEXT: csel w11, w15, w8, gt -; CHECK-NEXT: fcvtzs w15, d6 -; CHECK-NEXT: mov v1.s[1], w13 -; CHECK-NEXT: cmp w16, w9 -; CHECK-NEXT: fmov s3, w11 -; CHECK-NEXT: csel w16, w16, w9, lt -; CHECK-NEXT: fmov w11, s2 -; CHECK-NEXT: mov w13, v2.s[1] -; CHECK-NEXT: mov d2, v7.d[1] +; CHECK-NEXT: csel w14, w17, w9, gt +; CHECK-NEXT: cmp w11, w8 +; CHECK-NEXT: csel w11, w11, w8, lt +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-NEXT: fcvtzs w17, d3 +; CHECK-NEXT: csel w10, w11, w9, gt +; CHECK-NEXT: cmp w18, w8 +; CHECK-NEXT: csel w18, w18, w8, lt +; CHECK-NEXT: mov w11, v0.s[1] +; CHECK-NEXT: cmn w18, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v1.s[1], w15 +; CHECK-NEXT: csel w15, w18, w9, gt +; CHECK-NEXT: cmp w13, w8 +; CHECK-NEXT: mov v0.h[1], w11 +; CHECK-NEXT: csel w11, w13, w8, lt +; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: fmov w18, s1 +; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v2.s[1], w14 +; CHECK-NEXT: csel w10, w12, w9, gt +; CHECK-NEXT: cmp w16, w8 +; CHECK-NEXT: csel w16, w16, w8, lt +; CHECK-NEXT: fcvtzs w12, d5 ; CHECK-NEXT: cmn w16, #8, lsl #12 // =32768 -; CHECK-NEXT: csel w16, w16, w8, gt -; CHECK-NEXT: cmp w15, w9 -; CHECK-NEXT: mov v0.h[2], w11 -; CHECK-NEXT: csel w11, w15, w9, lt -; CHECK-NEXT: mov w15, v1.s[1] +; CHECK-NEXT: mov v0.h[2], w18 +; CHECK-NEXT: csel w16, w16, w9, gt +; CHECK-NEXT: cmp w17, w8 +; CHECK-NEXT: csel w17, w17, w8, lt +; CHECK-NEXT: fcvtzs w18, d4 +; CHECK-NEXT: cmn w17, #8, lsl #12 // =32768 +; CHECK-NEXT: mov d3, v7.d[1] +; CHECK-NEXT: csel w14, w17, w9, gt +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: csel w12, w12, w8, lt +; CHECK-NEXT: fcvtzs w17, d6 +; CHECK-NEXT: cmn w12, #8, lsl #12 // =32768 +; CHECK-NEXT: fmov s4, w11 +; CHECK-NEXT: csel w12, w12, w9, gt +; CHECK-NEXT: cmp w18, w8 +; CHECK-NEXT: csel w11, w18, w8, lt +; CHECK-NEXT: mov w13, v1.s[1] ; CHECK-NEXT: cmn w11, #8, lsl #12 // =32768 +; CHECK-NEXT: fmov s1, w16 +; CHECK-NEXT: csel w11, w11, w9, gt +; CHECK-NEXT: cmp w17, w8 +; CHECK-NEXT: fcvtzs w16, d3 +; CHECK-NEXT: csel w17, w17, w8, lt +; CHECK-NEXT: fmov s3, w12 +; CHECK-NEXT: cmn w17, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: csel w10, w17, w9, gt +; CHECK-NEXT: fcvtzs w12, d7 +; CHECK-NEXT: cmp w16, w8 ; CHECK-NEXT: mov v3.s[1], w14 -; CHECK-NEXT: fcvtzs w14, d2 -; CHECK-NEXT: csel w11, w11, w8, gt -; CHECK-NEXT: mov v0.h[3], w13 -; CHECK-NEXT: mov v1.h[1], w15 -; CHECK-NEXT: cmp w14, w9 -; CHECK-NEXT: fmov w13, s3 -; CHECK-NEXT: csel w14, w14, w9, lt -; CHECK-NEXT: fcvtzs w15, d7 -; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: csel w14, w16, w8, lt ; CHECK-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NEXT: mov w11, v3.s[1] -; CHECK-NEXT: mov v1.h[2], w13 -; CHECK-NEXT: csel w13, w14, w8, gt -; CHECK-NEXT: cmp w15, w9 -; CHECK-NEXT: fmov s3, w12 -; CHECK-NEXT: mov v2.s[1], w16 -; CHECK-NEXT: csel w9, w15, w9, lt -; CHECK-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-NEXT: fmov w12, s4 -; CHECK-NEXT: csel w8, w9, w8, gt -; CHECK-NEXT: mov w14, v4.s[1] -; CHECK-NEXT: mov v1.h[3], w11 +; CHECK-NEXT: fmov s5, w10 +; CHECK-NEXT: csel w10, w14, w9, gt +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: csel w8, w12, w8, lt +; CHECK-NEXT: mov w14, v1.s[1] +; CHECK-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NEXT: mov v5.s[1], w11 +; CHECK-NEXT: csel w8, w8, w9, gt +; CHECK-NEXT: fmov w9, s3 +; CHECK-NEXT: mov v1.h[1], w14 +; CHECK-NEXT: mov w12, v3.s[1] +; CHECK-NEXT: mov w14, v2.s[1] ; CHECK-NEXT: fmov w11, s2 -; CHECK-NEXT: mov w9, v2.s[1] +; CHECK-NEXT: mov v1.h[2], w9 ; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov v0.h[4], w12 -; CHECK-NEXT: mov v1.h[4], w11 -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: mov v2.s[1], w13 +; CHECK-NEXT: fmov w9, s5 +; CHECK-NEXT: mov v0.h[3], w13 +; CHECK-NEXT: mov v1.h[3], w12 +; CHECK-NEXT: mov v4.s[1], w15 +; CHECK-NEXT: mov v2.s[1], w10 +; CHECK-NEXT: mov w8, v5.s[1] +; CHECK-NEXT: mov v0.h[4], w11 +; CHECK-NEXT: mov v1.h[4], w9 +; CHECK-NEXT: fmov w9, s4 +; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: mov v0.h[5], w14 -; CHECK-NEXT: mov v1.h[5], w9 -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov w10, v3.s[1] +; CHECK-NEXT: mov v1.h[5], w8 +; CHECK-NEXT: mov w8, v4.s[1] ; CHECK-NEXT: mov w11, v2.s[1] -; CHECK-NEXT: mov v0.h[6], w8 -; CHECK-NEXT: mov v1.h[6], w9 -; CHECK-NEXT: mov v0.h[7], w10 +; CHECK-NEXT: mov v0.h[6], w9 +; CHECK-NEXT: mov v1.h[6], w10 +; CHECK-NEXT: mov v0.h[7], w8 ; CHECK-NEXT: mov v1.h[7], w11 ; CHECK-NEXT: ret %x = call <16 x i16> @llvm.fptosi.sat.v16f64.v16i16(<16 x double> %f) diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -62,13 +62,13 @@ ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: fcvtzu v4.4s, v4.4s ; CHECK-NEXT: mov v0.s[2], v2.s[0] -; CHECK-NEXT: fmov w4, s4 ; CHECK-NEXT: mov v0.s[3], v3.s[0] +; CHECK-NEXT: fmov w4, s4 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptoui.sat.v5f32.v5i32(<5 x float> %f) ret <5 x i32> %x @@ -88,13 +88,13 @@ ; CHECK-NEXT: mov v0.s[2], v2.s[0] ; CHECK-NEXT: fcvtzu v1.4s, v4.4s ; CHECK-NEXT: mov v0.s[3], v3.s[0] -; CHECK-NEXT: mov w5, v1.s[1] -; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptoui.sat.v6f32.v6i32(<6 x float> %f) ret <6 x i32> %x @@ -117,13 +117,13 @@ ; CHECK-NEXT: mov v0.s[3], v3.s[0] ; CHECK-NEXT: fcvtzu v1.4s, v4.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: mov w5, v1.s[1] ; CHECK-NEXT: mov w6, v1.s[2] -; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptoui.sat.v7f32.v7i32(<7 x float> %f) ret <7 x i32> %x @@ -177,13 +177,13 @@ define <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) { ; CHECK-LABEL: test_unsigned_v3f64_v3i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w8, d0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fcvtzu w9, d0 ; CHECK-NEXT: fcvtzu w8, d1 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: fcvtzu w9, d2 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: fcvtzu w8, d2 -; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: mov v0.s[2], w9 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptoui.sat.v3f64.v3i32(<3 x double> %f) @@ -195,13 +195,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: fcvtzu w8, d2 +; CHECK-NEXT: mov d2, v1.d[1] ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov d1, v1.d[1] -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: fcvtzu w8, d1 +; CHECK-NEXT: fcvtzu w8, d2 +; CHECK-NEXT: mov v0.s[2], w9 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptoui.sat.v4f64.v4i32(<4 x double> %f) @@ -548,11 +548,11 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptoui.sat.v5f16.v5i32(<5 x half> %f) ret <5 x i32> %x @@ -565,12 +565,12 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptoui.sat.v6f16.v6i32(<6 x half> %f) ret <6 x i32> %x @@ -579,17 +579,17 @@ define <7 x i32> @test_unsigned_v7f16_v7i32(<7 x half> %f) { ; CHECK-LABEL: test_unsigned_v7f16_v7i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v1.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-NEXT: mov w1, v1.s[1] -; CHECK-NEXT: mov w2, v1.s[2] -; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w6, v0.s[2] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: mov w2, v0.s[2] +; CHECK-NEXT: mov w3, v0.s[3] +; CHECK-NEXT: mov w6, v1.s[2] ; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptoui.sat.v7f16.v7i32(<7 x half> %f) ret <7 x i32> %x @@ -624,8 +624,8 @@ define <2 x i1> @test_unsigned_v2f32_v2i1(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #1 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #1 ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i1> @llvm.fptoui.sat.v2f32.v2i1(<2 x float> %f) @@ -635,8 +635,8 @@ define <2 x i8> @test_unsigned_v2f32_v2i8(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0x0000ff000000ff ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi d1, #0x0000ff000000ff ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i8> @llvm.fptoui.sat.v2f32.v2i8(<2 x float> %f) @@ -646,8 +646,8 @@ define <2 x i13> @test_unsigned_v2f32_v2i13(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #31, msl #8 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #31, msl #8 ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i13> @llvm.fptoui.sat.v2f32.v2i13(<2 x float> %f) @@ -657,8 +657,8 @@ define <2 x i16> @test_unsigned_v2f32_v2i16(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i16> @llvm.fptoui.sat.v2f32.v2i16(<2 x float> %f) @@ -668,8 +668,8 @@ define <2 x i19> @test_unsigned_v2f32_v2i19(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #7, msl #16 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #7, msl #16 ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i19> @llvm.fptoui.sat.v2f32.v2i19(<2 x float> %f) @@ -837,8 +837,8 @@ define <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -849,8 +849,8 @@ define <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -861,8 +861,8 @@ define <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #31, msl #8 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #31, msl #8 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -883,8 +883,8 @@ define <4 x i19> @test_unsigned_v4f32_v4i19(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #7, msl #16 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #7, msl #16 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %x = call <4 x i19> @llvm.fptoui.sat.v4f32.v4i19(<4 x float> %f) @@ -906,9 +906,9 @@ ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: mov x8, #1125899906842623 ; CHECK-NEXT: mov s3, v0.s[1] -; CHECK-NEXT: fcvtzu x11, s0 ; CHECK-NEXT: mov s2, v1.s[1] ; CHECK-NEXT: fcvtzu x9, s1 +; CHECK-NEXT: fcvtzu x11, s0 ; CHECK-NEXT: fcvtzu x12, s3 ; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: fcvtzu x10, s2 @@ -968,13 +968,13 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #1904214015 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov x25, #68719476735 ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 @@ -1018,8 +1018,8 @@ ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldp x30, x25, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret @@ -1051,12 +1051,12 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #2139095039 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 @@ -1384,8 +1384,8 @@ ; ; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.4h, #1 ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: movi v1.4h, #1 ; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i1> @llvm.fptoui.sat.v4f16.v4i1(<4 x half> %f) @@ -1404,8 +1404,8 @@ ; ; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i8: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi d1, #0xff00ff00ff00ff ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: movi d1, #0xff00ff00ff00ff ; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i8> @llvm.fptoui.sat.v4f16.v4i8(<4 x half> %f) @@ -1637,8 +1637,8 @@ ; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp x30, x25, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #96 @@ -1723,8 +1723,8 @@ ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret @@ -1755,18 +1755,19 @@ ; CHECK-CVT-NEXT: mov s3, v1.s[2] ; CHECK-CVT-NEXT: mov s4, v1.s[3] ; CHECK-CVT-NEXT: mov s5, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: fcvtzu w8, s1 ; CHECK-CVT-NEXT: fcvtzu w10, s0 ; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fcvtzu w8, s2 +; CHECK-CVT-NEXT: fcvtzu w9, s2 ; CHECK-CVT-NEXT: fcvtzu w11, s3 ; CHECK-CVT-NEXT: fcvtzu w12, s4 ; CHECK-CVT-NEXT: fcvtzu w13, s5 -; CHECK-CVT-NEXT: cmp w8, #1 -; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo ; CHECK-CVT-NEXT: cmp w9, #1 +; CHECK-CVT-NEXT: fcvtzu w14, s1 ; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo +; CHECK-CVT-NEXT: cmp w8, #1 +; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo ; CHECK-CVT-NEXT: cmp w11, #1 ; CHECK-CVT-NEXT: csinc w11, w11, wzr, lo ; CHECK-CVT-NEXT: cmp w12, #1 @@ -1775,28 +1776,27 @@ ; CHECK-CVT-NEXT: csinc w13, w13, wzr, lo ; CHECK-CVT-NEXT: cmp w10, #1 ; CHECK-CVT-NEXT: csinc w10, w10, wzr, lo -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvtzu w9, s1 -; CHECK-CVT-NEXT: fmov s3, w10 -; CHECK-CVT-NEXT: mov v2.s[1], w8 -; CHECK-CVT-NEXT: cmp w9, #1 -; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo -; CHECK-CVT-NEXT: fcvtzu w9, s0 -; CHECK-CVT-NEXT: mov v3.s[1], w13 -; CHECK-CVT-NEXT: mov v2.s[2], w11 -; CHECK-CVT-NEXT: cmp w9, #1 -; CHECK-CVT-NEXT: mov v3.s[2], w8 -; CHECK-CVT-NEXT: csinc w8, w9, wzr, lo -; CHECK-CVT-NEXT: mov v2.s[3], w12 -; CHECK-CVT-NEXT: mov v3.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: fmov s1, w8 +; CHECK-CVT-NEXT: fcvtzu w8, s0 +; CHECK-CVT-NEXT: cmp w14, #1 +; CHECK-CVT-NEXT: fmov s2, w10 +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: csinc w9, w14, wzr, lo +; CHECK-CVT-NEXT: cmp w8, #1 +; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo +; CHECK-CVT-NEXT: mov v1.s[2], w11 +; CHECK-CVT-NEXT: mov v2.s[1], w13 +; CHECK-CVT-NEXT: mov v1.s[3], w12 +; CHECK-CVT-NEXT: mov v2.s[2], w9 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.8h, #1 ; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: movi v1.8h, #1 ; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret @@ -1814,18 +1814,19 @@ ; CHECK-CVT-NEXT: mov s3, v1.s[2] ; CHECK-CVT-NEXT: mov s4, v1.s[3] ; CHECK-CVT-NEXT: mov s5, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvtzu w9, s1 ; CHECK-CVT-NEXT: fcvtzu w11, s0 ; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: fcvtzu w10, s2 ; CHECK-CVT-NEXT: fcvtzu w12, s3 ; CHECK-CVT-NEXT: fcvtzu w13, s4 ; CHECK-CVT-NEXT: fcvtzu w14, s5 -; CHECK-CVT-NEXT: cmp w9, #255 -; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: fcvtzu w15, s1 ; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w12, #255 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, #255 @@ -1834,21 +1835,20 @@ ; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w11, #255 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w10 -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fmov s3, w11 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: cmp w10, #255 -; CHECK-CVT-NEXT: csel w9, w10, w8, lo -; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: cmp w10, #255 -; CHECK-CVT-NEXT: csel w8, w10, w8, lo -; CHECK-CVT-NEXT: mov v3.s[2], w9 -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v3.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: fmov s1, w9 +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: cmp w15, #255 +; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: mov v1.s[1], w10 +; CHECK-CVT-NEXT: csel w10, w15, w8, lo +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: csel w8, w9, w8, lo +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: mov v2.s[1], w14 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; @@ -1871,18 +1871,19 @@ ; CHECK-CVT-NEXT: mov s3, v1.s[2] ; CHECK-CVT-NEXT: mov s4, v1.s[3] ; CHECK-CVT-NEXT: mov s5, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvtzu w9, s1 ; CHECK-CVT-NEXT: fcvtzu w11, s0 ; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: fcvtzu w10, s2 ; CHECK-CVT-NEXT: fcvtzu w12, s3 ; CHECK-CVT-NEXT: fcvtzu w13, s4 ; CHECK-CVT-NEXT: fcvtzu w14, s5 -; CHECK-CVT-NEXT: cmp w9, w8 -; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: fcvtzu w15, s1 ; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w12, w8 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, w8 @@ -1891,21 +1892,20 @@ ; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w10 -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fmov s3, w11 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w9, w10, w8, lo -; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w8, w10, w8, lo -; CHECK-CVT-NEXT: mov v3.s[2], w9 -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v3.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: fmov s1, w9 +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: mov v1.s[1], w10 +; CHECK-CVT-NEXT: csel w10, w15, w8, lo +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w8, w9, w8, lo +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: mov v2.s[1], w14 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13: @@ -1928,18 +1928,19 @@ ; CHECK-CVT-NEXT: mov s3, v1.s[2] ; CHECK-CVT-NEXT: mov s4, v1.s[3] ; CHECK-CVT-NEXT: mov s5, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvtzu w9, s1 ; CHECK-CVT-NEXT: fcvtzu w11, s0 ; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: fcvtzu w10, s2 ; CHECK-CVT-NEXT: fcvtzu w12, s3 ; CHECK-CVT-NEXT: fcvtzu w13, s4 ; CHECK-CVT-NEXT: fcvtzu w14, s5 -; CHECK-CVT-NEXT: cmp w9, w8 -; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: fcvtzu w15, s1 ; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w12, w8 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, w8 @@ -1948,21 +1949,20 @@ ; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w10 -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fmov s3, w11 -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w9, w10, w8, lo -; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w8, w10, w8, lo -; CHECK-CVT-NEXT: mov v3.s[2], w9 -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: mov v3.s[3], w8 -; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h +; CHECK-CVT-NEXT: fmov s1, w9 +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: mov v1.s[1], w10 +; CHECK-CVT-NEXT: csel w10, w15, w8, lo +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w8, w9, w8, lo +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: mov v2.s[1], w14 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: uzp1 v0.8h, v2.8h, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i16: @@ -1976,21 +1976,21 @@ define <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) { ; CHECK-LABEL: test_unsigned_v8f16_v8i19: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v2.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h ; CHECK-NEXT: movi v1.4s, #7, msl #16 ; CHECK-NEXT: fcvtzu v2.4s, v2.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mov w1, v2.s[1] -; CHECK-NEXT: mov w2, v2.s[2] -; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w3, v2.s[3] -; CHECK-NEXT: mov w6, v0.s[2] -; CHECK-NEXT: mov w7, v0.s[3] -; CHECK-NEXT: fmov w4, s0 -; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: fmov w4, s2 +; CHECK-NEXT: mov w5, v2.s[1] +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w6, v2.s[2] +; CHECK-NEXT: mov w7, v2.s[3] +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: mov w2, v0.s[2] +; CHECK-NEXT: mov w3, v0.s[3] ; CHECK-NEXT: ret %x = call <8 x i19> @llvm.fptoui.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x @@ -2014,26 +2014,26 @@ ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-CVT-NEXT: mov x8, #1125899906842623 ; CHECK-CVT-NEXT: mov h2, v0.h[1] +; CHECK-CVT-NEXT: mov h4, v1.h[1] +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov h4, v1.h[1] -; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: mov h7, v1.h[3] ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvtzu x9, s0 -; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvtzu x9, s0 ; CHECK-CVT-NEXT: fcvt s0, h7 ; CHECK-CVT-NEXT: fcvtzu x10, s1 ; CHECK-CVT-NEXT: fcvtzu x11, s2 -; CHECK-CVT-NEXT: fcvtzu x12, s3 -; CHECK-CVT-NEXT: fcvtzu x14, s5 ; CHECK-CVT-NEXT: fcvtzu x13, s4 ; CHECK-CVT-NEXT: fcvtzu x15, s6 +; CHECK-CVT-NEXT: fcvtzu x12, s3 +; CHECK-CVT-NEXT: fcvtzu x14, s5 ; CHECK-CVT-NEXT: cmp x10, x8 ; CHECK-CVT-NEXT: fcvtzu x16, s0 ; CHECK-CVT-NEXT: csel x4, x10, x8, lo @@ -2058,18 +2058,18 @@ ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-FP16-NEXT: mov x8, #1125899906842623 ; CHECK-FP16-NEXT: mov h2, v0.h[1] +; CHECK-FP16-NEXT: mov h4, v1.h[1] +; CHECK-FP16-NEXT: mov h6, v1.h[2] ; CHECK-FP16-NEXT: mov h3, v0.h[2] ; CHECK-FP16-NEXT: mov h5, v0.h[3] ; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov h4, v1.h[1] -; CHECK-FP16-NEXT: mov h6, v1.h[2] ; CHECK-FP16-NEXT: mov h0, v1.h[3] ; CHECK-FP16-NEXT: fcvtzu x10, h1 ; CHECK-FP16-NEXT: fcvtzu x11, h2 -; CHECK-FP16-NEXT: fcvtzu x12, h3 -; CHECK-FP16-NEXT: fcvtzu x14, h5 ; CHECK-FP16-NEXT: fcvtzu x13, h4 ; CHECK-FP16-NEXT: fcvtzu x15, h6 +; CHECK-FP16-NEXT: fcvtzu x12, h3 +; CHECK-FP16-NEXT: fcvtzu x14, h5 ; CHECK-FP16-NEXT: cmp x10, x8 ; CHECK-FP16-NEXT: fcvtzu x16, h0 ; CHECK-FP16-NEXT: csel x4, x10, x8, lo @@ -2096,63 +2096,63 @@ ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-CVT-NEXT: mov h4, v0.h[2] -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: fcvt s2, h1 +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: fcvt s4, h0 +; CHECK-CVT-NEXT: mov h5, v1.h[2] +; CHECK-CVT-NEXT: fcvt s7, h1 ; CHECK-CVT-NEXT: mov h3, v1.h[1] -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: fcvt s4, h4 -; CHECK-CVT-NEXT: mov h1, v1.h[3] -; CHECK-CVT-NEXT: fcvtzu x9, s5 -; CHECK-CVT-NEXT: fcvtzu x8, s2 -; CHECK-CVT-NEXT: fcvt s2, h3 -; CHECK-CVT-NEXT: mov h3, v0.h[1] +; CHECK-CVT-NEXT: mov h6, v0.h[1] ; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h1 -; CHECK-CVT-NEXT: fcvtzu x10, s2 -; CHECK-CVT-NEXT: fmov d2, x8 +; CHECK-CVT-NEXT: mov h1, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvtzu x8, s4 +; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: fcvtzu x9, s7 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: fmov d0, x9 -; CHECK-CVT-NEXT: mov v2.d[1], x10 -; CHECK-CVT-NEXT: fcvtzu x10, s5 -; CHECK-CVT-NEXT: fmov d1, x8 -; CHECK-CVT-NEXT: fcvtzu x9, s3 -; CHECK-CVT-NEXT: fcvtzu x8, s4 -; CHECK-CVT-NEXT: fmov d3, x10 +; CHECK-CVT-NEXT: fcvt s5, h6 +; CHECK-CVT-NEXT: fcvt s6, h0 +; CHECK-CVT-NEXT: fcvt s7, h1 +; CHECK-CVT-NEXT: fcvtzu x10, s2 +; CHECK-CVT-NEXT: fmov d2, x9 +; CHECK-CVT-NEXT: fcvtzu x9, s4 +; CHECK-CVT-NEXT: fmov d0, x8 +; CHECK-CVT-NEXT: fcvtzu x11, s3 +; CHECK-CVT-NEXT: fcvtzu x8, s5 +; CHECK-CVT-NEXT: fmov d1, x10 ; CHECK-CVT-NEXT: fcvtzu x10, s6 -; CHECK-CVT-NEXT: mov v0.d[1], x9 -; CHECK-CVT-NEXT: mov v1.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x10 +; CHECK-CVT-NEXT: fmov d3, x9 +; CHECK-CVT-NEXT: fcvtzu x9, s7 +; CHECK-CVT-NEXT: mov v0.d[1], x8 +; CHECK-CVT-NEXT: mov v2.d[1], x11 +; CHECK-CVT-NEXT: mov v1.d[1], x10 +; CHECK-CVT-NEXT: mov v3.d[1], x9 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h3, v0.h[2] +; CHECK-FP16-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-FP16-NEXT: mov h1, v0.h[2] ; CHECK-FP16-NEXT: mov h5, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] +; CHECK-FP16-NEXT: mov h4, v2.h[2] +; CHECK-FP16-NEXT: fcvtzu x9, h2 +; CHECK-FP16-NEXT: mov h3, v2.h[1] +; CHECK-FP16-NEXT: mov h6, v2.h[3] ; CHECK-FP16-NEXT: fcvtzu x8, h1 -; CHECK-FP16-NEXT: mov h4, v1.h[2] -; CHECK-FP16-NEXT: mov h6, v1.h[3] -; CHECK-FP16-NEXT: fcvtzu x10, h2 -; CHECK-FP16-NEXT: fmov d2, x8 -; CHECK-FP16-NEXT: fcvtzu x8, h3 -; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fmov d0, x9 -; CHECK-FP16-NEXT: mov v2.d[1], x10 -; CHECK-FP16-NEXT: fcvtzu x10, h4 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: fcvtzu x10, h0 +; CHECK-FP16-NEXT: fmov d2, x9 +; CHECK-FP16-NEXT: fcvtzu x9, h4 +; CHECK-FP16-NEXT: fcvtzu x11, h3 +; CHECK-FP16-NEXT: fmov d0, x10 +; CHECK-FP16-NEXT: fcvtzu x10, h1 ; CHECK-FP16-NEXT: fmov d1, x8 -; CHECK-FP16-NEXT: fcvtzu x9, h3 ; CHECK-FP16-NEXT: fcvtzu x8, h5 -; CHECK-FP16-NEXT: fmov d3, x10 -; CHECK-FP16-NEXT: fcvtzu x10, h6 -; CHECK-FP16-NEXT: mov v0.d[1], x9 +; CHECK-FP16-NEXT: fmov d3, x9 +; CHECK-FP16-NEXT: fcvtzu x9, h6 +; CHECK-FP16-NEXT: mov v0.d[1], x10 +; CHECK-FP16-NEXT: mov v2.d[1], x11 ; CHECK-FP16-NEXT: mov v1.d[1], x8 -; CHECK-FP16-NEXT: mov v3.d[1], x10 +; CHECK-FP16-NEXT: mov v3.d[1], x9 ; CHECK-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x @@ -2195,7 +2195,7 @@ ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #1904214015 ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov x21, #68719476735 +; CHECK-NEXT: mov x24, #68719476735 ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: csel x8, xzr, x1, lt @@ -2203,7 +2203,7 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x20, x21, x8, gt +; CHECK-NEXT: csel x20, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x9, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti @@ -2214,7 +2214,7 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x23, x21, x8, gt +; CHECK-NEXT: csel x22, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti @@ -2226,7 +2226,7 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x24, x21, x9, gt +; CHECK-NEXT: csel x23, x24, x9, gt ; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti @@ -2238,7 +2238,7 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x26, x21, x9, gt +; CHECK-NEXT: csel x26, x24, x9, gt ; CHECK-NEXT: str x8, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti @@ -2249,8 +2249,8 @@ ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x29, x9, xzr, le -; CHECK-NEXT: csel x28, x21, x8, gt +; CHECK-NEXT: csinv x27, x9, xzr, le +; CHECK-NEXT: csel x29, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 @@ -2259,8 +2259,8 @@ ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x27, x9, xzr, le -; CHECK-NEXT: csel x22, x21, x8, gt +; CHECK-NEXT: csinv x28, x9, xzr, le +; CHECK-NEXT: csel x25, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -2271,57 +2271,58 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x25, x21, x9, gt +; CHECK-NEXT: csel x21, x24, x9, gt ; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr x11, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: fmov d0, x27 -; CHECK-NEXT: fmov d1, x29 ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: lsr x10, x22, #28 -; CHECK-NEXT: stur x11, [x19, #75] -; CHECK-NEXT: lsr x11, x28, #28 -; CHECK-NEXT: mov v0.d[1], x22 -; CHECK-NEXT: ldr x12, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: mov v1.d[1], x28 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: lsr x8, x25, #28 +; CHECK-NEXT: fmov d0, x28 +; CHECK-NEXT: lsr x11, x29, #28 +; CHECK-NEXT: fmov d1, x27 +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x10, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: stur x12, [x19, #50] -; CHECK-NEXT: fmov x12, d0 -; CHECK-NEXT: fmov x13, d1 -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: ldp d0, d1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: csel x9, x21, x9, gt -; CHECK-NEXT: strb w10, [x19, #49] -; CHECK-NEXT: extr x10, x22, x12, #28 -; CHECK-NEXT: bfi x9, x12, #36, #28 -; CHECK-NEXT: stur x8, [x19, #25] -; CHECK-NEXT: extr x8, x28, x13, #28 -; CHECK-NEXT: mov v0.d[1], x23 +; CHECK-NEXT: strb w8, [x19, #49] +; CHECK-NEXT: mov v0.d[1], x25 ; CHECK-NEXT: strb w11, [x19, #24] -; CHECK-NEXT: mov v1.d[1], x20 -; CHECK-NEXT: stur x10, [x19, #41] -; CHECK-NEXT: stur x9, [x19, #33] -; CHECK-NEXT: bfi x25, x13, #36, #28 -; CHECK-NEXT: str x8, [x19, #16] -; CHECK-NEXT: lsr x9, x23, #28 +; CHECK-NEXT: ldr x11, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: mov v1.d[1], x29 +; CHECK-NEXT: csinv x8, x9, xzr, le +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: csel x10, x24, x10, gt +; CHECK-NEXT: stur x11, [x19, #75] +; CHECK-NEXT: fmov x11, d1 +; CHECK-NEXT: stur x8, [x19, #25] +; CHECK-NEXT: extr x8, x25, x9, #28 +; CHECK-NEXT: bfi x10, x9, #36, #28 +; CHECK-NEXT: ldr x9, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: ldr d0, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: bfi x21, x11, #36, #28 +; CHECK-NEXT: stur x8, [x19, #41] +; CHECK-NEXT: stur x9, [x19, #50] +; CHECK-NEXT: extr x9, x29, x11, #28 +; CHECK-NEXT: ldr d1, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x22 +; CHECK-NEXT: stur x10, [x19, #33] +; CHECK-NEXT: lsr x10, x20, #28 +; CHECK-NEXT: str x9, [x19, #16] ; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: mov v1.d[1], x20 ; CHECK-NEXT: ldr x12, [sp] // 8-byte Folded Reload +; CHECK-NEXT: lsr x9, x22, #28 +; CHECK-NEXT: strb w10, [x19, #74] ; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: lsr x10, x20, #28 -; CHECK-NEXT: strb w9, [x19, #99] -; CHECK-NEXT: stp x12, x25, [x19] -; CHECK-NEXT: extr x12, x23, x8, #28 ; CHECK-NEXT: bfi x26, x8, #36, #28 +; CHECK-NEXT: stp x12, x21, [x19] +; CHECK-NEXT: extr x12, x22, x8, #28 +; CHECK-NEXT: strb w9, [x19, #99] ; CHECK-NEXT: extr x8, x20, x11, #28 -; CHECK-NEXT: bfi x24, x11, #36, #28 -; CHECK-NEXT: strb w10, [x19, #74] +; CHECK-NEXT: bfi x23, x11, #36, #28 ; CHECK-NEXT: stur x12, [x19, #91] ; CHECK-NEXT: stur x26, [x19, #83] ; CHECK-NEXT: stur x8, [x19, #66] -; CHECK-NEXT: stur x24, [x19, #58] +; CHECK-NEXT: stur x23, [x19, #58] ; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload @@ -2498,9 +2499,9 @@ define <8 x i8> @test_unsigned_v8f32_v8i8(<8 x float> %f) { ; CHECK-LABEL: test_unsigned_v8f32_v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v2.2d, #0x0000ff000000ff ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s ; CHECK-NEXT: xtn v1.4h, v1.4s @@ -2514,42 +2515,42 @@ define <16 x i8> @test_unsigned_v16f32_v16i8(<16 x float> %f) { ; CHECK-LABEL: test_unsigned_v16f32_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v4.2d, #0x0000ff000000ff ; CHECK-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-NEXT: fcvtzu v1.4s, v1.4s +; CHECK-NEXT: movi v4.2d, #0x0000ff000000ff ; CHECK-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-NEXT: fcvtzu v3.4s, v3.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v4.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v4.4s ; CHECK-NEXT: umin v2.4s, v2.4s, v4.4s +; CHECK-NEXT: umin v3.4s, v3.4s, v4.4s ; CHECK-NEXT: xtn v5.4h, v0.4s ; CHECK-NEXT: xtn v1.4h, v1.4s ; CHECK-NEXT: umov w8, v5.h[0] -; CHECK-NEXT: umov w9, v5.h[1] +; CHECK-NEXT: umov w9, v5.h[2] +; CHECK-NEXT: xtn v2.4h, v2.4s ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: umov w8, v5.h[2] -; CHECK-NEXT: mov v0.b[1], w9 -; CHECK-NEXT: mov v0.b[2], w8 +; CHECK-NEXT: umov w8, v5.h[1] +; CHECK-NEXT: mov v0.b[1], w8 ; CHECK-NEXT: umov w8, v5.h[3] +; CHECK-NEXT: mov v0.b[2], w9 +; CHECK-NEXT: umov w9, v1.h[0] ; CHECK-NEXT: mov v0.b[3], w8 -; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: mov v0.b[4], w8 ; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: mov v0.b[4], w9 +; CHECK-NEXT: umov w9, v1.h[2] ; CHECK-NEXT: mov v0.b[5], w8 -; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[6], w8 ; CHECK-NEXT: umov w8, v1.h[3] -; CHECK-NEXT: xtn v1.4h, v2.4s -; CHECK-NEXT: fcvtzu v2.4s, v3.4s +; CHECK-NEXT: xtn v1.4h, v3.4s +; CHECK-NEXT: mov v0.b[6], w9 +; CHECK-NEXT: umov w9, v2.h[0] ; CHECK-NEXT: mov v0.b[7], w8 -; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umin v2.4s, v2.4s, v4.4s -; CHECK-NEXT: mov v0.b[8], w8 -; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: umov w8, v2.h[1] +; CHECK-NEXT: mov v0.b[8], w9 +; CHECK-NEXT: umov w9, v2.h[2] ; CHECK-NEXT: mov v0.b[9], w8 -; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[10], w8 -; CHECK-NEXT: umov w8, v1.h[3] -; CHECK-NEXT: xtn v1.4h, v2.4s +; CHECK-NEXT: umov w8, v2.h[3] +; CHECK-NEXT: mov v0.b[10], w9 ; CHECK-NEXT: mov v0.b[11], w8 ; CHECK-NEXT: umov w8, v1.h[0] ; CHECK-NEXT: mov v0.b[12], w8 @@ -2567,9 +2568,9 @@ define <8 x i16> @test_unsigned_v8f32_v8i16(<8 x float> %f) { ; CHECK-LABEL: test_unsigned_v8f32_v8i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s ; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h @@ -2581,11 +2582,11 @@ define <16 x i16> @test_unsigned_v16f32_v16i16(<16 x float> %f) { ; CHECK-LABEL: test_unsigned_v16f32_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v4.2d, #0x00ffff0000ffff ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-NEXT: fcvtzu v3.4s, v3.4s ; CHECK-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-NEXT: movi v4.2d, #0x00ffff0000ffff ; CHECK-NEXT: umin v1.4s, v1.4s, v4.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v4.4s ; CHECK-NEXT: umin v3.4s, v3.4s, v4.4s @@ -2604,95 +2605,95 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h ; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h -; CHECK-CVT-NEXT: fcvtl2 v5.4s, v0.8h +; CHECK-CVT-NEXT: fcvtl2 v4.4s, v0.8h ; CHECK-CVT-NEXT: mov w8, #255 ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s3, v2.s[1] -; CHECK-CVT-NEXT: mov s4, v2.s[2] +; CHECK-CVT-NEXT: mov s5, v2.s[2] ; CHECK-CVT-NEXT: fcvtzu w9, s2 ; CHECK-CVT-NEXT: mov s2, v2.s[3] -; CHECK-CVT-NEXT: fcvtzu w12, s1 -; CHECK-CVT-NEXT: fcvtzu w16, s5 +; CHECK-CVT-NEXT: mov s6, v1.s[1] +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvtzu w14, s4 ; CHECK-CVT-NEXT: fcvtzu w2, s0 -; CHECK-CVT-NEXT: fcvtzu w10, s3 -; CHECK-CVT-NEXT: mov s3, v1.s[1] -; CHECK-CVT-NEXT: fcvtzu w11, s4 -; CHECK-CVT-NEXT: mov s4, v1.s[2] +; CHECK-CVT-NEXT: fcvtzu w11, s3 +; CHECK-CVT-NEXT: mov s3, v1.s[2] +; CHECK-CVT-NEXT: fcvtzu w12, s5 ; CHECK-CVT-NEXT: mov s1, v1.s[3] ; CHECK-CVT-NEXT: fcvtzu w13, s2 -; CHECK-CVT-NEXT: cmp w10, #255 -; CHECK-CVT-NEXT: mov s2, v5.s[1] -; CHECK-CVT-NEXT: fcvtzu w14, s3 -; CHECK-CVT-NEXT: csel w10, w10, w8, lo -; CHECK-CVT-NEXT: cmp w9, #255 -; CHECK-CVT-NEXT: fcvtzu w15, s4 -; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: mov s5, v4.s[1] ; CHECK-CVT-NEXT: cmp w11, #255 +; CHECK-CVT-NEXT: fcvtzu w15, s6 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w12, #255 +; CHECK-CVT-NEXT: fcvtzu w16, s3 +; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, #255 -; CHECK-CVT-NEXT: mov s3, v5.s[2] +; CHECK-CVT-NEXT: mov s2, v4.s[2] ; CHECK-CVT-NEXT: fcvtzu w17, s1 ; CHECK-CVT-NEXT: csel w13, w13, w8, lo -; CHECK-CVT-NEXT: cmp w14, #255 -; CHECK-CVT-NEXT: mov s4, v5.s[3] -; CHECK-CVT-NEXT: fcvtzu w18, s2 -; CHECK-CVT-NEXT: csel w14, w14, w8, lo -; CHECK-CVT-NEXT: cmp w12, #255 -; CHECK-CVT-NEXT: mov s1, v0.s[1] -; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w15, #255 -; CHECK-CVT-NEXT: fcvtzu w0, s3 +; CHECK-CVT-NEXT: mov s4, v4.s[3] +; CHECK-CVT-NEXT: fcvtzu w18, s5 ; CHECK-CVT-NEXT: csel w15, w15, w8, lo +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: mov s1, v0.s[1] +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w16, #255 +; CHECK-CVT-NEXT: csel w16, w16, w8, lo ; CHECK-CVT-NEXT: cmp w17, #255 +; CHECK-CVT-NEXT: fcvtzu w0, s2 ; CHECK-CVT-NEXT: csel w17, w17, w8, lo ; CHECK-CVT-NEXT: cmp w18, #255 +; CHECK-CVT-NEXT: fcvtzu w1, s4 +; CHECK-CVT-NEXT: csel w18, w18, w8, lo +; CHECK-CVT-NEXT: cmp w14, #255 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: csel w9, w18, w8, lo -; CHECK-CVT-NEXT: fcvtzu w18, s4 -; CHECK-CVT-NEXT: cmp w16, #255 -; CHECK-CVT-NEXT: fcvtzu w1, s1 -; CHECK-CVT-NEXT: csel w16, w16, w8, lo +; CHECK-CVT-NEXT: csel w9, w14, w8, lo +; CHECK-CVT-NEXT: fcvtzu w14, s1 ; CHECK-CVT-NEXT: cmp w0, #255 -; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: csel w0, w0, w8, lo -; CHECK-CVT-NEXT: cmp w18, #255 -; CHECK-CVT-NEXT: mov v2.s[1], w10 -; CHECK-CVT-NEXT: csel w10, w18, w8, lo ; CHECK-CVT-NEXT: cmp w1, #255 -; CHECK-CVT-NEXT: fmov s3, w12 -; CHECK-CVT-NEXT: csel w18, w1, w8, lo -; CHECK-CVT-NEXT: cmp w2, #255 -; CHECK-CVT-NEXT: csel w1, w2, w8, lo -; CHECK-CVT-NEXT: fmov s4, w16 -; CHECK-CVT-NEXT: mov v2.s[2], w11 -; CHECK-CVT-NEXT: fcvtzu w11, s1 +; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: csel w1, w1, w8, lo +; CHECK-CVT-NEXT: cmp w14, #255 ; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fmov s1, w1 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, #255 -; CHECK-CVT-NEXT: mov v4.s[1], w9 -; CHECK-CVT-NEXT: csel w9, w11, w8, lo +; CHECK-CVT-NEXT: csel w14, w14, w8, lo +; CHECK-CVT-NEXT: cmp w2, #255 +; CHECK-CVT-NEXT: mov v2.s[1], w11 +; CHECK-CVT-NEXT: csel w11, w2, w8, lo +; CHECK-CVT-NEXT: fmov s3, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fmov s1, w9 +; CHECK-CVT-NEXT: fcvtzu w9, s0 +; CHECK-CVT-NEXT: fmov s4, w11 +; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: mov v3.s[1], w15 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: mov v1.s[1], w18 -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v3.s[2], w15 -; CHECK-CVT-NEXT: mov v4.s[2], w0 -; CHECK-CVT-NEXT: mov v1.s[2], w9 -; CHECK-CVT-NEXT: cmp w11, #255 -; CHECK-CVT-NEXT: csel w8, w11, w8, lo +; CHECK-CVT-NEXT: mov v4.s[1], w14 +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: csel w8, w9, w8, lo +; CHECK-CVT-NEXT: mov v3.s[2], w16 +; CHECK-CVT-NEXT: mov v1.s[2], w0 +; CHECK-CVT-NEXT: mov v4.s[2], w10 ; CHECK-CVT-NEXT: mov v2.s[3], w13 ; CHECK-CVT-NEXT: mov v3.s[3], w17 -; CHECK-CVT-NEXT: mov v4.s[3], w10 -; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: mov v1.s[3], w1 +; CHECK-CVT-NEXT: mov v4.s[3], w8 ; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h -; CHECK-CVT-NEXT: uzp1 v1.8h, v1.8h, v4.8h +; CHECK-CVT-NEXT: uzp1 v1.8h, v4.8h, v1.8h ; CHECK-CVT-NEXT: uzp1 v0.16b, v1.16b, v0.16b ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v16f16_v16i8: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v2.2d, #0xff00ff00ff00ff ; CHECK-FP16-NEXT: fcvtzu v1.8h, v1.8h ; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: movi v2.2d, #0xff00ff00ff00ff ; CHECK-FP16-NEXT: umin v1.8h, v1.8h, v2.8h ; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v2.8h ; CHECK-FP16-NEXT: uzp1 v0.16b, v0.16b, v1.16b @@ -2706,84 +2707,84 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: fcvtl2 v5.4s, v1.8h +; CHECK-CVT-NEXT: fcvtl2 v4.4s, v1.8h ; CHECK-CVT-NEXT: mov w8, #65535 ; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h ; CHECK-CVT-NEXT: mov s3, v2.s[1] -; CHECK-CVT-NEXT: mov s4, v2.s[2] +; CHECK-CVT-NEXT: mov s5, v2.s[2] ; CHECK-CVT-NEXT: fcvtzu w9, s2 ; CHECK-CVT-NEXT: mov s2, v2.s[3] -; CHECK-CVT-NEXT: fcvtzu w12, s0 -; CHECK-CVT-NEXT: fcvtzu w16, s5 +; CHECK-CVT-NEXT: mov s6, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: fcvtzu w14, s4 ; CHECK-CVT-NEXT: fcvtzu w2, s1 -; CHECK-CVT-NEXT: fcvtzu w10, s3 -; CHECK-CVT-NEXT: mov s3, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w11, s4 -; CHECK-CVT-NEXT: mov s4, v0.s[2] +; CHECK-CVT-NEXT: fcvtzu w11, s3 +; CHECK-CVT-NEXT: mov s3, v0.s[2] +; CHECK-CVT-NEXT: fcvtzu w12, s5 ; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: fcvtzu w13, s2 -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: mov s2, v5.s[1] -; CHECK-CVT-NEXT: fcvtzu w14, s3 -; CHECK-CVT-NEXT: csel w10, w10, w8, lo -; CHECK-CVT-NEXT: cmp w9, w8 -; CHECK-CVT-NEXT: fcvtzu w15, s4 -; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: mov s5, v4.s[1] ; CHECK-CVT-NEXT: cmp w11, w8 +; CHECK-CVT-NEXT: fcvtzu w15, s6 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: cmp w12, w8 +; CHECK-CVT-NEXT: fcvtzu w16, s3 +; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, w8 -; CHECK-CVT-NEXT: mov s3, v5.s[2] +; CHECK-CVT-NEXT: mov s2, v4.s[2] ; CHECK-CVT-NEXT: fcvtzu w17, s0 ; CHECK-CVT-NEXT: csel w13, w13, w8, lo -; CHECK-CVT-NEXT: cmp w14, w8 -; CHECK-CVT-NEXT: mov s4, v5.s[3] -; CHECK-CVT-NEXT: fcvtzu w18, s2 -; CHECK-CVT-NEXT: csel w14, w14, w8, lo -; CHECK-CVT-NEXT: cmp w12, w8 -; CHECK-CVT-NEXT: mov s0, v1.s[1] -; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w15, w8 -; CHECK-CVT-NEXT: fcvtzu w0, s3 +; CHECK-CVT-NEXT: mov s4, v4.s[3] +; CHECK-CVT-NEXT: fcvtzu w18, s5 ; CHECK-CVT-NEXT: csel w15, w15, w8, lo +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: mov s0, v1.s[1] +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w16, w8 +; CHECK-CVT-NEXT: csel w16, w16, w8, lo ; CHECK-CVT-NEXT: cmp w17, w8 +; CHECK-CVT-NEXT: fcvtzu w0, s2 ; CHECK-CVT-NEXT: csel w17, w17, w8, lo ; CHECK-CVT-NEXT: cmp w18, w8 +; CHECK-CVT-NEXT: fcvtzu w1, s4 +; CHECK-CVT-NEXT: csel w18, w18, w8, lo +; CHECK-CVT-NEXT: cmp w14, w8 ; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: csel w9, w18, w8, lo -; CHECK-CVT-NEXT: fcvtzu w18, s4 -; CHECK-CVT-NEXT: cmp w16, w8 -; CHECK-CVT-NEXT: fcvtzu w1, s0 -; CHECK-CVT-NEXT: csel w16, w16, w8, lo +; CHECK-CVT-NEXT: csel w9, w14, w8, lo +; CHECK-CVT-NEXT: fcvtzu w14, s0 ; CHECK-CVT-NEXT: cmp w0, w8 -; CHECK-CVT-NEXT: mov s0, v1.s[2] ; CHECK-CVT-NEXT: csel w0, w0, w8, lo -; CHECK-CVT-NEXT: cmp w18, w8 -; CHECK-CVT-NEXT: mov v2.s[1], w10 -; CHECK-CVT-NEXT: csel w10, w18, w8, lo ; CHECK-CVT-NEXT: cmp w1, w8 -; CHECK-CVT-NEXT: fmov s3, w12 -; CHECK-CVT-NEXT: csel w18, w1, w8, lo +; CHECK-CVT-NEXT: mov s0, v1.s[2] +; CHECK-CVT-NEXT: csel w1, w1, w8, lo +; CHECK-CVT-NEXT: cmp w14, w8 +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w2, w8 -; CHECK-CVT-NEXT: csel w1, w2, w8, lo -; CHECK-CVT-NEXT: fmov s4, w16 -; CHECK-CVT-NEXT: mov v2.s[2], w11 -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov s0, v1.s[3] -; CHECK-CVT-NEXT: fmov s5, w1 -; CHECK-CVT-NEXT: mov v3.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: mov v4.s[1], w9 -; CHECK-CVT-NEXT: csel w9, w11, w8, lo -; CHECK-CVT-NEXT: mov v5.s[1], w18 -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v3.s[2], w15 +; CHECK-CVT-NEXT: mov v2.s[1], w11 +; CHECK-CVT-NEXT: csel w11, w2, w8, lo +; CHECK-CVT-NEXT: fmov s3, w10 +; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: fmov s4, w9 +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: fmov s5, w11 +; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: mov v3.s[1], w15 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: mov v4.s[1], w18 +; CHECK-CVT-NEXT: mov v5.s[1], w14 +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w8, w9, w8, lo +; CHECK-CVT-NEXT: mov v3.s[2], w16 ; CHECK-CVT-NEXT: mov v4.s[2], w0 -; CHECK-CVT-NEXT: mov v5.s[2], w9 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w8, w11, w8, lo +; CHECK-CVT-NEXT: mov v5.s[2], w10 ; CHECK-CVT-NEXT: mov v2.s[3], w13 ; CHECK-CVT-NEXT: mov v3.s[3], w17 -; CHECK-CVT-NEXT: mov v4.s[3], w10 +; CHECK-CVT-NEXT: mov v4.s[3], w1 ; CHECK-CVT-NEXT: mov v5.s[3], w8 ; CHECK-CVT-NEXT: uzp1 v0.8h, v3.8h, v2.8h ; CHECK-CVT-NEXT: uzp1 v1.8h, v5.8h, v4.8h @@ -2802,54 +2803,54 @@ ; CHECK-LABEL: test_unsigned_v8f64_v8i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d5, v0.d[1] +; CHECK-NEXT: mov d6, v1.d[1] ; CHECK-NEXT: fcvtzu w10, d0 -; CHECK-NEXT: mov d0, v1.d[1] -; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: fcvtzu w12, d1 ; CHECK-NEXT: mov d4, v2.d[1] -; CHECK-NEXT: fcvtzu w13, d3 +; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: fcvtzu w11, d1 +; CHECK-NEXT: mov d1, v3.d[1] +; CHECK-NEXT: fcvtzu w12, d2 ; CHECK-NEXT: fcvtzu w9, d5 -; CHECK-NEXT: fcvtzu w11, d0 +; CHECK-NEXT: fcvtzu w13, d6 +; CHECK-NEXT: fcvtzu w14, d4 ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, #255 ; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: cmp w13, #255 +; CHECK-NEXT: csel w13, w13, w8, lo ; CHECK-NEXT: cmp w11, #255 +; CHECK-NEXT: csel w11, w11, w8, lo +; CHECK-NEXT: cmp w14, #255 ; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: csel w10, w11, w8, lo +; CHECK-NEXT: csel w10, w14, w8, lo ; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: csel w11, w12, w8, lo +; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: fcvtzu w11, d1 +; CHECK-NEXT: csel w12, w12, w8, lo ; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: fcvtzu w9, d4 -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcvtzu w11, d2 -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: mov w12, v0.s[1] -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: fcvtzu w9, d3 +; CHECK-NEXT: mov v2.s[1], w13 ; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w10, w11, w8, lo +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: mov w12, v0.s[1] +; CHECK-NEXT: csel w11, w11, w8, lo +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: csel w8, w9, w8, lo +; CHECK-NEXT: fmov w9, s2 ; CHECK-NEXT: mov v0.b[1], w12 -; CHECK-NEXT: fmov w11, s1 -; CHECK-NEXT: fmov s4, w10 -; CHECK-NEXT: fcvtzu w10, d2 -; CHECK-NEXT: mov w12, v1.s[1] -; CHECK-NEXT: mov v0.b[2], w11 -; CHECK-NEXT: mov v4.s[1], w9 -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: csel w9, w10, w8, lo -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: csel w8, w13, w8, lo -; CHECK-NEXT: mov v0.b[3], w12 -; CHECK-NEXT: fmov w10, s4 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, v4.s[1] -; CHECK-NEXT: mov v0.b[4], w10 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: mov v0.b[5], w8 +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov w12, v2.s[1] +; CHECK-NEXT: mov v0.b[2], w9 +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: mov v0.b[3], w12 +; CHECK-NEXT: mov v2.s[1], w11 +; CHECK-NEXT: mov v0.b[4], w8 +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov v0.b[5], w9 +; CHECK-NEXT: mov w9, v2.s[1] ; CHECK-NEXT: mov v0.b[6], w8 ; CHECK-NEXT: mov v0.b[7], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -2862,106 +2863,106 @@ ; CHECK-LABEL: test_unsigned_v16f64_v16i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d16, v0.d[1] -; CHECK-NEXT: fcvtzu w10, d0 -; CHECK-NEXT: mov d0, v1.d[1] +; CHECK-NEXT: mov d17, v1.d[1] +; CHECK-NEXT: fcvtzu w9, d0 +; CHECK-NEXT: mov d0, v2.d[1] ; CHECK-NEXT: mov w8, #255 -; CHECK-NEXT: fcvtzu w12, d1 -; CHECK-NEXT: mov d1, v2.d[1] -; CHECK-NEXT: fcvtzu w9, d16 -; CHECK-NEXT: fcvtzu w11, d0 -; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: fcvtzu w11, d1 +; CHECK-NEXT: mov d1, v3.d[1] +; CHECK-NEXT: fcvtzu w12, d2 +; CHECK-NEXT: fcvtzu w10, d16 +; CHECK-NEXT: fcvtzu w13, d17 +; CHECK-NEXT: fcvtzu w14, d0 +; CHECK-NEXT: fcvtzu w15, d3 +; CHECK-NEXT: mov d2, v4.d[1] ; CHECK-NEXT: cmp w10, #255 ; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w13, #255 +; CHECK-NEXT: csel w13, w13, w8, lo ; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: csel w10, w11, w8, lo -; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: csel w11, w12, w8, lo -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: csel w11, w11, w8, lo +; CHECK-NEXT: cmp w14, #255 +; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: csel w14, w14, w8, lo +; CHECK-NEXT: cmp w12, #255 ; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcvtzu w11, d2 +; CHECK-NEXT: csel w11, w12, w8, lo +; CHECK-NEXT: mov v0.s[1], w10 ; CHECK-NEXT: cmp w9, #255 -; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: mov w12, v0.s[1] ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: fcvtzu w10, d2 -; CHECK-NEXT: mov d2, v4.d[1] +; CHECK-NEXT: cmp w15, #255 +; CHECK-NEXT: mov w12, v0.s[1] +; CHECK-NEXT: mov v1.s[1], w13 +; CHECK-NEXT: fcvtzu w13, d2 +; CHECK-NEXT: fmov s2, w11 ; CHECK-NEXT: mov v0.b[1], w12 -; CHECK-NEXT: fmov w13, s1 -; CHECK-NEXT: mov w12, v1.s[1] -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcvtzu w11, d3 -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov v0.b[2], w13 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: csel w9, w10, w8, lo -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: fcvtzu w10, d2 -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: mov d2, v5.d[1] -; CHECK-NEXT: mov v0.b[3], w12 ; CHECK-NEXT: fmov w12, s1 +; CHECK-NEXT: csel w15, w15, w8, lo +; CHECK-NEXT: fcvtzu w10, d4 +; CHECK-NEXT: mov w11, v1.s[1] +; CHECK-NEXT: mov v2.s[1], w14 +; CHECK-NEXT: mov v0.b[2], w12 +; CHECK-NEXT: cmp w13, #255 +; CHECK-NEXT: fmov s1, w15 +; CHECK-NEXT: mov w12, v2.s[1] +; CHECK-NEXT: fmov w14, s2 +; CHECK-NEXT: mov v0.b[3], w11 +; CHECK-NEXT: mov d2, v5.d[1] +; CHECK-NEXT: csel w11, w13, w8, lo ; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: mov w13, v1.s[1] -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcvtzu w11, d4 -; CHECK-NEXT: mov v0.b[4], w12 ; CHECK-NEXT: mov v1.s[1], w9 ; CHECK-NEXT: csel w9, w10, w8, lo -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w10, w11, w8, lo -; CHECK-NEXT: mov v0.b[5], w13 -; CHECK-NEXT: fcvtzu w13, d2 -; CHECK-NEXT: fmov w11, s1 -; CHECK-NEXT: mov w12, v1.s[1] -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: fcvtzu w10, d5 -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: mov v0.b[6], w11 +; CHECK-NEXT: mov v0.b[4], w14 +; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: mov w13, v1.s[1] +; CHECK-NEXT: mov v0.b[5], w12 +; CHECK-NEXT: fcvtzu w12, d5 +; CHECK-NEXT: fmov s3, w9 +; CHECK-NEXT: fcvtzu w9, d2 ; CHECK-NEXT: mov d2, v6.d[1] -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: csel w9, w13, w8, lo -; CHECK-NEXT: cmp w10, #255 -; CHECK-NEXT: fcvtzu w13, d6 -; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: mov v0.b[7], w12 -; CHECK-NEXT: fcvtzu w12, d2 -; CHECK-NEXT: fmov w11, s1 +; CHECK-NEXT: mov v0.b[6], w10 +; CHECK-NEXT: mov d1, v7.d[1] +; CHECK-NEXT: fcvtzu w15, d7 +; CHECK-NEXT: cmp w9, #255 +; CHECK-NEXT: mov v3.s[1], w11 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: cmp w12, #255 +; CHECK-NEXT: fcvtzu w11, d2 +; CHECK-NEXT: csel w10, w12, w8, lo +; CHECK-NEXT: fcvtzu w12, d6 +; CHECK-NEXT: mov v0.b[7], w13 +; CHECK-NEXT: fmov w13, s3 +; CHECK-NEXT: mov w14, v3.s[1] +; CHECK-NEXT: cmp w11, #255 ; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: mov w10, v1.s[1] +; CHECK-NEXT: csel w10, w11, w8, lo ; CHECK-NEXT: cmp w12, #255 -; CHECK-NEXT: mov d1, v7.d[1] -; CHECK-NEXT: mov v0.b[8], w11 +; CHECK-NEXT: csel w11, w12, w8, lo +; CHECK-NEXT: fcvtzu w12, d1 ; CHECK-NEXT: mov v2.s[1], w9 +; CHECK-NEXT: mov v0.b[8], w13 +; CHECK-NEXT: cmp w12, #255 +; CHECK-NEXT: fmov s1, w11 ; CHECK-NEXT: csel w9, w12, w8, lo -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: csel w11, w13, w8, lo -; CHECK-NEXT: fcvtzu w13, d7 -; CHECK-NEXT: mov v0.b[9], w10 -; CHECK-NEXT: fmov w10, s2 -; CHECK-NEXT: fmov s3, w11 -; CHECK-NEXT: fcvtzu w11, d1 +; CHECK-NEXT: cmp w15, #255 +; CHECK-NEXT: csel w8, w15, w8, lo +; CHECK-NEXT: fmov w11, s2 +; CHECK-NEXT: mov v0.b[9], w14 +; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: mov w12, v2.s[1] -; CHECK-NEXT: mov v0.b[10], w10 -; CHECK-NEXT: mov v3.s[1], w9 -; CHECK-NEXT: cmp w11, #255 -; CHECK-NEXT: csel w9, w11, w8, lo -; CHECK-NEXT: cmp w13, #255 -; CHECK-NEXT: csel w8, w13, w8, lo -; CHECK-NEXT: mov v0.b[11], w12 -; CHECK-NEXT: fmov w10, s3 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, v3.s[1] -; CHECK-NEXT: mov v0.b[12], w10 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: mov v0.b[13], w8 +; CHECK-NEXT: mov v0.b[10], w11 +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov v0.b[11], w12 +; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: mov v0.b[12], w8 +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov v0.b[13], w9 +; CHECK-NEXT: mov w9, v2.s[1] ; CHECK-NEXT: mov v0.b[14], w8 ; CHECK-NEXT: mov v0.b[15], w9 ; CHECK-NEXT: ret @@ -2973,54 +2974,54 @@ ; CHECK-LABEL: test_unsigned_v8f64_v8i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d5, v0.d[1] +; CHECK-NEXT: mov d6, v1.d[1] ; CHECK-NEXT: fcvtzu w10, d0 -; CHECK-NEXT: mov d0, v1.d[1] ; CHECK-NEXT: mov w8, #65535 -; CHECK-NEXT: fcvtzu w12, d1 ; CHECK-NEXT: mov d4, v2.d[1] -; CHECK-NEXT: fcvtzu w13, d3 +; CHECK-NEXT: fcvtzu w11, d1 +; CHECK-NEXT: mov d1, v3.d[1] +; CHECK-NEXT: fcvtzu w12, d2 ; CHECK-NEXT: fcvtzu w9, d5 -; CHECK-NEXT: fcvtzu w11, d0 +; CHECK-NEXT: fcvtzu w13, d6 +; CHECK-NEXT: fcvtzu w14, d4 ; CHECK-NEXT: cmp w9, w8 ; CHECK-NEXT: csel w9, w9, w8, lo ; CHECK-NEXT: cmp w10, w8 ; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: cmp w13, w8 +; CHECK-NEXT: csel w13, w13, w8, lo ; CHECK-NEXT: cmp w11, w8 +; CHECK-NEXT: csel w11, w11, w8, lo +; CHECK-NEXT: cmp w14, w8 ; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: csel w10, w11, w8, lo +; CHECK-NEXT: csel w10, w14, w8, lo ; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: csel w11, w12, w8, lo +; CHECK-NEXT: fmov s2, w11 +; CHECK-NEXT: fcvtzu w11, d1 +; CHECK-NEXT: csel w12, w12, w8, lo ; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: fcvtzu w9, d4 -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: fcvtzu w11, d2 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: mov w12, v0.s[1] -; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: fcvtzu w9, d3 +; CHECK-NEXT: mov v2.s[1], w13 ; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: csel w10, w11, w8, lo +; CHECK-NEXT: fmov s1, w12 +; CHECK-NEXT: mov w12, v0.s[1] +; CHECK-NEXT: csel w11, w11, w8, lo +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: csel w8, w9, w8, lo +; CHECK-NEXT: fmov w9, s2 ; CHECK-NEXT: mov v0.h[1], w12 -; CHECK-NEXT: fmov w11, s1 -; CHECK-NEXT: fmov s4, w10 -; CHECK-NEXT: fcvtzu w10, d2 -; CHECK-NEXT: mov w12, v1.s[1] -; CHECK-NEXT: mov v0.h[2], w11 -; CHECK-NEXT: mov v4.s[1], w9 -; CHECK-NEXT: cmp w10, w8 -; CHECK-NEXT: csel w9, w10, w8, lo -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: csel w8, w13, w8, lo -; CHECK-NEXT: mov v0.h[3], w12 -; CHECK-NEXT: fmov w10, s4 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov w8, v4.s[1] -; CHECK-NEXT: mov v0.h[4], w10 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: mov v0.h[5], w8 +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov w12, v2.s[1] +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: fmov s2, w8 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: mov v0.h[3], w12 +; CHECK-NEXT: mov v2.s[1], w11 +; CHECK-NEXT: mov v0.h[4], w8 +; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: mov v0.h[5], w9 +; CHECK-NEXT: mov w9, v2.s[1] ; CHECK-NEXT: mov v0.h[6], w8 ; CHECK-NEXT: mov v0.h[7], w9 ; CHECK-NEXT: ret @@ -3031,22 +3032,22 @@ define <16 x i16> @test_unsigned_v16f64_v16i16(<16 x double> %f) { ; CHECK-LABEL: test_unsigned_v16f64_v16i16: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d16, v0.d[1] +; CHECK-NEXT: mov d17, v0.d[1] +; CHECK-NEXT: mov d18, v1.d[1] ; CHECK-NEXT: fcvtzu w9, d0 -; CHECK-NEXT: mov d0, v1.d[1] -; CHECK-NEXT: mov d17, v2.d[1] -; CHECK-NEXT: fcvtzu w10, d1 -; CHECK-NEXT: mov d1, v3.d[1] ; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: fcvtzu w10, d1 +; CHECK-NEXT: mov d16, v2.d[1] +; CHECK-NEXT: mov d0, v3.d[1] +; CHECK-NEXT: mov d1, v4.d[1] +; CHECK-NEXT: fcvtzu w11, d17 +; CHECK-NEXT: fcvtzu w13, d18 ; CHECK-NEXT: fcvtzu w12, d2 -; CHECK-NEXT: fcvtzu w11, d16 -; CHECK-NEXT: mov d2, v4.d[1] -; CHECK-NEXT: fcvtzu w13, d0 -; CHECK-NEXT: fcvtzu w14, d17 -; CHECK-NEXT: fcvtzu w15, d1 -; CHECK-NEXT: fcvtzu w16, d3 +; CHECK-NEXT: mov d2, v5.d[1] +; CHECK-NEXT: fcvtzu w14, d16 +; CHECK-NEXT: fcvtzu w15, d3 ; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: mov d1, v5.d[1] +; CHECK-NEXT: fcvtzu w16, d0 ; CHECK-NEXT: csel w11, w11, w8, lo ; CHECK-NEXT: cmp w9, w8 ; CHECK-NEXT: csel w9, w9, w8, lo @@ -3054,83 +3055,83 @@ ; CHECK-NEXT: csel w13, w13, w8, lo ; CHECK-NEXT: cmp w10, w8 ; CHECK-NEXT: csel w10, w10, w8, lo +; CHECK-NEXT: fcvtzu w17, d1 +; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: csel w14, w14, w8, lo +; CHECK-NEXT: csel w9, w14, w8, lo ; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: csel w12, w12, w8, lo -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: fcvtzu w17, d2 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: csel w9, w15, w8, lo -; CHECK-NEXT: fcvtzu w15, d4 -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fcvtzu w18, d1 -; CHECK-NEXT: csel w16, w16, w8, lo -; CHECK-NEXT: cmp w17, w8 -; CHECK-NEXT: csel w17, w17, w8, lo -; CHECK-NEXT: cmp w15, w8 +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: csel w10, w12, w8, lo ; CHECK-NEXT: mov v0.s[1], w11 -; CHECK-NEXT: fcvtzu w0, d5 -; CHECK-NEXT: csel w11, w15, w8, lo -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: cmp w18, w8 -; CHECK-NEXT: mov d4, v6.d[1] -; CHECK-NEXT: csel w10, w18, w8, lo -; CHECK-NEXT: cmp w0, w8 -; CHECK-NEXT: fmov s1, w11 -; CHECK-NEXT: csel w11, w0, w8, lo -; CHECK-NEXT: mov v2.s[1], w13 -; CHECK-NEXT: mov w13, v0.s[1] -; CHECK-NEXT: fcvtzu w15, d4 -; CHECK-NEXT: mov v1.s[1], w17 -; CHECK-NEXT: fmov s3, w11 -; CHECK-NEXT: mov d4, v7.d[1] -; CHECK-NEXT: mov v0.h[1], w13 -; CHECK-NEXT: fmov w11, s2 -; CHECK-NEXT: mov v3.s[1], w10 +; CHECK-NEXT: cmp w16, w8 +; CHECK-NEXT: csel w11, w16, w8, lo ; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: mov w10, v1.s[1] -; CHECK-NEXT: mov w13, v2.s[1] -; CHECK-NEXT: fmov s2, w12 -; CHECK-NEXT: mov v0.h[2], w11 -; CHECK-NEXT: fcvtzu w11, d6 -; CHECK-NEXT: csel w12, w15, w8, lo -; CHECK-NEXT: mov v1.h[1], w10 -; CHECK-NEXT: fmov w10, s3 -; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: mov v0.h[3], w13 -; CHECK-NEXT: fcvtzu w13, d7 -; CHECK-NEXT: mov v1.h[2], w10 -; CHECK-NEXT: fmov s5, w11 +; CHECK-NEXT: mov v1.s[1], w13 +; CHECK-NEXT: mov w12, v0.s[1] +; CHECK-NEXT: fmov s3, w10 ; CHECK-NEXT: fcvtzu w10, d4 -; CHECK-NEXT: mov w11, v3.s[1] -; CHECK-NEXT: mov v2.s[1], w14 -; CHECK-NEXT: fmov s3, w16 -; CHECK-NEXT: mov v5.s[1], w12 +; CHECK-NEXT: fmov w14, s1 +; CHECK-NEXT: mov v0.h[1], w12 +; CHECK-NEXT: fcvtzu w12, d2 +; CHECK-NEXT: csel w15, w15, w8, lo +; CHECK-NEXT: cmp w17, w8 +; CHECK-NEXT: mov w13, v1.s[1] +; CHECK-NEXT: mov v0.h[2], w14 +; CHECK-NEXT: fcvtzu w14, d5 +; CHECK-NEXT: mov d1, v6.d[1] +; CHECK-NEXT: csel w16, w17, w8, lo ; CHECK-NEXT: cmp w10, w8 +; CHECK-NEXT: mov d2, v7.d[1] ; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w13, w8 -; CHECK-NEXT: csel w8, w13, w8, lo -; CHECK-NEXT: fmov w12, s2 -; CHECK-NEXT: mov v1.h[3], w11 -; CHECK-NEXT: fmov w13, s5 -; CHECK-NEXT: mov w14, v2.s[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov w11, v5.s[1] -; CHECK-NEXT: mov v0.h[4], w12 -; CHECK-NEXT: mov v1.h[4], w13 +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: csel w12, w12, w8, lo +; CHECK-NEXT: cmp w14, w8 +; CHECK-NEXT: csel w14, w14, w8, lo +; CHECK-NEXT: fcvtzu w17, d1 ; CHECK-NEXT: mov v3.s[1], w9 +; CHECK-NEXT: fcvtzu w9, d6 +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: fmov s5, w15 +; CHECK-NEXT: fmov s4, w14 +; CHECK-NEXT: cmp w17, w8 +; CHECK-NEXT: csel w10, w17, w8, lo +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: fcvtzu w14, d2 +; CHECK-NEXT: csel w9, w9, w8, lo +; CHECK-NEXT: mov v4.s[1], w12 +; CHECK-NEXT: fcvtzu w12, d7 +; CHECK-NEXT: mov v1.s[1], w16 +; CHECK-NEXT: mov v0.h[3], w13 +; CHECK-NEXT: cmp w14, w8 +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: mov w15, v1.s[1] +; CHECK-NEXT: csel w14, w14, w8, lo +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: mov w9, v4.s[1] +; CHECK-NEXT: csel w8, w12, w8, lo +; CHECK-NEXT: fmov w12, s4 +; CHECK-NEXT: mov v1.h[1], w15 ; CHECK-NEXT: mov v2.s[1], w10 -; CHECK-NEXT: mov v0.h[5], w14 -; CHECK-NEXT: mov v1.h[5], w11 -; CHECK-NEXT: fmov w8, s3 -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: mov w10, v3.s[1] -; CHECK-NEXT: mov w11, v2.s[1] -; CHECK-NEXT: mov v0.h[6], w8 -; CHECK-NEXT: mov v1.h[6], w9 -; CHECK-NEXT: mov v0.h[7], w10 +; CHECK-NEXT: mov w15, v3.s[1] +; CHECK-NEXT: fmov w10, s3 +; CHECK-NEXT: mov v1.h[2], w12 +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: fmov w12, s2 +; CHECK-NEXT: mov v5.s[1], w11 +; CHECK-NEXT: mov v1.h[3], w9 +; CHECK-NEXT: mov w8, v2.s[1] +; CHECK-NEXT: mov v3.s[1], w14 +; CHECK-NEXT: mov v0.h[4], w10 +; CHECK-NEXT: mov v1.h[4], w12 +; CHECK-NEXT: fmov w9, s5 +; CHECK-NEXT: fmov w10, s3 +; CHECK-NEXT: mov v0.h[5], w15 +; CHECK-NEXT: mov v1.h[5], w8 +; CHECK-NEXT: mov w8, v5.s[1] +; CHECK-NEXT: mov w11, v3.s[1] +; CHECK-NEXT: mov v0.h[6], w9 +; CHECK-NEXT: mov v1.h[6], w10 +; CHECK-NEXT: mov v0.h[7], w8 ; CHECK-NEXT: mov v1.h[7], w11 ; CHECK-NEXT: ret %x = call <16 x i16> @llvm.fptoui.sat.v16f64.v16i16(<16 x double> %f) diff --git a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll --- a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll @@ -77,8 +77,8 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) { ; CHECK-LABEL: rotl_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #31 ; CHECK-NEXT: neg v3.4s, v1.4s +; CHECK-NEXT: movi v2.4s, #31 ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-NEXT: and v2.16b, v3.16b, v2.16b ; CHECK-NEXT: neg v2.4s, v2.4s diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -187,9 +187,9 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_splat_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -219,9 +219,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -233,8 +233,8 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_nonsplat_undef1_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: neg v1.4s, v1.4s +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s ; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 @@ -248,8 +248,8 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_nonsplat_undef2_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: neg v1.4s, v1.4s +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s ; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 diff --git a/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll b/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll --- a/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll +++ b/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll @@ -90,9 +90,9 @@ ; CHECK-LABEL: addmuli16_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff ; CHECK-NEXT: smlal v1.4s, v0.4h, v2.4h -; CHECK-NEXT: and v0.16b, v1.16b, v3.16b +; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret entry: %v0 = sext <4 x i16> %vec0 to <4 x i32> @@ -226,9 +226,9 @@ ; CHECK-LABEL: addmuli32_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v1.2d, v1.2s, v2.2s -; CHECK-NEXT: movi v3.2d, #0x000000ffffffff ; CHECK-NEXT: smlal v1.2d, v0.2s, v2.2s -; CHECK-NEXT: and v0.16b, v1.16b, v3.16b +; CHECK-NEXT: movi v0.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret entry: %v0 = sext <2 x i32> %vec0 to <2 x i64> diff --git a/llvm/test/CodeGen/AArch64/minmax.ll b/llvm/test/CodeGen/AArch64/minmax.ll --- a/llvm/test/CodeGen/AArch64/minmax.ll +++ b/llvm/test/CodeGen/AArch64/minmax.ll @@ -123,9 +123,9 @@ ; CHECK-LABEL: t12: ; CHECK: // %bb.0: ; CHECK-NEXT: cmhi v2.16b, v1.16b, v0.16b -; CHECK-NEXT: movi v3.16b, #1 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b -; CHECK-NEXT: and v1.16b, v2.16b, v3.16b +; CHECK-NEXT: movi v1.16b, #1 +; CHECK-NEXT: and v1.16b, v2.16b, v1.16b ; CHECK-NEXT: add v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %t1 = icmp ugt <16 x i8> %b, %a diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll --- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll @@ -55,8 +55,8 @@ ; CHECK-NEXT: ext v6.16b, v2.16b, v3.16b, #12 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #12 ; CHECK-NEXT: ext v2.16b, v3.16b, v4.16b, #12 -; CHECK-NEXT: mov v3.16b, v5.16b ; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: mov v3.16b, v5.16b ; CHECK-NEXT: ret %res = call <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float> %a, <16 x float> %b, i32 7) ret <16 x float> %res @@ -114,8 +114,8 @@ ; CHECK-NEXT: ext v6.16b, v2.16b, v3.16b, #12 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #12 ; CHECK-NEXT: ext v2.16b, v3.16b, v4.16b, #12 -; CHECK-NEXT: mov v3.16b, v5.16b ; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: mov v3.16b, v5.16b ; CHECK-NEXT: ret %res = call <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float> %a, <16 x float> %b, i32 -9) ret <16 x float> %res diff --git a/llvm/test/CodeGen/AArch64/neon-abd.ll b/llvm/test/CodeGen/AArch64/neon-abd.ll --- a/llvm/test/CodeGen/AArch64/neon-abd.ll +++ b/llvm/test/CodeGen/AArch64/neon-abd.ll @@ -145,16 +145,16 @@ ; CHECK-LABEL: sabd_2d: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, v0.d[1] -; CHECK-NEXT: fmov x10, d0 -; CHECK-NEXT: mov x9, v1.d[1] -; CHECK-NEXT: asr x11, x10, #63 +; CHECK-NEXT: mov x10, v1.d[1] +; CHECK-NEXT: fmov x9, d0 ; CHECK-NEXT: asr x12, x8, #63 -; CHECK-NEXT: asr x13, x9, #63 -; CHECK-NEXT: subs x8, x8, x9 -; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: asr x13, x10, #63 +; CHECK-NEXT: subs x8, x8, x10 +; CHECK-NEXT: fmov x10, d1 ; CHECK-NEXT: sbcs x12, x12, x13 -; CHECK-NEXT: asr x13, x9, #63 -; CHECK-NEXT: subs x9, x10, x9 +; CHECK-NEXT: asr x11, x9, #63 +; CHECK-NEXT: asr x13, x10, #63 +; CHECK-NEXT: subs x9, x9, x10 ; CHECK-NEXT: sbcs x10, x11, x13 ; CHECK-NEXT: cmp x10, #0 ; CHECK-NEXT: cneg x9, x9, lt @@ -323,8 +323,8 @@ ; CHECK-LABEL: uabd_2d: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, v0.d[1] -; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: mov x9, v1.d[1] +; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: subs x8, x8, x9 ; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: ngcs x11, xzr diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -892,8 +892,8 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI89_0] -; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI89_0] +; CHECK-NEXT: tbl v0.8b, { v0.16b }, v2.8b ; CHECK-NEXT: ret %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %c diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll --- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll +++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll @@ -7,11 +7,11 @@ define i32 @test_udot_v8i8(i8* nocapture readonly %a, i8* nocapture readonly %b) { ; CHECK-LABEL: test_udot_v8i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: udot v0.2s, v2.8b, v1.8b -; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: udot v2.2s, v1.8b, v0.8b +; CHECK-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -29,11 +29,11 @@ define i32 @test_udot_v8i8_nomla(i8* nocapture readonly %a1) { ; CHECK-LABEL: test_udot_v8i8_nomla: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: movi v0.8b, #1 -; CHECK-NEXT: ldr d2, [x0] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: udot v1.2s, v2.8b, v0.8b -; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: udot v2.2s, v1.8b, v0.8b +; CHECK-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -47,11 +47,11 @@ define i32 @test_sdot_v8i8(i8* nocapture readonly %a, i8* nocapture readonly %b) { ; CHECK-LABEL: test_sdot_v8i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: sdot v0.2s, v2.8b, v1.8b -; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: sdot v2.2s, v1.8b, v0.8b +; CHECK-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -69,11 +69,11 @@ define i32 @test_sdot_v8i8_nomla(i8* nocapture readonly %a1) { ; CHECK-LABEL: test_sdot_v8i8_nomla: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: movi v0.8b, #1 -; CHECK-NEXT: ldr d2, [x0] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: sdot v1.2s, v2.8b, v0.8b -; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: sdot v2.2s, v1.8b, v0.8b +; CHECK-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -88,11 +88,11 @@ define i32 @test_udot_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_udot_v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: udot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: udot v2.4s, v0.16b, v1.16b +; CHECK-NEXT: addv s0, v2.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret @@ -112,11 +112,11 @@ define i32 @test_udot_v16i8_nomla(i8* nocapture readonly %a1) { ; CHECK-LABEL: test_udot_v16i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.16b, #1 -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: udot v1.4s, v2.16b, v0.16b -; CHECK-NEXT: addv s0, v1.4s +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: movi v1.16b, #1 +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: udot v2.4s, v0.16b, v1.16b +; CHECK-NEXT: addv s0, v2.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -130,11 +130,11 @@ define i32 @test_sdot_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_sdot_v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: sdot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: sdot v2.4s, v0.16b, v1.16b +; CHECK-NEXT: addv s0, v2.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret @@ -154,11 +154,11 @@ define i32 @test_sdot_v16i8_nomla(i8* nocapture readonly %a1) { ; CHECK-LABEL: test_sdot_v16i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.16b, #1 -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: sdot v1.4s, v2.16b, v0.16b -; CHECK-NEXT: addv s0, v1.4s +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: movi v1.16b, #1 +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: sdot v2.4s, v0.16b, v1.16b +; CHECK-NEXT: addv s0, v2.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -195,11 +195,11 @@ define i32 @test_udot_v8i8_double_nomla(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: test_udot_v8i8_double_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.8b, #1 -; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: udot v3.2s, v2.8b, v1.8b -; CHECK-NEXT: udot v3.2s, v0.8b, v1.8b -; CHECK-NEXT: addp v0.2s, v3.2s, v3.2s +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: movi v3.8b, #1 +; CHECK-NEXT: udot v1.2s, v2.8b, v3.8b +; CHECK-NEXT: udot v1.2s, v0.8b, v3.8b +; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -277,11 +277,11 @@ define i32 @test_sdot_v8i8_double_nomla(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: test_sdot_v8i8_double_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.8b, #1 -; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: sdot v3.2s, v2.8b, v1.8b -; CHECK-NEXT: sdot v3.2s, v0.8b, v1.8b -; CHECK-NEXT: addp v0.2s, v3.2s, v3.2s +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: movi v3.8b, #1 +; CHECK-NEXT: sdot v1.2s, v2.8b, v3.8b +; CHECK-NEXT: sdot v1.2s, v0.8b, v3.8b +; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/neon-truncstore.ll b/llvm/test/CodeGen/AArch64/neon-truncstore.ll --- a/llvm/test/CodeGen/AArch64/neon-truncstore.ll +++ b/llvm/test/CodeGen/AArch64/neon-truncstore.ll @@ -42,10 +42,10 @@ ; CHECK-LABEL: v2i32_v2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x0] +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: strh w8, [x0, #2] +; CHECK-NEXT: strh w9, [x0] ; CHECK-NEXT: ret %b = trunc <2 x i32> %a to <2 x i16> store <2 x i16> %b, <2 x i16>* %result @@ -90,10 +90,10 @@ ; CHECK-LABEL: v2i32_v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x0] +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: strb w8, [x0, #1] +; CHECK-NEXT: strb w9, [x0] ; CHECK-NEXT: ret %b = trunc <2 x i32> %a to <2 x i8> store <2 x i8> %b, <2 x i8>* %result @@ -157,10 +157,10 @@ ; CHECK-LABEL: v2i16_v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x0] +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: strb w8, [x0, #1] +; CHECK-NEXT: strb w9, [x0] ; CHECK-NEXT: ret %b = trunc <2 x i16> %a to <2 x i8> store <2 x i8> %b, <2 x i8>* %result diff --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll --- a/llvm/test/CodeGen/AArch64/nontemporal.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal.ll @@ -455,12 +455,12 @@ ; CHECK-NEXT: ldr s17, [sp] ; CHECK-NEXT: add x9, sp, #4 ; CHECK-NEXT: ; kill: def $s4 killed $s4 def $q4 -; CHECK-NEXT: ; kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: ; kill: def $s5 killed $s5 def $q5 -; CHECK-NEXT: ; kill: def $s1 killed $s1 def $q1 +; CHECK-NEXT: ; kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: ; kill: def $s6 killed $s6 def $q6 -; CHECK-NEXT: ; kill: def $s2 killed $s2 def $q2 +; CHECK-NEXT: ; kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: ; kill: def $s7 killed $s7 def $q7 +; CHECK-NEXT: ; kill: def $s2 killed $s2 def $q2 ; CHECK-NEXT: ; kill: def $s3 killed $s3 def $q3 ; CHECK-NEXT: ld1.s { v16 }[1], [x8] ; CHECK-NEXT: add x8, sp, #24 @@ -471,22 +471,22 @@ ; CHECK-NEXT: ld1.s { v16 }[2], [x8] ; CHECK-NEXT: add x8, sp, #28 ; CHECK-NEXT: ld1.s { v17 }[2], [x9] -; CHECK-NEXT: add x9, sp, #12 ; CHECK-NEXT: mov.s v4[2], v6[0] +; CHECK-NEXT: add x9, sp, #12 ; CHECK-NEXT: mov.s v0[2], v2[0] -; CHECK-NEXT: ld1.s { v16 }[3], [x8] -; CHECK-NEXT: ld1.s { v17 }[3], [x9] ; CHECK-NEXT: mov.s v4[3], v7[0] +; CHECK-NEXT: ld1.s { v16 }[3], [x8] ; CHECK-NEXT: mov.s v0[3], v3[0] -; CHECK-NEXT: mov d1, v16[1] -; CHECK-NEXT: mov d2, v17[1] -; CHECK-NEXT: mov d3, v4[1] -; CHECK-NEXT: mov d5, v0[1] -; CHECK-NEXT: stnp d16, d1, [x0, #48] +; CHECK-NEXT: ld1.s { v17 }[3], [x9] +; CHECK-NEXT: mov d1, v4[1] +; CHECK-NEXT: mov d2, v0[1] +; CHECK-NEXT: mov d3, v16[1] +; CHECK-NEXT: mov d5, v17[1] +; CHECK-NEXT: stnp d4, d1, [x0, #16] ; CHECK-NEXT: ldr s1, [sp, #32] -; CHECK-NEXT: stnp d17, d2, [x0, #32] -; CHECK-NEXT: stnp d4, d3, [x0, #16] -; CHECK-NEXT: stnp d0, d5, [x0] +; CHECK-NEXT: stnp d0, d2, [x0] +; CHECK-NEXT: stnp d16, d3, [x0, #48] +; CHECK-NEXT: stnp d17, d5, [x0, #32] ; CHECK-NEXT: str s1, [x0, #64] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/pow.ll b/llvm/test/CodeGen/AArch64/pow.ll --- a/llvm/test/CodeGen/AArch64/pow.ll +++ b/llvm/test/CodeGen/AArch64/pow.ll @@ -91,9 +91,9 @@ ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: fmov s1, #0.25000000 +; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: bl powf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -8,12 +8,12 @@ define dso_local void @run_test() local_unnamed_addr #0 { ; CHECK-LABEL: run_test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: sub sp, sp, #80 +; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset b8, -8 ; CHECK-NEXT: .cfi_offset b9, -16 ; CHECK-NEXT: .cfi_offset b10, -24 @@ -22,13 +22,14 @@ ; CHECK-NEXT: .cfi_offset b13, -48 ; CHECK-NEXT: .cfi_offset b14, -56 ; CHECK-NEXT: .cfi_offset b15, -64 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: adrp x10, B+48 ; CHECK-NEXT: adrp x11, A ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: add x10, x10, :lo12:B+48 ; CHECK-NEXT: add x11, x11, :lo12:A +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: // implicit-def: $q1 ; CHECK-NEXT: // implicit-def: $q2 ; CHECK-NEXT: // implicit-def: $q3 ; CHECK-NEXT: // implicit-def: $q4 @@ -57,9 +58,7 @@ ; CHECK-NEXT: // implicit-def: $q11 ; CHECK-NEXT: // implicit-def: $q12 ; CHECK-NEXT: // implicit-def: $q13 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: // kill: killed $q0 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov x12, xzr @@ -67,103 +66,99 @@ ; CHECK-NEXT: ldr q15, [x10], #64 ; CHECK-NEXT: add x15, x11, x8 ; CHECK-NEXT: add x9, x9, #1 +; CHECK-NEXT: ldr x13, [x12] +; CHECK-NEXT: fmov x14, d14 ; CHECK-NEXT: ldr q0, [x12] -; CHECK-NEXT: fmov x13, d14 -; CHECK-NEXT: ldr x12, [x12] -; CHECK-NEXT: fmov x0, d15 -; CHECK-NEXT: mov x14, v14.d[1] +; CHECK-NEXT: fmov x12, d15 +; CHECK-NEXT: mov x17, v14.d[1] ; CHECK-NEXT: ldr x15, [x15, #128] -; CHECK-NEXT: fmov x16, d0 -; CHECK-NEXT: mul x17, x13, x12 -; CHECK-NEXT: mov x18, v0.d[1] -; CHECK-NEXT: mul x4, x0, x12 -; CHECK-NEXT: mul x1, x16, x12 -; CHECK-NEXT: mul x3, x14, x12 -; CHECK-NEXT: fmov d0, x17 -; CHECK-NEXT: mul x5, x13, x15 -; CHECK-NEXT: mov x17, v15.d[1] -; CHECK-NEXT: fmov d15, x4 -; CHECK-NEXT: fmov d14, x1 -; CHECK-NEXT: mul x1, x18, x12 -; CHECK-NEXT: ldr x2, [x8], #8 -; CHECK-NEXT: mov v0.d[1], x3 -; CHECK-NEXT: mul x3, x16, x15 -; CHECK-NEXT: mul x12, x17, x12 -; CHECK-NEXT: fmov d1, x5 -; CHECK-NEXT: mul x13, x13, x2 -; CHECK-NEXT: cmp x8, #64 +; CHECK-NEXT: mul x16, x14, x13 +; CHECK-NEXT: mov x2, v15.d[1] +; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: mul x18, x12, x13 +; CHECK-NEXT: mul x1, x17, x13 +; CHECK-NEXT: mul x4, x14, x15 +; CHECK-NEXT: fmov d14, x16 +; CHECK-NEXT: mul x3, x0, x13 +; CHECK-NEXT: mov x16, v0.d[1] +; CHECK-NEXT: fmov d0, x18 +; CHECK-NEXT: mul x18, x2, x13 ; CHECK-NEXT: mov v14.d[1], x1 -; CHECK-NEXT: mul x1, x14, x15 -; CHECK-NEXT: add v12.2d, v12.2d, v0.2d -; CHECK-NEXT: mul x14, x14, x2 -; CHECK-NEXT: mov v15.d[1], x12 -; CHECK-NEXT: mul x12, x18, x2 -; CHECK-NEXT: mul x18, x18, x15 -; CHECK-NEXT: fmov d0, x3 -; CHECK-NEXT: mov v1.d[1], x1 -; CHECK-NEXT: mul x16, x16, x2 -; CHECK-NEXT: mul x3, x0, x15 -; CHECK-NEXT: add v10.2d, v10.2d, v15.2d -; CHECK-NEXT: fmov d15, x13 +; CHECK-NEXT: mul x13, x16, x13 +; CHECK-NEXT: fmov d15, x3 +; CHECK-NEXT: mul x3, x17, x15 +; CHECK-NEXT: add v12.2d, v12.2d, v14.2d +; CHECK-NEXT: fmov d14, x4 +; CHECK-NEXT: ldr x1, [x8], #8 +; CHECK-NEXT: mov v15.d[1], x13 +; CHECK-NEXT: mul x13, x0, x15 +; CHECK-NEXT: mov v14.d[1], x3 +; CHECK-NEXT: mul x3, x12, x15 ; CHECK-NEXT: mov v0.d[1], x18 -; CHECK-NEXT: mul x13, x0, x2 -; CHECK-NEXT: add v29.2d, v29.2d, v1.2d -; CHECK-NEXT: mul x15, x17, x15 -; CHECK-NEXT: mov v15.d[1], x14 -; CHECK-NEXT: fmov d1, x16 -; CHECK-NEXT: add v28.2d, v28.2d, v0.2d -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add v13.2d, v13.2d, v14.2d -; CHECK-NEXT: mov v1.d[1], x12 -; CHECK-NEXT: mul x12, x17, x2 -; CHECK-NEXT: add v0.2d, v0.2d, v15.2d -; CHECK-NEXT: add v11.2d, v11.2d, v14.2d -; CHECK-NEXT: fmov d14, x3 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: fmov d0, x13 -; CHECK-NEXT: add v9.2d, v9.2d, v1.2d -; CHECK-NEXT: mov v14.d[1], x15 -; CHECK-NEXT: mov v0.d[1], x12 -; CHECK-NEXT: add v31.2d, v31.2d, v1.2d -; CHECK-NEXT: add v26.2d, v26.2d, v1.2d -; CHECK-NEXT: add v23.2d, v23.2d, v1.2d -; CHECK-NEXT: add v21.2d, v21.2d, v1.2d -; CHECK-NEXT: add v19.2d, v19.2d, v1.2d -; CHECK-NEXT: add v17.2d, v17.2d, v1.2d -; CHECK-NEXT: add v7.2d, v7.2d, v1.2d -; CHECK-NEXT: add v5.2d, v5.2d, v1.2d -; CHECK-NEXT: add v3.2d, v3.2d, v1.2d -; CHECK-NEXT: add v2.2d, v2.2d, v1.2d -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add v27.2d, v27.2d, v14.2d -; CHECK-NEXT: add v8.2d, v8.2d, v15.2d -; CHECK-NEXT: add v25.2d, v25.2d, v15.2d -; CHECK-NEXT: add v22.2d, v22.2d, v15.2d -; CHECK-NEXT: add v18.2d, v18.2d, v15.2d -; CHECK-NEXT: add v6.2d, v6.2d, v15.2d +; CHECK-NEXT: mul x14, x14, x1 +; CHECK-NEXT: add v13.2d, v13.2d, v15.2d +; CHECK-NEXT: mul x17, x17, x1 +; CHECK-NEXT: add v11.2d, v11.2d, v15.2d +; CHECK-NEXT: mul x18, x16, x1 +; CHECK-NEXT: fmov d15, x13 +; CHECK-NEXT: mul x13, x16, x15 +; CHECK-NEXT: mul x15, x2, x15 +; CHECK-NEXT: cmp x8, #64 +; CHECK-NEXT: add v10.2d, v10.2d, v0.2d +; CHECK-NEXT: fmov d0, x3 +; CHECK-NEXT: add v29.2d, v29.2d, v14.2d +; CHECK-NEXT: mul x16, x0, x1 +; CHECK-NEXT: fmov d14, x14 +; CHECK-NEXT: mul x12, x12, x1 +; CHECK-NEXT: mov v0.d[1], x15 +; CHECK-NEXT: mov v15.d[1], x13 +; CHECK-NEXT: mul x13, x2, x1 +; CHECK-NEXT: add v27.2d, v27.2d, v0.2d +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov v14.d[1], x17 +; CHECK-NEXT: add v28.2d, v28.2d, v15.2d +; CHECK-NEXT: fmov d15, x16 +; CHECK-NEXT: add v0.2d, v0.2d, v14.2d +; CHECK-NEXT: add v8.2d, v8.2d, v14.2d +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill +; CHECK-NEXT: fmov d0, x12 +; CHECK-NEXT: mov v15.d[1], x18 +; CHECK-NEXT: add v25.2d, v25.2d, v14.2d +; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: add v22.2d, v22.2d, v14.2d +; CHECK-NEXT: add v18.2d, v18.2d, v14.2d +; CHECK-NEXT: add v6.2d, v6.2d, v14.2d +; CHECK-NEXT: add v9.2d, v9.2d, v15.2d +; CHECK-NEXT: add v31.2d, v31.2d, v15.2d +; CHECK-NEXT: add v26.2d, v26.2d, v15.2d +; CHECK-NEXT: add v23.2d, v23.2d, v15.2d +; CHECK-NEXT: add v21.2d, v21.2d, v15.2d +; CHECK-NEXT: add v19.2d, v19.2d, v15.2d +; CHECK-NEXT: add v17.2d, v17.2d, v15.2d +; CHECK-NEXT: add v7.2d, v7.2d, v15.2d +; CHECK-NEXT: add v5.2d, v5.2d, v15.2d +; CHECK-NEXT: add v3.2d, v3.2d, v15.2d +; CHECK-NEXT: add v2.2d, v2.2d, v15.2d ; CHECK-NEXT: add v30.2d, v30.2d, v0.2d ; CHECK-NEXT: add v24.2d, v24.2d, v0.2d ; CHECK-NEXT: add v20.2d, v20.2d, v0.2d ; CHECK-NEXT: add v16.2d, v16.2d, v0.2d ; CHECK-NEXT: add v4.2d, v4.2d, v0.2d ; CHECK-NEXT: add v1.2d, v1.2d, v0.2d -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %for.cond.cleanup ; CHECK-NEXT: adrp x8, C -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: add x8, x8, :lo12:C -; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: stp q13, q12, [x8] ; CHECK-NEXT: stp q11, q10, [x8, #32] ; CHECK-NEXT: stp q9, q8, [x8, #64] -; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: stp q0, q2, [x8, #464] -; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: stp q31, q30, [x8, #96] -; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: stp q29, q28, [x8, #144] -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: stp q27, q26, [x8, #176] ; CHECK-NEXT: str q25, [x8, #208] ; CHECK-NEXT: stp q24, q23, [x8, #240] @@ -173,8 +168,9 @@ ; CHECK-NEXT: stp q16, q7, [x8, #368] ; CHECK-NEXT: stp q6, q5, [x8, #400] ; CHECK-NEXT: stp q4, q3, [x8, #432] -; CHECK-NEXT: str q0, [x8, #496] -; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: stp q0, q2, [x8, #464] +; CHECK-NEXT: str q1, [x8, #496] +; CHECK-NEXT: add sp, sp, #80 ; CHECK-NEXT: ret entry: br label %for.cond1.preheader diff --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll --- a/llvm/test/CodeGen/AArch64/reduce-and.ll +++ b/llvm/test/CodeGen/AArch64/reduce-and.ll @@ -21,8 +21,8 @@ ; CHECK-LABEL: test_redand_v2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w8, w9, w8 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -81,17 +81,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: and w8, w8, w12 -; CHECK-NEXT: and w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: and w8, w8, w11 +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -134,18 +134,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: and w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w8, w8, w11 +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -275,17 +275,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: and w8, w8, w12 -; CHECK-NEXT: and w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: and w8, w8, w9 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: and w8, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v8i8: @@ -326,18 +326,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: and w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v16i8: @@ -380,18 +380,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: and w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v32i8: @@ -465,10 +465,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v8i16: @@ -499,10 +499,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v16i16: @@ -529,8 +529,8 @@ ; CHECK-LABEL: test_redand_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret ; @@ -551,8 +551,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret ; @@ -575,8 +575,8 @@ ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll --- a/llvm/test/CodeGen/AArch64/reduce-or.ll +++ b/llvm/test/CodeGen/AArch64/reduce-or.ll @@ -21,8 +21,8 @@ ; CHECK-LABEL: test_redor_v2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: orr w8, w9, w8 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -81,17 +81,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: orr w8, w8, w12 -; CHECK-NEXT: orr w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: orr w8, w8, w11 +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -134,18 +134,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: orr w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: orr w8, w8, w12 ; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w8, w8, w11 +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -274,17 +274,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: orr w8, w8, w12 -; CHECK-NEXT: orr w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: orr w0, w8, w10 +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: orr w8, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v8i8: @@ -325,18 +325,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: orr w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: orr w8, w8, w12 ; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v16i8: @@ -379,18 +379,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: orr w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: orr w8, w8, w12 ; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v32i8: @@ -464,10 +464,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v8i16: @@ -498,10 +498,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v16i16: @@ -528,8 +528,8 @@ ; CHECK-LABEL: test_redor_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret ; @@ -550,8 +550,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret ; @@ -574,8 +574,8 @@ ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/reduce-xor.ll b/llvm/test/CodeGen/AArch64/reduce-xor.ll --- a/llvm/test/CodeGen/AArch64/reduce-xor.ll +++ b/llvm/test/CodeGen/AArch64/reduce-xor.ll @@ -20,8 +20,8 @@ ; CHECK-LABEL: test_redxor_v2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: eor w8, w9, w8 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -80,17 +80,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: eor w8, w8, w12 -; CHECK-NEXT: eor w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: eor w8, w8, w11 +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -133,18 +133,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: eor w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: eor w8, w8, w12 ; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w8, w8, w11 +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -273,17 +273,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: eor w8, w8, w12 -; CHECK-NEXT: eor w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: eor w8, w8, w9 -; CHECK-NEXT: eor w0, w8, w10 +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: eor w8, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v8i8: @@ -324,18 +324,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: eor w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: eor w8, w8, w12 ; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v16i8: @@ -378,18 +378,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: eor w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: eor w8, w8, w12 ; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v32i8: @@ -463,10 +463,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v8i16: @@ -497,10 +497,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v16i16: @@ -527,8 +527,8 @@ ; CHECK-LABEL: test_redxor_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: eor w0, w9, w8 ; CHECK-NEXT: ret ; @@ -549,8 +549,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: eor w0, w9, w8 ; CHECK-NEXT: ret ; @@ -573,8 +573,8 @@ ; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: eor w0, w9, w8 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -143,10 +143,10 @@ ; CHECK-NEXT: shl v1.2s, v1.2s, #24 ; CHECK-NEXT: sqadd v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strb w9, [x2, #1] +; CHECK-NEXT: strb w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -183,10 +183,10 @@ ; CHECK-NEXT: shl v1.2s, v1.2s, #16 ; CHECK-NEXT: sqadd v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strh w9, [x2, #2] +; CHECK-NEXT: strh w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py diff --git a/llvm/test/CodeGen/AArch64/sat-add.ll b/llvm/test/CodeGen/AArch64/sat-add.ll --- a/llvm/test/CodeGen/AArch64/sat-add.ll +++ b/llvm/test/CodeGen/AArch64/sat-add.ll @@ -346,9 +346,9 @@ ; CHECK-LABEL: unsigned_sat_constant_v16i8_using_min: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.16b, #213 -; CHECK-NEXT: movi v2.16b, #42 ; CHECK-NEXT: umin v0.16b, v0.16b, v1.16b -; CHECK-NEXT: add v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.16b, #42 +; CHECK-NEXT: add v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %c = icmp ult <16 x i8> %x, %s = select <16 x i1> %c, <16 x i8> %x, <16 x i8> @@ -383,9 +383,9 @@ define <8 x i16> @unsigned_sat_constant_v8i16_using_min(<8 x i16> %x) { ; CHECK-LABEL: unsigned_sat_constant_v8i16_using_min: ; CHECK: // %bb.0: +; CHECK-NEXT: mvni v1.8h, #42 +; CHECK-NEXT: umin v0.8h, v0.8h, v1.8h ; CHECK-NEXT: movi v1.8h, #42 -; CHECK-NEXT: mvni v2.8h, #42 -; CHECK-NEXT: umin v0.8h, v0.8h, v2.8h ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: ret %c = icmp ult <8 x i16> %x, diff --git a/llvm/test/CodeGen/AArch64/select_cc.ll b/llvm/test/CodeGen/AArch64/select_cc.ll --- a/llvm/test/CodeGen/AArch64/select_cc.ll +++ b/llvm/test/CodeGen/AArch64/select_cc.ll @@ -56,9 +56,9 @@ define <2 x double> @select_olt_load_cmp(<2 x double> %a, <2 x float>* %src) { ; CHECK-LABEL: select_olt_load_cmp: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: ldr d2, [x0] -; CHECK-NEXT: fcmgt v1.2s, v2.2s, v1.2s +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: movi d2, #0000000000000000 +; CHECK-NEXT: fcmgt v1.2s, v1.2s, v2.2s ; CHECK-NEXT: sshll v1.2d, v1.2s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll --- a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll @@ -165,8 +165,8 @@ define <16 x i8> @sel_shift_bool_v16i8(<16 x i1> %t) { ; CHECK-LABEL: sel_shift_bool_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #128 ; CHECK-NEXT: shl v0.16b, v0.16b, #7 +; CHECK-NEXT: movi v1.16b, #128 ; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/shift-mod.ll b/llvm/test/CodeGen/AArch64/shift-mod.ll --- a/llvm/test/CodeGen/AArch64/shift-mod.ll +++ b/llvm/test/CodeGen/AArch64/shift-mod.ll @@ -102,8 +102,8 @@ define <4 x i32> @ashr_add_shl_v4i8(<4 x i32> %r) { ; CHECK-LABEL: ashr_add_shl_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #1, lsl #24 ; CHECK-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-NEXT: movi v1.4s, #1, lsl #24 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: sshr v0.4s, v0.4s, #24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/signbit-shift.ll b/llvm/test/CodeGen/AArch64/signbit-shift.ll --- a/llvm/test/CodeGen/AArch64/signbit-shift.ll +++ b/llvm/test/CodeGen/AArch64/signbit-shift.ll @@ -30,9 +30,9 @@ ; CHECK-LABEL: add_zext_ifpos_vec_splat: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff -; CHECK-NEXT: movi v2.4s, #41 ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s -; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #41 +; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %x, %e = zext <4 x i1> %c to <4 x i32> @@ -79,9 +79,9 @@ ; CHECK-LABEL: add_sext_ifpos_vec_splat: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff -; CHECK-NEXT: movi v2.4s, #42 ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: movi v1.4s, #42 +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %x, %e = sext <4 x i1> %c to <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/sinksplat.ll b/llvm/test/CodeGen/AArch64/sinksplat.ll --- a/llvm/test/CodeGen/AArch64/sinksplat.ll +++ b/llvm/test/CodeGen/AArch64/sinksplat.ll @@ -4,16 +4,17 @@ define <4 x i32> @smull(<4 x i16> %x, <4 x i16> *%y) { ; CHECK-LABEL: smull: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d1, d0 ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: .LBB0_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d2, [x0] ; CHECK-NEXT: subs w8, w8, #1 -; CHECK-NEXT: smlal v0.4s, v2.4h, v1.h[3] +; CHECK-NEXT: smlal v1.4s, v2.4h, v0.h[3] ; CHECK-NEXT: b.eq .LBB0_1 ; CHECK-NEXT: // %bb.2: // %l2 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %a = shufflevector <4 x i16> %x, <4 x i16> undef, <4 x i32> @@ -36,16 +37,17 @@ define <4 x i32> @umull(<4 x i16> %x, <4 x i16> *%y) { ; CHECK-LABEL: umull: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d1, d0 ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: .LBB1_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d2, [x0] ; CHECK-NEXT: subs w8, w8, #1 -; CHECK-NEXT: umlal v0.4s, v2.4h, v1.h[3] +; CHECK-NEXT: umlal v1.4s, v2.4h, v0.h[3] ; CHECK-NEXT: b.eq .LBB1_1 ; CHECK-NEXT: // %bb.2: // %l2 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %a = shufflevector <4 x i16> %x, <4 x i16> undef, <4 x i32> @@ -68,17 +70,17 @@ define <4 x i32> @sqadd(<4 x i32> %x, <4 x i32> *%y) { ; CHECK-LABEL: sqadd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: .LBB2_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: subs w8, w8, #1 -; CHECK-NEXT: sqrdmulh v2.4s, v2.4s, v1.s[3] -; CHECK-NEXT: sqadd v0.4s, v0.4s, v2.4s +; CHECK-NEXT: sqrdmulh v2.4s, v2.4s, v0.s[3] +; CHECK-NEXT: sqadd v1.4s, v1.4s, v2.4s ; CHECK-NEXT: b.eq .LBB2_1 ; CHECK-NEXT: // %bb.2: // %l2 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> @@ -101,17 +103,17 @@ define <4 x i32> @sqsub(<4 x i32> %x, <4 x i32> *%y) { ; CHECK-LABEL: sqsub: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: .LBB3_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: subs w8, w8, #1 -; CHECK-NEXT: sqrdmulh v2.4s, v2.4s, v1.s[3] -; CHECK-NEXT: sqsub v0.4s, v0.4s, v2.4s +; CHECK-NEXT: sqrdmulh v2.4s, v2.4s, v0.s[3] +; CHECK-NEXT: sqsub v1.4s, v1.4s, v2.4s ; CHECK-NEXT: b.eq .LBB3_1 ; CHECK-NEXT: // %bb.2: // %l2 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> @@ -134,17 +136,17 @@ define <4 x i32> @sqdmulh(<4 x i32> %x, <4 x i32> *%y) { ; CHECK-LABEL: sqdmulh: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: .LBB4_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: subs w8, w8, #1 -; CHECK-NEXT: sqdmulh v2.4s, v2.4s, v1.s[3] -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: sqdmulh v2.4s, v2.4s, v0.s[3] +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s ; CHECK-NEXT: b.eq .LBB4_1 ; CHECK-NEXT: // %bb.2: // %l2 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %a = shufflevector <4 x i32> %x, <4 x i32> undef, <4 x i32> @@ -167,17 +169,18 @@ define <4 x i32> @sqdmull(<4 x i16> %x, <4 x i16> *%y) { ; CHECK-LABEL: sqdmull: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d1, d0 ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: .LBB5_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d2, [x0] ; CHECK-NEXT: subs w8, w8, #1 -; CHECK-NEXT: sqdmull v2.4s, v2.4h, v1.h[3] -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: sqdmull v2.4s, v2.4h, v0.h[3] +; CHECK-NEXT: add v1.4s, v1.4s, v2.4s ; CHECK-NEXT: b.eq .LBB5_1 ; CHECK-NEXT: // %bb.2: // %l2 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %a = shufflevector <4 x i16> %x, <4 x i16> undef, <4 x i32> @@ -200,10 +203,9 @@ define <4 x i32> @mlal(<4 x i32> %x, <4 x i32> *%y) { ; CHECK-LABEL: mlal: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: dup v1.4s, v0.s[3] ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: dup v1.4s, v1.s[3] ; CHECK-NEXT: .LBB6_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] @@ -233,17 +235,17 @@ define <4 x float> @fmul(<4 x float> %x, <4 x float> *%y) { ; CHECK-LABEL: fmul: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: .LBB7_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: subs w8, w8, #1 -; CHECK-NEXT: fmul v2.4s, v2.4s, v1.s[3] -; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s +; CHECK-NEXT: fmul v2.4s, v2.4s, v0.s[3] +; CHECK-NEXT: fadd v1.4s, v2.4s, v1.4s ; CHECK-NEXT: b.eq .LBB7_1 ; CHECK-NEXT: // %bb.2: // %l2 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret entry: %a = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> @@ -266,10 +268,9 @@ define <4 x float> @fmuladd(<4 x float> %x, <4 x float> *%y) { ; CHECK-LABEL: fmuladd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: dup v1.4s, v0.s[3] ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: dup v1.4s, v1.s[3] ; CHECK-NEXT: .LBB8_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] @@ -299,10 +300,9 @@ define <4 x float> @fma(<4 x float> %x, <4 x float> *%y) { ; CHECK-LABEL: fma: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: dup v1.4s, v0.s[3] ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: dup v1.4s, v1.s[3] ; CHECK-NEXT: .LBB9_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q3, [x0] @@ -333,12 +333,12 @@ define <4 x i32> @smull_nonsplat(<4 x i16> %x, <4 x i16> *%y) { ; CHECK-LABEL: smull_nonsplat: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d1, d0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v1.4h, v0.h[3] ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: ext v1.8b, v0.8b, v1.8b, #4 +; CHECK-NEXT: ext v1.8b, v0.8b, v1.8b, #6 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: dup v2.4h, v1.h[3] -; CHECK-NEXT: ext v2.8b, v1.8b, v2.8b, #4 -; CHECK-NEXT: ext v1.8b, v1.8b, v2.8b, #6 ; CHECK-NEXT: .LBB10_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d2, [x0] diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -59,50 +59,50 @@ define <3 x i1> @test_srem_vec(<3 x i33> %X) nounwind { ; CHECK-LABEL: test_srem_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #7282 -; CHECK-NEXT: sbfx x9, x0, #0, #33 -; CHECK-NEXT: movk x8, #29127, lsl #16 -; CHECK-NEXT: mov x11, #7281 -; CHECK-NEXT: movk x8, #50972, lsl #32 +; CHECK-NEXT: mov x11, #7282 +; CHECK-NEXT: mov x9, #7281 ; CHECK-NEXT: movk x11, #29127, lsl #16 -; CHECK-NEXT: movk x8, #7281, lsl #48 +; CHECK-NEXT: movk x9, #29127, lsl #16 ; CHECK-NEXT: movk x11, #50972, lsl #32 -; CHECK-NEXT: sbfx x12, x1, #0, #33 -; CHECK-NEXT: sbfx x10, x2, #0, #33 -; CHECK-NEXT: smulh x13, x9, x8 +; CHECK-NEXT: movk x9, #50972, lsl #32 +; CHECK-NEXT: sbfx x10, x0, #0, #33 ; CHECK-NEXT: movk x11, #7281, lsl #48 -; CHECK-NEXT: smulh x8, x12, x8 -; CHECK-NEXT: smulh x11, x10, x11 +; CHECK-NEXT: sbfx x8, x2, #0, #33 +; CHECK-NEXT: movk x9, #7281, lsl #48 +; CHECK-NEXT: smulh x13, x10, x11 +; CHECK-NEXT: sbfx x12, x1, #0, #33 +; CHECK-NEXT: smulh x9, x8, x9 +; CHECK-NEXT: smulh x11, x12, x11 +; CHECK-NEXT: sub x9, x9, x8 ; CHECK-NEXT: add x13, x13, x13, lsr #63 -; CHECK-NEXT: sub x11, x11, x10 -; CHECK-NEXT: add x8, x8, x8, lsr #63 +; CHECK-NEXT: asr x14, x9, #3 +; CHECK-NEXT: add x9, x14, x9, lsr #63 ; CHECK-NEXT: add x13, x13, x13, lsl #3 -; CHECK-NEXT: asr x14, x11, #3 -; CHECK-NEXT: sub x9, x9, x13 -; CHECK-NEXT: add x11, x14, x11, lsr #63 -; CHECK-NEXT: add x8, x8, x8, lsl #3 -; CHECK-NEXT: sub x8, x12, x8 +; CHECK-NEXT: add x11, x11, x11, lsr #63 +; CHECK-NEXT: sub x10, x10, x13 +; CHECK-NEXT: add x9, x9, x9, lsl #3 +; CHECK-NEXT: add x8, x8, x9 ; CHECK-NEXT: add x11, x11, x11, lsl #3 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: add x10, x10, x11 ; CHECK-NEXT: mov x9, #8589934591 -; CHECK-NEXT: adrp x11, .LCPI3_0 -; CHECK-NEXT: adrp x12, .LCPI3_1 -; CHECK-NEXT: mov v0.d[1], x8 -; CHECK-NEXT: fmov d1, x10 +; CHECK-NEXT: sub x11, x12, x11 +; CHECK-NEXT: adrp x12, .LCPI3_0 +; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: adrp x10, .LCPI3_1 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: dup v2.2d, x9 -; CHECK-NEXT: ldr q3, [x11, :lo12:.LCPI3_0] -; CHECK-NEXT: ldr q4, [x12, :lo12:.LCPI3_1] +; CHECK-NEXT: ldr q3, [x12, :lo12:.LCPI3_0] +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: ldr q4, [x10, :lo12:.LCPI3_1] ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: cmeq v0.2d, v0.2d, v3.2d ; CHECK-NEXT: cmeq v1.2d, v1.2d, v4.2d +; CHECK-NEXT: cmeq v0.2d, v0.2d, v3.2d +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: fmov w2, s1 ; CHECK-NEXT: ret %srem = srem <3 x i33> %X, diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll @@ -7,7 +7,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: adrp x9, .LCPI0_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: adrp x8, .LCPI0_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] @@ -21,7 +20,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -83,7 +83,6 @@ ; CHECK-NEXT: mov w9, #9362 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI3_0 @@ -92,8 +91,9 @@ ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -107,7 +107,6 @@ ; CHECK-NEXT: mov w9, #9362 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI4_0 @@ -116,8 +115,9 @@ ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp ne <4 x i32> %srem, @@ -131,7 +131,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: adrp x9, .LCPI5_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] ; CHECK-NEXT: adrp x8, .LCPI5_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_1] @@ -145,7 +144,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -157,7 +157,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: adrp x9, .LCPI6_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0] ; CHECK-NEXT: adrp x8, .LCPI6_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI6_1] @@ -171,7 +170,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp ne <4 x i32> %srem, @@ -187,7 +187,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: adrp x9, .LCPI7_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0] ; CHECK-NEXT: adrp x8, .LCPI7_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI7_1] @@ -201,7 +200,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -215,7 +215,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI8_0 ; CHECK-NEXT: adrp x9, .LCPI8_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0] ; CHECK-NEXT: adrp x8, .LCPI8_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI8_1] @@ -229,7 +228,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -243,7 +243,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI9_0 ; CHECK-NEXT: adrp x9, .LCPI9_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0] ; CHECK-NEXT: adrp x8, .LCPI9_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI9_1] @@ -257,7 +256,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -298,7 +298,6 @@ ; CHECK-NEXT: mov w9, #9362 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI11_0 @@ -307,8 +306,9 @@ ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -322,7 +322,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: adrp x9, .LCPI12_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: adrp x8, .LCPI12_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI12_1] @@ -336,7 +335,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -354,13 +354,13 @@ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0] ; CHECK-NEXT: adrp x8, .LCPI13_1 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI13_1] ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s -; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_1] ; CHECK-NEXT: adrp x8, .LCPI13_2 -; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_2] ; CHECK-NEXT: adrp x8, .LCPI13_3 +; CHECK-NEXT: mla v1.4s, v0.4s, v3.4s ; CHECK-NEXT: sshl v2.4s, v1.4s, v2.4s ; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_3] @@ -383,13 +383,13 @@ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] ; CHECK-NEXT: adrp x8, .LCPI14_1 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI14_1] ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s -; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_1] ; CHECK-NEXT: adrp x8, .LCPI14_2 -; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_2] ; CHECK-NEXT: adrp x8, .LCPI14_3 +; CHECK-NEXT: mla v1.4s, v0.4s, v3.4s ; CHECK-NEXT: sshl v2.4s, v1.4s, v2.4s ; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_3] @@ -412,13 +412,13 @@ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI15_1] ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s -; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_1] ; CHECK-NEXT: adrp x8, .LCPI15_2 -; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_2] ; CHECK-NEXT: adrp x8, .LCPI15_3 +; CHECK-NEXT: mla v1.4s, v0.4s, v3.4s ; CHECK-NEXT: sshl v2.4s, v1.4s, v2.4s ; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_3] @@ -441,7 +441,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: adrp x9, .LCPI16_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: adrp x8, .LCPI16_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI16_1] @@ -455,7 +454,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -469,7 +469,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: adrp x9, .LCPI17_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: adrp x8, .LCPI17_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI17_1] @@ -483,7 +482,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -497,7 +497,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: adrp x9, .LCPI18_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: adrp x8, .LCPI18_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_1] @@ -511,7 +510,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -552,7 +552,6 @@ ; CHECK-NEXT: mov w9, #9362 ; CHECK-NEXT: movk w8, #46811, lsl #16 ; CHECK-NEXT: movk w9, #4681, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: adrp x8, .LCPI20_0 @@ -561,8 +560,9 @@ ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -576,7 +576,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI21_0 ; CHECK-NEXT: adrp x9, .LCPI21_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0] ; CHECK-NEXT: adrp x8, .LCPI21_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI21_1] @@ -590,7 +589,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -606,7 +606,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI22_0 ; CHECK-NEXT: adrp x9, .LCPI22_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0] ; CHECK-NEXT: adrp x8, .LCPI22_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI22_1] @@ -620,7 +619,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -634,7 +634,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 ; CHECK-NEXT: adrp x9, .LCPI23_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] ; CHECK-NEXT: adrp x8, .LCPI23_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI23_1] @@ -648,7 +647,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -662,7 +662,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: adrp x9, .LCPI24_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0] ; CHECK-NEXT: adrp x8, .LCPI24_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI24_1] @@ -676,7 +675,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -691,7 +691,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI25_0 ; CHECK-NEXT: adrp x9, .LCPI25_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] ; CHECK-NEXT: adrp x8, .LCPI25_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI25_1] @@ -705,7 +704,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -718,7 +718,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI26_0 ; CHECK-NEXT: adrp x9, .LCPI26_1 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_0] ; CHECK-NEXT: adrp x8, .LCPI26_2 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI26_1] @@ -732,7 +731,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll @@ -33,7 +33,6 @@ ; CHECK-NEXT: mov w9, #47184 ; CHECK-NEXT: movk w8, #49807, lsl #16 ; CHECK-NEXT: movk w9, #1310, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: mov w8, #23592 @@ -44,7 +43,8 @@ ; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -86,7 +86,6 @@ ; CHECK-NEXT: mov w9, #47184 ; CHECK-NEXT: movk w8, #49807, lsl #16 ; CHECK-NEXT: movk w9, #1310, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 ; CHECK-NEXT: mov w8, #23592 @@ -97,7 +96,8 @@ ; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, %cmp = icmp eq <4 x i32> %srem, @@ -114,15 +114,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v3.4s, #25 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: sshr v2.4s, v1.4s, #3 ; CHECK-NEXT: usra v2.4s, v1.4s, #31 +; CHECK-NEXT: movi v1.4s, #25 +; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: mls v0.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -137,15 +137,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v3.4s, #100 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: sshr v2.4s, v1.4s, #5 ; CHECK-NEXT: usra v2.4s, v1.4s, #31 +; CHECK-NEXT: movi v1.4s, #100 +; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: mls v0.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -184,12 +184,12 @@ define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_pow2: ; CHECK: // %bb.0: -; CHECK-NEXT: cmlt v3.4s, v0.4s, #0 -; CHECK-NEXT: mov v2.16b, v0.16b -; CHECK-NEXT: usra v2.4s, v3.4s, #28 +; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: usra v1.4s, v2.4s, #28 +; CHECK-NEXT: bic v1.4s, #15 +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: bic v2.4s, #15 -; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -205,9 +205,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cmlt v2.4s, v0.4s, #0 ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: movi v3.4s, #128, lsl #24 ; CHECK-NEXT: usra v1.4s, v2.4s, #1 -; CHECK-NEXT: and v1.16b, v1.16b, v3.16b +; CHECK-NEXT: movi v2.4s, #128, lsl #24 +; CHECK-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll --- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll @@ -4,49 +4,49 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; CHECK-LABEL: fold_srem_vec_1: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #33437 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[1] -; CHECK-NEXT: smov w9, v0.h[0] -; CHECK-NEXT: mov w10, #63421 -; CHECK-NEXT: mov w11, #37253 -; CHECK-NEXT: movk w10, #31710, lsl #16 -; CHECK-NEXT: movk w11, #44150, lsl #16 -; CHECK-NEXT: smov w13, v0.h[2] -; CHECK-NEXT: mov w12, #33437 -; CHECK-NEXT: smull x10, w8, w10 -; CHECK-NEXT: movk w12, #21399, lsl #16 -; CHECK-NEXT: smull x11, w9, w11 +; CHECK-NEXT: smov w9, v0.h[2] +; CHECK-NEXT: movk w8, #21399, lsl #16 +; CHECK-NEXT: mov w10, #37253 +; CHECK-NEXT: movk w10, #44150, lsl #16 +; CHECK-NEXT: smov w11, v0.h[0] +; CHECK-NEXT: smull x8, w9, w8 +; CHECK-NEXT: mov w12, #63421 +; CHECK-NEXT: smull x10, w11, w10 +; CHECK-NEXT: movk w12, #31710, lsl #16 +; CHECK-NEXT: lsr x13, x8, #63 +; CHECK-NEXT: asr x8, x8, #37 +; CHECK-NEXT: smov w14, v0.h[1] +; CHECK-NEXT: add w8, w8, w13 +; CHECK-NEXT: mov w13, #98 ; CHECK-NEXT: lsr x10, x10, #32 -; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: sub w10, w10, w8 -; CHECK-NEXT: add w11, w11, w9 -; CHECK-NEXT: asr w14, w10, #6 -; CHECK-NEXT: asr w15, w11, #6 -; CHECK-NEXT: add w10, w14, w10, lsr #31 -; CHECK-NEXT: add w11, w15, w11, lsr #31 -; CHECK-NEXT: mov w14, #95 -; CHECK-NEXT: mov w15, #-124 -; CHECK-NEXT: smull x12, w13, w12 -; CHECK-NEXT: msub w9, w11, w14, w9 -; CHECK-NEXT: msub w8, w10, w15, w8 -; CHECK-NEXT: lsr x10, x12, #63 -; CHECK-NEXT: asr x11, x12, #37 -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: add w10, w11, w10 -; CHECK-NEXT: mov w11, #98 +; CHECK-NEXT: smull x12, w14, w12 +; CHECK-NEXT: add w10, w10, w11 +; CHECK-NEXT: msub w8, w8, w13, w9 +; CHECK-NEXT: asr w9, w10, #6 +; CHECK-NEXT: mov w13, #95 +; CHECK-NEXT: add w9, w9, w10, lsr #31 +; CHECK-NEXT: lsr x12, x12, #32 +; CHECK-NEXT: mov w10, #63249 +; CHECK-NEXT: sub w12, w12, w14 +; CHECK-NEXT: movk w10, #48808, lsl #16 +; CHECK-NEXT: msub w9, w9, w13, w11 +; CHECK-NEXT: smov w11, v0.h[3] +; CHECK-NEXT: asr w13, w12, #6 +; CHECK-NEXT: add w12, w13, w12, lsr #31 +; CHECK-NEXT: mov w13, #-124 +; CHECK-NEXT: smull x10, w11, w10 +; CHECK-NEXT: msub w12, w12, w13, w14 ; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov w9, #63249 -; CHECK-NEXT: movk w9, #48808, lsl #16 -; CHECK-NEXT: msub w10, w10, w11, w13 -; CHECK-NEXT: smull x9, w12, w9 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: lsr x8, x9, #63 -; CHECK-NEXT: asr x9, x9, #40 -; CHECK-NEXT: add w8, w9, w8 -; CHECK-NEXT: mov w9, #-1003 -; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w8, w9, w12 -; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: lsr x9, x10, #63 +; CHECK-NEXT: asr x10, x10, #40 +; CHECK-NEXT: add w9, w10, w9 +; CHECK-NEXT: mov w10, #-1003 +; CHECK-NEXT: mov v0.h[1], w12 +; CHECK-NEXT: msub w9, w9, w10, w11 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v0.h[3], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, @@ -56,41 +56,41 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { ; CHECK-LABEL: fold_srem_vec_2: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #37253 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w9, v0.h[0] -; CHECK-NEXT: mov w8, #37253 ; CHECK-NEXT: movk w8, #44150, lsl #16 ; CHECK-NEXT: smov w10, v0.h[1] -; CHECK-NEXT: smov w14, v0.h[2] +; CHECK-NEXT: smov w13, v0.h[2] ; CHECK-NEXT: mov w12, #95 ; CHECK-NEXT: smull x11, w9, w8 -; CHECK-NEXT: smull x13, w10, w8 -; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: add w11, w11, w9 -; CHECK-NEXT: lsr x13, x13, #32 -; CHECK-NEXT: asr w15, w11, #6 -; CHECK-NEXT: add w13, w13, w10 -; CHECK-NEXT: add w11, w15, w11, lsr #31 ; CHECK-NEXT: smov w15, v0.h[3] -; CHECK-NEXT: asr w16, w13, #6 -; CHECK-NEXT: msub w9, w11, w12, w9 -; CHECK-NEXT: add w13, w16, w13, lsr #31 -; CHECK-NEXT: smull x11, w14, w8 -; CHECK-NEXT: msub w10, w13, w12, w10 +; CHECK-NEXT: smull x14, w10, w8 ; CHECK-NEXT: lsr x11, x11, #32 +; CHECK-NEXT: add w11, w11, w9 +; CHECK-NEXT: lsr x14, x14, #32 +; CHECK-NEXT: asr w16, w11, #6 +; CHECK-NEXT: add w14, w14, w10 +; CHECK-NEXT: add w11, w16, w11, lsr #31 +; CHECK-NEXT: smull x16, w13, w8 +; CHECK-NEXT: asr w17, w14, #6 ; CHECK-NEXT: smull x8, w15, w8 -; CHECK-NEXT: add w11, w11, w14 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: asr w9, w11, #6 +; CHECK-NEXT: add w14, w17, w14, lsr #31 +; CHECK-NEXT: msub w9, w11, w12, w9 +; CHECK-NEXT: lsr x11, x16, #32 ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: add w9, w9, w11, lsr #31 +; CHECK-NEXT: add w11, w11, w13 +; CHECK-NEXT: msub w10, w14, w12, w10 +; CHECK-NEXT: asr w14, w11, #6 ; CHECK-NEXT: add w8, w8, w15 +; CHECK-NEXT: add w11, w14, w11, lsr #31 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: asr w9, w8, #6 +; CHECK-NEXT: msub w11, w11, w12, w13 +; CHECK-NEXT: add w8, w9, w8, lsr #31 ; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: asr w10, w8, #6 -; CHECK-NEXT: msub w9, w9, w12, w14 -; CHECK-NEXT: add w8, w10, w8, lsr #31 ; CHECK-NEXT: msub w8, w8, w12, w15 -; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: mov v0.h[2], w11 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -103,46 +103,46 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; CHECK-LABEL: combine_srem_sdiv: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #37253 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w9, v0.h[0] -; CHECK-NEXT: mov w8, #37253 ; CHECK-NEXT: movk w8, #44150, lsl #16 -; CHECK-NEXT: smov w10, v0.h[1] -; CHECK-NEXT: smov w11, v0.h[2] -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: mov w14, #95 +; CHECK-NEXT: smov w11, v0.h[1] +; CHECK-NEXT: smov w12, v0.h[2] +; CHECK-NEXT: mov w10, #95 ; CHECK-NEXT: smull x13, w9, w8 -; CHECK-NEXT: smull x15, w10, w8 +; CHECK-NEXT: smov w14, v0.h[3] +; CHECK-NEXT: smull x15, w11, w8 ; CHECK-NEXT: lsr x13, x13, #32 -; CHECK-NEXT: smull x16, w11, w8 +; CHECK-NEXT: smull x16, w12, w8 ; CHECK-NEXT: add w13, w13, w9 ; CHECK-NEXT: lsr x15, x15, #32 ; CHECK-NEXT: asr w17, w13, #6 -; CHECK-NEXT: add w15, w15, w10 +; CHECK-NEXT: add w15, w15, w11 ; CHECK-NEXT: add w13, w17, w13, lsr #31 +; CHECK-NEXT: smull x8, w14, w8 +; CHECK-NEXT: lsr x16, x16, #32 ; CHECK-NEXT: asr w17, w15, #6 +; CHECK-NEXT: msub w9, w13, w10, w9 +; CHECK-NEXT: add w16, w16, w12 ; CHECK-NEXT: add w15, w17, w15, lsr #31 -; CHECK-NEXT: smull x8, w12, w8 -; CHECK-NEXT: msub w9, w13, w14, w9 -; CHECK-NEXT: lsr x16, x16, #32 -; CHECK-NEXT: add w16, w16, w11 -; CHECK-NEXT: msub w10, w15, w14, w10 ; CHECK-NEXT: asr w17, w16, #6 ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: fmov s1, w13 ; CHECK-NEXT: add w16, w17, w16, lsr #31 +; CHECK-NEXT: msub w11, w15, w10, w11 +; CHECK-NEXT: add w8, w8, w14 ; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: add w8, w8, w12 +; CHECK-NEXT: fmov s1, w13 ; CHECK-NEXT: asr w9, w8, #6 ; CHECK-NEXT: add w8, w9, w8, lsr #31 -; CHECK-NEXT: msub w9, w16, w14, w11 -; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: msub w9, w16, w10, w12 +; CHECK-NEXT: mov v0.h[1], w11 ; CHECK-NEXT: mov v1.h[1], w15 -; CHECK-NEXT: msub w10, w8, w14, w12 -; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: msub w10, w8, w10, w14 ; CHECK-NEXT: mov v1.h[2], w16 -; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: mov v0.h[2], w9 ; CHECK-NEXT: mov v1.h[3], w8 +; CHECK-NEXT: mov v0.h[3], w10 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, @@ -159,36 +159,36 @@ ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w10, v0.h[0] ; CHECK-NEXT: mov w8, #37253 -; CHECK-NEXT: movk w8, #44150, lsl #16 -; CHECK-NEXT: add w11, w9, #31 +; CHECK-NEXT: smov w11, v0.h[3] +; CHECK-NEXT: add w12, w9, #31 ; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: add w12, w10, #63 -; CHECK-NEXT: csel w11, w11, w9, lt +; CHECK-NEXT: movk w8, #44150, lsl #16 +; CHECK-NEXT: add w13, w10, #63 +; CHECK-NEXT: csel w12, w12, w9, lt ; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: and w11, w11, #0xffffffe0 -; CHECK-NEXT: csel w12, w12, w10, lt -; CHECK-NEXT: sub w9, w9, w11 -; CHECK-NEXT: and w12, w12, #0xffffffc0 -; CHECK-NEXT: sub w10, w10, w12 -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: smov w10, v0.h[2] -; CHECK-NEXT: smull x8, w12, w8 -; CHECK-NEXT: mov v1.h[1], w9 +; CHECK-NEXT: and w12, w12, #0xffffffe0 +; CHECK-NEXT: csel w13, w13, w10, lt +; CHECK-NEXT: sub w9, w9, w12 +; CHECK-NEXT: smov w12, v0.h[2] +; CHECK-NEXT: and w13, w13, #0xffffffc0 +; CHECK-NEXT: smull x8, w11, w8 +; CHECK-NEXT: sub w10, w10, w13 +; CHECK-NEXT: add w13, w12, #7 +; CHECK-NEXT: cmp w12, #0 ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: add w9, w10, #7 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel w9, w9, w10, lt -; CHECK-NEXT: add w8, w8, w12 -; CHECK-NEXT: and w9, w9, #0xfffffff8 -; CHECK-NEXT: sub w9, w10, w9 -; CHECK-NEXT: asr w10, w8, #6 -; CHECK-NEXT: add w8, w10, w8, lsr #31 -; CHECK-NEXT: mov w10, #95 -; CHECK-NEXT: mov v1.h[2], w9 -; CHECK-NEXT: msub w8, w8, w10, w12 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: csel w13, w13, w12, lt +; CHECK-NEXT: add w8, w8, w11 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: and w10, w13, #0xfffffff8 +; CHECK-NEXT: asr w13, w8, #6 +; CHECK-NEXT: sub w10, w12, w10 +; CHECK-NEXT: add w8, w13, w8, lsr #31 +; CHECK-NEXT: mov w13, #95 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: msub w8, w8, w13, w11 +; CHECK-NEXT: mov v0.h[2], w10 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -198,40 +198,40 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { ; CHECK-LABEL: dont_fold_srem_one: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #17097 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[1] ; CHECK-NEXT: smov w9, v0.h[2] +; CHECK-NEXT: movk w8, #45590, lsl #16 ; CHECK-NEXT: mov w10, #30865 -; CHECK-NEXT: mov w11, #17097 ; CHECK-NEXT: movk w10, #51306, lsl #16 -; CHECK-NEXT: movk w11, #45590, lsl #16 -; CHECK-NEXT: mov w12, #654 -; CHECK-NEXT: smull x10, w8, w10 -; CHECK-NEXT: smull x11, w9, w11 +; CHECK-NEXT: smov w11, v0.h[1] +; CHECK-NEXT: smull x8, w9, w8 +; CHECK-NEXT: mov w13, #23 +; CHECK-NEXT: smull x10, w11, w10 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: add w8, w8, w9 ; CHECK-NEXT: lsr x10, x10, #32 -; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: add w10, w10, w8 -; CHECK-NEXT: add w11, w11, w9 -; CHECK-NEXT: asr w13, w10, #9 -; CHECK-NEXT: add w10, w13, w10, lsr #31 -; CHECK-NEXT: asr w13, w11, #4 -; CHECK-NEXT: add w11, w13, w11, lsr #31 -; CHECK-NEXT: smov w13, v0.h[3] -; CHECK-NEXT: msub w8, w10, w12, w8 -; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: asr w12, w8, #4 +; CHECK-NEXT: add w10, w10, w11 +; CHECK-NEXT: add w8, w12, w8, lsr #31 ; CHECK-NEXT: mov w12, #47143 -; CHECK-NEXT: mov w10, #23 ; CHECK-NEXT: movk w12, #24749, lsl #16 -; CHECK-NEXT: msub w9, w11, w10, w9 -; CHECK-NEXT: smull x10, w13, w12 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: lsr x8, x10, #63 -; CHECK-NEXT: asr x10, x10, #43 -; CHECK-NEXT: add w8, w10, w8 -; CHECK-NEXT: mov w10, #5423 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: msub w8, w8, w10, w13 -; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: msub w8, w8, w13, w9 +; CHECK-NEXT: smov w9, v0.h[3] +; CHECK-NEXT: asr w13, w10, #9 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: add w10, w13, w10, lsr #31 +; CHECK-NEXT: mov w13, #654 +; CHECK-NEXT: smull x12, w9, w12 +; CHECK-NEXT: msub w10, w10, w13, w11 +; CHECK-NEXT: lsr x11, x12, #63 +; CHECK-NEXT: asr x12, x12, #43 +; CHECK-NEXT: add w11, w12, w11 +; CHECK-NEXT: mov w12, #5423 +; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: msub w9, w11, w12, w9 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v0.h[3], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, @@ -242,38 +242,38 @@ define <4 x i16> @dont_fold_srem_i16_smax(<4 x i16> %x) { ; CHECK-LABEL: dont_fold_srem_i16_smax: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #17097 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[2] -; CHECK-NEXT: mov w9, #17097 -; CHECK-NEXT: smov w10, v0.h[1] -; CHECK-NEXT: movk w9, #45590, lsl #16 -; CHECK-NEXT: mov w11, #32767 -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: smull x9, w8, w9 -; CHECK-NEXT: add w11, w10, w11 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: lsr x9, x9, #32 -; CHECK-NEXT: csel w11, w11, w10, lt -; CHECK-NEXT: add w9, w9, w8 -; CHECK-NEXT: and w11, w11, #0xffff8000 -; CHECK-NEXT: asr w13, w9, #4 -; CHECK-NEXT: sub w10, w10, w11 +; CHECK-NEXT: smov w9, v0.h[2] +; CHECK-NEXT: movk w8, #45590, lsl #16 ; CHECK-NEXT: mov w11, #47143 -; CHECK-NEXT: add w9, w13, w9, lsr #31 -; CHECK-NEXT: mov w13, #23 +; CHECK-NEXT: smov w12, v0.h[1] +; CHECK-NEXT: mov w10, #32767 +; CHECK-NEXT: smull x8, w9, w8 ; CHECK-NEXT: movk w11, #24749, lsl #16 -; CHECK-NEXT: mov v1.h[1], w10 -; CHECK-NEXT: msub w8, w9, w13, w8 -; CHECK-NEXT: smull x9, w12, w11 -; CHECK-NEXT: lsr x10, x9, #63 -; CHECK-NEXT: asr x9, x9, #43 -; CHECK-NEXT: add w9, w9, w10 -; CHECK-NEXT: mov w10, #5423 -; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: msub w8, w9, w10, w12 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: smov w13, v0.h[3] +; CHECK-NEXT: add w10, w12, w10 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: cmp w12, #0 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: csel w10, w10, w12, lt +; CHECK-NEXT: asr w14, w8, #4 +; CHECK-NEXT: smull x11, w13, w11 +; CHECK-NEXT: add w8, w14, w8, lsr #31 +; CHECK-NEXT: mov w14, #23 +; CHECK-NEXT: and w10, w10, #0xffff8000 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: msub w8, w8, w14, w9 +; CHECK-NEXT: sub w9, w12, w10 +; CHECK-NEXT: lsr x10, x11, #63 +; CHECK-NEXT: asr x11, x11, #43 +; CHECK-NEXT: add w10, w11, w10 +; CHECK-NEXT: mov w11, #5423 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: msub w10, w10, w11, w13 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -286,35 +286,35 @@ ; CHECK-NEXT: mov x8, #8549 ; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: movk x8, #22795, lsl #16 -; CHECK-NEXT: mov x12, #6055 +; CHECK-NEXT: mov x11, #6055 ; CHECK-NEXT: movk x8, #17096, lsl #32 -; CHECK-NEXT: movk x12, #58853, lsl #16 +; CHECK-NEXT: movk x11, #58853, lsl #16 ; CHECK-NEXT: movk x8, #45590, lsl #48 -; CHECK-NEXT: mov x14, #21445 -; CHECK-NEXT: mov x10, v1.d[1] -; CHECK-NEXT: movk x12, #47142, lsl #32 +; CHECK-NEXT: mov x12, #21445 +; CHECK-NEXT: movk x11, #47142, lsl #32 +; CHECK-NEXT: movk x12, #1603, lsl #16 ; CHECK-NEXT: smulh x8, x9, x8 -; CHECK-NEXT: movk x14, #1603, lsl #16 -; CHECK-NEXT: mov x11, v0.d[1] -; CHECK-NEXT: movk x12, #24749, lsl #48 +; CHECK-NEXT: mov x10, v1.d[1] +; CHECK-NEXT: movk x11, #24749, lsl #48 +; CHECK-NEXT: movk x12, #15432, lsl #32 +; CHECK-NEXT: movk x12, #25653, lsl #48 +; CHECK-NEXT: mov x13, v0.d[1] ; CHECK-NEXT: add x8, x8, x9 -; CHECK-NEXT: movk x14, #15432, lsl #32 -; CHECK-NEXT: asr x13, x8, #4 -; CHECK-NEXT: movk x14, #25653, lsl #48 -; CHECK-NEXT: add x8, x13, x8, lsr #63 -; CHECK-NEXT: mov w13, #23 -; CHECK-NEXT: smulh x12, x10, x12 -; CHECK-NEXT: smulh x14, x11, x14 -; CHECK-NEXT: msub x8, x8, x13, x9 -; CHECK-NEXT: asr x13, x12, #11 -; CHECK-NEXT: add x12, x13, x12, lsr #63 -; CHECK-NEXT: asr x13, x14, #8 +; CHECK-NEXT: smulh x11, x10, x11 +; CHECK-NEXT: asr x14, x8, #4 +; CHECK-NEXT: mov w15, #23 +; CHECK-NEXT: add x8, x14, x8, lsr #63 +; CHECK-NEXT: smulh x12, x13, x12 +; CHECK-NEXT: asr x14, x11, #11 +; CHECK-NEXT: add x11, x14, x11, lsr #63 +; CHECK-NEXT: msub x8, x8, x15, x9 +; CHECK-NEXT: asr x14, x12, #8 ; CHECK-NEXT: mov w9, #5423 -; CHECK-NEXT: add x13, x13, x14, lsr #63 +; CHECK-NEXT: add x12, x14, x12, lsr #63 ; CHECK-NEXT: mov w14, #654 -; CHECK-NEXT: msub x9, x12, x9, x10 +; CHECK-NEXT: msub x9, x11, x9, x10 +; CHECK-NEXT: msub x10, x12, x14, x13 ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: msub x10, x13, x14, x11 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v1.d[1], x9 ; CHECK-NEXT: mov v0.d[1], x10 diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -144,10 +144,10 @@ ; CHECK-NEXT: shl v1.2s, v1.2s, #24 ; CHECK-NEXT: sqsub v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strb w9, [x2, #1] +; CHECK-NEXT: strb w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -184,10 +184,10 @@ ; CHECK-NEXT: shl v1.2s, v1.2s, #16 ; CHECK-NEXT: sqsub v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strh w9, [x2, #2] +; CHECK-NEXT: strh w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py diff --git a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll --- a/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll +++ b/llvm/test/CodeGen/AArch64/sve-extract-fixed-vector.ll @@ -367,9 +367,9 @@ define <2 x i1> @extract_v2i1_nxv2i1( %inmask) { ; CHECK-LABEL: extract_v2i1_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 -; CHECK-NEXT: fmov x0, d0 -; CHECK-NEXT: mov x8, v0.d[1] +; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 +; CHECK-NEXT: fmov x0, d1 +; CHECK-NEXT: mov x8, v1.d[1] ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll @@ -118,10 +118,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: umov w9, v0.h[3] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: mov v1.s[1], w8 +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %ret = call <2 x i16> @llvm.experimental.vector.extract.v2i16.v4i16(<4 x i16> %op, i64 2) ret <2 x i16> %ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll @@ -33,10 +33,10 @@ ; CHECK-NEXT: sdiv [[DIV:z[0-9]+]].s, [[PG0]]/m, [[OP1_LO_LO]].s, [[OP2_LO_LO]].s ; CHECK-NEXT: uzp1 [[RES:z[0-9]+]].h, [[DIV]].h, [[DIV]].h ; CHECK-NEXT: umov [[SCALAR0:w[0-9]+]], [[VEC:v[0-9]+]].h[0] -; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: fmov s0, [[SCALAR0]] -; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] +; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: mov [[FINAL:v[0-9]+]].b[1], [[SCALAR1]] +; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] ; CHECK-NEXT: mov [[FINAL]].b[2], [[SCALAR2]] ; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[3] ; CHECK-NEXT: mov [[FINAL]].b[3], [[SCALAR3]] @@ -725,10 +725,10 @@ ; CHECK-NEXT: udiv [[DIV:z[0-9]+]].s, [[PG0]]/m, [[OP1_LO_LO]].s, [[OP2_LO_LO]].s ; CHECK-NEXT: uzp1 [[RES:z[0-9]+]].h, [[DIV]].h, [[DIV]].h ; CHECK-NEXT: umov [[SCALAR0:w[0-9]+]], [[VEC:v[0-9]+]].h[0] -; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: fmov s0, [[SCALAR0]] -; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] +; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: mov [[FINAL:v[0-9]+]].b[1], [[SCALAR1]] +; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] ; CHECK-NEXT: mov [[FINAL]].b[2], [[SCALAR2]] ; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[3] ; CHECK-NEXT: mov [[FINAL]].b[3], [[SCALAR3]] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll @@ -37,10 +37,10 @@ ; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b ; CHECK-NEXT: ushr v1.8h, v0.8h, #8 ; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umov w9, v1.h[1] ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: mov v0.b[1], w8 ; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[1], w9 ; CHECK-NEXT: mov v0.b[2], w8 ; CHECK-NEXT: umov w8, v1.h[3] ; CHECK-NEXT: mov v0.b[3], w8 @@ -536,10 +536,10 @@ ; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b ; CHECK-NEXT: ushr v1.8h, v0.8h, #8 ; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umov w9, v1.h[1] ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: mov v0.b[1], w8 ; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[1], w9 ; CHECK-NEXT: mov v0.b[2], w8 ; CHECK-NEXT: umov w8, v1.h[3] ; CHECK-NEXT: mov v0.b[3], w8 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll @@ -33,10 +33,10 @@ ; CHECK-NEXT: sdivr [[DIV1:z[0-9]+]].s, [[PG1]]/m, [[OP2_LO_LO]].s, [[OP1_LO_LO]].s ; CHECK-NEXT: uzp1 [[UZP1:z[0-9]+]].h, [[DIV1]].h, [[DIV1]].h ; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC:v[0-9]+]].h[0] -; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: fmov s3, [[SCALAR1]] -; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[2] +; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: mov [[FINAL:v[0-9]+]].b[1], [[SCALAR2]] +; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[2] ; CHECK-NEXT: mov [[FINAL]].b[2], [[SCALAR3]] ; CHECK-NEXT: umov [[SCALAR4:w[0-9]+]], [[VEC]].h[3] ; CHECK-NEXT: mov [[FINAL]].b[3], [[SCALAR4]] @@ -792,10 +792,10 @@ ; CHECK-NEXT: udivr [[DIV1:z[0-9]+]].s, [[PG1]]/m, [[OP2_LO_LO]].s, [[OP1_LO_LO]].s ; CHECK-NEXT: uzp1 [[UZP1:z[0-9]+]].h, [[DIV1]].h, [[DIV1]].h ; CHECK-NEXT: umov [[SCALAR0:w[0-9]+]], [[VEC:v[0-9]+]].h[0] -; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: fmov s3, [[SCALAR0]] -; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] +; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: mov [[FINAL:v[0-9]+]].b[1], [[SCALAR1]] +; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] ; CHECK-NEXT: mov [[FINAL]].b[2], [[SCALAR2]] ; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[3] ; CHECK-NEXT: mov [[FINAL]].b[3], [[SCALAR3]] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll @@ -278,10 +278,10 @@ ; VBITS_EQ_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: ucvtf z0.d, p0/m, z0.d -; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: uunpklo z1.s, z1.h +; VBITS_EQ_256-NEXT: ucvtf z0.d, p0/m, z0.d ; VBITS_EQ_256-NEXT: uunpklo z1.d, z1.s +; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: ucvtf z1.d, p0/m, z1.d ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_EQ_256-NEXT: ret @@ -1221,10 +1221,10 @@ ; VBITS_EQ_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: sunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: sunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: scvtf z0.d, p0/m, z0.d -; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: sunpklo z1.s, z1.h +; VBITS_EQ_256-NEXT: scvtf z0.d, p0/m, z0.d ; VBITS_EQ_256-NEXT: sunpklo z1.d, z1.s +; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: scvtf z1.d, p0/m, z1.d ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_EQ_256-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -245,10 +245,10 @@ ; VBITS_EQ_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: sunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: sunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z0.d, #0 -; VBITS_EQ_256-NEXT: ld1h { z0.d }, p1/z, [z3.d] ; VBITS_EQ_256-NEXT: sunpklo z1.s, z1.h +; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z0.d, #0 ; VBITS_EQ_256-NEXT: sunpklo z1.d, z1.s +; VBITS_EQ_256-NEXT: ld1h { z0.d }, p1/z, [z3.d] ; VBITS_EQ_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 ; VBITS_EQ_256-NEXT: ld1h { z1.d }, p0/z, [z2.d] ; VBITS_EQ_256-NEXT: uzp1 z0.s, z0.s, z0.s @@ -610,10 +610,10 @@ ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, #0.0 ; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umov w9, v1.h[1] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 +; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: shl v1.2s, v2.2s, #16 ; CHECK-NEXT: sshr v1.2s, v1.2s, #16 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w9, v1.s[1] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll @@ -32,10 +32,10 @@ ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, v2.4h ; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umov w9, v1.h[1] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 +; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: shl v1.2s, v2.2s, #16 ; CHECK-NEXT: sshr v1.2s, v1.2s, #16 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w9, v1.s[1] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -1,4 +1,3 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --function masked_scatter_v8i8,masked_scatter_v8i16,masked_scatter_v8i32,masked_scatter_v8i64 --prefix VBITS_EQ_256 ; RUN: llc -aarch64-sve-vector-bits-min=128 < %s | FileCheck %s -check-prefix=NO_SVE ; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_EQ_256 ; RUN: llc -aarch64-sve-vector-bits-min=384 < %s | FileCheck %s -check-prefixes=CHECK @@ -77,9 +76,9 @@ ; VBITS_EQ_256-NEXT: mov x8, #4 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 ; VBITS_EQ_256-NEXT: cmeq v1.8b, v0.8b, #0 -; VBITS_EQ_256-NEXT: zip1 v5.8b, v0.8b, v0.8b ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1, x8, lsl #3] ; VBITS_EQ_256-NEXT: ld1d { z4.d }, p0/z, [x1] +; VBITS_EQ_256-NEXT: zip1 v5.8b, v0.8b, v0.8b ; VBITS_EQ_256-NEXT: zip1 v2.8b, v1.8b, v0.8b ; VBITS_EQ_256-NEXT: zip2 v1.8b, v1.8b, v0.8b ; VBITS_EQ_256-NEXT: zip2 v0.8b, v0.8b, v0.8b @@ -223,22 +222,22 @@ ; VBITS_EQ_256-NEXT: mov x8, #4 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 ; VBITS_EQ_256-NEXT: cmeq v1.8h, v0.8h, #0 -; VBITS_EQ_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3] -; VBITS_EQ_256-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h -; VBITS_EQ_256-NEXT: sunpklo z2.s, z1.h -; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s +; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] +; VBITS_EQ_256-NEXT: ld1d { z4.d }, p0/z, [x1] +; VBITS_EQ_256-NEXT: sunpklo z3.s, z1.h +; VBITS_EQ_256-NEXT: sunpklo z3.d, z3.s ; VBITS_EQ_256-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; VBITS_EQ_256-NEXT: sunpklo z2.d, z2.s -; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z2.d, #0 -; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1] -; VBITS_EQ_256-NEXT: uunpklo z3.s, z3.h +; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z3.d, #0 +; VBITS_EQ_256-NEXT: uunpklo z3.s, z0.h +; VBITS_EQ_256-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: sunpklo z1.s, z1.h ; VBITS_EQ_256-NEXT: sunpklo z1.d, z1.s -; VBITS_EQ_256-NEXT: st1h { z0.d }, p1, [z2.d] +; VBITS_EQ_256-NEXT: uunpklo z3.d, z3.s +; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; VBITS_EQ_256-NEXT: uunpklo z1.d, z3.s -; VBITS_EQ_256-NEXT: st1h { z1.d }, p0, [z4.d] +; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s +; VBITS_EQ_256-NEXT: st1h { z3.d }, p1, [z4.d] +; VBITS_EQ_256-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_EQ_256-NEXT: ret ; VBITS_GE_512-LABEL: masked_scatter_v8i16: ; VBITS_GE_512: // %bb.0: @@ -556,10 +555,10 @@ ; CHECK-NEXT: fcmeq v2.4h, v1.4h, #0.0 ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: umov w8, v2.h[0] -; CHECK-NEXT: umov w9, v2.h[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: umov w8, v2.h[1] +; CHECK-NEXT: mov v3.s[1], w8 +; CHECK-NEXT: shl v2.2s, v3.2s, #16 ; CHECK-NEXT: sshr v2.2s, v2.2s, #16 ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov w9, v2.s[1] @@ -793,10 +792,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q2, [x1] -; CHECK-NEXT: fcmeq v1.2d, v0.2d, #0.0 -; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: st1d { z0.d }, p0, [z2.d] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: fcmeq v2.2d, v0.2d, #0.0 +; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] ; CHECK-NEXT: ret %vals = load <2 x double>, <2 x double>* %a %ptrs = load <2 x double*>, <2 x double*>* %b diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -32,10 +32,10 @@ ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: fcmeq v2.4h, v1.4h, v2.4h ; CHECK-NEXT: umov w8, v2.h[0] -; CHECK-NEXT: umov w9, v2.h[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: umov w8, v2.h[1] +; CHECK-NEXT: mov v3.s[1], w8 +; CHECK-NEXT: shl v2.2s, v3.2s, #16 ; CHECK-NEXT: sshr v2.2s, v2.2s, #16 ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov w9, v2.s[1] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-permute-rev.ll @@ -205,12 +205,12 @@ ; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] ; CHECK-NEXT: mov z1.d, z0.d[2] ; CHECK-NEXT: mov z2.d, z0.d[3] -; CHECK-NEXT: mov x10, v0.d[1] ; CHECK-NEXT: fmov x8, d1 ; CHECK-NEXT: fmov x9, d2 -; CHECK-NEXT: fmov x11, d0 +; CHECK-NEXT: fmov x10, d0 ; CHECK-NEXT: stp x9, x8, [sp, #16] -; CHECK-NEXT: stp x10, x11, [sp] +; CHECK-NEXT: mov x8, v0.d[1] +; CHECK-NEXT: stp x8, x10, [sp] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] ; CHECK-NEXT: st1d { z0.d }, p0, [x0] ; CHECK-NEXT: mov sp, x29 @@ -239,22 +239,22 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.s, vl8 ; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: fmov w11, s0 ; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: mov w11, v0.s[3] -; CHECK-NEXT: fmov w10, s0 +; CHECK-NEXT: mov w10, v0.s[3] ; CHECK-NEXT: mov z1.s, z0.s[4] ; CHECK-NEXT: mov z2.s, z0.s[5] ; CHECK-NEXT: mov z3.s, z0.s[6] ; CHECK-NEXT: mov z0.s, z0.s[7] -; CHECK-NEXT: stp w8, w10, [sp, #24] -; CHECK-NEXT: fmov w10, s1 -; CHECK-NEXT: fmov w8, s2 -; CHECK-NEXT: stp w11, w9, [sp, #16] -; CHECK-NEXT: fmov w9, s3 -; CHECK-NEXT: fmov w11, s0 -; CHECK-NEXT: stp w8, w10, [sp, #8] -; CHECK-NEXT: stp w11, w9, [sp] +; CHECK-NEXT: stp w10, w9, [sp, #16] +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: fmov w10, s2 +; CHECK-NEXT: stp w8, w11, [sp, #24] +; CHECK-NEXT: fmov w11, s3 +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: stp w10, w9, [sp, #8] +; CHECK-NEXT: stp w8, w11, [sp] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] ; CHECK-NEXT: st1w { z0.s }, p0, [x0] ; CHECK-NEXT: mov sp, x29 @@ -355,46 +355,46 @@ ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] -; CHECK-NEXT: mov z1.h, z0.h[8] ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: fmov w9, s1 -; CHECK-NEXT: mov z5.h, z0.h[12] +; CHECK-NEXT: mov z1.h, z0.h[8] ; CHECK-NEXT: mov z2.h, z0.h[9] ; CHECK-NEXT: mov z3.h, z0.h[10] -; CHECK-NEXT: mov z4.h, z0.h[11] -; CHECK-NEXT: fmov w11, s2 -; CHECK-NEXT: strh w9, [sp, #30] -; CHECK-NEXT: fmov w9, s5 -; CHECK-NEXT: fmov w12, s3 +; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: fmov w10, s2 ; CHECK-NEXT: strh w8, [sp, #14] -; CHECK-NEXT: fmov w8, s4 +; CHECK-NEXT: fmov w8, s3 +; CHECK-NEXT: mov z4.h, z0.h[11] +; CHECK-NEXT: mov z5.h, z0.h[12] ; CHECK-NEXT: mov z6.h, z0.h[13] +; CHECK-NEXT: strh w9, [sp, #30] +; CHECK-NEXT: fmov w9, s4 +; CHECK-NEXT: strh w10, [sp, #28] +; CHECK-NEXT: fmov w10, s5 +; CHECK-NEXT: strh w8, [sp, #26] +; CHECK-NEXT: fmov w8, s6 ; CHECK-NEXT: mov z7.h, z0.h[14] -; CHECK-NEXT: mov z16.h, z0.h[15] -; CHECK-NEXT: umov w10, v0.h[1] -; CHECK-NEXT: strh w9, [sp, #22] +; CHECK-NEXT: mov z1.h, z0.h[15] +; CHECK-NEXT: strh w9, [sp, #24] +; CHECK-NEXT: fmov w9, s7 +; CHECK-NEXT: strh w10, [sp, #22] +; CHECK-NEXT: fmov w10, s1 +; CHECK-NEXT: strh w8, [sp, #20] +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: strh w9, [sp, #18] ; CHECK-NEXT: umov w9, v0.h[2] -; CHECK-NEXT: strh w11, [sp, #28] -; CHECK-NEXT: fmov w11, s6 -; CHECK-NEXT: strh w12, [sp, #26] -; CHECK-NEXT: fmov w12, s7 -; CHECK-NEXT: strh w8, [sp, #24] -; CHECK-NEXT: fmov w8, s16 -; CHECK-NEXT: strh w10, [sp, #12] -; CHECK-NEXT: strh w11, [sp, #20] -; CHECK-NEXT: umov w11, v0.h[3] -; CHECK-NEXT: strh w12, [sp, #18] -; CHECK-NEXT: umov w12, v0.h[4] -; CHECK-NEXT: strh w8, [sp, #16] -; CHECK-NEXT: umov w8, v0.h[5] -; CHECK-NEXT: umov w10, v0.h[6] +; CHECK-NEXT: strh w10, [sp, #16] +; CHECK-NEXT: umov w10, v0.h[3] +; CHECK-NEXT: strh w8, [sp, #12] +; CHECK-NEXT: umov w8, v0.h[4] ; CHECK-NEXT: strh w9, [sp, #10] -; CHECK-NEXT: umov w9, v0.h[7] -; CHECK-NEXT: strh w11, [sp, #8] -; CHECK-NEXT: strh w12, [sp, #6] -; CHECK-NEXT: strh w8, [sp, #4] +; CHECK-NEXT: umov w9, v0.h[5] +; CHECK-NEXT: strh w10, [sp, #8] +; CHECK-NEXT: umov w10, v0.h[6] +; CHECK-NEXT: strh w8, [sp, #6] +; CHECK-NEXT: umov w8, v0.h[7] +; CHECK-NEXT: strh w9, [sp, #4] ; CHECK-NEXT: strh w10, [sp, #2] -; CHECK-NEXT: strh w9, [sp] +; CHECK-NEXT: strh w8, [sp] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] ; CHECK-NEXT: st1h { z0.h }, p0, [x0] ; CHECK-NEXT: mov sp, x29 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-shuffles.ll @@ -17,8 +17,8 @@ ; Ensure we don't crash when trying to lower a shuffle via and extract define void @crash_when_lowering_extract_shuffle(<32 x i32>* %dst, i1 %cond) #0 { ; CHECK-LABEL: crash_when_lowering_extract_shuffle: -; CHECK: ld1w { z3.s }, p0/z, [x0] -; CHECK: st1w { z3.s }, p0, [x0] +; CHECK: ld1w { z3.s }, p0/z +; CHECK: st1w { z3.s }, p0 %broadcast.splat = shufflevector <32 x i1> zeroinitializer, <32 x i1> zeroinitializer, <32 x i32> zeroinitializer br i1 %cond, label %exit, label %vector.body diff --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll --- a/llvm/test/CodeGen/AArch64/sve-select.ll +++ b/llvm/test/CodeGen/AArch64/sve-select.ll @@ -641,8 +641,8 @@ define <4 x float> @select_f32_no_invert_not_scalable(<4 x float> %a, <4 x float> %b) #0 { ; CHECK-LABEL: select_f32_no_invert_not_scalable: ; CHECK: // %bb.0: -; CHECK-NEXT: fcmeq v2.4s, v0.4s, #0.0 ; CHECK-NEXT: fmul v1.4s, v0.4s, v1.4s +; CHECK-NEXT: fcmeq v2.4s, v0.4s, #0.0 ; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret %p = fcmp oeq <4 x float> %a, zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -112,13 +112,13 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: movi d0, #0xff00ff00ff00ff -; CHECK-NEXT: ldr s2, [x1] +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll v2.8h, v2.8b, #0 -; CHECK-NEXT: add v1.4h, v1.4h, v2.4h -; CHECK-NEXT: umin v0.4h, v1.4h, v0.4h +; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: movi d1, #0xff00ff00ff00ff +; CHECK-NEXT: umin v0.4h, v0.4h, v1.4h ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret @@ -143,10 +143,10 @@ ; CHECK-NEXT: mov v1.s[1], w9 ; CHECK-NEXT: add v1.2s, v1.2s, v2.2s ; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strb w9, [x2, #1] +; CHECK-NEXT: strb w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -184,10 +184,10 @@ ; CHECK-NEXT: mov v1.s[1], w9 ; CHECK-NEXT: add v1.2s, v1.2s, v2.2s ; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strh w9, [x2, #2] +; CHECK-NEXT: strh w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll @@ -31,8 +31,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0x0000ff000000ff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <2 x i8> %x, %mask @@ -63,8 +63,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0xff00ff00ff00ff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <4 x i8> %x, %mask @@ -79,8 +79,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0xff00ff00ff00ff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <4 x i8> %x, %mask @@ -95,8 +95,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0x00ffff0000ffff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <2 x i16> %x, %mask diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll @@ -68,20 +68,20 @@ ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: adrp x9, .LCPI4_1 +; CHECK-NEXT: movi d3, #0x0000000000ffff ; CHECK-NEXT: mov v0.h[1], w1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI4_1] ; CHECK-NEXT: adrp x8, .LCPI4_2 ; CHECK-NEXT: mov v0.h[2], w2 ; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: movi d1, #0x0000000000ffff -; CHECK-NEXT: mul v0.4h, v0.4h, v2.4h -; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_2] +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_2] ; CHECK-NEXT: adrp x8, .LCPI4_3 -; CHECK-NEXT: shl v3.4h, v0.4h, #1 +; CHECK-NEXT: mul v0.4h, v0.4h, v2.4h +; CHECK-NEXT: shl v2.4h, v0.4h, #1 ; CHECK-NEXT: bic v0.4h, #248, lsl #8 -; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ushl v1.4h, v3.4h, v2.4h +; CHECK-NEXT: ushl v0.4h, v0.4h, v3.4h +; CHECK-NEXT: ushl v1.4h, v2.4h, v1.4h ; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_3] ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: bic v0.4h, #248, lsl #8 diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll @@ -7,7 +7,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 ; CHECK-NEXT: adrp x9, .LCPI0_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: adrp x8, .LCPI0_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_2] @@ -19,7 +18,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -34,13 +34,13 @@ ; CHECK-LABEL: test_urem_odd_allones_eq: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI1_0 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] ; CHECK-NEXT: adrp x8, .LCPI1_1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_1] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -51,13 +51,13 @@ ; CHECK-LABEL: test_urem_odd_allones_ne: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI2_0 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: adrp x8, .LCPI2_1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_1] ; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp ne <4 x i32> %urem, @@ -71,7 +71,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: adrp x9, .LCPI3_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: adrp x8, .LCPI3_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_2] @@ -83,7 +82,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -95,7 +95,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI4_0 ; CHECK-NEXT: adrp x9, .LCPI4_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: adrp x8, .LCPI4_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_2] @@ -107,7 +106,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp ne <4 x i32> %urem, @@ -121,7 +121,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: adrp x9, .LCPI5_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] ; CHECK-NEXT: adrp x8, .LCPI5_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_2] @@ -133,7 +132,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -145,7 +145,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI6_0 ; CHECK-NEXT: adrp x9, .LCPI6_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_0] ; CHECK-NEXT: adrp x8, .LCPI6_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI6_2] @@ -157,7 +156,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp ne <4 x i32> %urem, @@ -173,7 +173,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI7_0 ; CHECK-NEXT: adrp x9, .LCPI7_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_0] ; CHECK-NEXT: adrp x8, .LCPI7_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI7_2] @@ -185,7 +184,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -199,7 +199,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI8_0 ; CHECK-NEXT: adrp x9, .LCPI8_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_0] ; CHECK-NEXT: adrp x8, .LCPI8_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI8_2] @@ -211,7 +210,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -225,7 +225,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI9_0 ; CHECK-NEXT: adrp x9, .LCPI9_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_0] ; CHECK-NEXT: adrp x8, .LCPI9_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI9_2] @@ -237,7 +236,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -253,13 +253,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #52429 ; CHECK-NEXT: movk w8, #52428, lsl #16 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -273,7 +273,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #28087 ; CHECK-NEXT: movk w8, #46811, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s @@ -282,7 +281,8 @@ ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -296,7 +296,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: adrp x9, .LCPI12_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] ; CHECK-NEXT: adrp x8, .LCPI12_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI12_2] @@ -308,7 +307,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -324,7 +324,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: adrp x9, .LCPI13_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0] ; CHECK-NEXT: adrp x8, .LCPI13_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI13_2] @@ -336,7 +335,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -350,7 +350,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: adrp x9, .LCPI14_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] ; CHECK-NEXT: adrp x8, .LCPI14_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI14_2] @@ -362,7 +361,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -376,7 +376,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: adrp x9, .LCPI15_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI15_2] @@ -388,7 +387,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -404,7 +404,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: adrp x9, .LCPI16_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: adrp x8, .LCPI16_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI16_2] @@ -416,7 +415,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -430,7 +430,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: adrp x9, .LCPI17_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: adrp x8, .LCPI17_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI17_2] @@ -442,7 +441,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -456,7 +456,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: adrp x9, .LCPI18_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: adrp x8, .LCPI18_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI18_2] @@ -468,7 +467,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -483,13 +483,13 @@ ; CHECK-LABEL: test_urem_odd_allones_and_one: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0] ; CHECK-NEXT: adrp x8, .LCPI19_1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_1] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -503,7 +503,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI20_0 ; CHECK-NEXT: adrp x9, .LCPI20_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] ; CHECK-NEXT: adrp x8, .LCPI20_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI20_2] @@ -515,7 +514,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -529,7 +529,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI21_0 ; CHECK-NEXT: adrp x9, .LCPI21_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0] ; CHECK-NEXT: adrp x8, .LCPI21_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI21_2] @@ -541,7 +540,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -557,7 +557,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI22_0 ; CHECK-NEXT: adrp x9, .LCPI22_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0] ; CHECK-NEXT: adrp x8, .LCPI22_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI22_2] @@ -569,7 +568,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -583,7 +583,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 ; CHECK-NEXT: adrp x9, .LCPI23_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] ; CHECK-NEXT: adrp x8, .LCPI23_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI23_2] @@ -595,7 +594,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -609,7 +609,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI24_0 ; CHECK-NEXT: adrp x9, .LCPI24_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_0] ; CHECK-NEXT: adrp x8, .LCPI24_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI24_2] @@ -621,7 +620,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -636,7 +636,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI25_0 ; CHECK-NEXT: adrp x9, .LCPI25_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_0] ; CHECK-NEXT: adrp x8, .LCPI25_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI25_2] @@ -648,7 +647,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -661,7 +661,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI26_0 ; CHECK-NEXT: adrp x9, .LCPI26_2 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_0] ; CHECK-NEXT: adrp x8, .LCPI26_1 ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI26_2] @@ -673,7 +672,8 @@ ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll @@ -7,14 +7,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #23593 ; CHECK-NEXT: movk w8, #49807, lsl #16 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: mov w8, #28835 ; CHECK-NEXT: movk w8, #2621, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -28,7 +28,6 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #23593 ; CHECK-NEXT: movk w8, #49807, lsl #16 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: mov w8, #23592 ; CHECK-NEXT: movk w8, #655, lsl #16 @@ -38,7 +37,8 @@ ; CHECK-NEXT: ushr v0.4s, v0.4s, #2 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -53,13 +53,13 @@ ; CHECK-LABEL: test_urem_odd_neg25: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI2_0 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: adrp x8, .LCPI2_1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_1] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -72,7 +72,6 @@ ; CHECK-LABEL: test_urem_even_neg100: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: movi v3.4s, #1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: adrp x8, .LCPI3_1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s @@ -81,7 +80,8 @@ ; CHECK-NEXT: ushr v0.4s, v0.4s, #2 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -167,10 +167,10 @@ ; CHECK-LABEL: test_urem_pow2: ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #15 -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, %cmp = icmp eq <4 x i32> %urem, @@ -182,9 +182,9 @@ define <4 x i32> @test_urem_int_min(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_urem_int_min: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: bic v0.4s, #128, lsl #24 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, @@ -197,8 +197,8 @@ define <4 x i32> @test_urem_allones(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_urem_allones: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: neg v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll @@ -82,10 +82,10 @@ ; CHECK-NEXT: mov x10, v0.d[1] ; CHECK-NEXT: mul x9, x9, x8 ; CHECK-NEXT: mul x8, x10, x8 +; CHECK-NEXT: adrp x10, .LCPI4_0 ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: adrp x9, .LCPI4_0 +; CHECK-NEXT: ldr q1, [x10, :lo12:.LCPI4_0] ; CHECK-NEXT: mov v0.d[1], x8 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI4_0] ; CHECK-NEXT: cmhs v0.2d, v1.2d, v0.2d ; CHECK-NEXT: movi d1, #0xffffffff00000000 ; CHECK-NEXT: xtn v0.2s, v0.2d diff --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll --- a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll @@ -4,42 +4,42 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; CHECK-LABEL: fold_urem_vec_1: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #8969 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: mov w9, #8969 -; CHECK-NEXT: movk w9, #22765, lsl #16 +; CHECK-NEXT: umov w9, v0.h[0] +; CHECK-NEXT: movk w8, #22765, lsl #16 ; CHECK-NEXT: umov w10, v0.h[1] -; CHECK-NEXT: mov w12, #16913 -; CHECK-NEXT: mov w13, #95 -; CHECK-NEXT: movk w12, #8456, lsl #16 -; CHECK-NEXT: umull x9, w8, w9 -; CHECK-NEXT: ubfx w14, w10, #2, #14 -; CHECK-NEXT: lsr x9, x9, #32 -; CHECK-NEXT: sub w11, w8, w9 -; CHECK-NEXT: umull x12, w14, w12 -; CHECK-NEXT: add w9, w9, w11, lsr #1 -; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: lsr w9, w9, #6 -; CHECK-NEXT: lsr x12, x12, #34 -; CHECK-NEXT: msub w8, w9, w13, w8 -; CHECK-NEXT: mov w9, #33437 -; CHECK-NEXT: movk w9, #21399, lsl #16 +; CHECK-NEXT: mov w11, #16913 ; CHECK-NEXT: mov w13, #124 -; CHECK-NEXT: umull x9, w11, w9 -; CHECK-NEXT: msub w10, w12, w13, w10 -; CHECK-NEXT: umov w12, v0.h[3] -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: umull x8, w9, w8 +; CHECK-NEXT: movk w11, #8456, lsl #16 +; CHECK-NEXT: ubfx w12, w10, #2, #14 +; CHECK-NEXT: mov w14, #95 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: umull x11, w12, w11 +; CHECK-NEXT: sub w12, w9, w8 +; CHECK-NEXT: lsr x11, x11, #34 +; CHECK-NEXT: add w8, w8, w12, lsr #1 +; CHECK-NEXT: mov w12, #33437 +; CHECK-NEXT: lsr w8, w8, #6 +; CHECK-NEXT: movk w12, #21399, lsl #16 +; CHECK-NEXT: msub w10, w11, w13, w10 +; CHECK-NEXT: umov w11, v0.h[2] +; CHECK-NEXT: msub w8, w8, w14, w9 ; CHECK-NEXT: mov w13, #2287 -; CHECK-NEXT: lsr x8, x9, #37 -; CHECK-NEXT: mov w9, #98 +; CHECK-NEXT: mov w14, #98 +; CHECK-NEXT: umull x9, w11, w12 +; CHECK-NEXT: umov w12, v0.h[3] ; CHECK-NEXT: movk w13, #16727, lsl #16 -; CHECK-NEXT: msub w8, w8, w9, w11 +; CHECK-NEXT: lsr x9, x9, #37 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: umull x13, w12, w13 +; CHECK-NEXT: msub w9, w9, w14, w11 +; CHECK-NEXT: mov w11, #1003 +; CHECK-NEXT: lsr x8, x13, #40 ; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: umull x9, w12, w13 -; CHECK-NEXT: mov w10, #1003 -; CHECK-NEXT: lsr x9, x9, #40 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: msub w8, w9, w10, w12 +; CHECK-NEXT: msub w8, w8, w11, w12 +; CHECK-NEXT: mov v0.h[2], w9 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -50,41 +50,41 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { ; CHECK-LABEL: fold_urem_vec_2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: mov w8, #8969 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: movk w8, #22765, lsl #16 +; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w15, v0.h[2] -; CHECK-NEXT: umov w16, v0.h[3] -; CHECK-NEXT: umull x12, w10, w8 +; CHECK-NEXT: mov w16, #95 ; CHECK-NEXT: umull x11, w9, w8 -; CHECK-NEXT: lsr x12, x12, #32 +; CHECK-NEXT: umull x12, w10, w8 ; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: sub w14, w10, w12 +; CHECK-NEXT: lsr x12, x12, #32 ; CHECK-NEXT: sub w13, w9, w11 -; CHECK-NEXT: add w12, w12, w14, lsr #1 -; CHECK-NEXT: umull x14, w15, w8 +; CHECK-NEXT: sub w14, w10, w12 ; CHECK-NEXT: add w11, w11, w13, lsr #1 -; CHECK-NEXT: mov w13, #95 +; CHECK-NEXT: umull x13, w15, w8 +; CHECK-NEXT: add w12, w12, w14, lsr #1 +; CHECK-NEXT: umov w14, v0.h[3] ; CHECK-NEXT: lsr w12, w12, #6 +; CHECK-NEXT: lsr x13, x13, #32 +; CHECK-NEXT: umull x8, w14, w8 ; CHECK-NEXT: lsr w11, w11, #6 -; CHECK-NEXT: umull x8, w16, w8 -; CHECK-NEXT: msub w10, w12, w13, w10 -; CHECK-NEXT: lsr x12, x14, #32 -; CHECK-NEXT: msub w9, w11, w13, w9 -; CHECK-NEXT: sub w11, w15, w12 +; CHECK-NEXT: msub w10, w12, w16, w10 +; CHECK-NEXT: sub w12, w15, w13 ; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: msub w9, w11, w16, w9 +; CHECK-NEXT: add w12, w13, w12, lsr #1 +; CHECK-NEXT: sub w13, w14, w8 +; CHECK-NEXT: lsr w11, w12, #6 ; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: add w10, w12, w11, lsr #1 -; CHECK-NEXT: lsr w10, w10, #6 -; CHECK-NEXT: sub w11, w16, w8 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: msub w9, w10, w13, w15 -; CHECK-NEXT: add w8, w8, w11, lsr #1 +; CHECK-NEXT: add w8, w8, w13, lsr #1 +; CHECK-NEXT: msub w10, w11, w16, w15 ; CHECK-NEXT: lsr w8, w8, #6 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: msub w8, w8, w13, w16 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: msub w8, w8, w16, w14 +; CHECK-NEXT: mov v0.h[2], w10 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -97,42 +97,42 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; CHECK-LABEL: combine_urem_udiv: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #8969 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: mov w8, #8969 ; CHECK-NEXT: movk w8, #22765, lsl #16 ; CHECK-NEXT: umov w10, v0.h[1] -; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: mov w15, #95 -; CHECK-NEXT: umov w13, v0.h[3] -; CHECK-NEXT: umull x12, w9, w8 +; CHECK-NEXT: umov w13, v0.h[2] +; CHECK-NEXT: mov w12, #95 +; CHECK-NEXT: umull x11, w9, w8 +; CHECK-NEXT: umov w15, v0.h[3] ; CHECK-NEXT: umull x14, w10, w8 -; CHECK-NEXT: lsr x12, x12, #32 -; CHECK-NEXT: umull x17, w11, w8 -; CHECK-NEXT: sub w16, w9, w12 +; CHECK-NEXT: lsr x11, x11, #32 +; CHECK-NEXT: umull x17, w13, w8 +; CHECK-NEXT: sub w16, w9, w11 ; CHECK-NEXT: lsr x14, x14, #32 +; CHECK-NEXT: umull x8, w15, w8 ; CHECK-NEXT: lsr x17, x17, #32 -; CHECK-NEXT: umull x8, w13, w8 -; CHECK-NEXT: add w12, w12, w16, lsr #1 +; CHECK-NEXT: add w11, w11, w16, lsr #1 ; CHECK-NEXT: sub w16, w10, w14 -; CHECK-NEXT: lsr w12, w12, #6 +; CHECK-NEXT: lsr w11, w11, #6 ; CHECK-NEXT: lsr x8, x8, #32 ; CHECK-NEXT: add w14, w14, w16, lsr #1 -; CHECK-NEXT: sub w16, w11, w17 -; CHECK-NEXT: msub w9, w12, w15, w9 +; CHECK-NEXT: sub w16, w13, w17 +; CHECK-NEXT: msub w9, w11, w12, w9 ; CHECK-NEXT: lsr w14, w14, #6 ; CHECK-NEXT: add w16, w17, w16, lsr #1 -; CHECK-NEXT: fmov s1, w12 -; CHECK-NEXT: msub w10, w14, w15, w10 -; CHECK-NEXT: sub w17, w13, w8 +; CHECK-NEXT: sub w17, w15, w8 +; CHECK-NEXT: msub w10, w14, w12, w10 +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: add w8, w8, w17, lsr #1 ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: lsr w9, w16, #6 -; CHECK-NEXT: mov v1.h[1], w14 -; CHECK-NEXT: add w8, w8, w17, lsr #1 -; CHECK-NEXT: msub w11, w9, w15, w11 ; CHECK-NEXT: lsr w8, w8, #6 +; CHECK-NEXT: mov v1.h[1], w14 +; CHECK-NEXT: msub w11, w9, w12, w13 ; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: msub w10, w8, w15, w13 +; CHECK-NEXT: msub w10, w8, w12, w15 ; CHECK-NEXT: mov v1.h[2], w9 ; CHECK-NEXT: mov v0.h[2], w11 ; CHECK-NEXT: mov v1.h[3], w8 @@ -150,26 +150,26 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { ; CHECK-LABEL: dont_fold_urem_power_of_two: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #8969 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w9, v0.h[3] -; CHECK-NEXT: mov w8, #8969 -; CHECK-NEXT: umov w11, v0.h[1] ; CHECK-NEXT: movk w8, #22765, lsl #16 -; CHECK-NEXT: and w10, w10, #0x3f +; CHECK-NEXT: umov w10, v0.h[0] +; CHECK-NEXT: umov w12, v0.h[1] ; CHECK-NEXT: umull x8, w9, w8 -; CHECK-NEXT: and w11, w11, #0x1f +; CHECK-NEXT: and w10, w10, #0x3f ; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: sub w11, w9, w8 ; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: sub w12, w9, w8 -; CHECK-NEXT: mov v1.h[1], w11 -; CHECK-NEXT: add w8, w8, w12, lsr #1 -; CHECK-NEXT: and w10, w10, #0x7 +; CHECK-NEXT: and w10, w12, #0x1f +; CHECK-NEXT: mov w12, #95 +; CHECK-NEXT: add w8, w8, w11, lsr #1 +; CHECK-NEXT: umov w11, v0.h[2] ; CHECK-NEXT: lsr w8, w8, #6 -; CHECK-NEXT: mov w11, #95 -; CHECK-NEXT: msub w8, w8, w11, w9 -; CHECK-NEXT: mov v1.h[2], w10 +; CHECK-NEXT: and w11, w11, #0x7 +; CHECK-NEXT: mov v1.h[1], w10 +; CHECK-NEXT: msub w8, w8, w12, w9 +; CHECK-NEXT: mov v1.h[2], w11 ; CHECK-NEXT: mov v1.h[3], w8 ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret @@ -186,29 +186,29 @@ ; CHECK-NEXT: mov w8, #30865 ; CHECK-NEXT: movk w8, #51306, lsl #16 ; CHECK-NEXT: umov w11, v0.h[2] +; CHECK-NEXT: ubfx w10, w9, #1, #15 ; CHECK-NEXT: mov w12, #654 -; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: mov w13, #47143 -; CHECK-NEXT: ubfx w10, w9, #1, #15 -; CHECK-NEXT: movk w13, #24749, lsl #16 ; CHECK-NEXT: umull x8, w10, w8 ; CHECK-NEXT: mov w10, #17097 ; CHECK-NEXT: movk w10, #45590, lsl #16 +; CHECK-NEXT: movk w13, #24749, lsl #16 ; CHECK-NEXT: lsr x8, x8, #40 ; CHECK-NEXT: umull x10, w11, w10 ; CHECK-NEXT: msub w8, w8, w12, w9 ; CHECK-NEXT: umov w9, v0.h[3] -; CHECK-NEXT: lsr x10, x10, #36 ; CHECK-NEXT: mov w12, #23 +; CHECK-NEXT: lsr x10, x10, #36 +; CHECK-NEXT: umull x13, w9, w13 +; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: msub w10, w10, w12, w11 -; CHECK-NEXT: mov w11, #5423 -; CHECK-NEXT: mov v1.h[1], w8 -; CHECK-NEXT: umull x8, w9, w13 -; CHECK-NEXT: lsr x8, x8, #43 -; CHECK-NEXT: mov v1.h[2], w10 -; CHECK-NEXT: msub w8, w8, w11, w9 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: mov w12, #5423 +; CHECK-NEXT: lsr x11, x13, #43 +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: msub w8, w11, w12, w9 +; CHECK-NEXT: mov v0.h[2], w10 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -228,39 +228,39 @@ ; CHECK-LABEL: dont_fold_urem_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #17097 -; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: mov x11, #21445 ; CHECK-NEXT: movk x8, #45590, lsl #16 -; CHECK-NEXT: mov x13, #21445 +; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: movk x8, #34192, lsl #32 -; CHECK-NEXT: movk x13, #1603, lsl #16 -; CHECK-NEXT: movk x8, #25644, lsl #48 -; CHECK-NEXT: movk x13, #15432, lsl #32 ; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: movk x13, #25653, lsl #48 +; CHECK-NEXT: movk x11, #1603, lsl #16 +; CHECK-NEXT: movk x8, #25644, lsl #48 +; CHECK-NEXT: movk x11, #15432, lsl #32 +; CHECK-NEXT: lsr x12, x10, #1 +; CHECK-NEXT: movk x11, #25653, lsl #48 ; CHECK-NEXT: umulh x8, x9, x8 -; CHECK-NEXT: mov x11, v1.d[1] -; CHECK-NEXT: sub x12, x9, x8 -; CHECK-NEXT: lsr x14, x10, #1 -; CHECK-NEXT: add x8, x8, x12, lsr #1 +; CHECK-NEXT: mov x14, v1.d[1] +; CHECK-NEXT: umulh x11, x12, x11 ; CHECK-NEXT: mov x12, #12109 +; CHECK-NEXT: sub x13, x9, x8 ; CHECK-NEXT: movk x12, #52170, lsl #16 -; CHECK-NEXT: umulh x13, x14, x13 ; CHECK-NEXT: movk x12, #28749, lsl #32 -; CHECK-NEXT: mov w14, #23 +; CHECK-NEXT: lsr x11, x11, #7 ; CHECK-NEXT: movk x12, #49499, lsl #48 +; CHECK-NEXT: add x8, x8, x13, lsr #1 +; CHECK-NEXT: mov w13, #23 ; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: lsr x13, x13, #7 -; CHECK-NEXT: umulh x12, x11, x12 -; CHECK-NEXT: msub x8, x8, x14, x9 -; CHECK-NEXT: mov w9, #5423 -; CHECK-NEXT: lsr x12, x12, #12 -; CHECK-NEXT: mov w14, #654 -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: msub x9, x12, x9, x11 -; CHECK-NEXT: msub x10, x13, x14, x10 +; CHECK-NEXT: umulh x12, x14, x12 +; CHECK-NEXT: msub x8, x8, x13, x9 +; CHECK-NEXT: mov w13, #654 +; CHECK-NEXT: lsr x9, x12, #12 +; CHECK-NEXT: mov w12, #5423 +; CHECK-NEXT: msub x10, x11, x13, x10 +; CHECK-NEXT: msub x9, x9, x12, x14 ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov v1.d[1], x9 +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x9 ; CHECK-NEXT: ret %1 = urem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -140,10 +140,10 @@ ; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strb w9, [x2, #1] +; CHECK-NEXT: strb w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -179,10 +179,10 @@ ; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strh w9, [x2, #2] +; CHECK-NEXT: strh w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py diff --git a/llvm/test/CodeGen/AArch64/vec-extract-branch.ll b/llvm/test/CodeGen/AArch64/vec-extract-branch.ll --- a/llvm/test/CodeGen/AArch64/vec-extract-branch.ll +++ b/llvm/test/CodeGen/AArch64/vec-extract-branch.ll @@ -6,8 +6,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fcmgt v0.2d, v0.2d, #0.0 ; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w8, w9, w8 ; CHECK-NEXT: tbz w8, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: // %true diff --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll --- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll +++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll @@ -124,8 +124,8 @@ ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: bl sinf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -82,33 +82,33 @@ ; CHECK-LABEL: uaddo_v6i32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s0, w6 -; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ldr s2, [sp, #16] +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: ldr s1, [sp, #16] ; CHECK-NEXT: add x9, sp, #24 -; CHECK-NEXT: add x10, sp, #8 -; CHECK-NEXT: mov v0.s[1], w7 ; CHECK-NEXT: fmov s3, w4 -; CHECK-NEXT: mov v1.s[1], w1 -; CHECK-NEXT: ld1 { v2.s }[1], [x9] -; CHECK-NEXT: mov v3.s[1], w5 +; CHECK-NEXT: mov v0.s[1], w7 +; CHECK-NEXT: mov v2.s[1], w1 ; CHECK-NEXT: ld1 { v0.s }[2], [x8] -; CHECK-NEXT: mov v1.s[2], w2 +; CHECK-NEXT: add x8, sp, #8 +; CHECK-NEXT: ld1 { v1.s }[1], [x9] +; CHECK-NEXT: mov v2.s[2], w2 +; CHECK-NEXT: mov v3.s[1], w5 +; CHECK-NEXT: mov v2.s[3], w3 +; CHECK-NEXT: ld1 { v0.s }[3], [x8] +; CHECK-NEXT: add v1.4s, v3.4s, v1.4s ; CHECK-NEXT: ldr x8, [sp, #32] -; CHECK-NEXT: add v2.4s, v3.4s, v2.4s -; CHECK-NEXT: ld1 { v0.s }[3], [x10] -; CHECK-NEXT: mov v1.s[3], w3 -; CHECK-NEXT: str d2, [x8, #16] -; CHECK-NEXT: cmhi v3.4s, v3.4s, v2.4s -; CHECK-NEXT: mov w5, v3.s[1] +; CHECK-NEXT: cmhi v3.4s, v3.4s, v1.4s +; CHECK-NEXT: add v0.4s, v2.4s, v0.4s +; CHECK-NEXT: str d1, [x8, #16] ; CHECK-NEXT: fmov w4, s3 -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s -; CHECK-NEXT: cmhi v1.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhi v2.4s, v2.4s, v0.4s +; CHECK-NEXT: mov w5, v3.s[1] ; CHECK-NEXT: str q0, [x8] -; CHECK-NEXT: mov w1, v1.s[1] -; CHECK-NEXT: mov w2, v1.s[2] -; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: mov w1, v2.s[1] +; CHECK-NEXT: mov w2, v2.s[2] +; CHECK-NEXT: mov w3, v2.s[3] ; CHECK-NEXT: ret %t = call {<6 x i32>, <6 x i1>} @llvm.uadd.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1) %val = extractvalue {<6 x i32>, <6 x i1>} %t, 0 @@ -147,15 +147,15 @@ ; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: zip1 v3.8b, v0.8b, v0.8b -; CHECK-NEXT: zip2 v5.8b, v0.8b, v0.8b +; CHECK-NEXT: zip2 v0.8b, v0.8b, v0.8b ; CHECK-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-NEXT: shl v2.4s, v2.4s, #31 +; CHECK-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-NEXT: ushll v5.4s, v0.4h, #0 ; CHECK-NEXT: cmlt v0.4s, v1.4s, #0 ; CHECK-NEXT: cmlt v1.4s, v2.4s, #0 -; CHECK-NEXT: ushll v2.4s, v3.4h, #0 -; CHECK-NEXT: ushll v3.4s, v5.4h, #0 -; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: shl v3.4s, v3.4s, #31 +; CHECK-NEXT: shl v2.4s, v3.4s, #31 +; CHECK-NEXT: shl v3.4s, v5.4s, #31 ; CHECK-NEXT: cmlt v2.4s, v2.4s, #0 ; CHECK-NEXT: cmlt v3.4s, v3.4s, #0 ; CHECK-NEXT: ret @@ -214,17 +214,17 @@ ; CHECK-NEXT: bic v0.4s, #255, lsl #24 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: mov w10, v0.s[1] ; CHECK-NEXT: fmov w11, s0 ; CHECK-NEXT: mov v1.16b, v0.16b -; CHECK-NEXT: bic v1.4s, #255, lsl #24 +; CHECK-NEXT: mov w9, v0.s[2] ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: cmeq v1.4s, v1.4s, v0.4s +; CHECK-NEXT: mov w10, v0.s[1] ; CHECK-NEXT: strh w9, [x0, #6] -; CHECK-NEXT: sturh w10, [x0, #3] +; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: lsr w9, w9, #16 +; CHECK-NEXT: cmeq v1.4s, v1.4s, v0.4s +; CHECK-NEXT: sturh w10, [x0, #3] ; CHECK-NEXT: lsr w10, w10, #16 ; CHECK-NEXT: strb w8, [x0, #11] ; CHECK-NEXT: lsr w8, w11, #16 @@ -252,17 +252,17 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[2] ; CHECK-NEXT: umov w10, v0.h[0] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: and v1.8b, v0.8b, v2.8b -; CHECK-NEXT: cmeq v0.4h, v1.4h, v0.4h ; CHECK-NEXT: and w8, w8, #0x1 ; CHECK-NEXT: and w9, w9, #0x1 -; CHECK-NEXT: mvn v0.8b, v0.8b -; CHECK-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-NEXT: cmeq v1.4h, v1.4h, v0.4h ; CHECK-NEXT: bfi w10, w8, #1, #1 +; CHECK-NEXT: umov w8, v0.h[3] ; CHECK-NEXT: bfi w10, w9, #2, #1 -; CHECK-NEXT: bfi w10, w11, #3, #29 +; CHECK-NEXT: mvn v0.8b, v1.8b +; CHECK-NEXT: bfi w10, w8, #3, #29 ; CHECK-NEXT: and w8, w10, #0xf +; CHECK-NEXT: sshll v0.4s, v0.4h, #0 ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.uadd.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) @@ -293,10 +293,10 @@ ; CHECK-NEXT: fmov s0, w13 ; CHECK-NEXT: mov v0.s[1], w10 ; CHECK-NEXT: ldr x10, [sp] -; CHECK-NEXT: stp x8, x9, [x10, #16] ; CHECK-NEXT: shl v0.2s, v0.2s, #31 -; CHECK-NEXT: stp x11, x12, [x10] +; CHECK-NEXT: stp x8, x9, [x10, #16] ; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 +; CHECK-NEXT: stp x11, x12, [x10] ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.uadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -59,8 +59,8 @@ ; CHECK-NEXT: mul v1.4s, v0.4s, v1.4s ; CHECK-NEXT: uzp2 v2.4s, v3.4s, v2.4s ; CHECK-NEXT: st1 { v1.s }[2], [x8] -; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: cmtst v2.4s, v2.4s, v2.4s +; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %t = call {<3 x i32>, <3 x i1>} @llvm.umul.with.overflow.v3i32(<3 x i32> %a0, <3 x i32> %a1) @@ -94,39 +94,39 @@ ; CHECK-LABEL: umulo_v6i32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s0, w6 -; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: ldr s2, [sp, #16] +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: ldr s1, [sp, #16] ; CHECK-NEXT: add x9, sp, #24 -; CHECK-NEXT: add x10, sp, #8 -; CHECK-NEXT: mov v0.s[1], w7 ; CHECK-NEXT: fmov s3, w4 -; CHECK-NEXT: mov v1.s[1], w1 -; CHECK-NEXT: ld1 { v2.s }[1], [x9] -; CHECK-NEXT: mov v3.s[1], w5 +; CHECK-NEXT: mov v0.s[1], w7 +; CHECK-NEXT: mov v2.s[1], w1 ; CHECK-NEXT: ld1 { v0.s }[2], [x8] -; CHECK-NEXT: mov v1.s[2], w2 +; CHECK-NEXT: add x8, sp, #8 +; CHECK-NEXT: ld1 { v1.s }[1], [x9] +; CHECK-NEXT: mov v2.s[2], w2 +; CHECK-NEXT: mov v3.s[1], w5 +; CHECK-NEXT: mov v2.s[3], w3 +; CHECK-NEXT: ld1 { v0.s }[3], [x8] +; CHECK-NEXT: umull2 v4.2d, v3.4s, v1.4s ; CHECK-NEXT: ldr x8, [sp, #32] -; CHECK-NEXT: umull2 v4.2d, v3.4s, v2.4s -; CHECK-NEXT: ld1 { v0.s }[3], [x10] -; CHECK-NEXT: mov v1.s[3], w3 -; CHECK-NEXT: umull v7.2d, v3.2s, v2.2s -; CHECK-NEXT: mul v2.4s, v3.4s, v2.4s -; CHECK-NEXT: umull2 v5.2d, v1.4s, v0.4s -; CHECK-NEXT: umull v6.2d, v1.2s, v0.2s -; CHECK-NEXT: uzp2 v4.4s, v7.4s, v4.4s -; CHECK-NEXT: str d2, [x8, #16] -; CHECK-NEXT: mul v0.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp2 v5.4s, v6.4s, v5.4s +; CHECK-NEXT: umull v5.2d, v3.2s, v1.2s +; CHECK-NEXT: mul v1.4s, v3.4s, v1.4s +; CHECK-NEXT: umull2 v6.2d, v2.4s, v0.4s +; CHECK-NEXT: umull v7.2d, v2.2s, v0.2s +; CHECK-NEXT: uzp2 v3.4s, v5.4s, v4.4s +; CHECK-NEXT: mul v0.4s, v2.4s, v0.4s +; CHECK-NEXT: str d1, [x8, #16] +; CHECK-NEXT: cmtst v3.4s, v3.4s, v3.4s +; CHECK-NEXT: uzp2 v4.4s, v7.4s, v6.4s ; CHECK-NEXT: cmtst v4.4s, v4.4s, v4.4s +; CHECK-NEXT: fmov w4, s3 +; CHECK-NEXT: mov w5, v3.s[1] ; CHECK-NEXT: str q0, [x8] -; CHECK-NEXT: cmtst v3.4s, v5.4s, v5.4s -; CHECK-NEXT: mov w5, v4.s[1] -; CHECK-NEXT: fmov w4, s4 -; CHECK-NEXT: mov w1, v3.s[1] -; CHECK-NEXT: mov w2, v3.s[2] -; CHECK-NEXT: mov w3, v3.s[3] -; CHECK-NEXT: fmov w0, s3 +; CHECK-NEXT: fmov w0, s4 +; CHECK-NEXT: mov w1, v4.s[1] +; CHECK-NEXT: mov w2, v4.s[2] +; CHECK-NEXT: mov w3, v4.s[3] ; CHECK-NEXT: ret %t = call {<6 x i32>, <6 x i1>} @llvm.umul.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1) %val = extractvalue {<6 x i32>, <6 x i1>} %t, 0 @@ -147,9 +147,9 @@ ; CHECK-NEXT: mul v2.4s, v0.4s, v2.4s ; CHECK-NEXT: uzp2 v5.4s, v6.4s, v5.4s ; CHECK-NEXT: uzp2 v6.4s, v7.4s, v4.4s -; CHECK-NEXT: stp q2, q3, [x0] ; CHECK-NEXT: cmtst v4.4s, v5.4s, v5.4s ; CHECK-NEXT: cmtst v5.4s, v6.4s, v6.4s +; CHECK-NEXT: stp q2, q3, [x0] ; CHECK-NEXT: mov v0.16b, v4.16b ; CHECK-NEXT: mov v1.16b, v5.16b ; CHECK-NEXT: ret @@ -166,27 +166,27 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b ; CHECK-NEXT: umull v3.8h, v0.8b, v1.8b -; CHECK-NEXT: mul v5.16b, v0.16b, v1.16b +; CHECK-NEXT: mul v4.16b, v0.16b, v1.16b ; CHECK-NEXT: uzp2 v2.16b, v3.16b, v2.16b -; CHECK-NEXT: str q5, [x0] +; CHECK-NEXT: str q4, [x0] ; CHECK-NEXT: cmtst v2.16b, v2.16b, v2.16b +; CHECK-NEXT: zip1 v0.8b, v2.8b, v0.8b +; CHECK-NEXT: zip2 v1.8b, v2.8b, v0.8b +; CHECK-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: zip1 v3.8b, v2.8b, v0.8b -; CHECK-NEXT: zip2 v4.8b, v2.8b, v0.8b -; CHECK-NEXT: ext v0.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ushll v1.4s, v3.4h, #0 -; CHECK-NEXT: ushll v2.4s, v4.4h, #0 -; CHECK-NEXT: zip1 v3.8b, v0.8b, v0.8b -; CHECK-NEXT: zip2 v4.8b, v0.8b, v0.8b +; CHECK-NEXT: zip2 v2.8b, v2.8b, v0.8b +; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: ushll v3.4s, v3.4h, #0 ; CHECK-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: cmlt v0.4s, v1.4s, #0 -; CHECK-NEXT: cmlt v1.4s, v2.4s, #0 -; CHECK-NEXT: ushll v2.4s, v3.4h, #0 -; CHECK-NEXT: ushll v3.4s, v4.4h, #0 -; CHECK-NEXT: shl v2.4s, v2.4s, #31 +; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: cmlt v2.4s, v2.4s, #0 -; CHECK-NEXT: cmlt v3.4s, v3.4s, #0 +; CHECK-NEXT: shl v5.4s, v2.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 +; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 +; CHECK-NEXT: cmlt v2.4s, v3.4s, #0 +; CHECK-NEXT: cmlt v3.4s, v5.4s, #0 ; CHECK-NEXT: ret %t = call {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1) %val = extractvalue {<16 x i8>, <16 x i1>} %t, 0 @@ -229,17 +229,17 @@ ; CHECK-LABEL: umulo_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, v1.d[1] -; CHECK-NEXT: fmov x10, d1 ; CHECK-NEXT: mov x9, v0.d[1] +; CHECK-NEXT: fmov x10, d1 ; CHECK-NEXT: fmov x11, d0 ; CHECK-NEXT: umulh x12, x9, x8 +; CHECK-NEXT: mul x8, x9, x8 ; CHECK-NEXT: umulh x13, x11, x10 ; CHECK-NEXT: cmp xzr, x12 -; CHECK-NEXT: mul x10, x11, x10 ; CHECK-NEXT: csetm x12, ne +; CHECK-NEXT: mul x10, x11, x10 ; CHECK-NEXT: cmp xzr, x13 ; CHECK-NEXT: csetm x13, ne -; CHECK-NEXT: mul x8, x9, x8 ; CHECK-NEXT: fmov d1, x10 ; CHECK-NEXT: fmov d0, x13 ; CHECK-NEXT: mov v1.d[1], x8 @@ -260,30 +260,30 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: bic v0.4s, #255, lsl #24 -; CHECK-NEXT: mul v2.4s, v0.4s, v1.4s -; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s -; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v2.s[3] -; CHECK-NEXT: mov w10, v2.s[2] -; CHECK-NEXT: mov w11, v2.s[1] -; CHECK-NEXT: ushr v1.4s, v2.4s, #24 -; CHECK-NEXT: uzp2 v0.4s, v0.4s, v3.4s -; CHECK-NEXT: fmov w9, s2 -; CHECK-NEXT: cmtst v1.4s, v1.4s, v1.4s +; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v3.2d, v0.2s, v1.2s +; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: uzp2 v1.4s, v3.4s, v2.4s +; CHECK-NEXT: mov w8, v0.s[3] +; CHECK-NEXT: ushr v2.4s, v0.4s, #24 +; CHECK-NEXT: fmov w11, s0 ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 -; CHECK-NEXT: strh w10, [x0, #6] -; CHECK-NEXT: lsr w10, w10, #16 -; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 -; CHECK-NEXT: sturh w11, [x0, #3] -; CHECK-NEXT: lsr w11, w11, #16 +; CHECK-NEXT: mov w9, v0.s[2] +; CHECK-NEXT: mov w10, v0.s[1] +; CHECK-NEXT: cmeq v0.4s, v1.4s, #0 +; CHECK-NEXT: strh w11, [x0] +; CHECK-NEXT: cmtst v1.4s, v2.4s, v2.4s +; CHECK-NEXT: strh w9, [x0, #6] +; CHECK-NEXT: sturh w10, [x0, #3] +; CHECK-NEXT: lsr w9, w9, #16 ; CHECK-NEXT: strb w8, [x0, #11] -; CHECK-NEXT: lsr w8, w9, #16 -; CHECK-NEXT: strh w9, [x0] +; CHECK-NEXT: lsr w8, w10, #16 +; CHECK-NEXT: lsr w10, w11, #16 ; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b -; CHECK-NEXT: strb w10, [x0, #8] -; CHECK-NEXT: strb w11, [x0, #5] -; CHECK-NEXT: strb w8, [x0, #2] +; CHECK-NEXT: strb w9, [x0, #8] +; CHECK-NEXT: strb w8, [x0, #5] +; CHECK-NEXT: strb w10, [x0, #2] ; CHECK-NEXT: ret %t = call {<4 x i24>, <4 x i1>} @llvm.umul.with.overflow.v4i24(<4 x i24> %a0, <4 x i24> %a1) %val = extractvalue {<4 x i24>, <4 x i1>} %t, 0 @@ -296,19 +296,18 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind { ; CHECK-LABEL: umulo_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d2, d0 -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: and v1.8b, v2.8b, v1.8b -; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: umov w9, v1.h[2] -; CHECK-NEXT: umov w10, v1.h[0] -; CHECK-NEXT: umov w11, v1.h[3] +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: umov w9, v0.h[2] +; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: and w8, w8, #0x1 ; CHECK-NEXT: and w9, w9, #0x1 ; CHECK-NEXT: bfi w10, w8, #1, #1 +; CHECK-NEXT: umov w8, v0.h[3] ; CHECK-NEXT: bfi w10, w9, #2, #1 -; CHECK-NEXT: bfi w10, w11, #3, #29 +; CHECK-NEXT: bfi w10, w8, #3, #29 ; CHECK-NEXT: and w8, w10, #0xf +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) @@ -363,8 +362,8 @@ ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: mul x8, x2, x6 ; CHECK-NEXT: shl v0.2s, v0.2s, #31 -; CHECK-NEXT: stp x8, x10, [x9, #16] ; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 +; CHECK-NEXT: stp x8, x10, [x9, #16] ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-add.ll b/llvm/test/CodeGen/AArch64/vecreduce-add.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-add.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-add.ll @@ -876,10 +876,10 @@ ; ; CHECK-DOT-LABEL: add_v8i8_v8i32_acc_zext: ; CHECK-DOT: // %bb.0: // %entry -; CHECK-DOT-NEXT: movi v1.8b, #1 -; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 -; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v1.8b -; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s +; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000 +; CHECK-DOT-NEXT: movi v2.8b, #1 +; CHECK-DOT-NEXT: udot v1.2s, v0.8b, v2.8b +; CHECK-DOT-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-DOT-NEXT: fmov w8, s0 ; CHECK-DOT-NEXT: add w0, w8, w0 ; CHECK-DOT-NEXT: ret @@ -901,10 +901,10 @@ ; ; CHECK-DOT-LABEL: add_v8i8_v8i32_acc_sext: ; CHECK-DOT: // %bb.0: // %entry -; CHECK-DOT-NEXT: movi v1.8b, #1 -; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 -; CHECK-DOT-NEXT: sdot v2.2s, v0.8b, v1.8b -; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s +; CHECK-DOT-NEXT: movi v1.2d, #0000000000000000 +; CHECK-DOT-NEXT: movi v2.8b, #1 +; CHECK-DOT-NEXT: sdot v1.2s, v0.8b, v2.8b +; CHECK-DOT-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-DOT-NEXT: fmov w8, s0 ; CHECK-DOT-NEXT: add w0, w8, w0 ; CHECK-DOT-NEXT: ret @@ -1580,11 +1580,11 @@ ; ; CHECK-DOT-LABEL: add_pair_v8i8_v8i32_zext: ; CHECK-DOT: // %bb.0: // %entry -; CHECK-DOT-NEXT: movi v2.8b, #1 -; CHECK-DOT-NEXT: movi v3.2d, #0000000000000000 -; CHECK-DOT-NEXT: udot v3.2s, v1.8b, v2.8b -; CHECK-DOT-NEXT: udot v3.2s, v0.8b, v2.8b -; CHECK-DOT-NEXT: addp v0.2s, v3.2s, v3.2s +; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 +; CHECK-DOT-NEXT: movi v3.8b, #1 +; CHECK-DOT-NEXT: udot v2.2s, v1.8b, v3.8b +; CHECK-DOT-NEXT: udot v2.2s, v0.8b, v3.8b +; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-DOT-NEXT: fmov w0, s0 ; CHECK-DOT-NEXT: ret entry: @@ -1609,11 +1609,11 @@ ; ; CHECK-DOT-LABEL: add_pair_v8i8_v8i32_sext: ; CHECK-DOT: // %bb.0: // %entry -; CHECK-DOT-NEXT: movi v2.8b, #1 -; CHECK-DOT-NEXT: movi v3.2d, #0000000000000000 -; CHECK-DOT-NEXT: sdot v3.2s, v1.8b, v2.8b -; CHECK-DOT-NEXT: sdot v3.2s, v0.8b, v2.8b -; CHECK-DOT-NEXT: addp v0.2s, v3.2s, v3.2s +; CHECK-DOT-NEXT: movi v2.2d, #0000000000000000 +; CHECK-DOT-NEXT: movi v3.8b, #1 +; CHECK-DOT-NEXT: sdot v2.2s, v1.8b, v3.8b +; CHECK-DOT-NEXT: sdot v2.2s, v0.8b, v3.8b +; CHECK-DOT-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-DOT-NEXT: fmov w0, s0 ; CHECK-DOT-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll @@ -97,7 +97,6 @@ ; CHECK-LABEL: test_v9i8: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov v1.b[9], w8 ; CHECK-NEXT: mov v1.b[10], w8 @@ -108,17 +107,18 @@ ; CHECK-NEXT: umov w8, v1.b[1] ; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: umov w10, v1.b[2] -; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v1.b[3] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: and w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret %b = call i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a) ret i8 %b @@ -129,8 +129,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: and v1.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %b = call i32 @llvm.vector.reduce.and.v3i32(<3 x i32> %a) @@ -159,8 +159,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %b = call i24 @llvm.vector.reduce.and.v4i24(<4 x i24> %a) @@ -185,8 +185,8 @@ ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %b = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -184,11 +184,11 @@ ; CHECK-FP-LABEL: test_v11f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 @@ -198,12 +198,12 @@ ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fmaxnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h @@ -292,26 +292,26 @@ ; CHECK-FP-LABEL: test_v11f16_ninf: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 ; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] ; CHECK-FP-NEXT: mvni v1.8h, #4, lsl #8 +; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 -; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] -; CHECK-FP-NEXT: add x8, sp, #16 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] +; CHECK-FP-NEXT: add x8, sp, #16 ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fmaxnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -184,11 +184,11 @@ ; CHECK-FP-LABEL: test_v11f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 @@ -198,12 +198,12 @@ ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fminnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fminnmv h0, v0.8h @@ -292,26 +292,26 @@ ; CHECK-FP-LABEL: test_v11f16_ninf: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 ; CHECK-FP-NEXT: mov v0.h[1], v1.h[0] ; CHECK-FP-NEXT: mvni v1.8h, #132, lsl #8 +; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 -; CHECK-FP-NEXT: mov v0.h[2], v2.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] -; CHECK-FP-NEXT: add x8, sp, #16 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] +; CHECK-FP-NEXT: add x8, sp, #16 ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fminnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fminnmv h0, v0.8h diff --git a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll @@ -157,8 +157,8 @@ ; CHECK: ; %bb.0: ; CHECK-NEXT: movi.2d v3, #0xffffffffffffffff ; CHECK-NEXT: fcvtl2 v4.2d, v2.4s -; CHECK-NEXT: fcvtl v2.2d, v2.2s ; CHECK-NEXT: fneg.2d v3, v3 +; CHECK-NEXT: fcvtl v2.2d, v2.2s ; CHECK-NEXT: bif.16b v1, v4, v3 ; CHECK-NEXT: bif.16b v0, v2, v3 ; CHECK-NEXT: ret @@ -189,31 +189,31 @@ ; NOFP16: ; %bb.0: ; NOFP16-NEXT: ; kill: def $d1 killed $d1 def $q1 ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 -; NOFP16-NEXT: mov h3, v1[1] -; NOFP16-NEXT: mov h4, v0[1] -; NOFP16-NEXT: fcvt s5, h1 -; NOFP16-NEXT: fcvt s6, h0 -; NOFP16-NEXT: mov h7, v1[2] -; NOFP16-NEXT: mov h16, v0[2] +; NOFP16-NEXT: fcvt s3, h1 +; NOFP16-NEXT: fcvt s4, h0 +; NOFP16-NEXT: mov h5, v1[1] +; NOFP16-NEXT: mov h6, v0[1] ; NOFP16-NEXT: mvni.4s v2, #128, lsl #24 +; NOFP16-NEXT: mov h7, v0[2] +; NOFP16-NEXT: mov h0, v0[3] +; NOFP16-NEXT: bit.16b v3, v4, v2 +; NOFP16-NEXT: mov h4, v1[2] +; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: fcvt s6, h6 ; NOFP16-NEXT: mov h1, v1[3] -; NOFP16-NEXT: fcvt s3, h3 +; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: fcvt s4, h4 ; NOFP16-NEXT: bit.16b v5, v6, v2 -; NOFP16-NEXT: fcvt s6, h7 -; NOFP16-NEXT: fcvt s7, h16 ; NOFP16-NEXT: fcvt s1, h1 -; NOFP16-NEXT: bit.16b v3, v4, v2 -; NOFP16-NEXT: mov h4, v0[3] -; NOFP16-NEXT: fcvt h0, s5 -; NOFP16-NEXT: bit.16b v6, v7, v2 -; NOFP16-NEXT: fcvt h3, s3 -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: fcvt h5, s6 +; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: bit.16b v4, v7, v2 +; NOFP16-NEXT: fcvt h0, s3 +; NOFP16-NEXT: fcvt h3, s5 +; NOFP16-NEXT: bit.16b v1, v6, v2 +; NOFP16-NEXT: fcvt h2, s4 ; NOFP16-NEXT: mov.h v0[1], v3[0] -; NOFP16-NEXT: bit.16b v1, v4, v2 -; NOFP16-NEXT: mov.h v0[2], v5[0] ; NOFP16-NEXT: fcvt h1, s1 +; NOFP16-NEXT: mov.h v0[2], v2[0] ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; NOFP16-NEXT: ret @@ -232,31 +232,31 @@ ; NOFP16: ; %bb.0: ; NOFP16-NEXT: fcvtn v1.4h, v1.4s ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 -; NOFP16-NEXT: mov h3, v0[1] -; NOFP16-NEXT: fcvt s5, h0 -; NOFP16-NEXT: mov h7, v0[2] +; NOFP16-NEXT: fcvt s3, h0 +; NOFP16-NEXT: mov h5, v0[1] ; NOFP16-NEXT: mvni.4s v2, #128, lsl #24 -; NOFP16-NEXT: mov h4, v1[1] -; NOFP16-NEXT: fcvt s6, h1 -; NOFP16-NEXT: mov h16, v1[2] -; NOFP16-NEXT: fcvt s3, h3 +; NOFP16-NEXT: fcvt s4, h1 +; NOFP16-NEXT: mov h6, v1[1] +; NOFP16-NEXT: mov h7, v1[2] +; NOFP16-NEXT: fcvt s5, h5 ; NOFP16-NEXT: mov h1, v1[3] -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: bif.16b v5, v6, v2 -; NOFP16-NEXT: fcvt s6, h7 -; NOFP16-NEXT: fcvt s7, h16 -; NOFP16-NEXT: fcvt s1, h1 ; NOFP16-NEXT: bif.16b v3, v4, v2 -; NOFP16-NEXT: mov h4, v0[3] -; NOFP16-NEXT: fcvt h0, s5 -; NOFP16-NEXT: bif.16b v6, v7, v2 -; NOFP16-NEXT: fcvt h3, s3 +; NOFP16-NEXT: mov h4, v0[2] +; NOFP16-NEXT: fcvt s6, h6 +; NOFP16-NEXT: mov h0, v0[3] +; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: fcvt s1, h1 ; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: fcvt h5, s6 +; NOFP16-NEXT: bif.16b v5, v6, v2 +; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: fcvt h0, s3 +; NOFP16-NEXT: fcvt h3, s5 +; NOFP16-NEXT: bif.16b v4, v7, v2 +; NOFP16-NEXT: bit.16b v1, v6, v2 +; NOFP16-NEXT: fcvt h2, s4 ; NOFP16-NEXT: mov.h v0[1], v3[0] -; NOFP16-NEXT: bit.16b v1, v4, v2 -; NOFP16-NEXT: mov.h v0[2], v5[0] ; NOFP16-NEXT: fcvt h1, s1 +; NOFP16-NEXT: mov.h v0[2], v2[0] ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; NOFP16-NEXT: ret @@ -281,26 +281,25 @@ ; NOFP16-NEXT: fcvt s1, d1 ; NOFP16-NEXT: fcvt s6, h0 ; NOFP16-NEXT: mov h7, v0[2] +; NOFP16-NEXT: mov h0, v0[3] ; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 ; NOFP16-NEXT: fcvt s4, d4 ; NOFP16-NEXT: fcvt s5, h5 ; NOFP16-NEXT: bit.16b v1, v6, v3 -; NOFP16-NEXT: fcvt s6, d2 +; NOFP16-NEXT: mov d6, v2[1] +; NOFP16-NEXT: fcvt s2, d2 ; NOFP16-NEXT: fcvt s7, h7 -; NOFP16-NEXT: mov d2, v2[1] ; NOFP16-NEXT: bit.16b v4, v5, v3 -; NOFP16-NEXT: mov h5, v0[3] +; NOFP16-NEXT: fcvt s5, d6 +; NOFP16-NEXT: fcvt s6, h0 ; NOFP16-NEXT: fcvt h0, s1 -; NOFP16-NEXT: bit.16b v6, v7, v3 -; NOFP16-NEXT: fcvt s2, d2 +; NOFP16-NEXT: bit.16b v2, v7, v3 ; NOFP16-NEXT: fcvt h1, s4 -; NOFP16-NEXT: fcvt s4, h5 -; NOFP16-NEXT: fcvt h5, s6 +; NOFP16-NEXT: fcvt h2, s2 +; NOFP16-NEXT: bsl.16b v3, v6, v5 ; NOFP16-NEXT: mov.h v0[1], v1[0] -; NOFP16-NEXT: mov.16b v1, v3 -; NOFP16-NEXT: bsl.16b v1, v4, v2 -; NOFP16-NEXT: mov.h v0[2], v5[0] -; NOFP16-NEXT: fcvt h1, s1 +; NOFP16-NEXT: fcvt h1, s3 +; NOFP16-NEXT: mov.h v0[2], v2[0] ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; NOFP16-NEXT: ret @@ -309,14 +308,14 @@ ; FP16: ; %bb.0: ; FP16-NEXT: mov d3, v1[1] ; FP16-NEXT: fcvt h1, d1 +; FP16-NEXT: mov d4, v2[1] +; FP16-NEXT: fcvt h2, d2 ; FP16-NEXT: fcvt h3, d3 ; FP16-NEXT: mov.h v1[1], v3[0] -; FP16-NEXT: fcvt h3, d2 -; FP16-NEXT: mov d2, v2[1] -; FP16-NEXT: mov.h v1[2], v3[0] -; FP16-NEXT: fcvt h2, d2 -; FP16-NEXT: mov.h v1[3], v2[0] +; FP16-NEXT: fcvt h3, d4 +; FP16-NEXT: mov.h v1[2], v2[0] ; FP16-NEXT: mvni.4h v2, #128, lsl #8 +; FP16-NEXT: mov.h v1[3], v3[0] ; FP16-NEXT: bif.8b v0, v1, v2 ; FP16-NEXT: ret %tmp0 = fptrunc <4 x double> %b to <4 x half> @@ -331,62 +330,63 @@ define <8 x half> @test_copysign_v8f16_v8f16(<8 x half> %a, <8 x half> %b) #0 { ; NOFP16-LABEL: test_copysign_v8f16_v8f16: ; NOFP16: ; %bb.0: -; NOFP16-NEXT: mov h5, v1[1] -; NOFP16-NEXT: mov h6, v0[1] -; NOFP16-NEXT: fcvt s2, h1 -; NOFP16-NEXT: fcvt s4, h0 +; NOFP16-NEXT: mov h3, v1[1] +; NOFP16-NEXT: mov h4, v0[1] ; NOFP16-NEXT: mov h7, v1[2] ; NOFP16-NEXT: mov h16, v0[2] -; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 -; NOFP16-NEXT: mov h17, v0[3] -; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: fcvt s6, h6 -; NOFP16-NEXT: mov h18, v0[5] -; NOFP16-NEXT: bit.16b v2, v4, v3 -; NOFP16-NEXT: mov h4, v1[3] +; NOFP16-NEXT: mov h17, v1[3] +; NOFP16-NEXT: mov h18, v0[3] +; NOFP16-NEXT: fcvt s5, h1 +; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: fcvt s3, h3 +; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: mov h19, v1[4] +; NOFP16-NEXT: mov h20, v0[4] ; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: fcvt s16, h16 ; NOFP16-NEXT: fcvt s17, h17 -; NOFP16-NEXT: bit.16b v5, v6, v3 -; NOFP16-NEXT: mov.16b v6, v3 -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: bsl.16b v6, v16, v7 -; NOFP16-NEXT: mov h7, v1[4] -; NOFP16-NEXT: mov h16, v0[4] -; NOFP16-NEXT: fcvt h2, s2 -; NOFP16-NEXT: fcvt h5, s5 -; NOFP16-NEXT: bit.16b v4, v17, v3 +; NOFP16-NEXT: fcvt s18, h18 +; NOFP16-NEXT: mvni.4s v2, #128, lsl #24 +; NOFP16-NEXT: bit.16b v5, v6, v2 +; NOFP16-NEXT: bit.16b v3, v4, v2 +; NOFP16-NEXT: mov.16b v4, v2 +; NOFP16-NEXT: mov.16b v6, v2 +; NOFP16-NEXT: bsl.16b v4, v16, v7 +; NOFP16-NEXT: bsl.16b v6, v18, v17 +; NOFP16-NEXT: fcvt s7, h19 +; NOFP16-NEXT: fcvt s16, h20 ; NOFP16-NEXT: mov h17, v1[5] -; NOFP16-NEXT: fcvt s7, h7 -; NOFP16-NEXT: fcvt s16, h16 -; NOFP16-NEXT: mov.h v2[1], v5[0] -; NOFP16-NEXT: fcvt h5, s6 -; NOFP16-NEXT: fcvt s6, h17 -; NOFP16-NEXT: fcvt s17, h18 -; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: bit.16b v7, v16, v3 -; NOFP16-NEXT: mov h16, v0[6] -; NOFP16-NEXT: mov.h v2[2], v5[0] -; NOFP16-NEXT: mov h5, v1[6] -; NOFP16-NEXT: bit.16b v6, v17, v3 +; NOFP16-NEXT: mov h18, v0[5] +; NOFP16-NEXT: mov h19, v1[6] +; NOFP16-NEXT: mov h20, v0[6] ; NOFP16-NEXT: mov h1, v1[7] -; NOFP16-NEXT: fcvt s16, h16 -; NOFP16-NEXT: mov.h v2[3], v4[0] -; NOFP16-NEXT: fcvt h4, s7 -; NOFP16-NEXT: fcvt s5, h5 ; NOFP16-NEXT: mov h0, v0[7] +; NOFP16-NEXT: fcvt h3, s3 +; NOFP16-NEXT: bit.16b v7, v16, v2 +; NOFP16-NEXT: fcvt s16, h17 +; NOFP16-NEXT: fcvt s17, h18 +; NOFP16-NEXT: fcvt s18, h19 +; NOFP16-NEXT: fcvt s19, h20 ; NOFP16-NEXT: fcvt s1, h1 -; NOFP16-NEXT: mov.h v2[4], v4[0] -; NOFP16-NEXT: fcvt h4, s6 -; NOFP16-NEXT: bit.16b v5, v16, v3 -; NOFP16-NEXT: fcvt s0, h0 -; NOFP16-NEXT: mov.h v2[5], v4[0] -; NOFP16-NEXT: fcvt h5, s5 -; NOFP16-NEXT: bif.16b v0, v1, v3 -; NOFP16-NEXT: mov.h v2[6], v5[0] -; NOFP16-NEXT: fcvt h0, s0 -; NOFP16-NEXT: mov.h v2[7], v0[0] -; NOFP16-NEXT: mov.16b v0, v2 +; NOFP16-NEXT: bit.16b v16, v17, v2 +; NOFP16-NEXT: mov.16b v17, v2 +; NOFP16-NEXT: bsl.16b v17, v19, v18 +; NOFP16-NEXT: fcvt s18, h0 +; NOFP16-NEXT: fcvt h0, s5 +; NOFP16-NEXT: bit.16b v1, v18, v2 +; NOFP16-NEXT: fcvt h2, s4 +; NOFP16-NEXT: mov.h v0[1], v3[0] +; NOFP16-NEXT: fcvt h3, s6 +; NOFP16-NEXT: fcvt h1, s1 +; NOFP16-NEXT: mov.h v0[2], v2[0] +; NOFP16-NEXT: fcvt h2, s7 +; NOFP16-NEXT: mov.h v0[3], v3[0] +; NOFP16-NEXT: fcvt h3, s16 +; NOFP16-NEXT: mov.h v0[4], v2[0] +; NOFP16-NEXT: fcvt h2, s17 +; NOFP16-NEXT: mov.h v0[5], v3[0] +; NOFP16-NEXT: mov.h v0[6], v2[0] +; NOFP16-NEXT: mov.h v0[7], v1[0] ; NOFP16-NEXT: ret ; ; FP16-LABEL: test_copysign_v8f16_v8f16: @@ -402,60 +402,60 @@ ; NOFP16-LABEL: test_copysign_v8f16_v8f32: ; NOFP16: ; %bb.0: ; NOFP16-NEXT: fcvtn v1.4h, v1.4s +; NOFP16-NEXT: fcvt s4, h0 ; NOFP16-NEXT: fcvtn v2.4h, v2.4s -; NOFP16-NEXT: mov h4, v0[1] -; NOFP16-NEXT: mov h5, v0[4] -; NOFP16-NEXT: fcvt s7, h0 -; NOFP16-NEXT: mov h17, v0[2] +; NOFP16-NEXT: mov h6, v0[1] +; NOFP16-NEXT: mov h7, v0[4] +; NOFP16-NEXT: mov h16, v0[2] ; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 -; NOFP16-NEXT: mov h6, v1[1] -; NOFP16-NEXT: fcvt s16, h1 -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: mov h18, v1[2] -; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: fcvt s17, h17 -; NOFP16-NEXT: fcvt s6, h6 -; NOFP16-NEXT: bif.16b v7, v16, v3 -; NOFP16-NEXT: fcvt s16, h2 -; NOFP16-NEXT: fcvt s18, h18 -; NOFP16-NEXT: bif.16b v4, v6, v3 -; NOFP16-NEXT: mov h6, v0[3] -; NOFP16-NEXT: bif.16b v5, v16, v3 -; NOFP16-NEXT: mov h16, v1[3] -; NOFP16-NEXT: fcvt h1, s7 -; NOFP16-NEXT: mov.16b v7, v3 -; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: bsl.16b v7, v17, v18 +; NOFP16-NEXT: mov h19, v0[3] +; NOFP16-NEXT: fcvt s5, h1 +; NOFP16-NEXT: mov h17, v1[1] +; NOFP16-NEXT: fcvt s18, h2 +; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: fcvt s6, h6 ; NOFP16-NEXT: fcvt s16, h16 -; NOFP16-NEXT: mov h17, v0[5] +; NOFP16-NEXT: fcvt s19, h19 +; NOFP16-NEXT: bif.16b v4, v5, v3 +; NOFP16-NEXT: mov h5, v1[2] +; NOFP16-NEXT: fcvt s17, h17 +; NOFP16-NEXT: mov h1, v1[3] +; NOFP16-NEXT: bif.16b v7, v18, v3 ; NOFP16-NEXT: mov h18, v2[1] -; NOFP16-NEXT: fcvt h5, s5 -; NOFP16-NEXT: mov.h v1[1], v4[0] -; NOFP16-NEXT: fcvt h4, s7 -; NOFP16-NEXT: bif.16b v6, v16, v3 -; NOFP16-NEXT: fcvt s7, h17 +; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: bif.16b v6, v17, v3 +; NOFP16-NEXT: mov h17, v0[5] +; NOFP16-NEXT: fcvt s1, h1 +; NOFP16-NEXT: bit.16b v5, v16, v3 +; NOFP16-NEXT: fcvt s16, h17 ; NOFP16-NEXT: fcvt s17, h18 -; NOFP16-NEXT: mov.h v1[2], v4[0] -; NOFP16-NEXT: mov h4, v0[6] -; NOFP16-NEXT: mov h16, v2[2] -; NOFP16-NEXT: fcvt h6, s6 +; NOFP16-NEXT: mov.16b v18, v3 +; NOFP16-NEXT: bsl.16b v18, v19, v1 +; NOFP16-NEXT: fcvt h1, s4 +; NOFP16-NEXT: fcvt h4, s6 +; NOFP16-NEXT: mov h6, v0[6] +; NOFP16-NEXT: bif.16b v16, v17, v3 +; NOFP16-NEXT: mov h17, v2[2] ; NOFP16-NEXT: mov h0, v0[7] -; NOFP16-NEXT: bif.16b v7, v17, v3 ; NOFP16-NEXT: mov h2, v2[3] -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: fcvt s16, h16 -; NOFP16-NEXT: mov.h v1[3], v6[0] +; NOFP16-NEXT: mov.h v1[1], v4[0] +; NOFP16-NEXT: fcvt h4, s5 +; NOFP16-NEXT: fcvt s5, h6 +; NOFP16-NEXT: fcvt s6, h17 ; NOFP16-NEXT: fcvt s0, h0 ; NOFP16-NEXT: fcvt s2, h2 -; NOFP16-NEXT: bif.16b v4, v16, v3 -; NOFP16-NEXT: mov.h v1[4], v5[0] -; NOFP16-NEXT: fcvt h5, s7 +; NOFP16-NEXT: mov.h v1[2], v4[0] +; NOFP16-NEXT: fcvt h4, s18 +; NOFP16-NEXT: bif.16b v5, v6, v3 +; NOFP16-NEXT: fcvt h6, s7 ; NOFP16-NEXT: bif.16b v0, v2, v3 -; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: mov.h v1[5], v5[0] +; NOFP16-NEXT: fcvt h2, s5 +; NOFP16-NEXT: mov.h v1[3], v4[0] +; NOFP16-NEXT: fcvt h4, s16 ; NOFP16-NEXT: fcvt h0, s0 -; NOFP16-NEXT: mov.h v1[6], v4[0] +; NOFP16-NEXT: mov.h v1[4], v6[0] +; NOFP16-NEXT: mov.h v1[5], v4[0] +; NOFP16-NEXT: mov.h v1[6], v2[0] ; NOFP16-NEXT: mov.h v1[7], v0[0] ; NOFP16-NEXT: mov.16b v0, v1 ; NOFP16-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vector-gep.ll b/llvm/test/CodeGen/AArch64/vector-gep.ll --- a/llvm/test/CodeGen/AArch64/vector-gep.ll +++ b/llvm/test/CodeGen/AArch64/vector-gep.ll @@ -10,9 +10,9 @@ define <2 x i8*> @vector_gep(<2 x i8*> %0) { ; CHECK-LABEL: vector_gep: ; CHECK: adrp x[[REG8:[123]?[0-9]]], lCPI0_0@PAGE -; CHECK: movi v[[REG1:[0-9]+]].2d, #0x000000ffffffff ; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG8]], lCPI0_0@PAGEOFF] ; CHECK: add v[[REG0:[0-9]+]].2d, v[[REG0]].2d, v[[REG2]].2d +; CHECK: movi v[[REG1:[0-9]+]].2d, #0x000000ffffffff ; CHECK: and v[[REG0]].16b, v[[REG0]].16b, v[[REG1]].16b ; CHECK: ret entry: diff --git a/llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll --- a/llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll +++ b/llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll @@ -4,8 +4,8 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_1_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -17,8 +17,8 @@ define <16 x i8> @ult_2_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_2_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -30,8 +30,8 @@ define <16 x i8> @ugt_2_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_2_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -43,8 +43,8 @@ define <16 x i8> @ult_3_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_3_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -56,8 +56,8 @@ define <16 x i8> @ugt_3_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_3_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -69,8 +69,8 @@ define <16 x i8> @ult_4_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_4_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -82,8 +82,8 @@ define <16 x i8> @ugt_4_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_4_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -95,8 +95,8 @@ define <16 x i8> @ult_5_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_5_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -108,8 +108,8 @@ define <16 x i8> @ugt_5_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_5_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -121,8 +121,8 @@ define <16 x i8> @ult_6_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_6_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -134,8 +134,8 @@ define <16 x i8> @ugt_6_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_6_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -147,8 +147,8 @@ define <16 x i8> @ult_7_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_7_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #7 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #7 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -1477,8 +1477,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1494,8 +1494,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1511,8 +1511,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1528,8 +1528,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1545,8 +1545,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1562,8 +1562,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #5 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1579,8 +1579,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #5 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1596,8 +1596,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1613,8 +1613,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1630,8 +1630,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #7 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1647,8 +1647,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #7 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1664,8 +1664,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #8 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1681,8 +1681,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #8 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1698,8 +1698,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1715,8 +1715,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1732,8 +1732,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #10 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1749,8 +1749,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #10 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1766,8 +1766,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #11 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1783,8 +1783,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #11 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1800,8 +1800,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #12 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1817,8 +1817,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #12 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1834,8 +1834,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #13 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1851,8 +1851,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #13 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1868,8 +1868,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #14 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1885,8 +1885,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #14 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1902,8 +1902,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #15 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1919,8 +1919,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #15 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1936,8 +1936,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1953,8 +1953,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1970,8 +1970,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #17 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1987,8 +1987,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #17 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2004,8 +2004,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #18 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2021,8 +2021,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #18 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2038,8 +2038,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #19 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2055,8 +2055,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #19 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2072,8 +2072,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #20 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2089,8 +2089,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #20 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2106,8 +2106,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #21 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2123,8 +2123,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #21 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2140,8 +2140,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #22 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2157,8 +2157,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #22 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2174,8 +2174,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #23 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2191,8 +2191,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #23 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2208,8 +2208,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #24 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2225,8 +2225,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #24 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2242,8 +2242,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #25 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2259,8 +2259,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #25 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2276,8 +2276,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #26 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2293,8 +2293,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #26 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2310,8 +2310,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #27 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2327,8 +2327,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #27 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2344,8 +2344,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #28 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2361,8 +2361,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #28 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2378,8 +2378,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #29 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2395,8 +2395,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #29 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2412,8 +2412,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #30 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2429,8 +2429,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #30 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2446,8 +2446,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #31 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2463,8 +2463,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #31 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2480,8 +2480,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2497,8 +2497,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2514,8 +2514,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #33 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2531,8 +2531,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #33 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2548,8 +2548,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #34 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2565,8 +2565,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #34 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2582,8 +2582,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #35 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2599,8 +2599,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #35 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2616,8 +2616,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #36 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2633,8 +2633,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #36 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2650,8 +2650,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2667,8 +2667,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2684,8 +2684,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #38 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2701,8 +2701,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #38 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2718,8 +2718,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #39 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2735,8 +2735,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #39 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2752,8 +2752,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #40 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2769,8 +2769,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #40 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2786,8 +2786,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #41 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2803,8 +2803,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #41 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2820,8 +2820,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2837,8 +2837,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2854,8 +2854,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #43 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2871,8 +2871,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #43 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2888,8 +2888,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #44 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2905,8 +2905,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #44 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2922,8 +2922,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #45 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2939,8 +2939,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #45 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2956,8 +2956,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #46 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2973,8 +2973,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #46 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2990,8 +2990,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #47 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3007,8 +3007,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #47 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3024,8 +3024,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #48 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3041,8 +3041,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #48 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3058,8 +3058,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #49 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3075,8 +3075,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #49 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3092,8 +3092,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #50 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3109,8 +3109,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #50 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3126,8 +3126,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #51 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3143,8 +3143,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #51 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3160,8 +3160,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #52 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3177,8 +3177,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #52 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3194,8 +3194,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #53 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3211,8 +3211,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #53 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3228,8 +3228,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #54 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3245,8 +3245,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #54 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3262,8 +3262,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #55 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3279,8 +3279,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #55 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3296,8 +3296,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #56 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3313,8 +3313,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #56 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3330,8 +3330,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #57 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3347,8 +3347,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #57 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3364,8 +3364,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #58 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3381,8 +3381,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #58 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3398,8 +3398,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #59 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3415,8 +3415,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #59 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3432,8 +3432,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #60 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3449,8 +3449,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #60 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3466,8 +3466,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #61 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3483,8 +3483,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #61 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3500,8 +3500,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #62 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3517,8 +3517,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #62 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3534,8 +3534,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #63 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d diff --git a/llvm/test/CodeGen/AArch64/vselect-constants.ll b/llvm/test/CodeGen/AArch64/vselect-constants.ll --- a/llvm/test/CodeGen/AArch64/vselect-constants.ll +++ b/llvm/test/CodeGen/AArch64/vselect-constants.ll @@ -159,9 +159,9 @@ define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: cmp_sel_1_or_0_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -184,9 +184,9 @@ define <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: cmp_sel_0_or_1_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bic v0.16b, v2.16b, v0.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/xor.ll b/llvm/test/CodeGen/AArch64/xor.ll --- a/llvm/test/CodeGen/AArch64/xor.ll +++ b/llvm/test/CodeGen/AArch64/xor.ll @@ -62,9 +62,9 @@ define <4 x i32> @vec_add_of_not_with_undef(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: vec_add_of_not_with_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %x, %y %r = add <4 x i32> %t0, @@ -74,9 +74,9 @@ define <4 x i32> @vec_add_of_not_with_undef_decrement(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: vec_add_of_not_with_undef_decrement: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %x, %y %r = add <4 x i32> %t0, diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s @@ -1091,21 +1091,21 @@ # CHECK-NEXT: 1 1 0.50 and v0.8b, v0.8b, v0.8b # CHECK-NEXT: 1 1 0.50 bic v0.4h, #15, lsl #8 # CHECK-NEXT: 1 1 0.50 bic v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 bif v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 bit v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 bsl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 cls v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 cls v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 cls v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 cls v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 cls v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 cls v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 clz v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 clz v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 clz v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 clz v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 clz v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 clz v0.8h, v0.8h +# CHECK-NEXT: 1 2 1.00 bif v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 bit v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 bsl v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 cls v0.16b, v0.16b +# CHECK-NEXT: 1 3 0.50 cls v0.2s, v0.2s +# CHECK-NEXT: 1 3 0.50 cls v0.4h, v0.4h +# CHECK-NEXT: 1 3 1.00 cls v0.4s, v0.4s +# CHECK-NEXT: 1 3 0.50 cls v0.8b, v0.8b +# CHECK-NEXT: 1 3 1.00 cls v0.8h, v0.8h +# CHECK-NEXT: 1 2 1.00 clz v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 clz v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 clz v0.4h, v0.4h +# CHECK-NEXT: 1 2 1.00 clz v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 clz v0.8b, v0.8b +# CHECK-NEXT: 1 2 1.00 clz v0.8h, v0.8h # CHECK-NEXT: 1 2 0.50 cmeq d20, d21, #0 # CHECK-NEXT: 1 2 0.50 cmeq d20, d21, d22 # CHECK-NEXT: 1 2 1.00 cmeq v0.16b, v0.16b, #0 @@ -1128,8 +1128,8 @@ # CHECK-NEXT: 1 2 1.00 cmlt v0.8h, v0.8h, #0 # CHECK-NEXT: 1 3 0.50 cmtst d20, d21, d22 # CHECK-NEXT: 1 3 0.50 cmtst v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 cnt v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 cnt v0.8b, v0.8b +# CHECK-NEXT: 1 2 1.00 cnt v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 cnt v0.8b, v0.8b # CHECK-NEXT: 1 2 0.50 dup v0.16b, w28 # CHECK-NEXT: 1 2 0.50 dup v0.2d, x28 # CHECK-NEXT: 1 4 0.50 dup v0.2s, w28 @@ -1138,8 +1138,8 @@ # CHECK-NEXT: 1 4 0.50 dup v0.8b, w28 # CHECK-NEXT: 1 2 0.50 dup v0.8h, w28 # CHECK-NEXT: 1 1 1.00 eor v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 ext v0.16b, v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 0.50 ext v0.8b, v0.8b, v0.8b, #3 +# CHECK-NEXT: 1 2 1.00 ext v0.16b, v0.16b, v0.16b, #3 +# CHECK-NEXT: 1 2 0.50 ext v0.8b, v0.8b, v0.8b, #3 # CHECK-NEXT: 1 4 0.50 fabd d29, d24, d20 # CHECK-NEXT: 1 4 0.50 fabd s29, s24, s20 # CHECK-NEXT: 1 4 1.00 fabd v0.4s, v0.4s, v0.4s @@ -1148,39 +1148,39 @@ # CHECK-NEXT: 1 4 0.50 fabs v0.4h, v0.4h # CHECK-NEXT: 1 4 1.00 fabs v0.4s, v0.4s # CHECK-NEXT: 1 4 1.00 fabs v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 facge d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 facge s10, s11, s12 -# CHECK-NEXT: 1 4 1.00 facge v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 facgt d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 facgt s10, s11, s12 -# CHECK-NEXT: 1 4 1.00 facgt v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 facge d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 facge s10, s11, s12 +# CHECK-NEXT: 1 2 1.00 facge v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 facgt d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 facgt s10, s11, s12 +# CHECK-NEXT: 1 2 1.00 facgt v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1 4 1.00 fadd v0.4s, v0.4s, v0.4s # CHECK-NEXT: 1 4 0.50 faddp v0.2s, v0.2s, v0.2s # CHECK-NEXT: 1 4 1.00 faddp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcmeq d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmeq d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 fcmeq s10, s11, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmeq s10, s11, s12 -# CHECK-NEXT: 1 4 0.50 fcmeq v0.2s, v0.2s, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmeq v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcmge d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmge d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 fcmge s10, s11, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmge s10, s11, s12 -# CHECK-NEXT: 1 4 1.00 fcmge v0.2d, v0.2d, #0.0 -# CHECK-NEXT: 1 4 1.00 fcmge v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcmgt d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmgt d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 fcmgt s10, s11, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmgt s10, s11, s12 -# CHECK-NEXT: 1 4 1.00 fcmgt v0.4s, v0.4s, #0.0 -# CHECK-NEXT: 1 4 1.00 fcmgt v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcmle d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmle s10, s11, #0.0 -# CHECK-NEXT: 1 4 1.00 fcmle v0.2d, v0.2d, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmlt d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmlt s10, s11, #0.0 -# CHECK-NEXT: 1 4 1.00 fcmlt v0.4s, v0.4s, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmeq d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmeq d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 fcmeq s10, s11, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmeq s10, s11, s12 +# CHECK-NEXT: 1 2 0.50 fcmeq v0.2s, v0.2s, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmeq v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 fcmge d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmge d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 fcmge s10, s11, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmge s10, s11, s12 +# CHECK-NEXT: 1 2 1.00 fcmge v0.2d, v0.2d, #0.0 +# CHECK-NEXT: 1 2 1.00 fcmge v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 fcmgt d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmgt d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 fcmgt s10, s11, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmgt s10, s11, s12 +# CHECK-NEXT: 1 2 1.00 fcmgt v0.4s, v0.4s, #0.0 +# CHECK-NEXT: 1 2 1.00 fcmgt v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 fcmle d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmle s10, s11, #0.0 +# CHECK-NEXT: 1 2 1.00 fcmle v0.2d, v0.2d, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmlt d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmlt s10, s11, #0.0 +# CHECK-NEXT: 1 2 1.00 fcmlt v0.4s, v0.4s, #0.0 # CHECK-NEXT: 1 4 0.50 fcvtas d21, d14 # CHECK-NEXT: 1 4 0.50 fcvtas s12, s13 # CHECK-NEXT: 1 4 0.50 fcvtas v0.2d, v0.2d @@ -1303,16 +1303,16 @@ # CHECK-NEXT: 1 4 0.50 fmls d0, d4, v0.d[1] # CHECK-NEXT: 1 4 0.50 fmls s3, s5, v0.s[3] # CHECK-NEXT: 1 4 0.50 fmls v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmov v0.2d, #-1.25000000 -# CHECK-NEXT: 1 4 0.50 fmov v0.2s, #13.00000000 -# CHECK-NEXT: 1 4 1.00 fmov v0.4s, #1.00000000 -# CHECK-NEXT: 1 4 0.50 fmul d0, d1, v0.d[1] -# CHECK-NEXT: 1 4 0.50 fmul s0, s1, v0.s[3] +# CHECK-NEXT: 1 1 0.50 fmov v0.2d, #-1.25000000 +# CHECK-NEXT: 1 1 0.50 fmov v0.2s, #13.00000000 +# CHECK-NEXT: 1 1 0.50 fmov v0.4s, #1.00000000 +# CHECK-NEXT: 1 4 1.00 fmul d0, d1, v0.d[1] +# CHECK-NEXT: 1 4 1.00 fmul s0, s1, v0.s[3] # CHECK-NEXT: 1 4 0.50 fmul v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fmulx d0, d4, v0.d[1] +# CHECK-NEXT: 1 4 1.00 fmulx d0, d4, v0.d[1] # CHECK-NEXT: 1 4 0.50 fmulx d23, d11, d1 # CHECK-NEXT: 1 4 0.50 fmulx s20, s22, s15 -# CHECK-NEXT: 1 4 0.50 fmulx s3, s5, v0.s[3] +# CHECK-NEXT: 1 4 1.00 fmulx s3, s5, v0.s[3] # CHECK-NEXT: 1 4 1.00 fmulx v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1 4 0.50 fmulx v0.2s, v0.2s, v0.2s # CHECK-NEXT: 1 4 1.00 fmulx v0.4s, v0.4s, v0.4s @@ -1431,12 +1431,12 @@ # CHECK-NEXT: 1 4 0.50 mov s17, v0.s[2] # CHECK-NEXT: 1 1 1.00 mov v0.16b, v0.16b # CHECK-NEXT: 1 1 0.50 mov v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 movi d15, #0xff00ff00ff00ff -# CHECK-NEXT: 1 4 1.00 movi v0.16b, #31 -# CHECK-NEXT: 1 4 1.00 movi v0.2d, #0xff0000ff0000ffff -# CHECK-NEXT: 1 4 0.50 movi v0.2s, #8, msl #8 -# CHECK-NEXT: 1 4 1.00 movi v0.4s, #255, lsl #24 -# CHECK-NEXT: 1 4 0.50 movi v0.8b, #255 +# CHECK-NEXT: 1 1 0.50 movi d15, #0xff00ff00ff00ff +# CHECK-NEXT: 1 1 1.00 movi v0.16b, #31 +# CHECK-NEXT: 1 1 1.00 movi v0.2d, #0xff0000ff0000ffff +# CHECK-NEXT: 1 1 0.50 movi v0.2s, #8, msl #8 +# CHECK-NEXT: 1 1 1.00 movi v0.4s, #255, lsl #24 +# CHECK-NEXT: 1 1 0.50 movi v0.8b, #255 # CHECK-NEXT: 1 4 0.50 mul v0.8b, v0.8b, v0.8b # CHECK-NEXT: 1 1 0.50 mvni v0.2s, #0 # CHECK-NEXT: 1 1 1.00 mvni v0.4s, #16, msl #16 @@ -1463,20 +1463,20 @@ # CHECK-NEXT: 1 4 2.00 raddhn2 v0.16b, v0.8h, v0.8h # CHECK-NEXT: 1 4 2.00 raddhn2 v0.4s, v0.2d, v0.2d # CHECK-NEXT: 1 4 2.00 raddhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 rbit v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 rbit v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 rev16 v21.8b, v1.8b -# CHECK-NEXT: 1 4 1.00 rev16 v30.16b, v31.16b -# CHECK-NEXT: 1 4 0.50 rev32 v0.4h, v9.4h -# CHECK-NEXT: 1 4 0.50 rev32 v21.8b, v1.8b -# CHECK-NEXT: 1 4 1.00 rev32 v30.16b, v31.16b -# CHECK-NEXT: 1 4 1.00 rev32 v4.8h, v7.8h -# CHECK-NEXT: 1 4 1.00 rev64 v0.16b, v31.16b -# CHECK-NEXT: 1 4 0.50 rev64 v1.8b, v9.8b -# CHECK-NEXT: 1 4 0.50 rev64 v13.4h, v21.4h -# CHECK-NEXT: 1 4 1.00 rev64 v2.8h, v4.8h -# CHECK-NEXT: 1 4 0.50 rev64 v4.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 rev64 v6.4s, v8.4s +# CHECK-NEXT: 1 2 1.00 rbit v0.16b, v0.16b +# CHECK-NEXT: 1 2 0.50 rbit v0.8b, v0.8b +# CHECK-NEXT: 1 2 0.50 rev16 v21.8b, v1.8b +# CHECK-NEXT: 1 2 1.00 rev16 v30.16b, v31.16b +# CHECK-NEXT: 1 2 0.50 rev32 v0.4h, v9.4h +# CHECK-NEXT: 1 2 0.50 rev32 v21.8b, v1.8b +# CHECK-NEXT: 1 2 1.00 rev32 v30.16b, v31.16b +# CHECK-NEXT: 1 2 1.00 rev32 v4.8h, v7.8h +# CHECK-NEXT: 1 2 1.00 rev64 v0.16b, v31.16b +# CHECK-NEXT: 1 2 0.50 rev64 v1.8b, v9.8b +# CHECK-NEXT: 1 2 0.50 rev64 v13.4h, v21.4h +# CHECK-NEXT: 1 2 1.00 rev64 v2.8h, v4.8h +# CHECK-NEXT: 1 2 0.50 rev64 v4.2s, v0.2s +# CHECK-NEXT: 1 2 1.00 rev64 v6.4s, v8.4s # CHECK-NEXT: 1 3 0.50 rshrn v0.2s, v0.2d, #3 # CHECK-NEXT: 1 3 0.50 rshrn v0.4h, v0.4s, #3 # CHECK-NEXT: 1 3 0.50 rshrn v0.8b, v0.8h, #3 @@ -1736,24 +1736,24 @@ # CHECK-NEXT: 1 3 1.00 sqsub v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1 3 1.00 sqsub v0.4s, v0.4s, v0.4s # CHECK-NEXT: 1 3 0.50 sqsub v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 sqxtn b18, h18 -# CHECK-NEXT: 1 4 0.50 sqxtn h20, s17 -# CHECK-NEXT: 1 4 0.50 sqxtn s19, d14 -# CHECK-NEXT: 1 4 1.00 sqxtn v0.2s, v0.2d -# CHECK-NEXT: 1 4 1.00 sqxtn v0.4h, v0.4s -# CHECK-NEXT: 1 4 1.00 sqxtn v0.8b, v0.8h -# CHECK-NEXT: 1 4 1.00 sqxtn2 v0.16b, v0.8h -# CHECK-NEXT: 1 4 1.00 sqxtn2 v0.4s, v0.2d -# CHECK-NEXT: 1 4 1.00 sqxtn2 v0.8h, v0.4s -# CHECK-NEXT: 1 4 0.50 sqxtun b19, h14 -# CHECK-NEXT: 1 4 0.50 sqxtun h21, s15 -# CHECK-NEXT: 1 4 0.50 sqxtun s20, d12 -# CHECK-NEXT: 1 4 1.00 sqxtun v0.2s, v0.2d -# CHECK-NEXT: 1 4 1.00 sqxtun v0.4h, v0.4s -# CHECK-NEXT: 1 4 1.00 sqxtun v0.8b, v0.8h -# CHECK-NEXT: 1 4 1.00 sqxtun2 v0.16b, v0.8h -# CHECK-NEXT: 1 4 1.00 sqxtun2 v0.4s, v0.2d -# CHECK-NEXT: 1 4 1.00 sqxtun2 v0.8h, v0.4s +# CHECK-NEXT: 1 2 0.50 sqxtn b18, h18 +# CHECK-NEXT: 1 2 0.50 sqxtn h20, s17 +# CHECK-NEXT: 1 2 0.50 sqxtn s19, d14 +# CHECK-NEXT: 1 2 0.50 sqxtn v0.2s, v0.2d +# CHECK-NEXT: 1 2 0.50 sqxtn v0.4h, v0.4s +# CHECK-NEXT: 1 2 0.50 sqxtn v0.8b, v0.8h +# CHECK-NEXT: 1 2 0.50 sqxtn2 v0.16b, v0.8h +# CHECK-NEXT: 1 2 0.50 sqxtn2 v0.4s, v0.2d +# CHECK-NEXT: 1 2 0.50 sqxtn2 v0.8h, v0.4s +# CHECK-NEXT: 1 2 0.50 sqxtun b19, h14 +# CHECK-NEXT: 1 2 0.50 sqxtun h21, s15 +# CHECK-NEXT: 1 2 0.50 sqxtun s20, d12 +# CHECK-NEXT: 1 2 0.50 sqxtun v0.2s, v0.2d +# CHECK-NEXT: 1 2 0.50 sqxtun v0.4h, v0.4s +# CHECK-NEXT: 1 2 0.50 sqxtun v0.8b, v0.8h +# CHECK-NEXT: 1 2 0.50 sqxtun2 v0.16b, v0.8h +# CHECK-NEXT: 1 2 0.50 sqxtun2 v0.4s, v0.2d +# CHECK-NEXT: 1 2 0.50 sqxtun2 v0.8h, v0.4s # CHECK-NEXT: 1 2 0.50 srhadd v0.2s, v0.2s, v0.2s # CHECK-NEXT: 1 2 0.50 srhadd v0.4h, v0.4h, v0.4h # CHECK-NEXT: 1 2 0.50 srhadd v0.8b, v0.8b, v0.8b @@ -1855,36 +1855,36 @@ # CHECK-NEXT: 1 3 1.00 suqadd v0.4s, v0.4s # CHECK-NEXT: 1 3 0.50 suqadd v0.8b, v0.8b # CHECK-NEXT: 1 3 1.00 suqadd v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -# CHECK-NEXT: 1 4 0.50 tbl v0.8b, { v0.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbl v0.8b, { v0.16b, v1.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b, v1.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -# CHECK-NEXT: 1 4 0.50 tbx v0.8b, { v0.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbx v0.8b, { v0.16b, v1.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -# CHECK-NEXT: 1 4 1.00 trn1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 trn1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 trn1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 trn1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 trn1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 trn1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 trn1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 trn2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 trn2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 trn2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 trn2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 trn2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 trn2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 trn2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 1.00 tbl v0.16b, { v0.16b }, v0.16b +# CHECK-NEXT: 1 3 2.00 tbl v0.16b, { v0.16b, v1.16b }, v0.16b +# CHECK-NEXT: 1 4 3.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +# CHECK-NEXT: 1 5 4.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +# CHECK-NEXT: 1 2 1.00 tbl v0.8b, { v0.16b }, v0.8b +# CHECK-NEXT: 1 3 2.00 tbl v0.8b, { v0.16b, v1.16b }, v0.8b +# CHECK-NEXT: 1 4 3.00 tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +# CHECK-NEXT: 1 5 4.00 tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +# CHECK-NEXT: 1 3 2.00 tbx v0.16b, { v0.16b }, v0.16b +# CHECK-NEXT: 1 4 3.00 tbx v0.16b, { v0.16b, v1.16b }, v0.16b +# CHECK-NEXT: 1 5 4.00 tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +# CHECK-NEXT: 1 6 5.00 tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +# CHECK-NEXT: 1 3 2.00 tbx v0.8b, { v0.16b }, v0.8b +# CHECK-NEXT: 1 4 3.00 tbx v0.8b, { v0.16b, v1.16b }, v0.8b +# CHECK-NEXT: 1 5 4.00 tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +# CHECK-NEXT: 1 6 5.00 tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +# CHECK-NEXT: 1 2 1.00 trn1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 trn1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 1.00 trn1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 trn1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 1.00 trn1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 trn1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 1.00 trn1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 1.00 trn2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 trn2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 1.00 trn2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 trn2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 1.00 trn2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 trn2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 1.00 trn2 v0.8h, v0.8h, v0.8h # CHECK-NEXT: 1 4 2.00 uaba v0.8b, v0.8b, v0.8b # CHECK-NEXT: 1 4 2.00 uabal v0.2d, v0.2s, v0.2s # CHECK-NEXT: 1 4 2.00 uabal v0.4s, v0.4h, v0.4h @@ -2013,15 +2013,15 @@ # CHECK-NEXT: 1 4 1.00 uqshrn2 v0.8h, v0.4s, #3 # CHECK-NEXT: 1 3 0.50 uqsub d16, d16, d16 # CHECK-NEXT: 1 3 0.50 uqsub v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 uqxtn b18, h18 -# CHECK-NEXT: 1 4 0.50 uqxtn h20, s17 -# CHECK-NEXT: 1 4 0.50 uqxtn s19, d14 -# CHECK-NEXT: 1 4 1.00 uqxtn v0.2s, v0.2d -# CHECK-NEXT: 1 4 1.00 uqxtn v0.4h, v0.4s -# CHECK-NEXT: 1 4 1.00 uqxtn v0.8b, v0.8h -# CHECK-NEXT: 1 4 1.00 uqxtn2 v0.16b, v0.8h -# CHECK-NEXT: 1 4 1.00 uqxtn2 v0.4s, v0.2d -# CHECK-NEXT: 1 4 1.00 uqxtn2 v0.8h, v0.4s +# CHECK-NEXT: 1 2 0.50 uqxtn b18, h18 +# CHECK-NEXT: 1 2 0.50 uqxtn h20, s17 +# CHECK-NEXT: 1 2 0.50 uqxtn s19, d14 +# CHECK-NEXT: 1 2 0.50 uqxtn v0.2s, v0.2d +# CHECK-NEXT: 1 2 0.50 uqxtn v0.4h, v0.4s +# CHECK-NEXT: 1 2 0.50 uqxtn v0.8b, v0.8h +# CHECK-NEXT: 1 2 0.50 uqxtn2 v0.16b, v0.8h +# CHECK-NEXT: 1 2 0.50 uqxtn2 v0.4s, v0.2d +# CHECK-NEXT: 1 2 0.50 uqxtn2 v0.8h, v0.4s # CHECK-NEXT: 1 4 0.50 urecpe v0.2s, v0.2s # CHECK-NEXT: 1 4 1.00 urecpe v0.4s, v0.4s # CHECK-NEXT: 1 2 1.00 urhadd v0.16b, v0.16b, v0.16b @@ -2095,40 +2095,40 @@ # CHECK-NEXT: 1 3 1.00 usubw2 v0.2d, v0.2d, v0.4s # CHECK-NEXT: 1 3 1.00 usubw2 v0.4s, v0.4s, v0.8h # CHECK-NEXT: 1 3 1.00 usubw2 v0.8h, v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 uzp1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 uzp1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 uzp1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 uzp1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uzp1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 uzp1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uzp1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uzp2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 uzp2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 uzp2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 uzp2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uzp2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 uzp2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uzp2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 1.00 uzp1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 uzp1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 uzp1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 uzp1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 1.00 uzp1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 uzp1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 1.00 uzp1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 1.00 uzp2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 uzp2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 uzp2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 uzp2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 1.00 uzp2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 uzp2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 1.00 uzp2 v0.8h, v0.8h, v0.8h # CHECK-NEXT: 1 2 0.50 xtn v0.2s, v0.2d # CHECK-NEXT: 1 2 0.50 xtn v0.4h, v0.4s # CHECK-NEXT: 1 2 0.50 xtn v0.8b, v0.8h # CHECK-NEXT: 1 2 0.50 xtn2 v0.16b, v0.8h # CHECK-NEXT: 1 2 0.50 xtn2 v0.4s, v0.2d # CHECK-NEXT: 1 2 0.50 xtn2 v0.8h, v0.4s -# CHECK-NEXT: 1 4 1.00 zip1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 zip1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 zip1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 zip1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 zip1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 zip1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 zip1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 zip2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 zip2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 zip2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 zip2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 zip2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 zip2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 zip2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 1.00 zip1 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 zip1 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 zip1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 zip1 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 1.00 zip1 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 zip1 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 1.00 zip1 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 1.00 zip2 v0.16b, v0.16b, v0.16b +# CHECK-NEXT: 1 2 1.00 zip2 v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 zip2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 2 0.50 zip2 v0.4h, v0.4h, v0.4h +# CHECK-NEXT: 1 2 1.00 zip2 v0.4s, v0.4s, v0.4s +# CHECK-NEXT: 1 2 0.50 zip2 v0.8b, v0.8b, v0.8b +# CHECK-NEXT: 1 2 1.00 zip2 v0.8h, v0.8h, v0.8h # CHECK: Resources: # CHECK-NEXT: [0.0] - CortexA55UnitALU @@ -2146,7 +2146,7 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] -# CHECK-NEXT: - - - - 780.00 780.00 197.00 3.00 3.00 107.00 - 52.00 +# CHECK-NEXT: - - - - 809.00 809.00 197.00 3.00 3.00 107.00 - 52.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] Instructions: @@ -2383,16 +2383,16 @@ # CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls d0, d4, v0.d[1] # CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls s3, s5, v0.s[3] # CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmov v0.2d, #-1.25000000 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmov v0.2d, #-1.25000000 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmov v0.2s, #13.00000000 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmov v0.4s, #1.00000000 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmul d0, d1, v0.d[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmul s0, s1, v0.s[3] +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmov v0.4s, #1.00000000 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmul d0, d1, v0.d[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmul s0, s1, v0.s[3] # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmul v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx d0, d4, v0.d[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx d0, d4, v0.d[1] # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx d23, d11, d1 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx s20, s22, s15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx s3, s5, v0.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx s3, s5, v0.s[3] # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx v0.2d, v0.2d, v0.2d # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx v0.2s, v0.2s, v0.2s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx v0.4s, v0.4s, v0.4s @@ -2819,21 +2819,21 @@ # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn b18, h18 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn h20, s17 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn s19, d14 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn v0.2s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn v0.4h, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn v0.8b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn2 v0.16b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn2 v0.8h, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn v0.2s, v0.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn v0.4h, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn v0.8b, v0.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn2 v0.16b, v0.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn2 v0.4s, v0.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn2 v0.8h, v0.4s # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun b19, h14 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun h21, s15 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun s20, d12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun v0.2s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun v0.4h, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun v0.8b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun2 v0.16b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun2 v0.8h, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun v0.2s, v0.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun v0.4h, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun v0.8b, v0.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun2 v0.16b, v0.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun2 v0.4s, v0.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun2 v0.8h, v0.4s # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v0.2s, v0.2s, v0.2s # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v0.4h, v0.4h, v0.4h # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v0.8b, v0.8b, v0.8b @@ -2936,31 +2936,31 @@ # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd v0.8b, v0.8b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - suqadd v0.8h, v0.8h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v0.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v0.16b, v1.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbl v0.8b, { v0.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbl v0.8b, { v0.16b, v1.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbx v0.16b, { v0.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbx v0.16b, { v0.16b, v1.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbx v0.8b, { v0.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbx v0.8b, { v0.16b, v1.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - tbl v0.16b, { v0.16b, v1.16b }, v0.16b +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.8b, { v0.16b }, v0.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - tbl v0.8b, { v0.16b, v1.16b }, v0.8b +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - tbx v0.16b, { v0.16b }, v0.16b +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - tbx v0.16b, { v0.16b, v1.16b }, v0.16b +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b +# CHECK-NEXT: - - - - 5.00 5.00 - - - - - - tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - tbx v0.8b, { v0.16b }, v0.8b +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - tbx v0.8b, { v0.16b, v1.16b }, v0.8b +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b +# CHECK-NEXT: - - - - 5.00 5.00 - - - - - - tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v0.16b, v0.16b, v0.16b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v0.2s, v0.2s, v0.2s # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v0.4h, v0.4h, v0.4h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v0.4s, v0.4s, v0.4s # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v0.8b, v0.8b, v0.8b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v0.8h, v0.8h, v0.8h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.16b, v0.16b, v0.16b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v0.2s, v0.2s, v0.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.2s, v0.2s, v0.2s # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v0.4h, v0.4h, v0.4h # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.4s, v0.4s, v0.4s # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v0.8b, v0.8b, v0.8b @@ -3096,12 +3096,12 @@ # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn b18, h18 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn h20, s17 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn s19, d14 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn v0.2s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn v0.4h, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn v0.8b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn2 v0.16b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn2 v0.8h, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn v0.2s, v0.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn v0.4h, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn v0.8b, v0.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn2 v0.16b, v0.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn2 v0.4s, v0.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn2 v0.8h, v0.4s # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urecpe v0.2s, v0.2s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urecpe v0.4s, v0.4s # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urhadd v0.16b, v0.16b, v0.16b