diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td --- a/llvm/lib/Target/AArch64/AArch64SchedA55.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td @@ -24,9 +24,10 @@ // or 5. Setting it 4 looked to be good trade-off. let MispredictPenalty = 8; // A branch direction mispredict. let PostRAScheduler = 1; // Enable PostRA scheduler pass. - let CompleteModel = 0; // Covers instructions applicable to Cortex-A55. + let CompleteModel = 1; // Covers instructions applicable to Cortex-A55. - list UnsupportedFeatures = [HasSVE]; + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; @@ -149,8 +150,54 @@ def : WriteRes { let Latency = 4; } def : WriteRes { let Latency = 3; } def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; let BeginGroup = 1; } + +class CortexA55WriteVd : SchedWriteRes<[res]> { + let Latency = n; +} +class CortexA55WriteVq : SchedWriteRes<[res, res]> { + let Latency = n; + let BeginGroup = 1; +} +class CortexA55WriteV : SchedWriteVariant<[ + SchedVar, [CortexA55WriteVq]>, + SchedVar]> +]>; +class CortexA55WriteVSlot2 rcl, list resl> : SchedWriteRes { + let Latency = n; + let ResourceCycles = rcl; + let EndGroup = 1; +} +class CortexA55WriteVAlu : CortexA55WriteV {} +class CortexA55WriteVAlud : CortexA55WriteVd {} +class CortexA55WriteVAluSlot2 : + CortexA55WriteVSlot2 {} +class CortexA55WriteVMac : CortexA55WriteV {} +class CortexA55WriteVMacSlot2 : + CortexA55WriteVSlot2 {} +class CortexA55WriteVDivSlot2 : + CortexA55WriteVSlot2 {} +class CortexA55WriteCrypto : + SchedWriteRes<[CortexA55UnitFPALU, CortexA55UnitFPALU]> { let Latency = n; } + +def CortexA55WriteMLA : CortexA55WriteVMac<4>; +def CortexA55WriteMLAIx : CortexA55WriteVMacSlot2<4>; +def CortexA55WriteMLAL : CortexA55WriteVMacSlot2<4>; +def CortexA55WriteDOT : CortexA55WriteVMac<4>; +def CortexA55WriteDOTSc : CortexA55WriteVMacSlot2<4>; +def CortexA55WriteAESEncrypt : CortexA55WriteVAluSlot2<2, 1>; +def CortexA55WriteAESDecrypt : CortexA55WriteVAluSlot2<2, 1>; + +// NEON ALU/MAC forwarding paths +def CortexA55ReadMLA : SchedReadAdvance<3, [CortexA55WriteMLA]>; +def CortexA55ReadMLAIx : SchedReadAdvance<3, [CortexA55WriteMLAIx]>; +def CortexA55ReadMLAL : SchedReadAdvance<3, [CortexA55WriteMLAL]>; +def CortexA55ReadDOT : SchedReadAdvance<3, [CortexA55WriteDOT]>; +def CortexA55ReadDOTSc : SchedReadAdvance<3, [CortexA55WriteDOTSc]>; +def CortexA55ReadAESMC : SchedReadAdvance<1, [CortexA55WriteAESEncrypt]>; +def CortexA55ReadAESIMC : SchedReadAdvance<1, [CortexA55WriteAESDecrypt]>; + +def : SchedAlias>; +def : SchedAlias>; // FP ALU specific new schedwrite definitions def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;} @@ -235,10 +282,15 @@ //--- // Miscellaneous //--- -def : InstRW<[CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W")>; -def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS[^W]")>; -def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)")>; -def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ")>; +def : InstRW<[CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?Wi")>; +def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPSi")>; +def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)i")>; +def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQi")>; +def : InstRW<[WriteAdr, CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W(pre|post)")>; +def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS(pre|post)")>; +def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)(pre|post)")>; +def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ(pre|post)")>; + def : InstRW<[WriteI], (instrs COPY)>; //--- // Vector Loads - 64-bit per cycle @@ -354,4 +406,181 @@ def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; +// 4.15. Advanced SIMD integer instructions +// ASIMD absolute diff +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "SABDv", "UABDv")>; +// ASIMD absolute diff accum +def : InstRW<[CortexA55WriteVAluSlot2<4, 2>], (instregex "SABAL?v", + "UABAL?v")>; +// ASIMD absolute diff long +def : InstRW<[CortexA55WriteVAluSlot2<3, 1>], (instregex "SABDLv", "UABDLv")>; +// ASIMD arith #1 +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "ADDv", "SUBv", "NEGv", + "SR?HADDv", "UR?HADDv", "SHSUBv", "UHSUBv")>; +// ASIMD arith #2 +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "ABSv", "SADDLPv", + "UADDLPv", "SQADDv", "UQADDv", "SQNEGv", "SQSUBv", "UQSUBv", "SUQADDv", + "USQADDv", "ADDPv(2i32|2i64|4i16|4i32|8i8|8i16|16i8)$")>; +// ASIMD arith #3 +def : InstRW<[CortexA55WriteVAluSlot2<3, 1>], (instregex "SADDLv", "UADDLv", "SADDWv", + "UADDWv", "SSUBLv", "USUBLv", "SSUBWv", "USUBWv", "ADDHNv", "SUBHNv")>; +// ASIMD arith #5 +def : InstRW<[CortexA55WriteVAluSlot2<4, 2>], (instregex "RADDHNv", "RSUBHNv")>; +// ASIMD arith, reduce +def : InstRW<[CortexA55WriteVAluSlot2<3, 1>], (instregex "ADDVv", "SADDLVv", "UADDLVv")>; +// ASIMD compare #1 +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v")>; +// ASIMD compare #2 +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "CMTSTv")>; +// ASIMD logical $1 +def : InstRW<[CortexA55WriteVAlu<1>], (instregex "ANDv", "EORv", "NOTv", + "ORNv", "ORRv(4i32|8i8|8i16|16i8)$", "BICv(4i32|8i8|8i16|16i8)$")>; +def : InstRW<[CortexA55WriteVAlu<1>], (instregex "BICv(2i32|4i16)$", + "ORRv(2i32|4i16)$", "MVNIv")>; +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "CPY")>; +// ASIMD max/min, basic +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "SMAXP?v", "SMINP?v", "UMAXP?v", + "UMINP?v", "UMINP?v")>; +// SIMD max/min, reduce +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex "SMAXVv", "SMINVv", "UMAXVv", + "UMINVv")>; +// ASIMD multiply, by element +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex + "MULv(2i32|4i16|4i32|8i16)_indexed$", + "SQR?DMULHv(1i16|1i32|2i32|4i16|4i32|8i16)_indexed$")>; +// ASIMD multiply +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "PMULv")>; +// ASIMD multiply accumulate +def : InstRW<[CortexA55WriteMLA, CortexA55ReadMLA], (instregex "ML[AS]v(16i8|2i32|4i16|4i32|8i16|8i8)$")>; +def : InstRW<[CortexA55WriteMLAIx, CortexA55ReadMLAIx], (instregex "ML[AS]v(2i32|4i16|4i32|8i16)_indexed$")>; +// ASIMD multiply accumulate half +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex "SQRDML[AS]H[vi]")>; +// ASIMD multiply accumulate long +def : InstRW<[CortexA55WriteMLAL, CortexA55ReadMLAL], (instregex "[SU]ML[AS]Lv")>; +// ASIMD multiply accumulate long #2 +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex "SQDML[AS]L[iv]")>; +// ASIMD dot product +def : InstRW<[CortexA55WriteDOT, CortexA55ReadDOT], (instregex "[SU]DOTv")>; +// ASIMD dot product, by scalar +def : InstRW<[CortexA55WriteDOTSc, CortexA55ReadDOTSc], (instregex "[SU]DOTlanev")>; +// ASIMD multiply long +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex "[SU]MULLv", "SQDMULL[iv]")>; +// ASIMD polynomial (8x8) multiply long +def : InstRW<[CortexA55WriteVAluSlot2<3, 1>], (instrs PMULLv8i8, PMULLv16i8)>; +// ASIMD pairwise add and accumulate +def : InstRW<[CortexA55WriteVAluSlot2<4, 2>], (instregex "[SU]ADALPv")>; +// ASIMD shift accumulate +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "[SU]SRA[vd]")>; +// ASIMD shift accumulate #2 +def : InstRW<[CortexA55WriteVAluSlot2<4, 2>], (instregex "[SU]RSRA[vd]")>; +// ASIMD shift by immed +def : InstRW<[CortexA55WriteVAlud<2>], (instregex "SHLd$", "SHLv", + "SLId$", "SRId$", "[SU]SHR[vd]", "SHRNv")>; +// ASIMD shift by immed and insert +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "SLIv", "SRIv")>; +// ASIMD shift by immed +// SXTL and UXTL are aliases for SHLL +def : InstRW<[CortexA55WriteVAluSlot2<2, 1>], (instregex "[US]?SHLLv")>; +// ASIMD shift by immed #2 +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "[SU]RSHR[vd]", "RSHRNv")>; +// ASIMD shift by register +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "[SU]SHLv")>; +// ASIMD shift by register #2 +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "[SU]RSHLv")>; + + +// 4.16. Advanced SIMD floating-point instructions +// ASIMD FP compare +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "FAC(GT|GE)(16|32|64|v)", + "FCM(EQ|GT|GE)(16|32|64|v)", "FCM(LE|LT)v")>; +// ASIMD FP convert, long +//def : InstRW<[CortexA55WriteVAluSlot2<4, 2>], (instregex "FCVTLv")>; +// ASIMD FP convert, other +// ASIMD FP divide, H-form +def : InstRW<[CortexA55WriteVDivSlot2<8, 5>], (instrs FDIVv4f16, FDIVv8f16)>; +// ASIMD FP divide, S-form +def : InstRW<[CortexA55WriteVDivSlot2<13, 10>], (instrs FDIVv2f32, FDIVv4f32)>; +// ASIMD FP divide, D-form +def : InstRW<[CortexA55WriteVDivSlot2<22, 19>], (instrs FDIVv2f64)>; +// ASIMD FP max/min, reduce +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex "FMAX(NM)?Vv", "FMIN(NM)?Vv")>; +// ASIMD FP multiply, by element +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex "FMULX?v[1248]i")>; + +// 4.17. Advanced SIMD miscellaneous instructions +// ASIMD bit reverse / ASIMD bitwise insert +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "R?BITv", "BIFv", "BSLv")>; +// ASIMD count +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "CLZv", "CNTv")>; +// ASIMD count #2 +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "CLSv")>; +// ASIMD extract +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "EXTv")>; +// ASIMD extract narrow +def : InstRW<[CortexA55WriteVAlu<1>], (instregex "XTNv")>; +// ASIMD extract narrow, saturating +def : InstRW<[CortexA55WriteVAlud<4>], (instregex "[SU]QXTNv", "SQXTUNv")>; +// ASIMD insert, element to element +def : InstRW<[CortexA55WriteVAlud<2>], (instregex "INSvi(8|16|32|64)lane$")>; +// ASIMD move, integer immed +def : InstRW<[CortexA55WriteVAlu<1>], (instregex "MOVI[Dv]")>; +// ASIMD move, FP immed +def : InstRW<[CortexA55WriteVAlud<1>], (instregex "FMOVv")>; +// ASIMD reverse +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "REV(16|32|64)v")>; +// ASIMD table lookup (TBL, 1 reg) +def : InstRW<[CortexA55WriteVAluSlot2<2, 1>], (instrs TBLv8i8One, TBLv16i8One)>; +// ASIMD table lookup (TBL, 2 regs) +def : InstRW<[CortexA55WriteVAluSlot2<3, 2>], (instrs TBLv8i8Two, TBLv16i8Two)>; +// ASIMD table lookup (TBL, 3 regs) +def : InstRW<[CortexA55WriteVAluSlot2<4, 3>], (instrs TBLv8i8Three, TBLv16i8Three)>; +// ASIMD table lookup (TBL, 4 regs) +def : InstRW<[CortexA55WriteVAluSlot2<5, 4>], (instrs TBLv8i8Four, TBLv16i8Four)>; +// ASIMD table lookup (TBX, 1 reg) +def : InstRW<[CortexA55WriteVAluSlot2<3, 2>], (instrs TBXv8i8One, TBXv16i8One)>; +// ASIMD table lookup (TBX, 2 regs) +def : InstRW<[CortexA55WriteVAluSlot2<4, 3>], (instrs TBXv8i8Two, TBXv16i8Two)>; +// ASIMD table lookup (TBX, 3 regs) +def : InstRW<[CortexA55WriteVAluSlot2<5, 4>], (instrs TBXv8i8Three, TBXv16i8Three)>; +// ASIMD table lookup (TBX, 4 regs) +def : InstRW<[CortexA55WriteVAluSlot2<6, 5>], (instrs TBXv8i8Four, TBXv16i8Four)>; +// ASIMD transfer, element to gen reg +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "[SU]MOVv")>; +// ASIMD transfer, gen reg to element +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "INSvi(8|16|32|64)gpr$")>; +// ASIMD transpose, 64-bit (.2D) +def : InstRW<[CortexA55WriteVAlu<2>], (instrs TRN1v2i32, TRN2v2i32)>; +// ASIMD transpose, other +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "TRN[12]v(2i64|4i16|4i32|8i8|8i16|16i8)$")>; +// ASIMD unzip/zip +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "UZP[12]v", "ZIP[12]v")>; + +// 4.20. Cryptographic Extension +// Crypto AES ops +def : InstRW<[CortexA55WriteAESEncrypt], (instrs AESErr)>; +def : InstRW<[CortexA55WriteAESDecrypt], (instrs AESDrr)>; +// Crypto AES ops #2 +def : InstRW<[CortexA55WriteCrypto<2>, CortexA55ReadAESIMC], (instrs AESIMCrr, AESIMCrrTied)>; +def : InstRW<[CortexA55WriteCrypto<2>, CortexA55ReadAESMC], (instrs AESMCrr, AESMCrrTied)>; +// Crypto polynomial (64x64) multiply long +def : InstRW<[CortexA55WriteCrypto<2>], (instrs PMULLv1i64, PMULLv2i64)>; +// Crypto SHA1 xor ops +def : InstRW<[CortexA55WriteCrypto<2>], (instrs SHA1SU0rrr)>; +// Crypto SHA1 schedule acceleration ops +def : InstRW<[CortexA55WriteCrypto<2>], (instrs SHA1Hrr, SHA1SU1rr)>; +// Crypto SHA1 hash acceleration ops +def : InstRW<[CortexA55WriteCrypto<4>], (instregex "SHA1[CMP]rrr$")>; +// Crypto SHA256 schedule acceleration ops +def : InstRW<[CortexA55WriteCrypto<3>], (instrs SHA256SU0rr)>; +// Crypto SHA256 schedule acceleration ops +def : InstRW<[CortexA55WriteCrypto<3>], (instrs SHA256SU1rrr)>; +// Crypto SHA256 hash acceleration ops +def : InstRW<[CortexA55WriteCrypto<4>], (instregex "SHA256H2?rrr$")>; + +// 4.21. CRC +// CRC checksum ops +def : InstRW<[CortexA55WriteCrypto<2>], (instregex "CRC32C?[BHX]rr$")>; +// CRC checksum ops #2 +def : InstRW<[CortexA55WriteCrypto<1>], (instrs CRC32CWrr, CRC32Wrr)>; + } diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll @@ -4,7 +4,7 @@ ; COST-LABEL: sel.v8i8 ; COST: Found an estimated cost of 42 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; CODE-LABEL: sel.v8i8 -; CODE: tbl v0.8b, { v0.16b }, v1.8b +; CODE: tbl v0.8b, { v0.16b }, v2.8b define <8 x i8> @sel.v8i8(<8 x i8> %v0, <8 x i8> %v1) { %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ret <8 x i8> %tmp0 diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll --- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -120,10 +120,10 @@ ; CODE-LABEL: v3i64_select_sle ; CODE: bb.0 ; CODE: mov -; CODE: ldr ; CODE: mov ; CODE: mov ; CODE: cmge +; CODE: ldr ; CODE: cmge ; CODE: bif ; CODE: bif diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll @@ -35,11 +35,11 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI1_0 +; SDAG-NEXT: adrp x9, .LCPI1_1 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] -; SDAG-NEXT: adrp x8, .LCPI1_1 -; SDAG-NEXT: ushl v1.8h, v0.8h, v1.8h -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_1] ; SDAG-NEXT: adrp x8, .LCPI1_2 +; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI1_1] +; SDAG-NEXT: ushl v1.8h, v0.8h, v1.8h ; SDAG-NEXT: umull2 v3.4s, v1.8h, v2.8h ; SDAG-NEXT: umull v1.4s, v1.4h, v2.4h ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_2] @@ -48,41 +48,41 @@ ; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h ; SDAG-NEXT: umull2 v3.4s, v0.8h, v2.8h ; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_3] ; SDAG-NEXT: uzp2 v0.8h, v0.8h, v3.8h ; SDAG-NEXT: add v0.8h, v0.8h, v1.8h -; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_3] -; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h +; SDAG-NEXT: ushl v0.8h, v0.8h, v2.8h ; SDAG-NEXT: ret ; ; GISEL-LABEL: combine_vec_udiv_nonuniform: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI1_4 -; GISEL-NEXT: adrp x10, .LCPI1_0 -; GISEL-NEXT: adrp x9, .LCPI1_1 +; GISEL-NEXT: adrp x9, .LCPI1_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4] ; GISEL-NEXT: adrp x8, .LCPI1_3 -; GISEL-NEXT: ldr q5, [x10, :lo12:.LCPI1_0] -; GISEL-NEXT: ldr q6, [x9, :lo12:.LCPI1_1] +; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI1_0] ; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3] ; GISEL-NEXT: adrp x8, .LCPI1_2 ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h -; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_2] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2] ; GISEL-NEXT: adrp x8, .LCPI1_5 -; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h -; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h -; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h +; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h +; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h +; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h +; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5] -; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h +; GISEL-NEXT: adrp x8, .LCPI1_1 +; GISEL-NEXT: ldr q6, [x8, :lo12:.LCPI1_1] ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; GISEL-NEXT: neg v4.8h, v6.8h +; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h -; GISEL-NEXT: shl v2.8h, v3.8h, #15 -; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h -; GISEL-NEXT: sshr v2.8h, v2.8h, #15 +; GISEL-NEXT: neg v2.8h, v6.8h +; GISEL-NEXT: shl v3.8h, v3.8h, #15 +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -93,38 +93,38 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform2: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI2_0 +; SDAG-NEXT: adrp x9, .LCPI2_1 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] -; SDAG-NEXT: adrp x8, .LCPI2_1 -; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h -; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_1] ; SDAG-NEXT: adrp x8, .LCPI2_2 -; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h -; SDAG-NEXT: umull v0.4s, v0.4h, v1.4h -; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_2] -; SDAG-NEXT: uzp2 v0.8h, v0.8h, v2.8h +; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h +; SDAG-NEXT: umull2 v1.4s, v0.8h, v2.8h +; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI2_2] +; SDAG-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; SDAG-NEXT: ushl v0.8h, v0.8h, v2.8h ; SDAG-NEXT: ret ; ; GISEL-LABEL: combine_vec_udiv_nonuniform2: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI2_3 ; GISEL-NEXT: adrp x9, .LCPI2_4 -; GISEL-NEXT: adrp x10, .LCPI2_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3] ; GISEL-NEXT: adrp x8, .LCPI2_2 ; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI2_4] -; GISEL-NEXT: ldr q4, [x10, :lo12:.LCPI2_0] +; GISEL-NEXT: adrp x9, .LCPI2_1 ; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2] -; GISEL-NEXT: adrp x8, .LCPI2_1 -; GISEL-NEXT: cmeq v3.8h, v3.8h, v4.8h +; GISEL-NEXT: adrp x8, .LCPI2_0 ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h -; GISEL-NEXT: shl v3.8h, v3.8h, #15 -; GISEL-NEXT: umull2 v5.4s, v1.8h, v2.8h +; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_0] +; GISEL-NEXT: umull2 v4.4s, v1.8h, v2.8h ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_1] +; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] +; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h +; GISEL-NEXT: uzp2 v1.8h, v1.8h, v4.8h +; GISEL-NEXT: shl v3.8h, v3.8h, #15 ; GISEL-NEXT: neg v2.8h, v2.8h -; GISEL-NEXT: uzp2 v1.8h, v1.8h, v5.8h ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h ; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b @@ -151,23 +151,23 @@ ; GISEL-LABEL: combine_vec_udiv_nonuniform3: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI3_2 -; GISEL-NEXT: adrp x10, .LCPI3_0 -; GISEL-NEXT: adrp x9, .LCPI3_1 +; GISEL-NEXT: adrp x9, .LCPI3_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2] ; GISEL-NEXT: adrp x8, .LCPI3_3 -; GISEL-NEXT: ldr q3, [x10, :lo12:.LCPI3_0] -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_1] +; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_0] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_3] +; GISEL-NEXT: adrp x8, .LCPI3_1 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3] -; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] +; GISEL-NEXT: cmeq v3.8h, v3.8h, v4.8h ; GISEL-NEXT: sub v5.8h, v0.8h, v1.8h -; GISEL-NEXT: neg v3.8h, v4.8h -; GISEL-NEXT: shl v2.8h, v2.8h, #15 +; GISEL-NEXT: neg v2.8h, v2.8h ; GISEL-NEXT: usra v1.8h, v5.8h, #1 -; GISEL-NEXT: sshr v2.8h, v2.8h, #15 -; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h +; GISEL-NEXT: shl v3.8h, v3.8h, #15 +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -178,41 +178,41 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform4: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI4_0 -; SDAG-NEXT: adrp x9, .LCPI4_3 +; SDAG-NEXT: adrp x9, .LCPI4_2 +; SDAG-NEXT: adrp x10, .LCPI4_3 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; SDAG-NEXT: adrp x8, .LCPI4_1 -; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI4_3] +; SDAG-NEXT: ldr q4, [x9, :lo12:.LCPI4_2] ; SDAG-NEXT: umull2 v2.8h, v0.16b, v1.16b ; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b -; SDAG-NEXT: and v0.16b, v0.16b, v3.16b +; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI4_1] ; SDAG-NEXT: uzp2 v1.16b, v1.16b, v2.16b -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_1] -; SDAG-NEXT: adrp x8, .LCPI4_2 -; SDAG-NEXT: ushl v1.16b, v1.16b, v2.16b -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_2] -; SDAG-NEXT: and v1.16b, v1.16b, v2.16b +; SDAG-NEXT: ldr q2, [x10, :lo12:.LCPI4_3] +; SDAG-NEXT: ushl v1.16b, v1.16b, v3.16b +; SDAG-NEXT: and v0.16b, v0.16b, v2.16b +; SDAG-NEXT: and v1.16b, v1.16b, v4.16b ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b ; SDAG-NEXT: ret ; ; GISEL-LABEL: combine_vec_udiv_nonuniform4: ; GISEL: // %bb.0: -; GISEL-NEXT: adrp x8, .LCPI4_3 -; GISEL-NEXT: adrp x9, .LCPI4_2 -; GISEL-NEXT: adrp x10, .LCPI4_1 -; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_3] +; GISEL-NEXT: adrp x8, .LCPI4_2 +; GISEL-NEXT: adrp x9, .LCPI4_3 +; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2] ; GISEL-NEXT: adrp x8, .LCPI4_0 -; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI4_2] -; GISEL-NEXT: ldr q3, [x10, :lo12:.LCPI4_1] +; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI4_3] +; GISEL-NEXT: adrp x9, .LCPI4_1 ; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI4_0] -; GISEL-NEXT: umull2 v5.8h, v0.16b, v2.16b -; GISEL-NEXT: umull v2.8h, v0.8b, v2.8b -; GISEL-NEXT: cmeq v1.16b, v1.16b, v4.16b -; GISEL-NEXT: neg v3.16b, v3.16b -; GISEL-NEXT: uzp2 v2.16b, v2.16b, v5.16b -; GISEL-NEXT: shl v1.16b, v1.16b, #7 -; GISEL-NEXT: ushl v2.16b, v2.16b, v3.16b -; GISEL-NEXT: sshr v1.16b, v1.16b, #7 -; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b +; GISEL-NEXT: umull2 v3.8h, v0.16b, v1.16b +; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b +; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI4_1] +; GISEL-NEXT: cmeq v2.16b, v2.16b, v4.16b +; GISEL-NEXT: uzp2 v1.16b, v1.16b, v3.16b +; GISEL-NEXT: shl v2.16b, v2.16b, #7 +; GISEL-NEXT: neg v3.16b, v5.16b +; GISEL-NEXT: sshr v2.16b, v2.16b, #7 +; GISEL-NEXT: ushl v1.16b, v1.16b, v3.16b +; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %div = udiv <16 x i8> %x, ret <16 x i8> %div @@ -222,54 +222,54 @@ ; SDAG-LABEL: pr38477: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI5_0 -; SDAG-NEXT: adrp x9, .LCPI5_4 +; SDAG-NEXT: adrp x9, .LCPI5_3 +; SDAG-NEXT: adrp x10, .LCPI5_4 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] ; SDAG-NEXT: adrp x8, .LCPI5_1 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h -; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_1] ; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h -; SDAG-NEXT: adrp x8, .LCPI5_2 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; SDAG-NEXT: sub v2.8h, v0.8h, v1.8h -; SDAG-NEXT: umull2 v4.4s, v2.8h, v3.8h -; SDAG-NEXT: umull v2.4s, v2.4h, v3.4h -; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI5_4] -; SDAG-NEXT: and v0.16b, v0.16b, v3.16b +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_1] +; SDAG-NEXT: adrp x8, .LCPI5_2 +; SDAG-NEXT: sub v3.8h, v0.8h, v1.8h +; SDAG-NEXT: umull2 v4.4s, v3.8h, v2.8h +; SDAG-NEXT: umull v2.4s, v3.4h, v2.4h +; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_2] ; SDAG-NEXT: uzp2 v2.8h, v2.8h, v4.8h +; SDAG-NEXT: ldr q4, [x9, :lo12:.LCPI5_3] ; SDAG-NEXT: add v1.8h, v2.8h, v1.8h -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_2] -; SDAG-NEXT: adrp x8, .LCPI5_3 -; SDAG-NEXT: ushl v1.8h, v1.8h, v2.8h -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_3] -; SDAG-NEXT: and v1.16b, v1.16b, v2.16b +; SDAG-NEXT: ldr q2, [x10, :lo12:.LCPI5_4] +; SDAG-NEXT: ushl v1.8h, v1.8h, v3.8h +; SDAG-NEXT: and v0.16b, v0.16b, v2.16b +; SDAG-NEXT: and v1.16b, v1.16b, v4.16b ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b ; SDAG-NEXT: ret ; ; GISEL-LABEL: pr38477: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI5_3 -; GISEL-NEXT: adrp x10, .LCPI5_0 -; GISEL-NEXT: adrp x9, .LCPI5_1 +; GISEL-NEXT: adrp x9, .LCPI5_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3] ; GISEL-NEXT: adrp x8, .LCPI5_2 -; GISEL-NEXT: ldr q5, [x10, :lo12:.LCPI5_0] -; GISEL-NEXT: ldr q6, [x9, :lo12:.LCPI5_1] +; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI5_0] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_2] ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h -; GISEL-NEXT: adrp x8, .LCPI5_4 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h -; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h -; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_2] +; GISEL-NEXT: adrp x8, .LCPI5_4 +; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h +; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h +; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4] -; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h +; GISEL-NEXT: adrp x8, .LCPI5_1 +; GISEL-NEXT: ldr q6, [x8, :lo12:.LCPI5_1] ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; GISEL-NEXT: neg v4.8h, v6.8h +; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h -; GISEL-NEXT: shl v2.8h, v3.8h, #15 -; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h -; GISEL-NEXT: sshr v2.8h, v2.8h, #15 +; GISEL-NEXT: neg v2.8h, v6.8h +; GISEL-NEXT: shl v3.8h, v3.8h, #15 +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %a0, diff --git a/llvm/test/CodeGen/AArch64/aarch64-addv.ll b/llvm/test/CodeGen/AArch64/aarch64-addv.ll --- a/llvm/test/CodeGen/AArch64/aarch64-addv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-addv.ll @@ -91,9 +91,9 @@ ; CHECK-LABEL: oversized_ADDV_512: ; CHECK: // %bb.0: ; CHECK-NEXT: ldp q0, q1, [x0, #32] -; CHECK-NEXT: ldp q3, q2, [x0] -; CHECK-NEXT: add v0.4s, v3.4s, v0.4s -; CHECK-NEXT: add v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldp q2, q3, [x0] +; CHECK-NEXT: add v0.4s, v2.4s, v0.4s +; CHECK-NEXT: add v1.4s, v3.4s, v1.4s ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: addv s0, v0.4s ; CHECK-NEXT: fmov w0, s0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-avoid-illegal-extract-subvector.ll b/llvm/test/CodeGen/AArch64/aarch64-avoid-illegal-extract-subvector.ll --- a/llvm/test/CodeGen/AArch64/aarch64-avoid-illegal-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-avoid-illegal-extract-subvector.ll @@ -4,9 +4,10 @@ ; CHECK-LABEL: test1: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: fmov d1, x8 +; CHECK-NEXT: mov w8, v0.s[2] +; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: mov v0.16b, v1.16b ; CHECK-NEXT: ret %i1 = extractelement <4 x i32> %x, i32 1 %zi1 = zext i32 %i1 to i64 @@ -24,9 +25,9 @@ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov w9, v0.s[2] ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov v1.d[1], x9 +; CHECK-NEXT: mov w8, v0.s[2] +; CHECK-NEXT: mov v1.d[1], x8 ; CHECK-NEXT: ret entry: %1 = add <4 x i32> %0, diff --git a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll --- a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll @@ -167,8 +167,8 @@ ; CHECK-LABEL: fmov_modimm_t11: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 -; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 ; CHECK-NEXT: fmov v1.4s, #3.00000000 +; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 ; CHECK-NEXT: ld1 { v0.8h }, [x8] ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] @@ -183,8 +183,8 @@ ; CHECK-LABEL: fmov_modimm_t12: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 -; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 ; CHECK-NEXT: fmov v1.2d, #0.17968750 +; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 ; CHECK-NEXT: ld1 { v0.8h }, [x8] ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll --- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll @@ -98,10 +98,10 @@ define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) { ; CHECK-LABEL: dupsext_v2i8_v2i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: dup v1.2s, w8 +; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: sshr v0.2s, v0.2s, #24 +; CHECK-NEXT: dup v1.2s, w8 ; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll --- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll @@ -187,12 +187,12 @@ ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] ; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: sshll2 v1.4s, v0.8h, #0 -; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-BE-NEXT: rev64 v1.4s, v1.4s +; CHECK-BE-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-BE-NEXT: sshll2 v0.4s, v0.8h, #0 +; CHECK-BE-NEXT: rev64 v2.4s, v1.4s ; CHECK-BE-NEXT: rev64 v0.4s, v0.4s -; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v2.16b, v2.16b, #8 ; CHECK-BE-NEXT: ret %x = load <8 x i8>, <8 x i8>* %a %y = sext <8 x i8> %x to <8 x i32> @@ -344,12 +344,12 @@ ; CHECK-BE-LABEL: fsext_v16i16: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] -; CHECK-BE-NEXT: sshll2 v1.8h, v0.16b, #0 -; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: rev64 v1.8h, v1.8h +; CHECK-BE-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-BE-NEXT: sshll2 v0.8h, v0.16b, #0 +; CHECK-BE-NEXT: rev64 v2.8h, v1.8h ; CHECK-BE-NEXT: rev64 v0.8h, v0.8h -; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: ext v0.16b, v2.16b, v2.16b, #8 ; CHECK-BE-NEXT: ret %x = load <16 x i8>, <16 x i8>* %a %y = sext <16 x i8> %x to <16 x i16> diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -111,11 +111,11 @@ define <4 x i32> @amull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: amull_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B @@ -129,11 +129,11 @@ define <2 x i64> @amull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind { ; CHECK-LABEL: amull_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smull v1.2d, v1.2s, v2.2s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B @@ -275,12 +275,12 @@ define <4 x i32> @amlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { ; CHECK-LABEL: amlal_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlal v1.4s, v0.4h, v2.4h ; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smlal v2.4s, v1.4h, v3.4h -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B @@ -296,12 +296,12 @@ define <2 x i64> @amlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { ; CHECK-LABEL: amlal_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlal v1.2d, v0.2s, v2.2s ; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smlal v2.2d, v1.2s, v3.2s -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B @@ -445,12 +445,12 @@ define <4 x i32> @amlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { ; CHECK-LABEL: amlsl_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlsl v1.4s, v0.4h, v2.4h ; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smlsl v2.4s, v1.4h, v3.4h -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B @@ -466,12 +466,12 @@ define <2 x i64> @amlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { ; CHECK-LABEL: amlsl_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlsl v1.2d, v0.2s, v2.2s ; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smlsl v2.2d, v1.2s, v3.2s -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B @@ -599,9 +599,9 @@ ; CHECK-LABEL: amull_extvec_v4i16_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1234 +; CHECK-NEXT: dup v1.4h, w8 +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: dup v2.4h, w8 -; CHECK-NEXT: smull v0.4s, v0.4h, v2.4h ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp3 = zext <4 x i16> %arg to <4 x i32> @@ -614,9 +614,9 @@ ; CHECK-LABEL: amull_extvec_v2i32_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1234 +; CHECK-NEXT: dup v1.2s, w8 +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-NEXT: dup v2.2s, w8 -; CHECK-NEXT: smull v0.2d, v0.2s, v2.2s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp3 = zext <2 x i32> %arg to <2 x i64> @@ -752,11 +752,11 @@ define <16 x i16> @amull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) { ; CHECK-LABEL: amull2_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b -; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b +; CHECK-NEXT: smull v2.8h, v0.8b, v1.8b +; CHECK-NEXT: smull2 v1.8h, v0.16b, v1.16b ; CHECK-NEXT: bic v2.8h, #255, lsl #8 -; CHECK-NEXT: bic v0.8h, #255, lsl #8 -; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: bic v1.8h, #255, lsl #8 +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %arg1_ext = zext <16 x i8> %arg1 to <16 x i16> %arg2_ext = zext <16 x i8> %arg2 to <16 x i16> @@ -768,11 +768,11 @@ define <8 x i32> @amull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) { ; CHECK-LABEL: amull2_i16: ; CHECK: // %bb.0: +; CHECK-NEXT: smull v3.4s, v0.4h, v1.4h +; CHECK-NEXT: smull2 v0.4s, v0.8h, v1.8h ; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff -; CHECK-NEXT: smull2 v3.4s, v0.8h, v1.8h -; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h -; CHECK-NEXT: and v1.16b, v3.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: and v1.16b, v0.16b, v2.16b +; CHECK-NEXT: and v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %arg1_ext = zext <8 x i16> %arg1 to <8 x i32> %arg2_ext = zext <8 x i16> %arg2 to <8 x i32> @@ -784,11 +784,11 @@ define <4 x i64> @amull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) { ; CHECK-LABEL: amull2_i32: ; CHECK: // %bb.0: +; CHECK-NEXT: smull v3.2d, v0.2s, v1.2s +; CHECK-NEXT: smull2 v0.2d, v0.4s, v1.4s ; CHECK-NEXT: movi v2.2d, #0x000000ffffffff -; CHECK-NEXT: smull2 v3.2d, v0.4s, v1.4s -; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s -; CHECK-NEXT: and v1.16b, v3.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: and v1.16b, v0.16b, v2.16b +; CHECK-NEXT: and v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %arg1_ext = zext <4 x i32> %arg1 to <4 x i64> %arg2_ext = zext <4 x i32> %arg2 to <4 x i64> diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll --- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll +++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll @@ -57,9 +57,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -134,9 +134,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -213,9 +213,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -290,9 +290,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -367,9 +367,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -446,9 +446,9 @@ ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -526,9 +526,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -606,9 +606,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -685,9 +685,9 @@ ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll b/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll --- a/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll +++ b/llvm/test/CodeGen/AArch64/arm64-AdvSIMD-Scalar.ll @@ -68,16 +68,16 @@ define double @add_sub_su64(<2 x i64> %a, <2 x i64> %b) nounwind readnone { ; CHECK-LABEL: add_sub_su64: ; CHECK: // %bb.0: +; CHECK-NEXT: fmov d2, xzr ; CHECK-NEXT: add d0, d1, d0 -; CHECK-NEXT: fmov d1, xzr -; CHECK-NEXT: sub d0, d1, d0 +; CHECK-NEXT: sub d0, d2, d0 ; CHECK-NEXT: ret ; ; GENERIC-LABEL: add_sub_su64: ; GENERIC: // %bb.0: +; GENERIC-NEXT: fmov d2, xzr ; GENERIC-NEXT: add d0, d1, d0 -; GENERIC-NEXT: fmov d1, xzr -; GENERIC-NEXT: sub d0, d1, d0 +; GENERIC-NEXT: sub d0, d2, d0 ; GENERIC-NEXT: ret %vecext = extractelement <2 x i64> %a, i32 0 %vecext1 = extractelement <2 x i64> %b, i32 0 diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll --- a/llvm/test/CodeGen/AArch64/arm64-dup.ll +++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll @@ -446,11 +446,11 @@ define void @disguised_dup(<4 x float> %x, <4 x float>* %p1, <4 x float>* %p2) { ; CHECK-LABEL: disguised_dup: ; CHECK: // %bb.0: -; CHECK-NEXT: ext.16b v1, v0, v0, #12 -; CHECK-NEXT: dup.4s v0, v0[0] -; CHECK-NEXT: ext.16b v1, v1, v0, #8 -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: str q0, [x1] +; CHECK-NEXT: dup.4s v1, v0[0] +; CHECK-NEXT: ext.16b v0, v0, v0, #12 +; CHECK-NEXT: ext.16b v0, v0, v1, #8 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: str q1, [x1] ; CHECK-NEXT: ret %shuf = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %dup = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll @@ -6,8 +6,8 @@ define float @test1(float %x, float %y) nounwind { ; CHECK-LABEL: test1: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: movi.4s v2, #128, lsl #24 ; CHECK-NEXT: ; kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: movi.4s v2, #128, lsl #24 ; CHECK-NEXT: ; kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: bit.16b v0, v1, v2 ; CHECK-NEXT: ; kill: def $s0 killed $s0 killed $q0 @@ -36,12 +36,12 @@ define double @test3(double %a, float %b, float %c) nounwind { ; CHECK-LABEL: test3: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.2d v3, #0000000000000000 -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fadd s1, s1, s2 -; CHECK-NEXT: fneg.2d v2, v3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: movi.2d v3, #0000000000000000 +; CHECK-NEXT: fneg.2d v3, v3 ; CHECK-NEXT: fcvt d1, s1 -; CHECK-NEXT: bit.16b v0, v1, v2 +; CHECK-NEXT: bit.16b v0, v1, v3 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp1 = fadd float %b, %c @@ -55,11 +55,11 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; CHECK-NEXT: bl _bar -; CHECK-NEXT: movi.4s v1, #128, lsl #24 ; CHECK-NEXT: fcvt s0, d0 -; CHECK-NEXT: fmov s2, #0.50000000 -; CHECK-NEXT: bit.16b v2, v0, v1 -; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fmov s1, #0.50000000 +; CHECK-NEXT: movi.4s v2, #128, lsl #24 +; CHECK-NEXT: bit.16b v1, v0, v2 +; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll --- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -185,8 +185,8 @@ ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload @@ -206,9 +206,9 @@ ; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str xzr, [sp, #32] +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload @@ -228,8 +228,8 @@ ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something @@ -250,9 +250,9 @@ ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str xzr, [sp, #64] +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something @@ -273,8 +273,8 @@ ; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 144 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: stp q0, q0, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #32] @@ -298,8 +298,8 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 272 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: stp q0, q0, [sp, #192] ; CHECK-NEXT: stp q0, q0, [sp, #160] @@ -451,8 +451,8 @@ ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload @@ -472,11 +472,11 @@ ; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: str x8, [sp, #32] +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: str x8, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #64 @@ -495,8 +495,8 @@ ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something @@ -517,11 +517,11 @@ ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: str x8, [sp, #64] +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: str x8, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload @@ -541,8 +541,8 @@ ; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 144 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: stp q0, q0, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #32] @@ -566,8 +566,8 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 272 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: stp q0, q0, [sp, #192] ; CHECK-NEXT: stp q0, q0, [sp, #160] diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -117,9 +117,9 @@ define <4 x i32> @test_vaddl_a16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vaddl_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <4 x i16> %a to <4 x i32> @@ -132,9 +132,9 @@ define <2 x i64> @test_vaddl_a32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vaddl_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <2 x i32> %a to <2 x i64> @@ -247,9 +247,9 @@ define <4 x i32> @test_vaddl_high_a16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vaddl_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddl2 v0.4s, v0.8h, v1.8h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> @@ -264,9 +264,9 @@ define <2 x i64> @test_vaddl_high_a32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vaddl_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: uaddl2 v0.2d, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> @@ -360,9 +360,9 @@ define <4 x i32> @test_vaddw_a16(<4 x i32> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vaddw_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <4 x i16> %b to <4 x i32> @@ -374,9 +374,9 @@ define <2 x i64> @test_vaddw_a32(<2 x i64> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vaddw_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <2 x i32> %b to <2 x i64> @@ -474,9 +474,9 @@ define <4 x i32> @test_vaddw_high_a16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vaddw_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddw2 v0.4s, v0.4s, v1.8h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> @@ -489,9 +489,9 @@ define <2 x i64> @test_vaddw_high_a32(<2 x i64> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vaddw_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: uaddw2 v0.2d, v0.2d, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> @@ -590,9 +590,9 @@ define <4 x i32> @test_vsubl_a16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vsubl_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: usubl v0.4s, v0.4h, v1.4h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <4 x i16> %a to <4 x i32> @@ -605,9 +605,9 @@ define <2 x i64> @test_vsubl_a32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vsubl_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: usubl v0.2d, v0.2s, v1.2s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <2 x i32> %a to <2 x i64> @@ -720,9 +720,9 @@ define <4 x i32> @test_vsubl_high_a16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vsubl_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: usubl2 v0.4s, v0.8h, v1.8h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> @@ -737,9 +737,9 @@ define <2 x i64> @test_vsubl_high_a32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vsubl_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: usubl2 v0.2d, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> @@ -833,9 +833,9 @@ define <4 x i32> @test_vsubw_a16(<4 x i32> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vsubw_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: usubw v0.4s, v0.4s, v1.4h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <4 x i16> %b to <4 x i32> @@ -847,9 +847,9 @@ define <2 x i64> @test_vsubw_a32(<2 x i64> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vsubw_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: usubw v0.2d, v0.2d, v1.2s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <2 x i32> %b to <2 x i64> @@ -947,9 +947,9 @@ define <4 x i32> @test_vsubw_high_a16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vsubw_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: usubw2 v0.4s, v0.4s, v1.8h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> @@ -962,9 +962,9 @@ define <2 x i64> @test_vsubw_high_a32(<2 x i64> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vsubw_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: usubw2 v0.2d, v0.2d, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> @@ -2510,8 +2510,8 @@ ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: fmov d1, x1 ; CHECK-NEXT: pmull v0.1q, v0.1d, v1.1d -; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: ret entry: %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) @@ -2523,8 +2523,8 @@ ; CHECK-LABEL: test_vmull_high_p64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d -; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: ret entry: %0 = extractelement <2 x i64> %a, i32 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll @@ -200,9 +200,9 @@ ; CHECK-LABEL: test_sabd_v2i32_const: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: movi d0, #0x00ffffffff0000 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: sabd v0.2s, v1.2s, v0.2s +; CHECK-NEXT: movi d1, #0x00ffffffff0000 +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32( <2 x i32> , diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll @@ -73,14 +73,14 @@ define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) { ; CHECK-LABEL: mul2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: fmov x10, d0 -; CHECK-NEXT: mov x8, v1.d[1] -; CHECK-NEXT: mov x11, v0.d[1] +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: mov x10, v0.d[1] +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: mov x9, v1.d[1] ; CHECK-NEXT: mul x9, x10, x9 -; CHECK-NEXT: mul x8, x11, x8 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %tmp3 = mul <2 x i64> %A, %B; ret <2 x i64> %tmp3 @@ -162,32 +162,32 @@ ; CHECK-NEXT: smov w9, v0.b[1] ; CHECK-NEXT: smov w10, v0.b[0] ; CHECK-NEXT: smov w11, v0.b[2] -; CHECK-NEXT: smov w12, v0.b[3] -; CHECK-NEXT: smov w13, v0.b[4] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.b[0] +; CHECK-NEXT: smov w12, v0.b[3] +; CHECK-NEXT: smov w13, v0.b[4] +; CHECK-NEXT: smov w14, v0.b[5] +; CHECK-NEXT: smov w15, v0.b[6] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.b[5] ; CHECK-NEXT: mov v2.b[1], w8 +; CHECK-NEXT: smov w8, v1.b[7] ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 -; CHECK-NEXT: smov w10, v0.b[6] +; CHECK-NEXT: smov w10, v0.b[7] ; CHECK-NEXT: sdiv w12, w13, w12 -; CHECK-NEXT: smov w13, v0.b[5] +; CHECK-NEXT: smov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 -; CHECK-NEXT: smov w11, v0.b[7] -; CHECK-NEXT: sdiv w8, w13, w9 -; CHECK-NEXT: smov w9, v1.b[6] +; CHECK-NEXT: sdiv w13, w14, w13 +; CHECK-NEXT: smov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 -; CHECK-NEXT: sdiv w9, w10, w9 -; CHECK-NEXT: smov w10, v1.b[7] -; CHECK-NEXT: mov v2.b[5], w8 -; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: sdiv w9, w15, w14 +; CHECK-NEXT: mov v2.b[5], w13 +; CHECK-NEXT: sdiv w8, w10, w8 ; CHECK-NEXT: mov v2.b[6], w9 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 @@ -203,66 +203,66 @@ ; CHECK-NEXT: smov w9, v0.b[1] ; CHECK-NEXT: smov w10, v0.b[0] ; CHECK-NEXT: smov w11, v0.b[2] +; CHECK-NEXT: sdiv w8, w9, w8 +; CHECK-NEXT: smov w9, v1.b[0] ; CHECK-NEXT: smov w12, v0.b[3] ; CHECK-NEXT: smov w13, v0.b[4] ; CHECK-NEXT: smov w14, v0.b[5] ; CHECK-NEXT: smov w15, v0.b[6] -; CHECK-NEXT: sdiv w8, w9, w8 -; CHECK-NEXT: smov w9, v1.b[0] ; CHECK-NEXT: smov w16, v0.b[7] ; CHECK-NEXT: smov w17, v0.b[8] +; CHECK-NEXT: smov w18, v0.b[9] +; CHECK-NEXT: smov w0, v0.b[10] +; CHECK-NEXT: smov w1, v0.b[11] +; CHECK-NEXT: smov w2, v0.b[12] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.b[9] ; CHECK-NEXT: mov v2.b[1], w8 +; CHECK-NEXT: smov w8, v1.b[13] ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 -; CHECK-NEXT: smov w10, v0.b[10] +; CHECK-NEXT: smov w10, v0.b[13] ; CHECK-NEXT: sdiv w12, w13, w12 ; CHECK-NEXT: smov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 -; CHECK-NEXT: smov w11, v0.b[11] +; CHECK-NEXT: smov w11, v0.b[14] ; CHECK-NEXT: sdiv w13, w14, w13 ; CHECK-NEXT: smov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 -; CHECK-NEXT: smov w12, v0.b[12] +; CHECK-NEXT: smov w12, v0.b[15] ; CHECK-NEXT: sdiv w14, w15, w14 ; CHECK-NEXT: smov w15, v1.b[7] ; CHECK-NEXT: mov v2.b[5], w13 -; CHECK-NEXT: smov w13, v0.b[13] ; CHECK-NEXT: sdiv w15, w16, w15 ; CHECK-NEXT: smov w16, v1.b[8] ; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: sdiv w16, w17, w16 -; CHECK-NEXT: smov w17, v0.b[9] +; CHECK-NEXT: smov w17, v1.b[9] ; CHECK-NEXT: mov v2.b[7], w15 -; CHECK-NEXT: sdiv w8, w17, w9 -; CHECK-NEXT: smov w9, v1.b[10] +; CHECK-NEXT: sdiv w17, w18, w17 +; CHECK-NEXT: smov w18, v1.b[10] ; CHECK-NEXT: mov v2.b[8], w16 -; CHECK-NEXT: sdiv w9, w10, w9 -; CHECK-NEXT: smov w10, v1.b[11] -; CHECK-NEXT: mov v2.b[9], w8 +; CHECK-NEXT: sdiv w18, w0, w18 +; CHECK-NEXT: smov w0, v1.b[11] +; CHECK-NEXT: mov v2.b[9], w17 +; CHECK-NEXT: sdiv w0, w1, w0 +; CHECK-NEXT: smov w1, v1.b[12] +; CHECK-NEXT: mov v2.b[10], w18 +; CHECK-NEXT: sdiv w9, w2, w1 +; CHECK-NEXT: mov v2.b[11], w0 +; CHECK-NEXT: sdiv w8, w10, w8 +; CHECK-NEXT: smov w10, v1.b[14] +; CHECK-NEXT: mov v2.b[12], w9 ; CHECK-NEXT: sdiv w10, w11, w10 -; CHECK-NEXT: smov w11, v1.b[12] -; CHECK-NEXT: mov v2.b[10], w9 -; CHECK-NEXT: smov w9, v1.b[14] -; CHECK-NEXT: sdiv w11, w12, w11 -; CHECK-NEXT: smov w12, v1.b[13] -; CHECK-NEXT: mov v2.b[11], w10 -; CHECK-NEXT: smov w10, v1.b[15] -; CHECK-NEXT: sdiv w8, w13, w12 -; CHECK-NEXT: smov w12, v0.b[14] -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: smov w11, v0.b[15] -; CHECK-NEXT: sdiv w9, w12, w9 +; CHECK-NEXT: smov w11, v1.b[15] ; CHECK-NEXT: mov v2.b[13], w8 -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[14], w9 -; CHECK-NEXT: mov v2.b[15], w8 +; CHECK-NEXT: sdiv w11, w12, w11 +; CHECK-NEXT: mov v2.b[14], w10 +; CHECK-NEXT: mov v2.b[15], w11 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp3 = sdiv <16 x i8> %A, %B; @@ -292,18 +292,18 @@ ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w10, v0.h[0] ; CHECK-NEXT: smov w11, v0.h[2] -; CHECK-NEXT: smov w12, v0.h[3] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.h[0] +; CHECK-NEXT: smov w12, v0.h[3] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.h[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.h[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: sdiv w8, w12, w11 +; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: mov v0.h[3], w11 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = sdiv <4 x i16> %A, %B; @@ -317,32 +317,32 @@ ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w10, v0.h[0] ; CHECK-NEXT: smov w11, v0.h[2] -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: smov w13, v0.h[4] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.h[0] +; CHECK-NEXT: smov w12, v0.h[3] +; CHECK-NEXT: smov w13, v0.h[4] +; CHECK-NEXT: smov w14, v0.h[5] +; CHECK-NEXT: smov w15, v0.h[6] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.h[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.h[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.h[5] ; CHECK-NEXT: mov v2.h[1], w8 +; CHECK-NEXT: smov w8, v1.h[7] ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.h[4] ; CHECK-NEXT: mov v2.h[2], w10 -; CHECK-NEXT: smov w10, v0.h[6] +; CHECK-NEXT: smov w10, v0.h[7] ; CHECK-NEXT: sdiv w12, w13, w12 -; CHECK-NEXT: smov w13, v0.h[5] +; CHECK-NEXT: smov w13, v1.h[5] ; CHECK-NEXT: mov v2.h[3], w11 -; CHECK-NEXT: smov w11, v0.h[7] -; CHECK-NEXT: sdiv w8, w13, w9 -; CHECK-NEXT: smov w9, v1.h[6] +; CHECK-NEXT: sdiv w13, w14, w13 +; CHECK-NEXT: smov w14, v1.h[6] ; CHECK-NEXT: mov v2.h[4], w12 -; CHECK-NEXT: sdiv w9, w10, w9 -; CHECK-NEXT: smov w10, v1.h[7] -; CHECK-NEXT: mov v2.h[5], w8 -; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: sdiv w9, w15, w14 +; CHECK-NEXT: mov v2.h[5], w13 +; CHECK-NEXT: sdiv w8, w10, w8 ; CHECK-NEXT: mov v2.h[6], w9 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b @@ -391,18 +391,18 @@ ; CHECK-NEXT: mov w9, v0.s[1] ; CHECK-NEXT: fmov w10, s0 ; CHECK-NEXT: mov w11, v0.s[2] -; CHECK-NEXT: mov w12, v0.s[3] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: mov w12, v0.s[3] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: mov w10, v1.s[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: mov w11, v1.s[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: sdiv w8, w12, w11 +; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: mov v0.s[2], w10 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: mov v0.s[3], w11 ; CHECK-NEXT: ret %tmp3 = sdiv <4 x i32> %A, %B; ret <4 x i32> %tmp3 @@ -461,32 +461,32 @@ ; CHECK-NEXT: umov w9, v0.b[1] ; CHECK-NEXT: umov w10, v0.b[0] ; CHECK-NEXT: umov w11, v0.b[2] -; CHECK-NEXT: umov w12, v0.b[3] -; CHECK-NEXT: umov w13, v0.b[4] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.b[0] +; CHECK-NEXT: umov w12, v0.b[3] +; CHECK-NEXT: umov w13, v0.b[4] +; CHECK-NEXT: umov w14, v0.b[5] +; CHECK-NEXT: umov w15, v0.b[6] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.b[5] ; CHECK-NEXT: mov v2.b[1], w8 +; CHECK-NEXT: umov w8, v1.b[7] ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 -; CHECK-NEXT: umov w10, v0.b[6] +; CHECK-NEXT: umov w10, v0.b[7] ; CHECK-NEXT: udiv w12, w13, w12 -; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: udiv w8, w13, w9 -; CHECK-NEXT: umov w9, v1.b[6] +; CHECK-NEXT: udiv w13, w14, w13 +; CHECK-NEXT: umov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 -; CHECK-NEXT: udiv w9, w10, w9 -; CHECK-NEXT: umov w10, v1.b[7] -; CHECK-NEXT: mov v2.b[5], w8 -; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: udiv w9, w15, w14 +; CHECK-NEXT: mov v2.b[5], w13 +; CHECK-NEXT: udiv w8, w10, w8 ; CHECK-NEXT: mov v2.b[6], w9 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 @@ -502,66 +502,66 @@ ; CHECK-NEXT: umov w9, v0.b[1] ; CHECK-NEXT: umov w10, v0.b[0] ; CHECK-NEXT: umov w11, v0.b[2] +; CHECK-NEXT: udiv w8, w9, w8 +; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: umov w12, v0.b[3] ; CHECK-NEXT: umov w13, v0.b[4] ; CHECK-NEXT: umov w14, v0.b[5] ; CHECK-NEXT: umov w15, v0.b[6] -; CHECK-NEXT: udiv w8, w9, w8 -; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: umov w16, v0.b[7] ; CHECK-NEXT: umov w17, v0.b[8] +; CHECK-NEXT: umov w18, v0.b[9] +; CHECK-NEXT: umov w0, v0.b[10] +; CHECK-NEXT: umov w1, v0.b[11] +; CHECK-NEXT: umov w2, v0.b[12] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.b[9] ; CHECK-NEXT: mov v2.b[1], w8 +; CHECK-NEXT: umov w8, v1.b[13] ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 -; CHECK-NEXT: umov w10, v0.b[10] +; CHECK-NEXT: umov w10, v0.b[13] ; CHECK-NEXT: udiv w12, w13, w12 ; CHECK-NEXT: umov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 -; CHECK-NEXT: umov w11, v0.b[11] +; CHECK-NEXT: umov w11, v0.b[14] ; CHECK-NEXT: udiv w13, w14, w13 ; CHECK-NEXT: umov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 -; CHECK-NEXT: umov w12, v0.b[12] +; CHECK-NEXT: umov w12, v0.b[15] ; CHECK-NEXT: udiv w14, w15, w14 ; CHECK-NEXT: umov w15, v1.b[7] ; CHECK-NEXT: mov v2.b[5], w13 -; CHECK-NEXT: umov w13, v0.b[13] ; CHECK-NEXT: udiv w15, w16, w15 ; CHECK-NEXT: umov w16, v1.b[8] ; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: udiv w16, w17, w16 -; CHECK-NEXT: umov w17, v0.b[9] +; CHECK-NEXT: umov w17, v1.b[9] ; CHECK-NEXT: mov v2.b[7], w15 -; CHECK-NEXT: udiv w8, w17, w9 -; CHECK-NEXT: umov w9, v1.b[10] +; CHECK-NEXT: udiv w17, w18, w17 +; CHECK-NEXT: umov w18, v1.b[10] ; CHECK-NEXT: mov v2.b[8], w16 -; CHECK-NEXT: udiv w9, w10, w9 -; CHECK-NEXT: umov w10, v1.b[11] -; CHECK-NEXT: mov v2.b[9], w8 +; CHECK-NEXT: udiv w18, w0, w18 +; CHECK-NEXT: umov w0, v1.b[11] +; CHECK-NEXT: mov v2.b[9], w17 +; CHECK-NEXT: udiv w0, w1, w0 +; CHECK-NEXT: umov w1, v1.b[12] +; CHECK-NEXT: mov v2.b[10], w18 +; CHECK-NEXT: udiv w9, w2, w1 +; CHECK-NEXT: mov v2.b[11], w0 +; CHECK-NEXT: udiv w8, w10, w8 +; CHECK-NEXT: umov w10, v1.b[14] +; CHECK-NEXT: mov v2.b[12], w9 ; CHECK-NEXT: udiv w10, w11, w10 -; CHECK-NEXT: umov w11, v1.b[12] -; CHECK-NEXT: mov v2.b[10], w9 -; CHECK-NEXT: umov w9, v1.b[14] -; CHECK-NEXT: udiv w11, w12, w11 -; CHECK-NEXT: umov w12, v1.b[13] -; CHECK-NEXT: mov v2.b[11], w10 -; CHECK-NEXT: umov w10, v1.b[15] -; CHECK-NEXT: udiv w8, w13, w12 -; CHECK-NEXT: umov w12, v0.b[14] -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: umov w11, v0.b[15] -; CHECK-NEXT: udiv w9, w12, w9 +; CHECK-NEXT: umov w11, v1.b[15] ; CHECK-NEXT: mov v2.b[13], w8 -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[14], w9 -; CHECK-NEXT: mov v2.b[15], w8 +; CHECK-NEXT: udiv w11, w12, w11 +; CHECK-NEXT: mov v2.b[14], w10 +; CHECK-NEXT: mov v2.b[15], w11 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp3 = udiv <16 x i8> %A, %B; @@ -591,18 +591,18 @@ ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: umov w12, v0.h[3] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.h[0] +; CHECK-NEXT: umov w12, v0.h[3] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.h[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.h[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: udiv w8, w12, w11 +; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: mov v0.h[3], w11 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = udiv <4 x i16> %A, %B; @@ -616,32 +616,32 @@ ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: umov w12, v0.h[3] -; CHECK-NEXT: umov w13, v0.h[4] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.h[0] +; CHECK-NEXT: umov w12, v0.h[3] +; CHECK-NEXT: umov w13, v0.h[4] +; CHECK-NEXT: umov w14, v0.h[5] +; CHECK-NEXT: umov w15, v0.h[6] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.h[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.h[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.h[5] ; CHECK-NEXT: mov v2.h[1], w8 +; CHECK-NEXT: umov w8, v1.h[7] ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.h[4] ; CHECK-NEXT: mov v2.h[2], w10 -; CHECK-NEXT: umov w10, v0.h[6] +; CHECK-NEXT: umov w10, v0.h[7] ; CHECK-NEXT: udiv w12, w13, w12 -; CHECK-NEXT: umov w13, v0.h[5] +; CHECK-NEXT: umov w13, v1.h[5] ; CHECK-NEXT: mov v2.h[3], w11 -; CHECK-NEXT: umov w11, v0.h[7] -; CHECK-NEXT: udiv w8, w13, w9 -; CHECK-NEXT: umov w9, v1.h[6] +; CHECK-NEXT: udiv w13, w14, w13 +; CHECK-NEXT: umov w14, v1.h[6] ; CHECK-NEXT: mov v2.h[4], w12 -; CHECK-NEXT: udiv w9, w10, w9 -; CHECK-NEXT: umov w10, v1.h[7] -; CHECK-NEXT: mov v2.h[5], w8 -; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: udiv w9, w15, w14 +; CHECK-NEXT: mov v2.h[5], w13 +; CHECK-NEXT: udiv w8, w10, w8 ; CHECK-NEXT: mov v2.h[6], w9 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b @@ -690,18 +690,18 @@ ; CHECK-NEXT: mov w9, v0.s[1] ; CHECK-NEXT: fmov w10, s0 ; CHECK-NEXT: mov w11, v0.s[2] -; CHECK-NEXT: mov w12, v0.s[3] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: fmov w9, s1 +; CHECK-NEXT: mov w12, v0.s[3] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: mov w10, v1.s[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: mov w11, v1.s[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: udiv w8, w12, w11 +; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: mov v0.s[2], w10 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: mov v0.s[3], w11 ; CHECK-NEXT: ret %tmp3 = udiv <4 x i32> %A, %B; ret <4 x i32> %tmp3 @@ -755,49 +755,53 @@ define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: srem8x8: ; CHECK: // %bb.0: +; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w11, v1.b[0] ; CHECK-NEXT: smov w12, v0.b[0] ; CHECK-NEXT: smov w8, v1.b[1] ; CHECK-NEXT: smov w9, v0.b[1] +; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w14, v1.b[2] ; CHECK-NEXT: smov w15, v0.b[2] ; CHECK-NEXT: smov w17, v1.b[3] ; CHECK-NEXT: smov w18, v0.b[3] -; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.b[4] ; CHECK-NEXT: smov w2, v0.b[4] +; CHECK-NEXT: smov w4, v1.b[5] +; CHECK-NEXT: smov w5, v0.b[5] +; CHECK-NEXT: smov w7, v1.b[6] +; CHECK-NEXT: smov w19, v0.b[6] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.b[5] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.b[5] ; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: smov w11, v0.b[6] +; CHECK-NEXT: smov w11, v0.b[7] ; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: smov w10, v1.b[6] +; CHECK-NEXT: smov w10, v1.b[7] ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.b[7] +; CHECK-NEXT: msub w9, w16, w14, w15 ; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.b[7] -; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 +; CHECK-NEXT: mov v2.b[2], w9 +; CHECK-NEXT: msub w13, w0, w17, w18 ; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: mov v2.b[3], w14 +; CHECK-NEXT: mov v2.b[3], w13 ; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: sdiv w9, w13, w12 +; CHECK-NEXT: sdiv w6, w5, w4 ; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: msub w9, w6, w4, w5 +; CHECK-NEXT: sdiv w12, w19, w7 ; CHECK-NEXT: mov v2.b[5], w9 +; CHECK-NEXT: msub w12, w12, w7, w19 +; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: mov v2.b[6], w12 ; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: sdiv w12, w16, w15 -; CHECK-NEXT: mov v2.b[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = srem <8 x i8> %A, %B; ret <8 x i8> %tmp3 @@ -806,11 +810,14 @@ define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: srem16x8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: sub sp, sp, #128 +; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 128 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -819,15 +826,20 @@ ; CHECK-NEXT: .cfi_offset w24, -48 ; CHECK-NEXT: .cfi_offset w25, -56 ; CHECK-NEXT: .cfi_offset w26, -64 -; CHECK-NEXT: smov w11, v1.b[0] -; CHECK-NEXT: smov w12, v0.b[0] +; CHECK-NEXT: .cfi_offset w27, -72 +; CHECK-NEXT: .cfi_offset w28, -80 +; CHECK-NEXT: .cfi_offset w30, -88 +; CHECK-NEXT: .cfi_offset w29, -96 ; CHECK-NEXT: smov w8, v1.b[1] -; CHECK-NEXT: smov w9, v0.b[1] -; CHECK-NEXT: smov w14, v1.b[2] +; CHECK-NEXT: smov w6, v0.b[1] +; CHECK-NEXT: smov w16, v1.b[2] ; CHECK-NEXT: smov w15, v0.b[2] +; CHECK-NEXT: str w8, [sp, #24] // 4-byte Folded Spill +; CHECK-NEXT: sdiv w8, w6, w8 ; CHECK-NEXT: smov w17, v1.b[3] ; CHECK-NEXT: smov w18, v0.b[3] -; CHECK-NEXT: sdiv w13, w12, w11 +; CHECK-NEXT: smov w3, v1.b[0] +; CHECK-NEXT: smov w0, v0.b[0] ; CHECK-NEXT: smov w1, v1.b[4] ; CHECK-NEXT: smov w2, v0.b[4] ; CHECK-NEXT: smov w4, v1.b[5] @@ -838,72 +850,84 @@ ; CHECK-NEXT: smov w22, v0.b[7] ; CHECK-NEXT: smov w24, v1.b[8] ; CHECK-NEXT: smov w25, v0.b[8] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.b[9] -; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.b[9] -; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: smov w11, v0.b[10] -; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: smov w10, v1.b[10] -; CHECK-NEXT: sdiv w16, w15, w14 -; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.b[11] -; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.b[11] -; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 -; CHECK-NEXT: smov w18, v1.b[12] -; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: smov w0, v0.b[12] -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: smov w2, v1.b[13] -; CHECK-NEXT: sdiv w6, w5, w4 -; CHECK-NEXT: smov w3, v0.b[13] -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w17, w6, w4, w5 -; CHECK-NEXT: sdiv w20, w19, w7 -; CHECK-NEXT: mov v2.b[5], w17 -; CHECK-NEXT: msub w17, w20, w7, w19 -; CHECK-NEXT: sdiv w23, w22, w21 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[6], w17 -; CHECK-NEXT: msub w1, w23, w21, w22 +; CHECK-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NEXT: sdiv w9, w15, w16 +; CHECK-NEXT: smov w27, v1.b[9] +; CHECK-NEXT: smov w28, v0.b[9] +; CHECK-NEXT: smov w30, v1.b[10] +; CHECK-NEXT: smov w12, v0.b[10] +; CHECK-NEXT: smov w11, v1.b[11] +; CHECK-NEXT: smov w10, v0.b[11] +; CHECK-NEXT: sdiv w8, w18, w17 +; CHECK-NEXT: sdiv w20, w0, w3 +; CHECK-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: msub w0, w20, w3, w0 +; CHECK-NEXT: sdiv w9, w2, w1 +; CHECK-NEXT: ldp w23, w20, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: msub w6, w20, w23, w6 +; CHECK-NEXT: ldp w20, w0, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: sdiv w8, w5, w4 +; CHECK-NEXT: mov v2.b[1], w6 +; CHECK-NEXT: msub w15, w0, w16, w15 +; CHECK-NEXT: smov w16, v1.b[13] +; CHECK-NEXT: msub w17, w20, w17, w18 +; CHECK-NEXT: smov w0, v0.b[13] +; CHECK-NEXT: mov v2.b[2], w15 +; CHECK-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: sdiv w9, w19, w7 +; CHECK-NEXT: ldr w6, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr w15, [sp, #8] // 4-byte Folded Reload +; CHECK-NEXT: mov v2.b[3], w17 +; CHECK-NEXT: msub w1, w6, w1, w2 +; CHECK-NEXT: smov w2, v0.b[14] +; CHECK-NEXT: msub w15, w15, w4, w5 +; CHECK-NEXT: sdiv w8, w22, w21 +; CHECK-NEXT: mov v2.b[4], w1 +; CHECK-NEXT: smov w1, v1.b[14] +; CHECK-NEXT: mov v2.b[5], w15 +; CHECK-NEXT: stp w8, w9, [sp] // 8-byte Folded Spill ; CHECK-NEXT: sdiv w26, w25, w24 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[7], w1 -; CHECK-NEXT: msub w1, w26, w24, w25 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[8], w1 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: smov w13, v1.b[15] -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[9], w9 -; CHECK-NEXT: smov w9, v1.b[14] -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: smov w10, v0.b[14] -; CHECK-NEXT: sdiv w14, w16, w15 -; CHECK-NEXT: mov v2.b[10], w8 -; CHECK-NEXT: msub w11, w14, w15, w16 -; CHECK-NEXT: smov w14, v0.b[15] -; CHECK-NEXT: sdiv w17, w0, w18 -; CHECK-NEXT: mov v2.b[11], w11 -; CHECK-NEXT: msub w11, w17, w18, w0 -; CHECK-NEXT: sdiv w12, w3, w2 -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: msub w12, w12, w2, w3 -; CHECK-NEXT: sdiv w8, w10, w9 -; CHECK-NEXT: mov v2.b[13], w12 -; CHECK-NEXT: msub w8, w8, w9, w10 -; CHECK-NEXT: sdiv w11, w14, w13 -; CHECK-NEXT: mov v2.b[14], w8 -; CHECK-NEXT: msub w8, w11, w13, w14 +; CHECK-NEXT: ldp w4, w17, [sp] // 8-byte Folded Reload +; CHECK-NEXT: smov w9, v1.b[12] +; CHECK-NEXT: smov w8, v0.b[12] +; CHECK-NEXT: msub w17, w17, w7, w19 +; CHECK-NEXT: msub w4, w4, w21, w22 +; CHECK-NEXT: msub w5, w26, w24, w25 +; CHECK-NEXT: sdiv w29, w28, w27 +; CHECK-NEXT: mov v2.b[6], w17 +; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[7], w4 +; CHECK-NEXT: smov w4, v0.b[15] +; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[8], w5 +; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: msub w17, w29, w27, w28 +; CHECK-NEXT: sdiv w14, w12, w30 +; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[9], w17 +; CHECK-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: msub w12, w14, w30, w12 +; CHECK-NEXT: smov w14, v1.b[15] +; CHECK-NEXT: sdiv w13, w10, w11 +; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[10], w12 +; CHECK-NEXT: msub w10, w13, w11, w10 +; CHECK-NEXT: sdiv w3, w8, w9 +; CHECK-NEXT: mov v2.b[11], w10 +; CHECK-NEXT: msub w8, w3, w9, w8 +; CHECK-NEXT: sdiv w18, w0, w16 +; CHECK-NEXT: mov v2.b[12], w8 +; CHECK-NEXT: msub w9, w18, w16, w0 +; CHECK-NEXT: sdiv w15, w2, w1 +; CHECK-NEXT: mov v2.b[13], w9 +; CHECK-NEXT: msub w10, w15, w1, w2 +; CHECK-NEXT: sdiv w11, w4, w14 +; CHECK-NEXT: mov v2.b[14], w10 +; CHECK-NEXT: msub w8, w11, w14, w4 ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret %tmp3 = srem <16 x i8> %A, %B; ret <16 x i8> %tmp3 @@ -933,21 +957,21 @@ ; CHECK-NEXT: smov w12, v0.h[0] ; CHECK-NEXT: smov w8, v1.h[1] ; CHECK-NEXT: smov w9, v0.h[1] +; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w14, v1.h[2] ; CHECK-NEXT: smov w15, v0.h[2] -; CHECK-NEXT: sdiv w13, w12, w11 +; CHECK-NEXT: smov w17, v1.h[3] +; CHECK-NEXT: smov w18, v0.h[3] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.h[3] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.h[3] ; CHECK-NEXT: fmov s0, w11 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w9, w12, w13 +; CHECK-NEXT: msub w9, w16, w14, w15 +; CHECK-NEXT: sdiv w12, w18, w17 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: msub w8, w12, w17, w18 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -958,47 +982,51 @@ define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) { ; CHECK-LABEL: srem8x16: ; CHECK: // %bb.0: +; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -16 ; CHECK-NEXT: smov w11, v1.h[0] ; CHECK-NEXT: smov w12, v0.h[0] ; CHECK-NEXT: smov w8, v1.h[1] ; CHECK-NEXT: smov w9, v0.h[1] +; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w14, v1.h[2] ; CHECK-NEXT: smov w15, v0.h[2] ; CHECK-NEXT: smov w17, v1.h[3] ; CHECK-NEXT: smov w18, v0.h[3] -; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.h[4] ; CHECK-NEXT: smov w2, v0.h[4] +; CHECK-NEXT: smov w4, v1.h[5] +; CHECK-NEXT: smov w5, v0.h[5] +; CHECK-NEXT: smov w7, v1.h[6] +; CHECK-NEXT: smov w19, v0.h[6] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.h[5] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.h[5] ; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: smov w11, v0.h[6] +; CHECK-NEXT: smov w11, v0.h[7] ; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: smov w10, v1.h[6] +; CHECK-NEXT: smov w10, v1.h[7] ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.h[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.h[7] +; CHECK-NEXT: msub w9, w16, w14, w15 ; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.h[7] -; CHECK-NEXT: mov v2.h[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 +; CHECK-NEXT: mov v2.h[2], w9 +; CHECK-NEXT: msub w13, w0, w17, w18 ; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: mov v2.h[3], w14 +; CHECK-NEXT: mov v2.h[3], w13 ; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: sdiv w9, w13, w12 +; CHECK-NEXT: sdiv w6, w5, w4 ; CHECK-NEXT: mov v2.h[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: msub w9, w6, w4, w5 +; CHECK-NEXT: sdiv w12, w19, w7 ; CHECK-NEXT: mov v2.h[5], w9 +; CHECK-NEXT: msub w12, w12, w7, w19 +; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: mov v2.h[6], w12 ; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: sdiv w12, w16, w15 -; CHECK-NEXT: mov v2.h[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = srem <8 x i16> %A, %B; ret <8 x i16> %tmp3 @@ -1058,10 +1086,10 @@ ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: sdiv w9, w18, w17 -; CHECK-NEXT: mov v0.s[2], w10 -; CHECK-NEXT: msub w8, w9, w17, w18 +; CHECK-NEXT: msub w9, w16, w14, w15 +; CHECK-NEXT: sdiv w12, w18, w17 +; CHECK-NEXT: mov v0.s[2], w9 +; CHECK-NEXT: msub w8, w12, w17, w18 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %tmp3 = srem <4 x i32> %A, %B; @@ -1119,49 +1147,53 @@ define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: urem8x8: ; CHECK: // %bb.0: +; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w11, v1.b[0] ; CHECK-NEXT: umov w12, v0.b[0] ; CHECK-NEXT: umov w8, v1.b[1] ; CHECK-NEXT: umov w9, v0.b[1] +; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w14, v1.b[2] ; CHECK-NEXT: umov w15, v0.b[2] ; CHECK-NEXT: umov w17, v1.b[3] ; CHECK-NEXT: umov w18, v0.b[3] -; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.b[4] ; CHECK-NEXT: umov w2, v0.b[4] +; CHECK-NEXT: umov w4, v1.b[5] +; CHECK-NEXT: umov w5, v0.b[5] +; CHECK-NEXT: umov w7, v1.b[6] +; CHECK-NEXT: umov w19, v0.b[6] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.b[5] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: umov w11, v0.b[6] +; CHECK-NEXT: umov w11, v0.b[7] ; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: umov w10, v1.b[6] +; CHECK-NEXT: umov w10, v1.b[7] ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.b[7] +; CHECK-NEXT: msub w9, w16, w14, w15 ; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.b[7] -; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 +; CHECK-NEXT: mov v2.b[2], w9 +; CHECK-NEXT: msub w13, w0, w17, w18 ; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: mov v2.b[3], w14 +; CHECK-NEXT: mov v2.b[3], w13 ; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: udiv w9, w13, w12 +; CHECK-NEXT: udiv w6, w5, w4 ; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: msub w9, w6, w4, w5 +; CHECK-NEXT: udiv w12, w19, w7 ; CHECK-NEXT: mov v2.b[5], w9 +; CHECK-NEXT: msub w12, w12, w7, w19 +; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: mov v2.b[6], w12 ; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: udiv w12, w16, w15 -; CHECK-NEXT: mov v2.b[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = urem <8 x i8> %A, %B; ret <8 x i8> %tmp3 @@ -1170,11 +1202,14 @@ define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: urem16x8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: sub sp, sp, #128 +; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 128 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -1183,15 +1218,20 @@ ; CHECK-NEXT: .cfi_offset w24, -48 ; CHECK-NEXT: .cfi_offset w25, -56 ; CHECK-NEXT: .cfi_offset w26, -64 -; CHECK-NEXT: umov w11, v1.b[0] -; CHECK-NEXT: umov w12, v0.b[0] +; CHECK-NEXT: .cfi_offset w27, -72 +; CHECK-NEXT: .cfi_offset w28, -80 +; CHECK-NEXT: .cfi_offset w30, -88 +; CHECK-NEXT: .cfi_offset w29, -96 ; CHECK-NEXT: umov w8, v1.b[1] -; CHECK-NEXT: umov w9, v0.b[1] -; CHECK-NEXT: umov w14, v1.b[2] +; CHECK-NEXT: umov w6, v0.b[1] +; CHECK-NEXT: umov w16, v1.b[2] ; CHECK-NEXT: umov w15, v0.b[2] +; CHECK-NEXT: str w8, [sp, #24] // 4-byte Folded Spill +; CHECK-NEXT: udiv w8, w6, w8 ; CHECK-NEXT: umov w17, v1.b[3] ; CHECK-NEXT: umov w18, v0.b[3] -; CHECK-NEXT: udiv w13, w12, w11 +; CHECK-NEXT: umov w3, v1.b[0] +; CHECK-NEXT: umov w0, v0.b[0] ; CHECK-NEXT: umov w1, v1.b[4] ; CHECK-NEXT: umov w2, v0.b[4] ; CHECK-NEXT: umov w4, v1.b[5] @@ -1202,72 +1242,84 @@ ; CHECK-NEXT: umov w22, v0.b[7] ; CHECK-NEXT: umov w24, v1.b[8] ; CHECK-NEXT: umov w25, v0.b[8] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.b[9] -; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.b[9] -; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: umov w11, v0.b[10] -; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: umov w10, v1.b[10] -; CHECK-NEXT: udiv w16, w15, w14 -; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.b[11] -; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.b[11] -; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 -; CHECK-NEXT: umov w18, v1.b[12] -; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: umov w0, v0.b[12] -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: umov w2, v1.b[13] -; CHECK-NEXT: udiv w6, w5, w4 -; CHECK-NEXT: umov w3, v0.b[13] -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w17, w6, w4, w5 -; CHECK-NEXT: udiv w20, w19, w7 -; CHECK-NEXT: mov v2.b[5], w17 -; CHECK-NEXT: msub w17, w20, w7, w19 -; CHECK-NEXT: udiv w23, w22, w21 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[6], w17 -; CHECK-NEXT: msub w1, w23, w21, w22 +; CHECK-NEXT: str w8, [sp, #28] // 4-byte Folded Spill +; CHECK-NEXT: udiv w9, w15, w16 +; CHECK-NEXT: umov w27, v1.b[9] +; CHECK-NEXT: umov w28, v0.b[9] +; CHECK-NEXT: umov w30, v1.b[10] +; CHECK-NEXT: umov w12, v0.b[10] +; CHECK-NEXT: umov w11, v1.b[11] +; CHECK-NEXT: umov w10, v0.b[11] +; CHECK-NEXT: udiv w8, w18, w17 +; CHECK-NEXT: udiv w20, w0, w3 +; CHECK-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: msub w0, w20, w3, w0 +; CHECK-NEXT: udiv w9, w2, w1 +; CHECK-NEXT: ldp w23, w20, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: fmov s2, w0 +; CHECK-NEXT: msub w6, w20, w23, w6 +; CHECK-NEXT: ldp w20, w0, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: udiv w8, w5, w4 +; CHECK-NEXT: mov v2.b[1], w6 +; CHECK-NEXT: msub w15, w0, w16, w15 +; CHECK-NEXT: umov w16, v1.b[13] +; CHECK-NEXT: msub w17, w20, w17, w18 +; CHECK-NEXT: umov w0, v0.b[13] +; CHECK-NEXT: mov v2.b[2], w15 +; CHECK-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: udiv w9, w19, w7 +; CHECK-NEXT: ldr w6, [sp, #12] // 4-byte Folded Reload +; CHECK-NEXT: ldr w15, [sp, #8] // 4-byte Folded Reload +; CHECK-NEXT: mov v2.b[3], w17 +; CHECK-NEXT: msub w1, w6, w1, w2 +; CHECK-NEXT: umov w2, v0.b[14] +; CHECK-NEXT: msub w15, w15, w4, w5 +; CHECK-NEXT: udiv w8, w22, w21 +; CHECK-NEXT: mov v2.b[4], w1 +; CHECK-NEXT: umov w1, v1.b[14] +; CHECK-NEXT: mov v2.b[5], w15 +; CHECK-NEXT: stp w8, w9, [sp] // 8-byte Folded Spill ; CHECK-NEXT: udiv w26, w25, w24 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[7], w1 -; CHECK-NEXT: msub w1, w26, w24, w25 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[8], w1 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: umov w13, v1.b[15] -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[9], w9 -; CHECK-NEXT: umov w9, v1.b[14] -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: umov w10, v0.b[14] -; CHECK-NEXT: udiv w14, w16, w15 -; CHECK-NEXT: mov v2.b[10], w8 -; CHECK-NEXT: msub w11, w14, w15, w16 -; CHECK-NEXT: umov w14, v0.b[15] -; CHECK-NEXT: udiv w17, w0, w18 -; CHECK-NEXT: mov v2.b[11], w11 -; CHECK-NEXT: msub w11, w17, w18, w0 -; CHECK-NEXT: udiv w12, w3, w2 -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: msub w12, w12, w2, w3 -; CHECK-NEXT: udiv w8, w10, w9 -; CHECK-NEXT: mov v2.b[13], w12 -; CHECK-NEXT: msub w8, w8, w9, w10 -; CHECK-NEXT: udiv w11, w14, w13 -; CHECK-NEXT: mov v2.b[14], w8 -; CHECK-NEXT: msub w8, w11, w13, w14 +; CHECK-NEXT: ldp w4, w17, [sp] // 8-byte Folded Reload +; CHECK-NEXT: umov w9, v1.b[12] +; CHECK-NEXT: umov w8, v0.b[12] +; CHECK-NEXT: msub w17, w17, w7, w19 +; CHECK-NEXT: msub w4, w4, w21, w22 +; CHECK-NEXT: msub w5, w26, w24, w25 +; CHECK-NEXT: udiv w29, w28, w27 +; CHECK-NEXT: mov v2.b[6], w17 +; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[7], w4 +; CHECK-NEXT: umov w4, v0.b[15] +; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[8], w5 +; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: msub w17, w29, w27, w28 +; CHECK-NEXT: udiv w14, w12, w30 +; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[9], w17 +; CHECK-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: msub w12, w14, w30, w12 +; CHECK-NEXT: umov w14, v1.b[15] +; CHECK-NEXT: udiv w13, w10, w11 +; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[10], w12 +; CHECK-NEXT: msub w10, w13, w11, w10 +; CHECK-NEXT: udiv w3, w8, w9 +; CHECK-NEXT: mov v2.b[11], w10 +; CHECK-NEXT: msub w8, w3, w9, w8 +; CHECK-NEXT: udiv w18, w0, w16 +; CHECK-NEXT: mov v2.b[12], w8 +; CHECK-NEXT: msub w9, w18, w16, w0 +; CHECK-NEXT: udiv w15, w2, w1 +; CHECK-NEXT: mov v2.b[13], w9 +; CHECK-NEXT: msub w10, w15, w1, w2 +; CHECK-NEXT: udiv w11, w4, w14 +; CHECK-NEXT: mov v2.b[14], w10 +; CHECK-NEXT: msub w8, w11, w14, w4 ; CHECK-NEXT: mov v2.b[15], w8 ; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret %tmp3 = urem <16 x i8> %A, %B; ret <16 x i8> %tmp3 @@ -1297,21 +1349,21 @@ ; CHECK-NEXT: umov w12, v0.h[0] ; CHECK-NEXT: umov w8, v1.h[1] ; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w14, v1.h[2] ; CHECK-NEXT: umov w15, v0.h[2] -; CHECK-NEXT: udiv w13, w12, w11 +; CHECK-NEXT: umov w17, v1.h[3] +; CHECK-NEXT: umov w18, v0.h[3] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.h[3] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.h[3] ; CHECK-NEXT: fmov s0, w11 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w9, w12, w13 +; CHECK-NEXT: msub w9, w16, w14, w15 +; CHECK-NEXT: udiv w12, w18, w17 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: msub w8, w12, w17, w18 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -1322,47 +1374,51 @@ define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) { ; CHECK-LABEL: urem8x16: ; CHECK: // %bb.0: +; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -16 ; CHECK-NEXT: umov w11, v1.h[0] ; CHECK-NEXT: umov w12, v0.h[0] ; CHECK-NEXT: umov w8, v1.h[1] ; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w14, v1.h[2] ; CHECK-NEXT: umov w15, v0.h[2] ; CHECK-NEXT: umov w17, v1.h[3] ; CHECK-NEXT: umov w18, v0.h[3] -; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.h[4] ; CHECK-NEXT: umov w2, v0.h[4] +; CHECK-NEXT: umov w4, v1.h[5] +; CHECK-NEXT: umov w5, v0.h[5] +; CHECK-NEXT: umov w7, v1.h[6] +; CHECK-NEXT: umov w19, v0.h[6] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.h[5] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.h[5] ; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: umov w11, v0.h[6] +; CHECK-NEXT: umov w11, v0.h[7] ; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: umov w10, v1.h[6] +; CHECK-NEXT: umov w10, v1.h[7] ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.h[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.h[7] +; CHECK-NEXT: msub w9, w16, w14, w15 ; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.h[7] -; CHECK-NEXT: mov v2.h[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 +; CHECK-NEXT: mov v2.h[2], w9 +; CHECK-NEXT: msub w13, w0, w17, w18 ; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: mov v2.h[3], w14 +; CHECK-NEXT: mov v2.h[3], w13 ; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: udiv w9, w13, w12 +; CHECK-NEXT: udiv w6, w5, w4 ; CHECK-NEXT: mov v2.h[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: msub w9, w6, w4, w5 +; CHECK-NEXT: udiv w12, w19, w7 ; CHECK-NEXT: mov v2.h[5], w9 +; CHECK-NEXT: msub w12, w12, w7, w19 +; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: mov v2.h[6], w12 ; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: udiv w12, w16, w15 -; CHECK-NEXT: mov v2.h[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = urem <8 x i16> %A, %B; ret <8 x i16> %tmp3 @@ -1422,10 +1478,10 @@ ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: udiv w9, w18, w17 -; CHECK-NEXT: mov v0.s[2], w10 -; CHECK-NEXT: msub w8, w9, w17, w18 +; CHECK-NEXT: msub w9, w16, w14, w15 +; CHECK-NEXT: udiv w12, w18, w17 +; CHECK-NEXT: mov v0.s[2], w9 +; CHECK-NEXT: msub w8, w12, w17, w18 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %tmp3 = urem <4 x i32> %A, %B; diff --git a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll --- a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll +++ b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: and x8, x1, #0x3 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: fmov.2d v0, #2.00000000 -; CHECK-NEXT: bfi x9, x8, #2, #2 ; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: bfi x9, x8, #2, #2 ; CHECK-NEXT: ldr s0, [x9] ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: add sp, sp, #16 @@ -31,8 +31,8 @@ ; CHECK-NEXT: and x8, x1, #0x3 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: movi.16b v0, #63 -; CHECK-NEXT: bfi x9, x8, #2, #2 ; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: bfi x9, x8, #2, #2 ; CHECK-NEXT: ldr s0, [x9] ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: add sp, sp, #16 diff --git a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll --- a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll +++ b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -o - %s | FileCheck %s ; AsmPrinter cannot lower floating point constant expressions in global @@ -8,54 +9,37 @@ target triple = "arm64-apple-ios14.0.0" define [1 x <4 x float>] @test1() { -; CHECK-LABEL: .p2align 4 ; -- Begin function test1 -; CHECK-NEXT: lCPI0_0: -; CHECK-NEXT: .quad 0 ; 0x0 -; CHECK-NEXT: .quad 4575657221408423936 ; 0x3f80000000000000 -; CHECK-NEXT: .section __TEXT,__text,regular,pure_instructions -; CHECK-NEXT: .globl _test1 -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: _test1: ; @test1 -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: ; %bb.0: -; CHECK-NEXT: Lloh0: -; CHECK-NEXT: adrp x8, lCPI0_0@PAGE -; CHECK-NEXT: Lloh1: -; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF] -; CHECK-NEXT: ret +; CHECK-LABEL: test1: +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x8, lCPI0_0@PAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 ret [1 x <4 x float>] [<4 x float> bitcast (<1 x i128> to <4 x float>)] } define [1 x <4 x float>] @test2() { -; CHECK-LABEL: .p2align 4 ; -- Begin function test2 -; CHECK-NEXT: lCPI1_0: -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x3f800000 ; float 1 -; CHECK-NEXT: .section __TEXT,__text,regular,pure_instructions -; CHECK-NEXT: .globl _test2 -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: _test2: ; @test2 -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: ; %bb.0: -; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x8, lCPI1_0@PAGE -; CHECK-NEXT: Lloh3: -; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] -; CHECK-NEXT: mov s2, v1[1] -; CHECK-NEXT: fneg s0, s1 -; CHECK-NEXT: mov s3, v1[2] -; CHECK-NEXT: mov s1, v1[3] -; CHECK-NEXT: fneg s2, s2 -; CHECK-NEXT: fneg s1, s1 -; CHECK-NEXT: mov.s v0[1], v2[0] -; CHECK-NEXT: fneg s2, s3 -; CHECK-NEXT: mov.s v0[2], v2[0] -; CHECK-NEXT: mov.s v0[3], v1[0] -; CHECK-NEXT: ret -; +; CHECK-LABEL: test2: +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: adrp x8, lCPI1_0@PAGE +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] +; CHECK-NEXT: mov s0, v1[1] +; CHECK-NEXT: mov s3, v1[2] +; CHECK-NEXT: fneg s2, s0 +; CHECK-NEXT: fneg s0, s1 +; CHECK-NEXT: fneg s3, s3 +; CHECK-NEXT: mov s1, v1[3] +; CHECK-NEXT: fneg s1, s1 +; CHECK-NEXT: mov.s v0[1], v2[0] +; CHECK-NEXT: mov.s v0[2], v3[0] +; CHECK-NEXT: mov.s v0[3], v1[0] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3 ret [1 x <4 x float>] [<4 x float> bitcast (<1 x i128> to <4 x float>), i32 0)), float fneg (float extractelement (<4 x float> bitcast (<1 x i128> to <4 x float>), i32 1)), diff --git a/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll --- a/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll +++ b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll @@ -4,9 +4,9 @@ define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind { ; CHECK-LABEL: foo: ; CHECK: ; %bb.0: -; CHECK-NEXT: fmov.4s v2, #1.00000000 ; CHECK-NEXT: fcmeq.4s v0, v0, v1 -; CHECK-NEXT: and.16b v0, v0, v2 +; CHECK-NEXT: fmov.4s v1, #1.00000000 +; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: ret %cmp = fcmp oeq <4 x float> %val, %test %ext = zext <4 x i1> %cmp to <4 x i32> @@ -19,8 +19,8 @@ define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwind { ; CHECK-LABEL: foo1: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.4s v2, #1 ; CHECK-NEXT: fcmeq.4s v0, v0, v1 +; CHECK-NEXT: movi.4s v2, #1 ; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: ushll2.2d v1, v0, #0 ; CHECK-NEXT: ushll.2d v0, v0, #0 @@ -44,8 +44,8 @@ ; CHECK-NEXT: adrp x8, lCPI2_0@PAGE ; CHECK-NEXT: fcmeq.4s v0, v0, v1 ; CHECK-NEXT: Lloh1: -; CHECK-NEXT: ldr q1, [x8, lCPI2_0@PAGEOFF] -; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: ldr q2, [x8, lCPI2_0@PAGEOFF] +; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 %cmp = fcmp oeq <4 x float> %val, %test diff --git a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll --- a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll @@ -374,8 +374,8 @@ ; CHECK-LABEL: testLeftBad2x64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #10 -; CHECK-NEXT: movk x8, #1, lsl #48 ; CHECK-NEXT: shl.2d v1, v1, #48 +; CHECK-NEXT: movk x8, #1, lsl #48 ; CHECK-NEXT: dup.2d v2, x8 ; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: orr.16b v0, v0, v1 @@ -405,8 +405,8 @@ ; CHECK-LABEL: testRightBad2x64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #10 -; CHECK-NEXT: movk x8, #1, lsl #48 ; CHECK-NEXT: ushr.2d v1, v1, #48 +; CHECK-NEXT: movk x8, #1, lsl #48 ; CHECK-NEXT: dup.2d v2, x8 ; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: orr.16b v0, v0, v1 diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -177,12 +177,12 @@ ; CHECK-LABEL: sext_v4i8_to_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll2.2d v1, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 -; CHECK-NEXT: shl.2d v1, v1, #56 +; CHECK-NEXT: ushll.2d v1, v0, #0 +; CHECK-NEXT: ushll2.2d v0, v0, #0 +; CHECK-NEXT: shl.2d v2, v1, #56 ; CHECK-NEXT: shl.2d v0, v0, #56 -; CHECK-NEXT: sshr.2d v1, v1, #56 -; CHECK-NEXT: sshr.2d v0, v0, #56 +; CHECK-NEXT: sshr.2d v1, v0, #56 +; CHECK-NEXT: sshr.2d v0, v2, #56 ; CHECK-NEXT: ret %r = sext <4 x i8> %v0 to <4 x i64> ret <4 x i64> %r @@ -192,12 +192,12 @@ ; CHECK-LABEL: zext_v8i8_to_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushll.4s v2, v0, #0 -; CHECK-NEXT: ushll2.4s v4, v0, #0 -; CHECK-NEXT: ushll2.2d v1, v2, #0 -; CHECK-NEXT: ushll.2d v0, v2, #0 -; CHECK-NEXT: ushll2.2d v3, v4, #0 -; CHECK-NEXT: ushll.2d v2, v4, #0 +; CHECK-NEXT: ushll2.4s v2, v0, #0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll2.2d v3, v2, #0 +; CHECK-NEXT: ushll2.2d v1, v0, #0 +; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: ushll.2d v2, v2, #0 ; CHECK-NEXT: ret %r = zext <8 x i8> %v0 to <8 x i64> ret <8 x i64> %r @@ -207,12 +207,12 @@ ; CHECK-LABEL: sext_v8i8_to_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll.4s v2, v0, #0 -; CHECK-NEXT: sshll2.4s v4, v0, #0 -; CHECK-NEXT: sshll2.2d v1, v2, #0 -; CHECK-NEXT: sshll.2d v0, v2, #0 -; CHECK-NEXT: sshll2.2d v3, v4, #0 -; CHECK-NEXT: sshll.2d v2, v4, #0 +; CHECK-NEXT: sshll2.4s v2, v0, #0 +; CHECK-NEXT: sshll.4s v0, v0, #0 +; CHECK-NEXT: sshll2.2d v3, v2, #0 +; CHECK-NEXT: sshll2.2d v1, v0, #0 +; CHECK-NEXT: sshll.2d v0, v0, #0 +; CHECK-NEXT: sshll.2d v2, v2, #0 ; CHECK-NEXT: ret %r = sext <8 x i8> %v0 to <8 x i64> ret <8 x i64> %r @@ -225,60 +225,60 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #64] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp] -; CHECK-NEXT: ldr w10, [sp, #8] +; CHECK-NEXT: ldr w9, [sp, #80] +; CHECK-NEXT: ldr w10, [sp] ; CHECK-NEXT: fmov s1, w8 ; CHECK-NEXT: ldr w8, [sp, #72] ; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: ldr w11, [sp, #8] ; CHECK-NEXT: movi.16b v2, #1 ; CHECK-NEXT: mov.b v1[1], w8 -; CHECK-NEXT: ldr w8, [sp, #80] -; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: mov.b v1[2], w8 ; CHECK-NEXT: ldr w8, [sp, #88] +; CHECK-NEXT: mov.b v0[2], w2 +; CHECK-NEXT: mov.b v1[2], w9 +; CHECK-NEXT: ldr w9, [sp, #96] ; CHECK-NEXT: mov.b v0[3], w3 ; CHECK-NEXT: mov.b v1[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: mov.b v1[4], w8 ; CHECK-NEXT: ldr w8, [sp, #104] +; CHECK-NEXT: mov.b v0[4], w4 +; CHECK-NEXT: mov.b v1[4], w9 +; CHECK-NEXT: ldr w9, [sp, #112] ; CHECK-NEXT: mov.b v0[5], w5 ; CHECK-NEXT: mov.b v1[5], w8 -; CHECK-NEXT: ldr w8, [sp, #112] -; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w8 ; CHECK-NEXT: ldr w8, [sp, #120] +; CHECK-NEXT: mov.b v0[6], w6 +; CHECK-NEXT: mov.b v1[6], w9 +; CHECK-NEXT: ldr w9, [sp, #128] ; CHECK-NEXT: mov.b v0[7], w7 ; CHECK-NEXT: mov.b v1[7], w8 -; CHECK-NEXT: ldr w8, [sp, #128] -; CHECK-NEXT: mov.b v0[8], w9 -; CHECK-NEXT: ldr w9, [sp, #16] -; CHECK-NEXT: mov.b v1[8], w8 ; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #24] +; CHECK-NEXT: mov.b v0[8], w10 +; CHECK-NEXT: ldr w10, [sp, #16] +; CHECK-NEXT: mov.b v1[8], w9 +; CHECK-NEXT: ldr w9, [sp, #144] +; CHECK-NEXT: mov.b v0[9], w11 +; CHECK-NEXT: ldr w11, [sp, #24] ; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v0[10], w9 -; CHECK-NEXT: ldr w9, [sp, #32] -; CHECK-NEXT: mov.b v1[10], w8 ; CHECK-NEXT: ldr w8, [sp, #152] -; CHECK-NEXT: mov.b v0[11], w10 -; CHECK-NEXT: ldr w10, [sp, #40] +; CHECK-NEXT: mov.b v0[10], w10 +; CHECK-NEXT: ldr w10, [sp, #32] +; CHECK-NEXT: mov.b v1[10], w9 +; CHECK-NEXT: ldr w9, [sp, #160] +; CHECK-NEXT: mov.b v0[11], w11 +; CHECK-NEXT: ldr w11, [sp, #40] ; CHECK-NEXT: mov.b v1[11], w8 -; CHECK-NEXT: ldr w8, [sp, #160] -; CHECK-NEXT: mov.b v0[12], w9 -; CHECK-NEXT: ldr w9, [sp, #48] -; CHECK-NEXT: mov.b v1[12], w8 ; CHECK-NEXT: ldr w8, [sp, #168] -; CHECK-NEXT: mov.b v0[13], w10 -; CHECK-NEXT: ldr w10, [sp, #56] +; CHECK-NEXT: mov.b v0[12], w10 +; CHECK-NEXT: ldr w10, [sp, #48] +; CHECK-NEXT: mov.b v1[12], w9 +; CHECK-NEXT: ldr w9, [sp, #176] +; CHECK-NEXT: mov.b v0[13], w11 +; CHECK-NEXT: ldr w11, [sp, #56] ; CHECK-NEXT: mov.b v1[13], w8 -; CHECK-NEXT: ldr w8, [sp, #176] -; CHECK-NEXT: mov.b v0[14], w9 -; CHECK-NEXT: mov.b v1[14], w8 ; CHECK-NEXT: ldr w8, [sp, #184] -; CHECK-NEXT: mov.b v0[15], w10 +; CHECK-NEXT: mov.b v0[14], w10 +; CHECK-NEXT: mov.b v1[14], w9 +; CHECK-NEXT: mov.b v0[15], w11 ; CHECK-NEXT: mov.b v1[15], w8 ; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: and.16b v1, v1, v2 @@ -291,65 +291,65 @@ ; CHECK-LABEL: sext_v32i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #64] -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp] -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: ldr w9, [sp, #80] +; CHECK-NEXT: ldr w10, [sp] +; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: mov.b v1[1], w8 -; CHECK-NEXT: ldr w8, [sp, #80] -; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: mov.b v1[2], w8 +; CHECK-NEXT: mov.b v1[1], w1 +; CHECK-NEXT: ldr w11, [sp, #8] +; CHECK-NEXT: mov.b v0[1], w8 ; CHECK-NEXT: ldr w8, [sp, #88] -; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: mov.b v1[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: mov.b v1[4], w8 +; CHECK-NEXT: mov.b v1[2], w2 +; CHECK-NEXT: mov.b v0[2], w9 +; CHECK-NEXT: ldr w9, [sp, #96] +; CHECK-NEXT: mov.b v1[3], w3 +; CHECK-NEXT: mov.b v0[3], w8 ; CHECK-NEXT: ldr w8, [sp, #104] -; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: mov.b v1[5], w8 -; CHECK-NEXT: ldr w8, [sp, #112] -; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w8 +; CHECK-NEXT: mov.b v1[4], w4 +; CHECK-NEXT: mov.b v0[4], w9 +; CHECK-NEXT: ldr w9, [sp, #112] +; CHECK-NEXT: mov.b v1[5], w5 +; CHECK-NEXT: mov.b v0[5], w8 ; CHECK-NEXT: ldr w8, [sp, #120] -; CHECK-NEXT: mov.b v0[7], w7 -; CHECK-NEXT: mov.b v1[7], w8 -; CHECK-NEXT: ldr w8, [sp, #128] -; CHECK-NEXT: mov.b v0[8], w9 -; CHECK-NEXT: ldr w9, [sp, #16] -; CHECK-NEXT: mov.b v1[8], w8 +; CHECK-NEXT: mov.b v1[6], w6 +; CHECK-NEXT: mov.b v0[6], w9 +; CHECK-NEXT: ldr w9, [sp, #128] +; CHECK-NEXT: mov.b v1[7], w7 +; CHECK-NEXT: mov.b v0[7], w8 ; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #24] -; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v0[10], w9 -; CHECK-NEXT: ldr w9, [sp, #32] -; CHECK-NEXT: mov.b v1[10], w8 +; CHECK-NEXT: mov.b v1[8], w10 +; CHECK-NEXT: ldr w10, [sp, #16] +; CHECK-NEXT: mov.b v0[8], w9 +; CHECK-NEXT: ldr w9, [sp, #144] +; CHECK-NEXT: mov.b v1[9], w11 +; CHECK-NEXT: ldr w11, [sp, #24] +; CHECK-NEXT: mov.b v0[9], w8 ; CHECK-NEXT: ldr w8, [sp, #152] -; CHECK-NEXT: mov.b v0[11], w10 -; CHECK-NEXT: ldr w10, [sp, #40] -; CHECK-NEXT: mov.b v1[11], w8 -; CHECK-NEXT: ldr w8, [sp, #160] -; CHECK-NEXT: mov.b v0[12], w9 -; CHECK-NEXT: ldr w9, [sp, #48] -; CHECK-NEXT: mov.b v1[12], w8 +; CHECK-NEXT: mov.b v1[10], w10 +; CHECK-NEXT: ldr w10, [sp, #32] +; CHECK-NEXT: mov.b v0[10], w9 +; CHECK-NEXT: ldr w9, [sp, #160] +; CHECK-NEXT: mov.b v1[11], w11 +; CHECK-NEXT: ldr w11, [sp, #40] +; CHECK-NEXT: mov.b v0[11], w8 ; CHECK-NEXT: ldr w8, [sp, #168] -; CHECK-NEXT: mov.b v0[13], w10 -; CHECK-NEXT: ldr w10, [sp, #56] -; CHECK-NEXT: mov.b v1[13], w8 -; CHECK-NEXT: ldr w8, [sp, #176] -; CHECK-NEXT: mov.b v0[14], w9 -; CHECK-NEXT: mov.b v1[14], w8 +; CHECK-NEXT: mov.b v1[12], w10 +; CHECK-NEXT: ldr w10, [sp, #48] +; CHECK-NEXT: mov.b v0[12], w9 +; CHECK-NEXT: ldr w9, [sp, #176] +; CHECK-NEXT: mov.b v1[13], w11 +; CHECK-NEXT: ldr w11, [sp, #56] +; CHECK-NEXT: mov.b v0[13], w8 ; CHECK-NEXT: ldr w8, [sp, #184] -; CHECK-NEXT: mov.b v0[15], w10 -; CHECK-NEXT: mov.b v1[15], w8 -; CHECK-NEXT: shl.16b v0, v0, #7 +; CHECK-NEXT: mov.b v1[14], w10 +; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: mov.b v1[15], w11 +; CHECK-NEXT: mov.b v0[15], w8 ; CHECK-NEXT: shl.16b v1, v1, #7 -; CHECK-NEXT: sshr.16b v0, v0, #7 -; CHECK-NEXT: sshr.16b v1, v1, #7 +; CHECK-NEXT: shl.16b v2, v0, #7 +; CHECK-NEXT: sshr.16b v0, v1, #7 +; CHECK-NEXT: sshr.16b v1, v2, #7 ; CHECK-NEXT: ret %res = sext <32 x i1> %arg to <32 x i8> ret <32 x i8> %res @@ -360,125 +360,125 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #320] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp, #64] -; CHECK-NEXT: ldr w10, [sp, #192] +; CHECK-NEXT: ldr w9, [sp, #192] +; CHECK-NEXT: ldr w10, [sp, #336] ; CHECK-NEXT: fmov s3, w8 ; CHECK-NEXT: ldr w8, [sp, #328] -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldr w9, [sp, #200] -; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: ldr w10, [sp, #336] -; CHECK-NEXT: mov.b v3[1], w8 -; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: ldr w9, [sp, #344] ; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: ldr w11, [sp, #352] -; CHECK-NEXT: mov.b v2[1], w9 -; CHECK-NEXT: ldr w9, [sp, #80] -; CHECK-NEXT: mov.b v1[1], w8 -; CHECK-NEXT: ldr w8, [sp, #344] -; CHECK-NEXT: mov.b v3[2], w10 -; CHECK-NEXT: ldr w10, [sp, #208] +; CHECK-NEXT: ldr w11, [sp, #96] +; CHECK-NEXT: mov.b v3[1], w8 +; CHECK-NEXT: ldr w8, [sp, #64] ; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: ldr w12, [sp, #368] -; CHECK-NEXT: ldr w13, [sp, #384] -; CHECK-NEXT: mov.b v1[2], w9 -; CHECK-NEXT: ldr w9, [sp, #360] -; CHECK-NEXT: mov.b v2[2], w10 -; CHECK-NEXT: ldr w10, [sp, #88] -; CHECK-NEXT: mov.b v3[3], w8 +; CHECK-NEXT: ldr w14, [sp, #136] +; CHECK-NEXT: mov.b v3[2], w10 +; CHECK-NEXT: ldr w10, [sp, #200] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: ldr w8, [sp, #352] +; CHECK-NEXT: mov.b v3[3], w9 +; CHECK-NEXT: ldr w9, [sp, #208] +; CHECK-NEXT: mov.b v2[1], w10 +; CHECK-NEXT: ldr w10, [sp, #360] +; CHECK-NEXT: mov.b v3[4], w8 ; CHECK-NEXT: ldr w8, [sp, #216] -; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: ldr w14, [sp, #400] -; CHECK-NEXT: mov.b v1[3], w10 -; CHECK-NEXT: ldr w10, [sp, #376] +; CHECK-NEXT: mov.b v2[2], w9 +; CHECK-NEXT: ldr w9, [sp, #368] +; CHECK-NEXT: mov.b v3[5], w10 +; CHECK-NEXT: ldr w10, [sp, #224] ; CHECK-NEXT: mov.b v2[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v3[4], w11 -; CHECK-NEXT: ldr w11, [sp, #224] -; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: ldr w15, [sp, #416] -; CHECK-NEXT: mov.b v1[4], w8 -; CHECK-NEXT: ldr w8, [sp, #392] -; CHECK-NEXT: mov.b v2[4], w11 -; CHECK-NEXT: ldr w11, [sp, #104] -; CHECK-NEXT: mov.b v3[5], w9 +; CHECK-NEXT: ldr w8, [sp, #376] +; CHECK-NEXT: mov.b v3[6], w9 ; CHECK-NEXT: ldr w9, [sp, #232] -; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: ldr w16, [sp, #432] -; CHECK-NEXT: mov.b v1[5], w11 -; CHECK-NEXT: ldr w11, [sp, #408] +; CHECK-NEXT: mov.b v2[4], w10 +; CHECK-NEXT: ldr w10, [sp, #384] +; CHECK-NEXT: mov.b v3[7], w8 +; CHECK-NEXT: ldr w8, [sp, #240] ; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w9, [sp, #112] -; CHECK-NEXT: mov.b v3[6], w12 -; CHECK-NEXT: ldr w12, [sp, #240] -; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w9 -; CHECK-NEXT: ldr w9, [sp, #424] -; CHECK-NEXT: mov.b v2[6], w12 -; CHECK-NEXT: ldr w12, [sp, #120] -; CHECK-NEXT: mov.b v3[7], w10 +; CHECK-NEXT: ldr w9, [sp, #392] +; CHECK-NEXT: mov.b v3[8], w10 ; CHECK-NEXT: ldr w10, [sp, #248] -; CHECK-NEXT: mov.b v0[7], w7 -; CHECK-NEXT: mov.b v1[7], w12 -; CHECK-NEXT: ldr w12, [sp] +; CHECK-NEXT: mov.b v2[6], w8 +; CHECK-NEXT: ldr w8, [sp, #400] +; CHECK-NEXT: mov.b v3[9], w9 +; CHECK-NEXT: ldr w9, [sp, #256] ; CHECK-NEXT: mov.b v2[7], w10 -; CHECK-NEXT: ldr w10, [sp, #128] -; CHECK-NEXT: mov.b v3[8], w13 -; CHECK-NEXT: ldr w13, [sp, #256] -; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #440] -; CHECK-NEXT: mov.b v1[8], w10 -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: mov.b v2[8], w13 -; CHECK-NEXT: ldr w13, [sp, #136] -; CHECK-NEXT: mov.b v3[9], w8 +; CHECK-NEXT: ldr w10, [sp, #408] +; CHECK-NEXT: mov.b v3[10], w8 ; CHECK-NEXT: ldr w8, [sp, #264] -; CHECK-NEXT: mov.b v0[9], w10 +; CHECK-NEXT: mov.b v2[8], w9 +; CHECK-NEXT: ldr w9, [sp, #416] +; CHECK-NEXT: mov.b v3[11], w10 ; CHECK-NEXT: ldr w10, [sp, #272] -; CHECK-NEXT: mov.b v1[9], w13 -; CHECK-NEXT: ldr w13, [sp, #16] ; CHECK-NEXT: mov.b v2[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v3[10], w14 -; CHECK-NEXT: ldr w14, [sp, #280] -; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #296] -; CHECK-NEXT: mov.b v1[10], w8 -; CHECK-NEXT: ldr w8, [sp, #24] +; CHECK-NEXT: ldr w8, [sp, #424] +; CHECK-NEXT: mov.b v3[12], w9 +; CHECK-NEXT: ldr w9, [sp, #280] ; CHECK-NEXT: mov.b v2[10], w10 -; CHECK-NEXT: ldr w10, [sp, #152] -; CHECK-NEXT: mov.b v3[11], w11 -; CHECK-NEXT: ldr w11, [sp, #288] -; CHECK-NEXT: mov.b v0[11], w8 +; CHECK-NEXT: ldr w10, [sp, #432] +; CHECK-NEXT: mov.b v3[13], w8 +; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: mov.b v2[11], w9 +; CHECK-NEXT: ldr w9, [sp, #80] +; CHECK-NEXT: mov.b v3[14], w10 +; CHECK-NEXT: ldr w10, [sp, #88] +; CHECK-NEXT: mov.b v1[1], w8 +; CHECK-NEXT: ldr w8, [sp, #288] +; CHECK-NEXT: mov.b v0[3], w3 +; CHECK-NEXT: ldr w15, [sp, #144] +; CHECK-NEXT: mov.b v1[2], w9 +; CHECK-NEXT: ldr w12, [sp, #304] +; CHECK-NEXT: mov.b v2[12], w8 +; CHECK-NEXT: ldr w8, [sp, #104] +; CHECK-NEXT: mov.b v1[3], w10 +; CHECK-NEXT: ldr w10, [sp, #296] +; CHECK-NEXT: mov.b v0[4], w4 +; CHECK-NEXT: ldr w9, [sp, #440] +; CHECK-NEXT: mov.b v1[4], w11 +; CHECK-NEXT: ldr w11, [sp, #112] +; CHECK-NEXT: mov.b v0[5], w5 +; CHECK-NEXT: ldr w13, [sp, #312] +; CHECK-NEXT: mov.b v1[5], w8 +; CHECK-NEXT: ldr w8, [sp, #120] +; CHECK-NEXT: mov.b v0[6], w6 +; CHECK-NEXT: mov.b v1[6], w11 +; CHECK-NEXT: ldr w11, [sp, #128] +; CHECK-NEXT: mov.b v0[7], w7 +; CHECK-NEXT: mov.b v1[7], w8 +; CHECK-NEXT: ldr w8, [sp] +; CHECK-NEXT: mov.b v2[13], w10 +; CHECK-NEXT: ldr w10, [sp, #56] +; CHECK-NEXT: mov.b v1[8], w11 +; CHECK-NEXT: ldr w11, [sp, #8] +; CHECK-NEXT: mov.b v0[8], w8 +; CHECK-NEXT: ldr w8, [sp, #16] +; CHECK-NEXT: mov.b v1[9], w14 +; CHECK-NEXT: ldr w14, [sp, #152] +; CHECK-NEXT: mov.b v0[9], w11 +; CHECK-NEXT: ldr w11, [sp, #24] +; CHECK-NEXT: mov.b v1[10], w15 +; CHECK-NEXT: ldr w15, [sp, #160] +; CHECK-NEXT: mov.b v0[10], w8 ; CHECK-NEXT: ldr w8, [sp, #32] -; CHECK-NEXT: mov.b v1[11], w10 -; CHECK-NEXT: ldr w10, [sp, #160] -; CHECK-NEXT: mov.b v2[11], w14 -; CHECK-NEXT: mov.b v3[12], w15 +; CHECK-NEXT: mov.b v1[11], w14 +; CHECK-NEXT: ldr w14, [sp, #168] +; CHECK-NEXT: mov.b v0[11], w11 +; CHECK-NEXT: ldr w11, [sp, #40] +; CHECK-NEXT: mov.b v1[12], w15 +; CHECK-NEXT: ldr w15, [sp, #176] ; CHECK-NEXT: mov.b v0[12], w8 -; CHECK-NEXT: ldr w8, [sp, #40] -; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #168] -; CHECK-NEXT: mov.b v2[12], w11 -; CHECK-NEXT: ldr w11, [sp, #312] -; CHECK-NEXT: mov.b v3[13], w9 -; CHECK-NEXT: ldr w9, [sp, #304] -; CHECK-NEXT: mov.b v0[13], w8 ; CHECK-NEXT: ldr w8, [sp, #48] -; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #176] -; CHECK-NEXT: mov.b v2[13], w13 -; CHECK-NEXT: mov.b v3[14], w16 +; CHECK-NEXT: mov.b v1[13], w14 +; CHECK-NEXT: ldr w14, [sp, #184] +; CHECK-NEXT: mov.b v0[13], w11 +; CHECK-NEXT: mov.b v1[14], w15 ; CHECK-NEXT: mov.b v0[14], w8 -; CHECK-NEXT: ldr w8, [sp, #56] -; CHECK-NEXT: mov.b v1[14], w10 -; CHECK-NEXT: mov.b v2[14], w9 -; CHECK-NEXT: ldr w9, [sp, #184] +; CHECK-NEXT: mov.b v2[14], w12 +; CHECK-NEXT: mov.b v3[15], w9 +; CHECK-NEXT: mov.b v1[15], w14 +; CHECK-NEXT: mov.b v0[15], w10 +; CHECK-NEXT: mov.b v2[15], w13 ; CHECK-NEXT: movi.16b v4, #1 -; CHECK-NEXT: mov.b v0[15], w8 -; CHECK-NEXT: mov.b v1[15], w9 -; CHECK-NEXT: mov.b v2[15], w11 -; CHECK-NEXT: mov.b v3[15], w12 ; CHECK-NEXT: and.16b v0, v0, v4 ; CHECK-NEXT: and.16b v1, v1, v4 ; CHECK-NEXT: and.16b v2, v2, v4 @@ -493,124 +493,124 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #320] ; CHECK-NEXT: fmov s3, w0 -; CHECK-NEXT: ldr w9, [sp, #64] -; CHECK-NEXT: ldr w10, [sp, #192] +; CHECK-NEXT: ldr w9, [sp, #192] +; CHECK-NEXT: ldr w10, [sp, #336] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: ldr w8, [sp, #328] +; CHECK-NEXT: fmov s1, w9 ; CHECK-NEXT: ldr w9, [sp, #200] -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: ldr w10, [sp, #328] -; CHECK-NEXT: mov.b v3[1], w1 -; CHECK-NEXT: ldr w11, [sp, #344] -; CHECK-NEXT: mov.b v2[1], w8 -; CHECK-NEXT: ldr w8, [sp, #336] +; CHECK-NEXT: ldr w11, [sp, #352] +; CHECK-NEXT: mov.b v0[1], w8 +; CHECK-NEXT: ldr w8, [sp, #344] ; CHECK-NEXT: mov.b v1[1], w9 -; CHECK-NEXT: ldr w9, [sp, #80] -; CHECK-NEXT: mov.b v0[1], w10 -; CHECK-NEXT: ldr w10, [sp, #208] +; CHECK-NEXT: ldr w9, [sp, #368] +; CHECK-NEXT: mov.b v0[2], w10 +; CHECK-NEXT: ldr w10, [sp, #360] +; CHECK-NEXT: ldr w12, [sp, #408] +; CHECK-NEXT: mov.b v0[3], w8 +; CHECK-NEXT: ldr w8, [sp, #208] +; CHECK-NEXT: mov.b v3[1], w1 +; CHECK-NEXT: ldr w15, [sp, #136] +; CHECK-NEXT: mov.b v0[4], w11 +; CHECK-NEXT: ldr w11, [sp, #216] +; CHECK-NEXT: mov.b v1[2], w8 +; CHECK-NEXT: ldr w8, [sp, #376] +; CHECK-NEXT: mov.b v0[5], w10 +; CHECK-NEXT: ldr w10, [sp, #224] +; CHECK-NEXT: mov.b v1[3], w11 +; CHECK-NEXT: ldr w11, [sp, #384] +; CHECK-NEXT: mov.b v0[6], w9 +; CHECK-NEXT: ldr w9, [sp, #232] +; CHECK-NEXT: mov.b v1[4], w10 +; CHECK-NEXT: ldr w10, [sp, #392] +; CHECK-NEXT: mov.b v0[7], w8 +; CHECK-NEXT: ldr w8, [sp, #240] +; CHECK-NEXT: mov.b v1[5], w9 +; CHECK-NEXT: ldr w9, [sp, #400] +; CHECK-NEXT: mov.b v0[8], w11 +; CHECK-NEXT: ldr w11, [sp, #248] +; CHECK-NEXT: mov.b v1[6], w8 +; CHECK-NEXT: ldr w8, [sp, #256] +; CHECK-NEXT: mov.b v0[9], w10 +; CHECK-NEXT: ldr w10, [sp, #416] +; CHECK-NEXT: mov.b v1[7], w11 +; CHECK-NEXT: ldr w11, [sp, #272] +; CHECK-NEXT: mov.b v0[10], w9 +; CHECK-NEXT: ldr w9, [sp, #264] +; CHECK-NEXT: mov.b v1[8], w8 +; CHECK-NEXT: ldr w8, [sp, #424] +; CHECK-NEXT: mov.b v0[11], w12 +; CHECK-NEXT: ldr w16, [sp, #144] +; CHECK-NEXT: mov.b v1[9], w9 +; CHECK-NEXT: ldr w9, [sp, #64] +; CHECK-NEXT: mov.b v0[12], w10 +; CHECK-NEXT: ldr w10, [sp, #280] ; CHECK-NEXT: mov.b v3[2], w2 -; CHECK-NEXT: ldr w12, [sp, #360] -; CHECK-NEXT: mov.b v2[2], w9 -; CHECK-NEXT: ldr w9, [sp, #352] -; CHECK-NEXT: mov.b v1[2], w10 -; CHECK-NEXT: ldr w10, [sp, #88] -; CHECK-NEXT: mov.b v0[2], w8 -; CHECK-NEXT: ldr w8, [sp, #216] +; CHECK-NEXT: ldr w12, [sp, #296] +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: mov.b v0[13], w8 +; CHECK-NEXT: ldr w8, [sp, #80] ; CHECK-NEXT: mov.b v3[3], w3 -; CHECK-NEXT: ldr w13, [sp, #376] -; CHECK-NEXT: mov.b v2[3], w10 -; CHECK-NEXT: ldr w10, [sp, #368] -; CHECK-NEXT: mov.b v1[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v0[3], w11 -; CHECK-NEXT: ldr w11, [sp, #224] +; CHECK-NEXT: ldr w13, [sp, #304] +; CHECK-NEXT: mov.b v2[1], w9 +; CHECK-NEXT: ldr w9, [sp, #88] ; CHECK-NEXT: mov.b v3[4], w4 -; CHECK-NEXT: ldr w14, [sp, #392] -; CHECK-NEXT: mov.b v2[4], w8 -; CHECK-NEXT: ldr w8, [sp, #384] -; CHECK-NEXT: mov.b v1[4], w11 -; CHECK-NEXT: ldr w11, [sp, #104] -; CHECK-NEXT: mov.b v0[4], w9 -; CHECK-NEXT: ldr w9, [sp, #232] +; CHECK-NEXT: ldr w14, [sp, #312] +; CHECK-NEXT: mov.b v2[2], w8 +; CHECK-NEXT: ldr w8, [sp, #96] ; CHECK-NEXT: mov.b v3[5], w5 -; CHECK-NEXT: ldr w15, [sp, #408] -; CHECK-NEXT: mov.b v2[5], w11 -; CHECK-NEXT: ldr w11, [sp, #400] -; CHECK-NEXT: mov.b v1[5], w9 -; CHECK-NEXT: ldr w9, [sp, #112] -; CHECK-NEXT: mov.b v0[5], w12 -; CHECK-NEXT: ldr w12, [sp, #240] +; CHECK-NEXT: mov.b v2[3], w9 +; CHECK-NEXT: ldr w9, [sp, #104] ; CHECK-NEXT: mov.b v3[6], w6 -; CHECK-NEXT: ldr w16, [sp, #424] -; CHECK-NEXT: mov.b v2[6], w9 -; CHECK-NEXT: ldr w9, [sp, #416] -; CHECK-NEXT: mov.b v1[6], w12 -; CHECK-NEXT: ldr w12, [sp, #120] -; CHECK-NEXT: mov.b v0[6], w10 -; CHECK-NEXT: ldr w10, [sp, #248] +; CHECK-NEXT: mov.b v2[4], w8 +; CHECK-NEXT: ldr w8, [sp, #112] ; CHECK-NEXT: mov.b v3[7], w7 -; CHECK-NEXT: mov.b v2[7], w12 -; CHECK-NEXT: ldr w12, [sp] -; CHECK-NEXT: mov.b v1[7], w10 -; CHECK-NEXT: ldr w10, [sp, #128] -; CHECK-NEXT: mov.b v0[7], w13 -; CHECK-NEXT: ldr w13, [sp, #256] -; CHECK-NEXT: mov.b v3[8], w12 -; CHECK-NEXT: ldr w12, [sp, #432] -; CHECK-NEXT: mov.b v2[8], w10 -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: mov.b v1[8], w13 -; CHECK-NEXT: ldr w13, [sp, #136] -; CHECK-NEXT: mov.b v0[8], w8 -; CHECK-NEXT: ldr w8, [sp, #264] -; CHECK-NEXT: mov.b v3[9], w10 -; CHECK-NEXT: ldr w10, [sp, #440] -; CHECK-NEXT: mov.b v2[9], w13 -; CHECK-NEXT: ldr w13, [sp, #16] -; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v0[9], w14 -; CHECK-NEXT: ldr w14, [sp, #272] -; CHECK-NEXT: mov.b v3[10], w13 -; CHECK-NEXT: ldr w13, [sp, #280] -; CHECK-NEXT: mov.b v2[10], w8 -; CHECK-NEXT: ldr w8, [sp, #24] -; CHECK-NEXT: mov.b v1[10], w14 -; CHECK-NEXT: ldr w14, [sp, #152] -; CHECK-NEXT: mov.b v0[10], w11 +; CHECK-NEXT: mov.b v2[5], w9 +; CHECK-NEXT: ldr w9, [sp, #120] +; CHECK-NEXT: mov.b v1[10], w11 ; CHECK-NEXT: ldr w11, [sp, #288] +; CHECK-NEXT: mov.b v2[6], w8 +; CHECK-NEXT: ldr w8, [sp, #128] +; CHECK-NEXT: mov.b v1[11], w10 +; CHECK-NEXT: ldr w10, [sp, #432] +; CHECK-NEXT: mov.b v2[7], w9 +; CHECK-NEXT: ldr w9, [sp] +; CHECK-NEXT: mov.b v1[12], w11 +; CHECK-NEXT: ldr w11, [sp, #440] +; CHECK-NEXT: mov.b v2[8], w8 +; CHECK-NEXT: ldr w8, [sp, #8] +; CHECK-NEXT: mov.b v3[8], w9 +; CHECK-NEXT: ldr w9, [sp, #16] +; CHECK-NEXT: mov.b v2[9], w15 +; CHECK-NEXT: ldr w15, [sp, #152] +; CHECK-NEXT: mov.b v3[9], w8 +; CHECK-NEXT: ldr w8, [sp, #24] +; CHECK-NEXT: mov.b v2[10], w16 +; CHECK-NEXT: ldr w16, [sp, #160] +; CHECK-NEXT: mov.b v3[10], w9 +; CHECK-NEXT: ldr w9, [sp, #32] +; CHECK-NEXT: mov.b v2[11], w15 +; CHECK-NEXT: ldr w15, [sp, #168] ; CHECK-NEXT: mov.b v3[11], w8 -; CHECK-NEXT: ldr w8, [sp, #32] -; CHECK-NEXT: mov.b v2[11], w14 -; CHECK-NEXT: ldr w14, [sp, #296] -; CHECK-NEXT: mov.b v1[11], w13 -; CHECK-NEXT: ldr w13, [sp, #160] -; CHECK-NEXT: mov.b v0[11], w15 -; CHECK-NEXT: mov.b v3[12], w8 ; CHECK-NEXT: ldr w8, [sp, #40] -; CHECK-NEXT: mov.b v2[12], w13 -; CHECK-NEXT: ldr w13, [sp, #312] -; CHECK-NEXT: mov.b v1[12], w11 -; CHECK-NEXT: ldr w11, [sp, #168] -; CHECK-NEXT: mov.b v0[12], w9 -; CHECK-NEXT: ldr w9, [sp, #304] +; CHECK-NEXT: mov.b v2[12], w16 +; CHECK-NEXT: ldr w16, [sp, #176] +; CHECK-NEXT: mov.b v3[12], w9 +; CHECK-NEXT: ldr w9, [sp, #48] +; CHECK-NEXT: mov.b v2[13], w15 +; CHECK-NEXT: ldr w15, [sp, #184] ; CHECK-NEXT: mov.b v3[13], w8 -; CHECK-NEXT: ldr w8, [sp, #48] -; CHECK-NEXT: mov.b v2[13], w11 -; CHECK-NEXT: ldr w11, [sp, #176] -; CHECK-NEXT: mov.b v1[13], w14 -; CHECK-NEXT: mov.b v0[13], w16 -; CHECK-NEXT: mov.b v3[14], w8 ; CHECK-NEXT: ldr w8, [sp, #56] -; CHECK-NEXT: mov.b v2[14], w11 -; CHECK-NEXT: mov.b v1[14], w9 -; CHECK-NEXT: ldr w9, [sp, #184] -; CHECK-NEXT: mov.b v0[14], w12 +; CHECK-NEXT: mov.b v1[13], w12 +; CHECK-NEXT: mov.b v2[14], w16 +; CHECK-NEXT: mov.b v3[14], w9 +; CHECK-NEXT: mov.b v0[14], w10 +; CHECK-NEXT: mov.b v1[14], w13 +; CHECK-NEXT: mov.b v2[15], w15 ; CHECK-NEXT: mov.b v3[15], w8 -; CHECK-NEXT: mov.b v2[15], w9 -; CHECK-NEXT: mov.b v1[15], w13 -; CHECK-NEXT: mov.b v0[15], w10 +; CHECK-NEXT: mov.b v0[15], w11 +; CHECK-NEXT: mov.b v1[15], w14 ; CHECK-NEXT: shl.16b v3, v3, #7 ; CHECK-NEXT: shl.16b v2, v2, #7 ; CHECK-NEXT: shl.16b v4, v1, #7 diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -307,8 +307,8 @@ ; ; GISEL-LABEL: uabdl4s_rdx_i32: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: usubl.4s v0, v0, v1 +; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: cmgt.4s v1, v2, v0 ; GISEL-NEXT: neg.4s v2, v0 ; GISEL-NEXT: shl.4s v1, v1, #31 @@ -384,8 +384,8 @@ ; ; GISEL-LABEL: uabdl2d_rdx_i64: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: usubl.2d v0, v0, v1 +; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: cmgt.2d v1, v2, v0 ; GISEL-NEXT: neg.2d v2, v0 ; GISEL-NEXT: shl.2d v1, v1, #63 @@ -1650,8 +1650,8 @@ ; GISEL-LABEL: abspattern4: ; GISEL: // %bb.0: ; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.4s v2, v0 ; GISEL-NEXT: cmge.4s v1, v0, v1 +; GISEL-NEXT: neg.4s v2, v0 ; GISEL-NEXT: shl.4s v1, v1, #31 ; GISEL-NEXT: sshr.4s v1, v1, #31 ; GISEL-NEXT: bif.16b v0, v2, v1 @@ -1672,8 +1672,8 @@ ; GISEL-LABEL: abspattern5: ; GISEL: // %bb.0: ; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.8h v2, v0 ; GISEL-NEXT: cmgt.8h v1, v0, v1 +; GISEL-NEXT: neg.8h v2, v0 ; GISEL-NEXT: shl.8h v1, v1, #15 ; GISEL-NEXT: sshr.8h v1, v1, #15 ; GISEL-NEXT: bif.16b v0, v2, v1 @@ -1694,8 +1694,8 @@ ; GISEL-LABEL: abspattern6: ; GISEL: // %bb.0: ; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.16b v2, v0 ; GISEL-NEXT: cmgt.16b v1, v1, v0 +; GISEL-NEXT: neg.16b v2, v0 ; GISEL-NEXT: shl.16b v1, v1, #7 ; GISEL-NEXT: sshr.16b v1, v1, #7 ; GISEL-NEXT: bit.16b v0, v2, v1 @@ -1716,8 +1716,8 @@ ; GISEL-LABEL: abspattern7: ; GISEL: // %bb.0: ; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.2d v2, v0 ; GISEL-NEXT: cmge.2d v1, v1, v0 +; GISEL-NEXT: neg.2d v2, v0 ; GISEL-NEXT: shl.2d v1, v1, #63 ; GISEL-NEXT: sshr.2d v1, v1, #63 ; GISEL-NEXT: bit.16b v0, v2, v1 @@ -1737,8 +1737,8 @@ ; ; GISEL-LABEL: uabd_i32: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: ssubl.2d v0, v0, v1 +; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: cmgt.2d v1, v2, v0 ; GISEL-NEXT: neg.2d v2, v0 ; GISEL-NEXT: shl.2d v1, v1, #63 @@ -1765,21 +1765,21 @@ ; CHECK-NEXT: asr x12, x9, #63 ; CHECK-NEXT: asr x13, x11, #63 ; CHECK-NEXT: subs x9, x9, x11 -; CHECK-NEXT: sbcs x11, x12, x13 -; CHECK-NEXT: asr x12, x8, #63 +; CHECK-NEXT: asr x11, x8, #63 +; CHECK-NEXT: sbcs x12, x12, x13 ; CHECK-NEXT: asr x13, x10, #63 ; CHECK-NEXT: subs x8, x8, x10 -; CHECK-NEXT: sbcs x10, x12, x13 -; CHECK-NEXT: negs x12, x8 +; CHECK-NEXT: sbcs x10, x11, x13 +; CHECK-NEXT: negs x11, x8 ; CHECK-NEXT: ngcs x13, x10 ; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: csel x2, x12, x8, lt +; CHECK-NEXT: csel x2, x11, x8, lt ; CHECK-NEXT: csel x3, x13, x10, lt ; CHECK-NEXT: negs x8, x9 -; CHECK-NEXT: ngcs x10, x11 -; CHECK-NEXT: cmp x11, #0 +; CHECK-NEXT: ngcs x10, x12 +; CHECK-NEXT: cmp x12, #0 ; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: csel x1, x10, x11, lt +; CHECK-NEXT: csel x1, x10, x12, lt ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov.d v0[1], x1 ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll --- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll @@ -759,10 +759,10 @@ define <4 x i64> @hadd32_sext_asr(<4 x i32> %src1, <4 x i32> %src2) nounwind { ; CHECK-LABEL: hadd32_sext_asr: ; CHECK: // %bb.0: -; CHECK-NEXT: saddl2.2d v2, v0, v1 -; CHECK-NEXT: saddl.2d v0, v0, v1 -; CHECK-NEXT: sshr.2d v1, v2, #1 -; CHECK-NEXT: sshr.2d v0, v0, #1 +; CHECK-NEXT: saddl.2d v2, v0, v1 +; CHECK-NEXT: saddl2.2d v0, v0, v1 +; CHECK-NEXT: sshr.2d v1, v0, #1 +; CHECK-NEXT: sshr.2d v0, v2, #1 ; CHECK-NEXT: ret %zextsrc1 = sext <4 x i32> %src1 to <4 x i64> %zextsrc2 = sext <4 x i32> %src2 to <4 x i64> @@ -774,10 +774,10 @@ define <4 x i64> @hadd32_zext_asr(<4 x i32> %src1, <4 x i32> %src2) nounwind { ; CHECK-LABEL: hadd32_zext_asr: ; CHECK: // %bb.0: -; CHECK-NEXT: uaddl2.2d v2, v0, v1 -; CHECK-NEXT: uaddl.2d v0, v0, v1 -; CHECK-NEXT: ushr.2d v1, v2, #1 -; CHECK-NEXT: ushr.2d v0, v0, #1 +; CHECK-NEXT: uaddl.2d v2, v0, v1 +; CHECK-NEXT: uaddl2.2d v0, v0, v1 +; CHECK-NEXT: ushr.2d v1, v0, #1 +; CHECK-NEXT: ushr.2d v0, v2, #1 ; CHECK-NEXT: ret %zextsrc1 = zext <4 x i32> %src1 to <4 x i64> %zextsrc2 = zext <4 x i32> %src2 to <4 x i64> @@ -789,10 +789,10 @@ define <4 x i64> @hadd32_sext_lsr(<4 x i32> %src1, <4 x i32> %src2) nounwind { ; CHECK-LABEL: hadd32_sext_lsr: ; CHECK: // %bb.0: -; CHECK-NEXT: saddl2.2d v2, v0, v1 -; CHECK-NEXT: saddl.2d v0, v0, v1 -; CHECK-NEXT: ushr.2d v1, v2, #1 -; CHECK-NEXT: ushr.2d v0, v0, #1 +; CHECK-NEXT: saddl.2d v2, v0, v1 +; CHECK-NEXT: saddl2.2d v0, v0, v1 +; CHECK-NEXT: ushr.2d v1, v0, #1 +; CHECK-NEXT: ushr.2d v0, v2, #1 ; CHECK-NEXT: ret %zextsrc1 = sext <4 x i32> %src1 to <4 x i64> %zextsrc2 = sext <4 x i32> %src2 to <4 x i64> @@ -804,10 +804,10 @@ define <4 x i64> @hadd32_zext_lsr(<4 x i32> %src1, <4 x i32> %src2) nounwind { ; CHECK-LABEL: hadd32_zext_lsr: ; CHECK: // %bb.0: -; CHECK-NEXT: uaddl2.2d v2, v0, v1 -; CHECK-NEXT: uaddl.2d v0, v0, v1 -; CHECK-NEXT: ushr.2d v1, v2, #1 -; CHECK-NEXT: ushr.2d v0, v0, #1 +; CHECK-NEXT: uaddl.2d v2, v0, v1 +; CHECK-NEXT: uaddl2.2d v0, v0, v1 +; CHECK-NEXT: ushr.2d v1, v0, #1 +; CHECK-NEXT: ushr.2d v0, v2, #1 ; CHECK-NEXT: ret %zextsrc1 = zext <4 x i32> %src1 to <4 x i64> %zextsrc2 = zext <4 x i32> %src2 to <4 x i64> diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -904,14 +904,14 @@ define <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-LABEL: mul_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: fmov x10, d0 -; CHECK-NEXT: mov.d x8, v1[1] -; CHECK-NEXT: mov.d x11, v0[1] +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: mov.d x10, v0[1] +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: mov.d x9, v1[1] ; CHECK-NEXT: mul x9, x10, x9 -; CHECK-NEXT: mul x8, x11, x8 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov.d v0[1], x8 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov.d v0[1], x9 ; CHECK-NEXT: ret %tmp1 = mul <2 x i64> %A, %B ret <2 x i64> %tmp1 diff --git a/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll b/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll --- a/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll +++ b/llvm/test/CodeGen/AArch64/bitcast-promote-widen.ll @@ -8,10 +8,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: umov w9, v0.h[1] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: mov v1.s[1], w8 +; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret %y = bitcast <2 x half> %x to <2 x i16> ret <2 x i16> %y diff --git a/llvm/test/CodeGen/AArch64/build-vector-extract.ll b/llvm/test/CodeGen/AArch64/build-vector-extract.ll --- a/llvm/test/CodeGen/AArch64/build-vector-extract.ll +++ b/llvm/test/CodeGen/AArch64/build-vector-extract.ll @@ -16,10 +16,9 @@ define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract0_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 0 %z = zext i32 %e to i64 @@ -30,8 +29,8 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: zip1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 @@ -43,10 +42,9 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 %z = zext i32 %e to i64 @@ -57,8 +55,8 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) { ; CHECK-LABEL: extract2_i32_zext_insert0_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 2 @@ -70,10 +68,9 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract2_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[2] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 2 %z = zext i32 %e to i64 @@ -96,10 +93,9 @@ define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract3_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 3 %z = zext i32 %e to i64 @@ -123,10 +119,9 @@ define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract0_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 0 %z = zext i32 %e to i64 @@ -137,8 +132,8 @@ define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert1_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #4 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 @@ -150,10 +145,9 @@ define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 %z = zext i32 %e to i64 @@ -175,10 +169,9 @@ define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract2_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[2] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 2 %z = zext i32 %e to i64 @@ -201,10 +194,9 @@ define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract3_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 3 %z = zext i32 %e to i64 @@ -227,10 +219,9 @@ define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract0_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 0 %z = zext i16 %e to i64 @@ -253,10 +244,9 @@ define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract1_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 1 %z = zext i16 %e to i64 @@ -279,10 +269,9 @@ define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract2_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 2 %z = zext i16 %e to i64 @@ -305,10 +294,9 @@ define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract3_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 3 %z = zext i16 %e to i64 @@ -331,10 +319,9 @@ define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract0_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 0 %z = zext i16 %e to i64 @@ -357,10 +344,9 @@ define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract1_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 1 %z = zext i16 %e to i64 @@ -383,10 +369,9 @@ define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract2_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 2 %z = zext i16 %e to i64 @@ -409,10 +394,9 @@ define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract3_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 3 %z = zext i16 %e to i64 @@ -437,10 +421,9 @@ define <2 x i64> @extract0_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract0_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 0 %z = zext i8 %e to i64 @@ -463,10 +446,9 @@ define <2 x i64> @extract1_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract1_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 1 %z = zext i8 %e to i64 @@ -489,10 +471,9 @@ define <2 x i64> @extract2_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract2_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 2 %z = zext i8 %e to i64 @@ -515,10 +496,9 @@ define <2 x i64> @extract3_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract3_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 3 %z = zext i8 %e to i64 @@ -541,10 +521,9 @@ define <2 x i64> @extract0_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract0_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 0 %z = zext i8 %e to i64 @@ -567,10 +546,9 @@ define <2 x i64> @extract1_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract1_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 1 %z = zext i8 %e to i64 @@ -593,10 +571,9 @@ define <2 x i64> @extract2_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract2_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 2 %z = zext i8 %e to i64 @@ -619,10 +596,9 @@ define <2 x i64> @extract3_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract3_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 3 %z = zext i8 %e to i64 diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -114,8 +114,8 @@ define <7 x i8> @sign_7xi8(<7 x i8> %a) { ; CHECK-LABEL: sign_7xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: sshr v0.8b, v0.8b, #7 +; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %c = icmp sgt <7 x i8> %a, @@ -126,8 +126,8 @@ define <8 x i8> @sign_8xi8(<8 x i8> %a) { ; CHECK-LABEL: sign_8xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: sshr v0.8b, v0.8b, #7 +; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %c = icmp sgt <8 x i8> %a, @@ -138,8 +138,8 @@ define <16 x i8> @sign_16xi8(<16 x i8> %a) { ; CHECK-LABEL: sign_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %c = icmp sgt <16 x i8> %a, @@ -178,9 +178,9 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: sshr v2.4s, v0.4s, #31 -; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmgt v1.4s, v0.4s, v1.4s ; CHECK-NEXT: orr v2.4s, #1 -; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn v0.4h, v1.4s ; CHECK-NEXT: str q2, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl use_4xi1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -198,9 +198,9 @@ ; CHECK-LABEL: not_sign_4xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v1.16b, v0.16b, v1.16b ; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret @@ -213,11 +213,11 @@ define <4 x i32> @not_sign_4xi32_2(<4 x i32> %a) { ; CHECK-LABEL: not_sign_4xi32_2: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: and v1.16b, v0.16b, v1.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: and v1.16b, v0.16b, v2.16b ; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %c = icmp sgt <4 x i32> %a, @@ -229,10 +229,10 @@ define <4 x i32> @not_sign_4xi32_3(<4 x i32> %a) { ; CHECK-LABEL: not_sign_4xi32_3: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret @@ -246,18 +246,18 @@ ; CHECK-LABEL: sign_4xi65: ; CHECK: // %bb.0: ; CHECK-NEXT: sbfx x8, x1, #0, #1 -; CHECK-NEXT: sbfx x10, x5, #0, #1 +; CHECK-NEXT: sbfx x10, x7, #0, #1 ; CHECK-NEXT: orr x9, x8, #0x1 ; CHECK-NEXT: lsr x1, x8, #63 -; CHECK-NEXT: sbfx x8, x7, #0, #1 -; CHECK-NEXT: orr x4, x10, #0x1 -; CHECK-NEXT: lsr x5, x10, #63 -; CHECK-NEXT: orr x6, x8, #0x1 +; CHECK-NEXT: sbfx x8, x5, #0, #1 +; CHECK-NEXT: orr x6, x10, #0x1 +; CHECK-NEXT: orr x4, x8, #0x1 +; CHECK-NEXT: lsr x5, x8, #63 ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: sbfx x9, x3, #0, #1 ; CHECK-NEXT: orr x2, x9, #0x1 ; CHECK-NEXT: lsr x3, x9, #63 -; CHECK-NEXT: lsr x7, x8, #63 +; CHECK-NEXT: lsr x7, x10, #63 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll b/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll --- a/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll @@ -10,13 +10,13 @@ define void @no_combine(i32 %p) local_unnamed_addr { ; CHECK-LABEL: no_combine: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v0.4h, #4 -; CHECK-NEXT: dup v1.4s, w0 -; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: mov v1.d[1], v0.d[0] -; CHECK-NEXT: xtn v0.8b, v1.8h -; CHECK-NEXT: xtn2 v0.16b, v1.8h -; CHECK-NEXT: str q0, [x8] +; CHECK-NEXT: dup v0.4s, w0 +; CHECK-NEXT: movi v1.4h, #4 +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: xtn v1.8b, v0.8h +; CHECK-NEXT: xtn2 v1.16b, v0.8h +; CHECK-NEXT: str q1, [x8] ; CHECK-NEXT: ret ; The two shufflevector operations are needed to force the DAGCombine to happen diff --git a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll --- a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll +++ b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll @@ -15,9 +15,9 @@ ; CHECK-NEXT: adrp x8, .LCPI0_1 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: movi v1.4h, #1 +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_1] ; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_1] -; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: shl v0.4h, v0.4h, #15 ; CHECK-NEXT: sshr v0.4h, v0.4h, #15 ; CHECK-NEXT: umov w0, v0.h[0] diff --git a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll --- a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll +++ b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll @@ -74,66 +74,66 @@ ; ALL-NEXT: smov w9, v0.b[1] ; ALL-NEXT: smov w10, v0.b[0] ; ALL-NEXT: smov w11, v0.b[2] +; ALL-NEXT: sdiv w8, w9, w8 +; ALL-NEXT: smov w9, v1.b[0] ; ALL-NEXT: smov w12, v0.b[3] ; ALL-NEXT: smov w13, v0.b[4] ; ALL-NEXT: smov w14, v0.b[5] ; ALL-NEXT: smov w15, v0.b[6] -; ALL-NEXT: sdiv w8, w9, w8 -; ALL-NEXT: smov w9, v1.b[0] ; ALL-NEXT: smov w16, v0.b[7] ; ALL-NEXT: smov w17, v0.b[8] +; ALL-NEXT: smov w18, v0.b[9] +; ALL-NEXT: smov w1, v0.b[10] +; ALL-NEXT: smov w2, v0.b[11] +; ALL-NEXT: smov w3, v0.b[12] ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: smov w10, v1.b[2] ; ALL-NEXT: sdiv w10, w11, w10 ; ALL-NEXT: smov w11, v1.b[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: smov w9, v1.b[9] ; ALL-NEXT: mov v2.b[1], w8 +; ALL-NEXT: smov w8, v1.b[13] ; ALL-NEXT: sdiv w11, w12, w11 ; ALL-NEXT: smov w12, v1.b[4] ; ALL-NEXT: mov v2.b[2], w10 -; ALL-NEXT: smov w10, v0.b[10] +; ALL-NEXT: smov w10, v0.b[13] ; ALL-NEXT: sdiv w12, w13, w12 ; ALL-NEXT: smov w13, v1.b[5] ; ALL-NEXT: mov v2.b[3], w11 -; ALL-NEXT: smov w11, v0.b[11] +; ALL-NEXT: smov w11, v0.b[14] ; ALL-NEXT: sdiv w13, w14, w13 ; ALL-NEXT: smov w14, v1.b[6] ; ALL-NEXT: mov v2.b[4], w12 -; ALL-NEXT: smov w12, v0.b[12] +; ALL-NEXT: smov w12, v0.b[15] ; ALL-NEXT: sdiv w14, w15, w14 ; ALL-NEXT: smov w15, v1.b[7] ; ALL-NEXT: mov v2.b[5], w13 -; ALL-NEXT: smov w13, v0.b[13] ; ALL-NEXT: sdiv w15, w16, w15 ; ALL-NEXT: smov w16, v1.b[8] ; ALL-NEXT: mov v2.b[6], w14 ; ALL-NEXT: sdiv w16, w17, w16 -; ALL-NEXT: smov w17, v0.b[9] +; ALL-NEXT: smov w17, v1.b[9] ; ALL-NEXT: mov v2.b[7], w15 -; ALL-NEXT: sdiv w8, w17, w9 -; ALL-NEXT: smov w9, v1.b[10] +; ALL-NEXT: sdiv w17, w18, w17 +; ALL-NEXT: smov w18, v1.b[10] ; ALL-NEXT: mov v2.b[8], w16 -; ALL-NEXT: sdiv w9, w10, w9 -; ALL-NEXT: smov w10, v1.b[11] -; ALL-NEXT: mov v2.b[9], w8 +; ALL-NEXT: sdiv w18, w1, w18 +; ALL-NEXT: smov w1, v1.b[11] +; ALL-NEXT: mov v2.b[9], w17 +; ALL-NEXT: sdiv w1, w2, w1 +; ALL-NEXT: smov w2, v1.b[12] +; ALL-NEXT: mov v2.b[10], w18 +; ALL-NEXT: sdiv w9, w3, w2 +; ALL-NEXT: mov v2.b[11], w1 +; ALL-NEXT: sdiv w8, w10, w8 +; ALL-NEXT: smov w10, v1.b[14] +; ALL-NEXT: mov v2.b[12], w9 ; ALL-NEXT: sdiv w10, w11, w10 -; ALL-NEXT: smov w11, v1.b[12] -; ALL-NEXT: mov v2.b[10], w9 -; ALL-NEXT: smov w9, v1.b[14] -; ALL-NEXT: sdiv w11, w12, w11 -; ALL-NEXT: smov w12, v1.b[13] -; ALL-NEXT: mov v2.b[11], w10 -; ALL-NEXT: smov w10, v1.b[15] -; ALL-NEXT: sdiv w8, w13, w12 -; ALL-NEXT: smov w12, v0.b[14] -; ALL-NEXT: mov v2.b[12], w11 -; ALL-NEXT: smov w11, v0.b[15] -; ALL-NEXT: sdiv w9, w12, w9 +; ALL-NEXT: smov w11, v1.b[15] ; ALL-NEXT: mov v2.b[13], w8 -; ALL-NEXT: sdiv w8, w11, w10 -; ALL-NEXT: mov v2.b[14], w9 -; ALL-NEXT: mov v2.b[15], w8 +; ALL-NEXT: sdiv w11, w12, w11 +; ALL-NEXT: mov v2.b[14], w10 +; ALL-NEXT: mov v2.b[15], w11 ; ALL-NEXT: mls v0.16b, v2.16b, v1.16b ; ALL-NEXT: str q2, [x0] ; ALL-NEXT: ret @@ -151,32 +151,32 @@ ; ALL-NEXT: smov w9, v0.h[1] ; ALL-NEXT: smov w10, v0.h[0] ; ALL-NEXT: smov w11, v0.h[2] -; ALL-NEXT: smov w12, v0.h[3] -; ALL-NEXT: smov w13, v0.h[4] ; ALL-NEXT: sdiv w8, w9, w8 ; ALL-NEXT: smov w9, v1.h[0] +; ALL-NEXT: smov w12, v0.h[3] +; ALL-NEXT: smov w13, v0.h[4] +; ALL-NEXT: smov w14, v0.h[5] +; ALL-NEXT: smov w15, v0.h[6] ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: smov w10, v1.h[2] ; ALL-NEXT: sdiv w10, w11, w10 ; ALL-NEXT: smov w11, v1.h[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: smov w9, v1.h[5] ; ALL-NEXT: mov v2.h[1], w8 +; ALL-NEXT: smov w8, v1.h[7] ; ALL-NEXT: sdiv w11, w12, w11 ; ALL-NEXT: smov w12, v1.h[4] ; ALL-NEXT: mov v2.h[2], w10 -; ALL-NEXT: smov w10, v0.h[6] +; ALL-NEXT: smov w10, v0.h[7] ; ALL-NEXT: sdiv w12, w13, w12 -; ALL-NEXT: smov w13, v0.h[5] +; ALL-NEXT: smov w13, v1.h[5] ; ALL-NEXT: mov v2.h[3], w11 -; ALL-NEXT: smov w11, v0.h[7] -; ALL-NEXT: sdiv w8, w13, w9 -; ALL-NEXT: smov w9, v1.h[6] +; ALL-NEXT: sdiv w13, w14, w13 +; ALL-NEXT: smov w14, v1.h[6] ; ALL-NEXT: mov v2.h[4], w12 -; ALL-NEXT: sdiv w9, w10, w9 -; ALL-NEXT: smov w10, v1.h[7] -; ALL-NEXT: mov v2.h[5], w8 -; ALL-NEXT: sdiv w8, w11, w10 +; ALL-NEXT: sdiv w9, w15, w14 +; ALL-NEXT: mov v2.h[5], w13 +; ALL-NEXT: sdiv w8, w10, w8 ; ALL-NEXT: mov v2.h[6], w9 ; ALL-NEXT: mov v2.h[7], w8 ; ALL-NEXT: mls v0.8h, v2.8h, v1.8h @@ -196,18 +196,18 @@ ; ALL-NEXT: mov w9, v0.s[1] ; ALL-NEXT: fmov w10, s0 ; ALL-NEXT: mov w11, v0.s[2] -; ALL-NEXT: mov w12, v0.s[3] ; ALL-NEXT: sdiv w8, w9, w8 ; ALL-NEXT: fmov w9, s1 +; ALL-NEXT: mov w12, v0.s[3] ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: mov w10, v1.s[2] ; ALL-NEXT: sdiv w10, w11, w10 ; ALL-NEXT: mov w11, v1.s[3] ; ALL-NEXT: fmov s2, w9 ; ALL-NEXT: mov v2.s[1], w8 -; ALL-NEXT: sdiv w8, w12, w11 +; ALL-NEXT: sdiv w11, w12, w11 ; ALL-NEXT: mov v2.s[2], w10 -; ALL-NEXT: mov v2.s[3], w8 +; ALL-NEXT: mov v2.s[3], w11 ; ALL-NEXT: mls v0.4s, v2.4s, v1.4s ; ALL-NEXT: str q2, [x0] ; ALL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll --- a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll @@ -74,66 +74,66 @@ ; ALL-NEXT: umov w9, v0.b[1] ; ALL-NEXT: umov w10, v0.b[0] ; ALL-NEXT: umov w11, v0.b[2] +; ALL-NEXT: udiv w8, w9, w8 +; ALL-NEXT: umov w9, v1.b[0] ; ALL-NEXT: umov w12, v0.b[3] ; ALL-NEXT: umov w13, v0.b[4] ; ALL-NEXT: umov w14, v0.b[5] ; ALL-NEXT: umov w15, v0.b[6] -; ALL-NEXT: udiv w8, w9, w8 -; ALL-NEXT: umov w9, v1.b[0] ; ALL-NEXT: umov w16, v0.b[7] ; ALL-NEXT: umov w17, v0.b[8] +; ALL-NEXT: umov w18, v0.b[9] +; ALL-NEXT: umov w1, v0.b[10] +; ALL-NEXT: umov w2, v0.b[11] +; ALL-NEXT: umov w3, v0.b[12] ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: umov w10, v1.b[2] ; ALL-NEXT: udiv w10, w11, w10 ; ALL-NEXT: umov w11, v1.b[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: umov w9, v1.b[9] ; ALL-NEXT: mov v2.b[1], w8 +; ALL-NEXT: umov w8, v1.b[13] ; ALL-NEXT: udiv w11, w12, w11 ; ALL-NEXT: umov w12, v1.b[4] ; ALL-NEXT: mov v2.b[2], w10 -; ALL-NEXT: umov w10, v0.b[10] +; ALL-NEXT: umov w10, v0.b[13] ; ALL-NEXT: udiv w12, w13, w12 ; ALL-NEXT: umov w13, v1.b[5] ; ALL-NEXT: mov v2.b[3], w11 -; ALL-NEXT: umov w11, v0.b[11] +; ALL-NEXT: umov w11, v0.b[14] ; ALL-NEXT: udiv w13, w14, w13 ; ALL-NEXT: umov w14, v1.b[6] ; ALL-NEXT: mov v2.b[4], w12 -; ALL-NEXT: umov w12, v0.b[12] +; ALL-NEXT: umov w12, v0.b[15] ; ALL-NEXT: udiv w14, w15, w14 ; ALL-NEXT: umov w15, v1.b[7] ; ALL-NEXT: mov v2.b[5], w13 -; ALL-NEXT: umov w13, v0.b[13] ; ALL-NEXT: udiv w15, w16, w15 ; ALL-NEXT: umov w16, v1.b[8] ; ALL-NEXT: mov v2.b[6], w14 ; ALL-NEXT: udiv w16, w17, w16 -; ALL-NEXT: umov w17, v0.b[9] +; ALL-NEXT: umov w17, v1.b[9] ; ALL-NEXT: mov v2.b[7], w15 -; ALL-NEXT: udiv w8, w17, w9 -; ALL-NEXT: umov w9, v1.b[10] +; ALL-NEXT: udiv w17, w18, w17 +; ALL-NEXT: umov w18, v1.b[10] ; ALL-NEXT: mov v2.b[8], w16 -; ALL-NEXT: udiv w9, w10, w9 -; ALL-NEXT: umov w10, v1.b[11] -; ALL-NEXT: mov v2.b[9], w8 +; ALL-NEXT: udiv w18, w1, w18 +; ALL-NEXT: umov w1, v1.b[11] +; ALL-NEXT: mov v2.b[9], w17 +; ALL-NEXT: udiv w1, w2, w1 +; ALL-NEXT: umov w2, v1.b[12] +; ALL-NEXT: mov v2.b[10], w18 +; ALL-NEXT: udiv w9, w3, w2 +; ALL-NEXT: mov v2.b[11], w1 +; ALL-NEXT: udiv w8, w10, w8 +; ALL-NEXT: umov w10, v1.b[14] +; ALL-NEXT: mov v2.b[12], w9 ; ALL-NEXT: udiv w10, w11, w10 -; ALL-NEXT: umov w11, v1.b[12] -; ALL-NEXT: mov v2.b[10], w9 -; ALL-NEXT: umov w9, v1.b[14] -; ALL-NEXT: udiv w11, w12, w11 -; ALL-NEXT: umov w12, v1.b[13] -; ALL-NEXT: mov v2.b[11], w10 -; ALL-NEXT: umov w10, v1.b[15] -; ALL-NEXT: udiv w8, w13, w12 -; ALL-NEXT: umov w12, v0.b[14] -; ALL-NEXT: mov v2.b[12], w11 -; ALL-NEXT: umov w11, v0.b[15] -; ALL-NEXT: udiv w9, w12, w9 +; ALL-NEXT: umov w11, v1.b[15] ; ALL-NEXT: mov v2.b[13], w8 -; ALL-NEXT: udiv w8, w11, w10 -; ALL-NEXT: mov v2.b[14], w9 -; ALL-NEXT: mov v2.b[15], w8 +; ALL-NEXT: udiv w11, w12, w11 +; ALL-NEXT: mov v2.b[14], w10 +; ALL-NEXT: mov v2.b[15], w11 ; ALL-NEXT: mls v0.16b, v2.16b, v1.16b ; ALL-NEXT: str q2, [x0] ; ALL-NEXT: ret @@ -151,32 +151,32 @@ ; ALL-NEXT: umov w9, v0.h[1] ; ALL-NEXT: umov w10, v0.h[0] ; ALL-NEXT: umov w11, v0.h[2] -; ALL-NEXT: umov w12, v0.h[3] -; ALL-NEXT: umov w13, v0.h[4] ; ALL-NEXT: udiv w8, w9, w8 ; ALL-NEXT: umov w9, v1.h[0] +; ALL-NEXT: umov w12, v0.h[3] +; ALL-NEXT: umov w13, v0.h[4] +; ALL-NEXT: umov w14, v0.h[5] +; ALL-NEXT: umov w15, v0.h[6] ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: umov w10, v1.h[2] ; ALL-NEXT: udiv w10, w11, w10 ; ALL-NEXT: umov w11, v1.h[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: umov w9, v1.h[5] ; ALL-NEXT: mov v2.h[1], w8 +; ALL-NEXT: umov w8, v1.h[7] ; ALL-NEXT: udiv w11, w12, w11 ; ALL-NEXT: umov w12, v1.h[4] ; ALL-NEXT: mov v2.h[2], w10 -; ALL-NEXT: umov w10, v0.h[6] +; ALL-NEXT: umov w10, v0.h[7] ; ALL-NEXT: udiv w12, w13, w12 -; ALL-NEXT: umov w13, v0.h[5] +; ALL-NEXT: umov w13, v1.h[5] ; ALL-NEXT: mov v2.h[3], w11 -; ALL-NEXT: umov w11, v0.h[7] -; ALL-NEXT: udiv w8, w13, w9 -; ALL-NEXT: umov w9, v1.h[6] +; ALL-NEXT: udiv w13, w14, w13 +; ALL-NEXT: umov w14, v1.h[6] ; ALL-NEXT: mov v2.h[4], w12 -; ALL-NEXT: udiv w9, w10, w9 -; ALL-NEXT: umov w10, v1.h[7] -; ALL-NEXT: mov v2.h[5], w8 -; ALL-NEXT: udiv w8, w11, w10 +; ALL-NEXT: udiv w9, w15, w14 +; ALL-NEXT: mov v2.h[5], w13 +; ALL-NEXT: udiv w8, w10, w8 ; ALL-NEXT: mov v2.h[6], w9 ; ALL-NEXT: mov v2.h[7], w8 ; ALL-NEXT: mls v0.8h, v2.8h, v1.8h @@ -196,18 +196,18 @@ ; ALL-NEXT: mov w9, v0.s[1] ; ALL-NEXT: fmov w10, s0 ; ALL-NEXT: mov w11, v0.s[2] -; ALL-NEXT: mov w12, v0.s[3] ; ALL-NEXT: udiv w8, w9, w8 ; ALL-NEXT: fmov w9, s1 +; ALL-NEXT: mov w12, v0.s[3] ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: mov w10, v1.s[2] ; ALL-NEXT: udiv w10, w11, w10 ; ALL-NEXT: mov w11, v1.s[3] ; ALL-NEXT: fmov s2, w9 ; ALL-NEXT: mov v2.s[1], w8 -; ALL-NEXT: udiv w8, w12, w11 +; ALL-NEXT: udiv w11, w12, w11 ; ALL-NEXT: mov v2.s[2], w10 -; ALL-NEXT: mov v2.s[3], w8 +; ALL-NEXT: mov v2.s[3], w11 ; ALL-NEXT: mls v0.4s, v2.4s, v1.4s ; ALL-NEXT: str q2, [x0] ; ALL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/expand-vector-rot.ll b/llvm/test/CodeGen/AArch64/expand-vector-rot.ll --- a/llvm/test/CodeGen/AArch64/expand-vector-rot.ll +++ b/llvm/test/CodeGen/AArch64/expand-vector-rot.ll @@ -6,15 +6,15 @@ define <2 x i16> @rotlv2_16(<2 x i16> %vec2_16, <2 x i16> %shift) { ; CHECK-LABEL: rotlv2_16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.2s, #15 -; CHECK-NEXT: neg v3.2s, v1.2s +; CHECK-NEXT: neg v2.2s, v1.2s +; CHECK-NEXT: movi v3.2s, #15 ; CHECK-NEXT: movi d4, #0x00ffff0000ffff -; CHECK-NEXT: and v3.8b, v3.8b, v2.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b +; CHECK-NEXT: and v2.8b, v2.8b, v3.8b ; CHECK-NEXT: and v4.8b, v0.8b, v4.8b -; CHECK-NEXT: neg v3.2s, v3.2s +; CHECK-NEXT: neg v2.2s, v2.2s ; CHECK-NEXT: ushl v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushl v2.2s, v4.2s, v3.2s +; CHECK-NEXT: ushl v2.2s, v4.2s, v2.2s ; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b ; CHECK-NEXT: ret %1 = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %vec2_16, <2 x i16> %vec2_16, <2 x i16> %shift) diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -1100,9 +1100,9 @@ } ; CHECK-CVT-LABEL: test_copysign: -; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: bit.16b v0, v1, v2 ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: ret @@ -1118,15 +1118,15 @@ } ; CHECK-CVT-LABEL: test_copysign_f32: -; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: bit.16b v0, v1, v2 ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_copysign_f32: -; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8 ; CHECK-FP16-NEXT: fcvt h1, s1 +; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8 ; CHECK-FP16-NEXT: bit.16b v0, v1, v2 ; CHECK-FP16-NEXT: ret @@ -1137,16 +1137,16 @@ } ; CHECK-CVT-LABEL: test_copysign_f64: -; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: fcvt s1, d1 ; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: bit.16b v0, v1, v2 ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_copysign_f64: -; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8 ; CHECK-FP16-NEXT: fcvt h1, d1 +; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8 ; CHECK-FP16-NEXT: bit.16b v0, v1, v2 ; CHECK-FP16-NEXT: ret @@ -1160,9 +1160,9 @@ ; away the (fpext (fp_round )) here. ; CHECK-CVT-LABEL: test_copysign_extended: -; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 ; CHECK-CVT-NEXT: bit.16b v0, v1, v2 ; CHECK-CVT-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fdiv_combine.ll b/llvm/test/CodeGen/AArch64/fdiv_combine.ll --- a/llvm/test/CodeGen/AArch64/fdiv_combine.ll +++ b/llvm/test/CodeGen/AArch64/fdiv_combine.ll @@ -29,8 +29,8 @@ define <2 x float> @test3(<2 x i32> %in) { ; CHECK-LABEL: test3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov.2s v1, #9.00000000 ; CHECK-NEXT: scvtf.2s v0, v0 +; CHECK-NEXT: fmov.2s v1, #9.00000000 ; CHECK-NEXT: fdiv.2s v0, v0, v1 ; CHECK-NEXT: ret entry: @@ -43,8 +43,8 @@ define <2 x float> @test4(<2 x i32> %in) { ; CHECK-LABEL: test4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi.2s v1, #80, lsl #24 ; CHECK-NEXT: scvtf.2s v0, v0 +; CHECK-NEXT: movi.2s v1, #80, lsl #24 ; CHECK-NEXT: fdiv.2s v0, v0, v1 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll --- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -9,52 +9,52 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: mov h7, v0.h[2] ; CHECK-CVT-NEXT: mov h16, v1.h[3] ; CHECK-CVT-NEXT: mov h17, v0.h[3] -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h18, v1.h[4] ; CHECK-CVT-NEXT: fadd s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h7 -; CHECK-CVT-NEXT: fcvt s7, h16 -; CHECK-CVT-NEXT: fcvt s16, h17 +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fadd s3, s3, s2 +; CHECK-CVT-NEXT: fcvt s16, h16 +; CHECK-CVT-NEXT: fcvt s17, h17 +; CHECK-CVT-NEXT: fcvt s18, h18 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h19, v1.h[5] ; CHECK-CVT-NEXT: fcvt h2, s4 -; CHECK-CVT-NEXT: fadd s4, s6, s5 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: mov h6, v0.h[4] -; CHECK-CVT-NEXT: fadd s7, s16, s7 +; CHECK-CVT-NEXT: fadd s6, s7, s6 +; CHECK-CVT-NEXT: mov h7, v0.h[5] ; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: mov h16, v0.h[5] -; CHECK-CVT-NEXT: fcvt h7, s7 +; CHECK-CVT-NEXT: fadd s4, s17, s16 +; CHECK-CVT-NEXT: mov h16, v1.h[6] +; CHECK-CVT-NEXT: fadd s5, s5, s18 +; CHECK-CVT-NEXT: mov h17, v0.h[6] +; CHECK-CVT-NEXT: fcvt s18, h19 +; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt h6, s6 ; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: mov h6, v1.h[5] -; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] -; CHECK-CVT-NEXT: fadd s3, s5, s4 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: fcvt s5, h16 -; CHECK-CVT-NEXT: mov h6, v1.h[6] -; CHECK-CVT-NEXT: mov h16, v0.h[6] +; CHECK-CVT-NEXT: fcvt s3, h16 +; CHECK-CVT-NEXT: fcvt s16, h17 ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: fadd s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h16 +; CHECK-CVT-NEXT: fcvt h4, s4 +; CHECK-CVT-NEXT: fadd s7, s7, s18 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fadd s4, s6, s5 +; CHECK-CVT-NEXT: mov v2.h[2], v6.h[0] +; CHECK-CVT-NEXT: fadd s3, s16, s3 +; CHECK-CVT-NEXT: fcvt h5, s5 +; CHECK-CVT-NEXT: mov v2.h[3], v4.h[0] ; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: fcvt h1, s7 +; CHECK-CVT-NEXT: mov v2.h[4], v5.h[0] +; CHECK-CVT-NEXT: fcvt h3, s3 +; CHECK-CVT-NEXT: mov v2.h[5], v1.h[0] ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: mov v2.h[6], v3.h[0] ; CHECK-CVT-NEXT: mov v2.h[7], v0.h[0] @@ -78,52 +78,52 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: mov h7, v0.h[2] ; CHECK-CVT-NEXT: mov h16, v1.h[3] ; CHECK-CVT-NEXT: mov h17, v0.h[3] -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h18, v1.h[4] ; CHECK-CVT-NEXT: fsub s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h7 -; CHECK-CVT-NEXT: fcvt s7, h16 -; CHECK-CVT-NEXT: fcvt s16, h17 +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fsub s3, s3, s2 +; CHECK-CVT-NEXT: fcvt s16, h16 +; CHECK-CVT-NEXT: fcvt s17, h17 +; CHECK-CVT-NEXT: fcvt s18, h18 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h19, v1.h[5] ; CHECK-CVT-NEXT: fcvt h2, s4 -; CHECK-CVT-NEXT: fsub s4, s6, s5 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: mov h6, v0.h[4] -; CHECK-CVT-NEXT: fsub s7, s16, s7 +; CHECK-CVT-NEXT: fsub s6, s7, s6 +; CHECK-CVT-NEXT: mov h7, v0.h[5] ; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: mov h16, v0.h[5] -; CHECK-CVT-NEXT: fcvt h7, s7 +; CHECK-CVT-NEXT: fsub s4, s17, s16 +; CHECK-CVT-NEXT: mov h16, v1.h[6] +; CHECK-CVT-NEXT: fsub s5, s5, s18 +; CHECK-CVT-NEXT: mov h17, v0.h[6] +; CHECK-CVT-NEXT: fcvt s18, h19 +; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt h6, s6 ; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: mov h6, v1.h[5] -; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] -; CHECK-CVT-NEXT: fsub s3, s5, s4 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: fcvt s5, h16 -; CHECK-CVT-NEXT: mov h6, v1.h[6] -; CHECK-CVT-NEXT: mov h16, v0.h[6] +; CHECK-CVT-NEXT: fcvt s3, h16 +; CHECK-CVT-NEXT: fcvt s16, h17 ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: fsub s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h16 +; CHECK-CVT-NEXT: fcvt h4, s4 +; CHECK-CVT-NEXT: fsub s7, s7, s18 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fsub s4, s6, s5 +; CHECK-CVT-NEXT: mov v2.h[2], v6.h[0] +; CHECK-CVT-NEXT: fsub s3, s16, s3 +; CHECK-CVT-NEXT: fcvt h5, s5 +; CHECK-CVT-NEXT: mov v2.h[3], v4.h[0] ; CHECK-CVT-NEXT: fsub s0, s0, s1 -; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: fcvt h1, s7 +; CHECK-CVT-NEXT: mov v2.h[4], v5.h[0] +; CHECK-CVT-NEXT: fcvt h3, s3 +; CHECK-CVT-NEXT: mov v2.h[5], v1.h[0] ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: mov v2.h[6], v3.h[0] ; CHECK-CVT-NEXT: mov v2.h[7], v0.h[0] @@ -147,52 +147,52 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: mov h7, v0.h[2] -; CHECK-CVT-NEXT: mov h16, v0.h[3] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] +; CHECK-CVT-NEXT: mov h7, v0.h[2] +; CHECK-CVT-NEXT: mov h16, v1.h[3] +; CHECK-CVT-NEXT: mov h17, v0.h[3] +; CHECK-CVT-NEXT: mov h18, v1.h[4] ; CHECK-CVT-NEXT: fmul s4, s5, s4 -; CHECK-CVT-NEXT: mov h5, v1.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[4] ; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fmul s3, s3, s2 +; CHECK-CVT-NEXT: fcvt s16, h16 +; CHECK-CVT-NEXT: fcvt s17, h17 +; CHECK-CVT-NEXT: fcvt s18, h18 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h19, v1.h[5] ; CHECK-CVT-NEXT: fcvt h2, s4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: fcvt s5, h16 ; CHECK-CVT-NEXT: fmul s6, s7, s6 -; CHECK-CVT-NEXT: mov h7, v1.h[4] -; CHECK-CVT-NEXT: mov h16, v0.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[5] ; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: fmul s4, s5, s4 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: fcvt h6, s6 +; CHECK-CVT-NEXT: fmul s4, s17, s16 +; CHECK-CVT-NEXT: mov h16, v1.h[6] +; CHECK-CVT-NEXT: fmul s5, s5, s18 +; CHECK-CVT-NEXT: mov h17, v0.h[6] +; CHECK-CVT-NEXT: fcvt s18, h19 ; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt h6, s6 ; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] -; CHECK-CVT-NEXT: mov h3, v1.h[5] -; CHECK-CVT-NEXT: fcvt s16, h16 -; CHECK-CVT-NEXT: fcvt h4, s4 -; CHECK-CVT-NEXT: fcvt s5, h5 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov v2.h[2], v6.h[0] -; CHECK-CVT-NEXT: fmul s6, s16, s7 -; CHECK-CVT-NEXT: mov h7, v1.h[6] -; CHECK-CVT-NEXT: mov h16, v0.h[6] +; CHECK-CVT-NEXT: fcvt s3, h16 +; CHECK-CVT-NEXT: fcvt s16, h17 ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: fmul s3, s5, s3 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v4.h[0] -; CHECK-CVT-NEXT: fcvt h4, s6 -; CHECK-CVT-NEXT: fcvt s5, h7 -; CHECK-CVT-NEXT: fcvt s6, h16 +; CHECK-CVT-NEXT: fcvt h4, s4 +; CHECK-CVT-NEXT: fmul s7, s7, s18 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt h3, s3 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v4.h[0] -; CHECK-CVT-NEXT: fmul s4, s6, s5 +; CHECK-CVT-NEXT: mov v2.h[2], v6.h[0] +; CHECK-CVT-NEXT: fmul s3, s16, s3 +; CHECK-CVT-NEXT: fcvt h5, s5 +; CHECK-CVT-NEXT: mov v2.h[3], v4.h[0] ; CHECK-CVT-NEXT: fmul s0, s0, s1 -; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: fcvt h1, s7 +; CHECK-CVT-NEXT: mov v2.h[4], v5.h[0] +; CHECK-CVT-NEXT: fcvt h3, s3 +; CHECK-CVT-NEXT: mov v2.h[5], v1.h[0] ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: mov v2.h[6], v3.h[0] ; CHECK-CVT-NEXT: mov v2.h[7], v0.h[0] @@ -216,56 +216,55 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h0 ; CHECK-CVT-NEXT: mov h5, v0.h[2] -; CHECK-CVT-NEXT: mov h6, v0.h[3] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v0.h[5] -; CHECK-CVT-NEXT: mov h17, v0.h[6] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcvt s5, h5 -; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov h6, v0.h[3] +; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h16, v0.h[5] ; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: mov h17, v0.h[6] +; CHECK-CVT-NEXT: fdiv s2, s3, s2 +; CHECK-CVT-NEXT: fcvt s3, h1 ; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcvt s17, h17 -; CHECK-CVT-NEXT: fdiv s2, s3, s2 -; CHECK-CVT-NEXT: fcvt s3, h1 +; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fdiv s3, s4, s3 ; CHECK-CVT-NEXT: mov h4, v1.h[2] -; CHECK-CVT-NEXT: fcvt h18, s2 ; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt h2, s2 ; CHECK-CVT-NEXT: fdiv s4, s5, s4 ; CHECK-CVT-NEXT: mov h5, v1.h[3] -; CHECK-CVT-NEXT: fcvt h2, s3 ; CHECK-CVT-NEXT: fcvt s5, h5 -; CHECK-CVT-NEXT: mov v2.h[1], v18.h[0] ; CHECK-CVT-NEXT: fdiv s5, s6, s5 ; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: fcvt h4, s4 ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: mov v2.h[2], v4.h[0] ; CHECK-CVT-NEXT: fdiv s6, s7, s6 ; CHECK-CVT-NEXT: mov h7, v1.h[5] -; CHECK-CVT-NEXT: fcvt h4, s5 ; CHECK-CVT-NEXT: fcvt s7, h7 -; CHECK-CVT-NEXT: mov v2.h[3], v4.h[0] ; CHECK-CVT-NEXT: fdiv s7, s16, s7 ; CHECK-CVT-NEXT: mov h16, v1.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fdiv s3, s17, s16 -; CHECK-CVT-NEXT: fdiv s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h1, s6 -; CHECK-CVT-NEXT: mov v2.h[4], v1.h[0] -; CHECK-CVT-NEXT: fcvt h1, s7 -; CHECK-CVT-NEXT: mov v2.h[5], v1.h[0] -; CHECK-CVT-NEXT: fcvt h1, s3 -; CHECK-CVT-NEXT: mov v2.h[6], v1.h[0] -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: mov v2.h[7], v0.h[0] -; CHECK-CVT-NEXT: mov v0.16b, v2.16b +; CHECK-CVT-NEXT: fdiv s16, s17, s16 +; CHECK-CVT-NEXT: fdiv s1, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s3 +; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: mov v0.h[1], v2.h[0] +; CHECK-CVT-NEXT: fcvt h2, s5 +; CHECK-CVT-NEXT: mov v0.h[2], v3.h[0] +; CHECK-CVT-NEXT: fcvt h3, s6 +; CHECK-CVT-NEXT: mov v0.h[3], v2.h[0] +; CHECK-CVT-NEXT: fcvt h2, s7 +; CHECK-CVT-NEXT: mov v0.h[4], v3.h[0] +; CHECK-CVT-NEXT: fcvt h3, s16 +; CHECK-CVT-NEXT: mov v0.h[5], v2.h[0] +; CHECK-CVT-NEXT: fcvt h1, s1 +; CHECK-CVT-NEXT: mov v0.h[6], v3.h[0] +; CHECK-CVT-NEXT: mov v0.h[7], v1.h[0] ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: div_h: @@ -313,24 +312,24 @@ define <8 x half> @d_to_h(<8 x double> %a) { ; CHECK-LABEL: d_to_h: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d4, v0.d[1] +; CHECK-NEXT: mov d5, v0.d[1] ; CHECK-NEXT: fcvt h0, d0 -; CHECK-NEXT: mov d5, v1.d[1] +; CHECK-NEXT: fcvt h6, d1 +; CHECK-NEXT: mov d1, v1.d[1] +; CHECK-NEXT: fcvt h5, d5 +; CHECK-NEXT: fcvt h4, d2 ; CHECK-NEXT: fcvt h1, d1 -; CHECK-NEXT: fcvt h4, d4 -; CHECK-NEXT: mov v0.h[1], v4.h[0] -; CHECK-NEXT: fcvt h4, d5 -; CHECK-NEXT: mov v0.h[2], v1.h[0] -; CHECK-NEXT: fcvt h1, d2 ; CHECK-NEXT: mov d2, v2.d[1] -; CHECK-NEXT: mov v0.h[3], v4.h[0] -; CHECK-NEXT: fcvt h2, d2 -; CHECK-NEXT: mov v0.h[4], v1.h[0] -; CHECK-NEXT: fcvt h1, d3 -; CHECK-NEXT: mov v0.h[5], v2.h[0] -; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: mov v0.h[6], v1.h[0] +; CHECK-NEXT: mov v0.h[1], v5.h[0] +; CHECK-NEXT: mov v0.h[2], v6.h[0] +; CHECK-NEXT: mov v0.h[3], v1.h[0] ; CHECK-NEXT: fcvt h1, d2 +; CHECK-NEXT: fcvt h2, d3 +; CHECK-NEXT: mov d3, v3.d[1] +; CHECK-NEXT: mov v0.h[4], v4.h[0] +; CHECK-NEXT: mov v0.h[5], v1.h[0] +; CHECK-NEXT: fcvt h1, d3 +; CHECK-NEXT: mov v0.h[6], v2.h[0] ; CHECK-NEXT: mov v0.h[7], v1.h[0] ; CHECK-NEXT: ret %1 = fptrunc <8 x double> %a to <8 x half> @@ -350,25 +349,26 @@ define <8 x double> @h_to_d(<8 x half> %a) { ; CHECK-LABEL: h_to_d: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: mov h1, v0.h[1] -; CHECK-NEXT: mov h3, v0.h[3] -; CHECK-NEXT: mov h4, v0.h[2] -; CHECK-NEXT: fcvt d0, h0 -; CHECK-NEXT: mov h5, v2.h[1] -; CHECK-NEXT: mov h6, v2.h[3] -; CHECK-NEXT: mov h7, v2.h[2] -; CHECK-NEXT: fcvt d16, h1 -; CHECK-NEXT: fcvt d17, h3 -; CHECK-NEXT: fcvt d1, h4 -; CHECK-NEXT: fcvt d2, h2 -; CHECK-NEXT: fcvt d4, h5 -; CHECK-NEXT: fcvt d5, h6 -; CHECK-NEXT: fcvt d3, h7 -; CHECK-NEXT: mov v0.d[1], v16.d[0] -; CHECK-NEXT: mov v1.d[1], v17.d[0] -; CHECK-NEXT: mov v2.d[1], v4.d[0] -; CHECK-NEXT: mov v3.d[1], v5.d[0] +; CHECK-NEXT: fcvt d4, h0 +; CHECK-NEXT: mov h2, v0.h[3] +; CHECK-NEXT: mov h3, v0.h[2] +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: fcvt d5, h1 +; CHECK-NEXT: fcvt d1, h3 +; CHECK-NEXT: mov h3, v0.h[1] +; CHECK-NEXT: mov h7, v0.h[3] +; CHECK-NEXT: mov h16, v0.h[2] +; CHECK-NEXT: fcvt d6, h2 +; CHECK-NEXT: fcvt d2, h0 +; CHECK-NEXT: fcvt d0, h3 +; CHECK-NEXT: fcvt d7, h7 +; CHECK-NEXT: fcvt d3, h16 +; CHECK-NEXT: mov v4.d[1], v5.d[0] +; CHECK-NEXT: mov v1.d[1], v6.d[0] +; CHECK-NEXT: mov v2.d[1], v0.d[0] +; CHECK-NEXT: mov v3.d[1], v7.d[0] +; CHECK-NEXT: mov v0.16b, v4.16b ; CHECK-NEXT: ret %1 = fpext <8 x half> %a to <8 x double> ret <8 x double> %1 @@ -641,57 +641,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, ne ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: csetm w9, ne -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, ne +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csetm w10, ne ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csetm w9, ne +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcmp s3, s4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v6.h[2], w10 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w9 +; CHECK-CVT-NEXT: csetm w9, ne +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, ne ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, ne +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_une: @@ -711,65 +711,65 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, eq +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc ; CHECK-CVT-NEXT: fcmp s5, s4 ; CHECK-CVT-NEXT: mov h4, v1.h[3] -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, eq ; CHECK-CVT-NEXT: csinv w9, w9, wzr, vc -; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fmov s3, w9 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[1], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s2, s4 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[5] -; CHECK-CVT-NEXT: mov h6, v0.h[5] -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w10, eq +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: csinv w10, w10, wzr, vc +; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: csetm w11, eq +; CHECK-CVT-NEXT: csinv w11, w11, wzr, vc +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: csetm w12, eq ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v3.h[3], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: csinv w12, w12, wzr, vc ; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fmov s2, w9 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h5 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[4], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v3.h[5], w8 +; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc +; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v2.h[2], w10 +; CHECK-CVT-NEXT: mov v2.h[3], w11 +; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: csinv w9, w9, wzr, vc ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v3.h[6], w8 +; CHECK-CVT-NEXT: mov v2.h[4], w12 +; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc -; CHECK-CVT-NEXT: mov v3.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v3.8h +; CHECK-CVT-NEXT: mov v2.h[6], w9 +; CHECK-CVT-NEXT: mov v2.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v2.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ueq: @@ -791,57 +791,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, hi ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: csetm w9, hi -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, hi +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csetm w10, hi ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csetm w9, hi +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcmp s3, s4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v6.h[2], w10 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w9 +; CHECK-CVT-NEXT: csetm w9, hi +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, hi ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, hi +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ugt: @@ -861,57 +861,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, pl ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: csetm w9, pl -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, pl +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csetm w10, pl ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csetm w9, pl +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcmp s3, s4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v6.h[2], w10 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w9 +; CHECK-CVT-NEXT: csetm w9, pl +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, pl ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, pl +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_uge: @@ -931,57 +931,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, lt ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: csetm w9, lt -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, lt +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csetm w10, lt ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csetm w9, lt +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcmp s3, s4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v6.h[2], w10 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w9 +; CHECK-CVT-NEXT: csetm w9, lt +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, lt ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, lt +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ult: @@ -1001,57 +1001,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, le ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: csetm w9, le -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, le +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csetm w10, le ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csetm w9, le +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcmp s3, s4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v6.h[2], w10 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w9 +; CHECK-CVT-NEXT: csetm w9, le +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, le ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, le +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ule: @@ -1071,57 +1071,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, vs ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: csetm w9, vs -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, vs +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csetm w10, vs ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csetm w9, vs +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcmp s3, s4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v6.h[2], w10 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w9 +; CHECK-CVT-NEXT: csetm w9, vs +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, vs ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, vs +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_uno: @@ -1143,65 +1143,65 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcmp s3, s2 ; CHECK-CVT-NEXT: mov h2, v1.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, mi +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le ; CHECK-CVT-NEXT: fcmp s5, s4 ; CHECK-CVT-NEXT: mov h4, v1.h[3] -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: mov h5, v0.h[3] ; CHECK-CVT-NEXT: csetm w9, mi ; CHECK-CVT-NEXT: csinv w9, w9, wzr, le -; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fmov s3, w9 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[1], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s2, s4 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[5] -; CHECK-CVT-NEXT: mov h6, v0.h[5] -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h5 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: csetm w10, mi +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: csinv w10, w10, wzr, le +; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: csetm w11, mi +; CHECK-CVT-NEXT: csinv w11, w11, wzr, le +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: mov h4, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: csetm w12, mi ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v3.h[3], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: csinv w12, w12, wzr, le ; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: fmov s2, w9 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h5 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[4], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v3.h[5], w8 +; CHECK-CVT-NEXT: mov v2.h[1], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le +; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov v2.h[2], w10 +; CHECK-CVT-NEXT: mov v2.h[3], w11 +; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: csinv w9, w9, wzr, le ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v3.h[6], w8 +; CHECK-CVT-NEXT: mov v2.h[4], w12 +; CHECK-CVT-NEXT: mov v2.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le -; CHECK-CVT-NEXT: mov v3.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v3.8h +; CHECK-CVT-NEXT: mov v2.h[6], w9 +; CHECK-CVT-NEXT: mov v2.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v2.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_one: @@ -1222,57 +1222,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, eq +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csetm w10, eq ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcmp s3, s4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v6.h[2], w10 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w9 +; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_oeq: @@ -1291,57 +1291,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, gt ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: csetm w9, gt -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, gt +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csetm w10, gt ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csetm w9, gt +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcmp s3, s4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v6.h[2], w10 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w9 +; CHECK-CVT-NEXT: csetm w9, gt +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, gt ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, gt +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ogt: @@ -1360,57 +1360,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, ge ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: csetm w9, ge -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, ge +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csetm w10, ge ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csetm w9, ge +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcmp s3, s4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v6.h[2], w10 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w9 +; CHECK-CVT-NEXT: csetm w9, ge +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, ge ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, ge +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_oge: @@ -1429,57 +1429,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, mi +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csetm w10, mi ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcmp s3, s4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v6.h[2], w10 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w9 +; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_olt: @@ -1498,57 +1498,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, ls ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: csetm w9, ls -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, ls +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csetm w10, ls ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csetm w9, ls +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcmp s3, s4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v6.h[2], w10 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w9 +; CHECK-CVT-NEXT: csetm w9, ls +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, ls ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, ls +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ole: @@ -1567,57 +1567,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, vc ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v1.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 ; CHECK-CVT-NEXT: csetm w9, vc -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, vc +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csetm w10, vc ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 +; CHECK-CVT-NEXT: mov h3, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csetm w9, vc +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 -; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcmp s3, s4 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov v6.h[2], w10 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: mov v6.h[3], w9 +; CHECK-CVT-NEXT: csetm w9, vc +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, vc ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, vc +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ord: diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -62,13 +62,13 @@ ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: fcvtzs v4.4s, v4.4s ; CHECK-NEXT: mov v0.s[2], v2.s[0] -; CHECK-NEXT: fmov w4, s4 ; CHECK-NEXT: mov v0.s[3], v3.s[0] +; CHECK-NEXT: fmov w4, s4 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptosi.sat.v5f32.v5i32(<5 x float> %f) ret <5 x i32> %x @@ -88,13 +88,13 @@ ; CHECK-NEXT: mov v0.s[2], v2.s[0] ; CHECK-NEXT: fcvtzs v1.4s, v4.4s ; CHECK-NEXT: mov v0.s[3], v3.s[0] -; CHECK-NEXT: mov w5, v1.s[1] -; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptosi.sat.v6f32.v6i32(<6 x float> %f) ret <6 x i32> %x @@ -117,13 +117,13 @@ ; CHECK-NEXT: mov v0.s[3], v3.s[0] ; CHECK-NEXT: fcvtzs v1.4s, v4.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: mov w5, v1.s[1] ; CHECK-NEXT: mov w6, v1.s[2] -; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptosi.sat.v7f32.v7i32(<7 x float> %f) ret <7 x i32> %x @@ -163,8 +163,8 @@ define <2 x i32> @test_signed_v2f64_v2i32(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: fcvtzs w8, d1 ; CHECK-NEXT: mov v0.s[1], w8 @@ -177,13 +177,13 @@ define <3 x i32> @test_signed_v3f64_v3i32(<3 x double> %f) { ; CHECK-LABEL: test_signed_v3f64_v3i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzs w8, d0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fcvtzs w9, d0 ; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: fcvtzs w9, d2 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: fcvtzs w8, d2 -; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: mov v0.s[2], w9 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptosi.sat.v3f64.v3i32(<3 x double> %f) @@ -193,15 +193,15 @@ define <4 x i32> @test_signed_v4f64_v4i32(<4 x double> %f) { ; CHECK-LABEL: test_signed_v4f64_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: fcvtzs w9, d1 +; CHECK-NEXT: mov d1, v1.d[1] ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: fcvtzs w8, d2 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov d1, v1.d[1] -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: fcvtzs w8, d1 +; CHECK-NEXT: mov v0.s[2], w9 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptosi.sat.v4f64.v4i32(<4 x double> %f) @@ -295,11 +295,11 @@ ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -364,12 +364,12 @@ ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -454,11 +454,11 @@ ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: stp q2, q3, [sp, #64] // 32-byte Folded Spill ; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: stp q2, q3, [sp, #64] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill @@ -619,11 +619,11 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptosi.sat.v5f16.v5i32(<5 x half> %f) ret <5 x i32> %x @@ -636,12 +636,12 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptosi.sat.v6f16.v6i32(<6 x half> %f) ret <6 x i32> %x @@ -650,17 +650,17 @@ define <7 x i32> @test_signed_v7f16_v7i32(<7 x half> %f) { ; CHECK-LABEL: test_signed_v7f16_v7i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v1.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: mov w1, v1.s[1] -; CHECK-NEXT: mov w2, v1.s[2] -; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w6, v0.s[2] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: mov w2, v0.s[2] +; CHECK-NEXT: mov w3, v0.s[3] +; CHECK-NEXT: mov w6, v1.s[2] ; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptosi.sat.v7f16.v7i32(<7 x half> %f) ret <7 x i32> %x @@ -695,8 +695,8 @@ define <2 x i1> @test_signed_v2f32_v2i1(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -708,8 +708,8 @@ define <2 x i8> @test_signed_v2f32_v2i8(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #127 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #127 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mvni v1.2s, #127 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -721,8 +721,8 @@ define <2 x i13> @test_signed_v2f32_v2i13(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #15, msl #8 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #15, msl #8 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mvni v1.2s, #15, msl #8 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -734,8 +734,8 @@ define <2 x i16> @test_signed_v2f32_v2i16(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #127, msl #8 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #127, msl #8 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mvni v1.2s, #127, msl #8 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -747,8 +747,8 @@ define <2 x i19> @test_signed_v2f32_v2i19(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #3, msl #16 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #3, msl #16 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mvni v1.2s, #3, msl #16 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -794,8 +794,8 @@ ; CHECK-LABEL: test_signed_v2f32_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fcvtzs x8, s1 ; CHECK-NEXT: mov v0.d[1], x8 @@ -955,8 +955,8 @@ define <4 x i1> @test_signed_v4f32_v4i1(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -969,8 +969,8 @@ define <4 x i8> @test_signed_v4f32_v4i8(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #127 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #127 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mvni v1.4s, #127 ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -983,8 +983,8 @@ define <4 x i13> @test_signed_v4f32_v4i13(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #15, msl #8 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #15, msl #8 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mvni v1.4s, #15, msl #8 ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1007,8 +1007,8 @@ define <4 x i19> @test_signed_v4f32_v4i19(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #3, msl #16 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #3, msl #16 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mvni v1.4s, #3, msl #16 ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1033,17 +1033,17 @@ ; CHECK-NEXT: mov x9, #562949953421311 ; CHECK-NEXT: mov x10, #-562949953421312 ; CHECK-NEXT: fcvtzs x12, s0 -; CHECK-NEXT: mov s2, v1.s[1] ; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: mov s1, v1.s[1] +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: fcvtzs x11, s1 ; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: fcvtzs x11, s2 ; CHECK-NEXT: csel x8, x8, x9, lt ; CHECK-NEXT: cmp x8, x10 ; CHECK-NEXT: csel x2, x8, x10, gt ; CHECK-NEXT: cmp x11, x9 ; CHECK-NEXT: csel x8, x11, x9, lt -; CHECK-NEXT: fcvtzs x11, s1 +; CHECK-NEXT: fcvtzs x11, s0 ; CHECK-NEXT: cmp x8, x10 ; CHECK-NEXT: csel x3, x8, x10, gt ; CHECK-NEXT: cmp x12, x9 @@ -1063,16 +1063,16 @@ ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov s3, v0.s[1] ; CHECK-NEXT: fcvtzs x9, s0 -; CHECK-NEXT: mov s2, v1.s[1] +; CHECK-NEXT: mov s2, v0.s[1] ; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: mov s1, v1.s[1] ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzs x9, s3 +; CHECK-NEXT: fcvtzs x10, s1 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fcvtzs x8, s2 -; CHECK-NEXT: mov v0.d[1], x9 -; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: mov v1.d[1], x10 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %x = call <4 x i64> @llvm.fptosi.sat.v4f32.v4i64(<4 x float> %f) ret <4 x i64> %x @@ -1423,8 +1423,8 @@ define <2 x i32> @test_signed_v2f64_v2i32_duplicate(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i32_duplicate: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: fcvtzs w8, d1 ; CHECK-NEXT: mov v0.s[1], w8 @@ -1626,8 +1626,8 @@ ; ; CHECK-FP16-LABEL: test_signed_v4f16_v4i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h @@ -1650,8 +1650,8 @@ ; ; CHECK-FP16-LABEL: test_signed_v4f16_v4i8: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.4h, #127 ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: movi v1.4h, #127 ; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: mvni v1.4h, #127 ; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h @@ -1674,8 +1674,8 @@ ; ; CHECK-FP16-LABEL: test_signed_v4f16_v4i13: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mvni v1.4h, #240, lsl #8 ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: mvni v1.4h, #240, lsl #8 ; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: movi v1.4h, #240, lsl #8 ; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h @@ -1728,18 +1728,18 @@ ; CHECK-CVT-LABEL: test_signed_v4f16_v4i50: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-NEXT: mov h1, v0.h[1] -; CHECK-CVT-NEXT: fcvt s2, h0 -; CHECK-CVT-NEXT: mov h3, v0.h[2] -; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: fcvt s1, h0 +; CHECK-CVT-NEXT: mov h2, v0.h[1] ; CHECK-CVT-NEXT: mov x8, #562949953421311 ; CHECK-CVT-NEXT: mov x11, #-562949953421312 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvtzs x9, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: mov h0, v0.h[3] ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvtzs x9, s2 -; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs x10, s2 ; CHECK-CVT-NEXT: cmp x9, x8 -; CHECK-CVT-NEXT: fcvtzs x10, s1 -; CHECK-CVT-NEXT: fcvt s1, h3 +; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: csel x9, x9, x8, lt ; CHECK-CVT-NEXT: cmp x9, x11 ; CHECK-CVT-NEXT: csel x0, x9, x11, gt @@ -1762,15 +1762,15 @@ ; CHECK-FP16-LABEL: test_signed_v4f16_v4i50: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h1, v0.h[1] ; CHECK-FP16-NEXT: fcvtzs x9, h0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] ; CHECK-FP16-NEXT: mov x8, #562949953421311 ; CHECK-FP16-NEXT: mov x11, #-562949953421312 -; CHECK-FP16-NEXT: cmp x9, x8 ; CHECK-FP16-NEXT: fcvtzs x10, h1 ; CHECK-FP16-NEXT: mov h1, v0.h[2] -; CHECK-FP16-NEXT: csel x9, x9, x8, lt +; CHECK-FP16-NEXT: cmp x9, x8 ; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: csel x9, x9, x8, lt ; CHECK-FP16-NEXT: cmp x9, x11 ; CHECK-FP16-NEXT: csel x0, x9, x11, gt ; CHECK-FP16-NEXT: cmp x10, x8 @@ -1797,18 +1797,18 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-NEXT: mov h1, v0.h[2] -; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: fcvt s3, h0 +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: mov h0, v0.h[3] ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzs x8, s3 -; CHECK-CVT-NEXT: fcvt s3, h0 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvtzs x8, s2 +; CHECK-CVT-NEXT: fcvt s2, h0 ; CHECK-CVT-NEXT: fcvtzs x9, s1 ; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s2 +; CHECK-CVT-NEXT: fcvtzs x8, s3 ; CHECK-CVT-NEXT: fmov d1, x9 -; CHECK-CVT-NEXT: fcvtzs x9, s3 +; CHECK-CVT-NEXT: fcvtzs x9, s2 ; CHECK-CVT-NEXT: mov v0.d[1], x8 ; CHECK-CVT-NEXT: mov v1.d[1], x9 ; CHECK-CVT-NEXT: ret @@ -1817,9 +1817,9 @@ ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-NEXT: mov h1, v0.h[2] +; CHECK-FP16-NEXT: fcvtzs x8, h0 ; CHECK-FP16-NEXT: mov h2, v0.h[1] ; CHECK-FP16-NEXT: mov h3, v0.h[3] -; CHECK-FP16-NEXT: fcvtzs x8, h0 ; CHECK-FP16-NEXT: fcvtzs x9, h1 ; CHECK-FP16-NEXT: fmov d0, x8 ; CHECK-FP16-NEXT: fcvtzs x8, h2 @@ -1882,12 +1882,12 @@ ; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x9, x26, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 @@ -1931,8 +1931,8 @@ ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret @@ -1990,12 +1990,12 @@ ; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x9, x26, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 @@ -2039,8 +2039,8 @@ ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret @@ -2073,29 +2073,30 @@ ; CHECK-CVT-NEXT: fcvtzs w8, s2 ; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: fcvtzs w10, s2 ; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: fcvtzs w11, s1 ; CHECK-CVT-NEXT: csel w8, w8, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w10, s2 +; CHECK-CVT-NEXT: mov s1, v0.s[1] ; CHECK-CVT-NEXT: cmp w8, #0 -; CHECK-CVT-NEXT: mov s2, v0.s[1] ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge ; CHECK-CVT-NEXT: cmp w9, #0 ; CHECK-CVT-NEXT: csel w9, w9, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: fcvtzs w12, s1 ; CHECK-CVT-NEXT: cmp w9, #0 ; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge ; CHECK-CVT-NEXT: cmp w10, #0 ; CHECK-CVT-NEXT: csel w10, w10, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: fcvtzs w14, s1 ; CHECK-CVT-NEXT: cmp w10, #0 ; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csinv w10, w10, wzr, ge ; CHECK-CVT-NEXT: cmp w11, #0 ; CHECK-CVT-NEXT: csel w11, w11, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w14, s1 +; CHECK-CVT-NEXT: fcvtzs w15, s0 ; CHECK-CVT-NEXT: cmp w11, #0 -; CHECK-CVT-NEXT: fmov s2, w9 +; CHECK-CVT-NEXT: fmov s1, w9 ; CHECK-CVT-NEXT: csinv w11, w11, wzr, ge ; CHECK-CVT-NEXT: cmp w12, #0 ; CHECK-CVT-NEXT: csel w12, w12, wzr, lt @@ -2106,30 +2107,29 @@ ; CHECK-CVT-NEXT: cmp w13, #0 ; CHECK-CVT-NEXT: csinv w13, w13, wzr, ge ; CHECK-CVT-NEXT: cmp w14, #0 -; CHECK-CVT-NEXT: csel w9, w14, wzr, lt +; CHECK-CVT-NEXT: csel w14, w14, wzr, lt +; CHECK-CVT-NEXT: cmp w14, #0 +; CHECK-CVT-NEXT: fmov s0, w13 +; CHECK-CVT-NEXT: csinv w13, w14, wzr, ge +; CHECK-CVT-NEXT: cmp w15, #0 +; CHECK-CVT-NEXT: csel w9, w15, wzr, lt +; CHECK-CVT-NEXT: mov v0.s[1], w12 ; CHECK-CVT-NEXT: cmp w9, #0 -; CHECK-CVT-NEXT: fmov s1, w13 -; CHECK-CVT-NEXT: fcvtzs w13, s0 ; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge -; CHECK-CVT-NEXT: mov v2.s[1], w8 -; CHECK-CVT-NEXT: mov v1.s[1], w12 -; CHECK-CVT-NEXT: cmp w13, #0 -; CHECK-CVT-NEXT: csel w8, w13, wzr, lt -; CHECK-CVT-NEXT: cmp w8, #0 -; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge -; CHECK-CVT-NEXT: mov v1.s[2], w9 -; CHECK-CVT-NEXT: mov v2.s[2], w10 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: mov v2.s[3], w11 -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: mov v1.s[1], w8 +; CHECK-CVT-NEXT: mov v0.s[2], w13 +; CHECK-CVT-NEXT: mov v1.s[2], w10 +; CHECK-CVT-NEXT: mov v0.s[3], w9 +; CHECK-CVT-NEXT: mov v1.s[3], w11 +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h @@ -2143,65 +2143,65 @@ ; CHECK-CVT-LABEL: test_signed_v8f16_v8i8: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-CVT-NEXT: mov w8, #127 -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w9, #127 ; CHECK-CVT-NEXT: mov w10, #-128 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: fcvtzs w11, s1 ; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: fcvtzs w8, s2 ; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: mov s1, v1.s[3] -; CHECK-CVT-NEXT: cmp w9, #127 -; CHECK-CVT-NEXT: csel w9, w9, w8, lt ; CHECK-CVT-NEXT: fcvtzs w12, s2 -; CHECK-CVT-NEXT: cmn w9, #128 -; CHECK-CVT-NEXT: mov s2, v0.s[1] -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, #127 -; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: cmp w8, #127 ; CHECK-CVT-NEXT: fcvtzs w13, s1 +; CHECK-CVT-NEXT: csel w8, w8, w9, lt +; CHECK-CVT-NEXT: mov s1, v0.s[1] +; CHECK-CVT-NEXT: cmn w8, #128 +; CHECK-CVT-NEXT: csel w8, w8, w10, gt +; CHECK-CVT-NEXT: cmp w11, #127 +; CHECK-CVT-NEXT: csel w11, w11, w9, lt +; CHECK-CVT-NEXT: fcvtzs w14, s1 ; CHECK-CVT-NEXT: cmn w11, #128 ; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: csel w11, w11, w10, gt ; CHECK-CVT-NEXT: cmp w12, #127 -; CHECK-CVT-NEXT: csel w12, w12, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: csel w12, w12, w9, lt +; CHECK-CVT-NEXT: fcvtzs w16, s1 ; CHECK-CVT-NEXT: cmn w12, #128 ; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, #127 -; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: csel w13, w13, w9, lt +; CHECK-CVT-NEXT: fcvtzs w17, s0 ; CHECK-CVT-NEXT: cmn w13, #128 -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, #127 -; CHECK-CVT-NEXT: csel w14, w14, w8, lt +; CHECK-CVT-NEXT: csel w14, w14, w9, lt ; CHECK-CVT-NEXT: cmn w14, #128 ; CHECK-CVT-NEXT: csel w14, w14, w10, gt ; CHECK-CVT-NEXT: cmp w15, #127 -; CHECK-CVT-NEXT: csel w15, w15, w8, lt +; CHECK-CVT-NEXT: csel w15, w15, w9, lt ; CHECK-CVT-NEXT: cmn w15, #128 ; CHECK-CVT-NEXT: csel w15, w15, w10, gt ; CHECK-CVT-NEXT: cmp w16, #127 -; CHECK-CVT-NEXT: csel w11, w16, w8, lt -; CHECK-CVT-NEXT: cmn w11, #128 -; CHECK-CVT-NEXT: fmov s1, w15 -; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: csel w11, w11, w10, gt -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: mov v1.s[1], w14 -; CHECK-CVT-NEXT: cmp w15, #127 -; CHECK-CVT-NEXT: csel w8, w15, w8, lt -; CHECK-CVT-NEXT: cmn w8, #128 -; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v1.s[2], w11 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: csel w16, w16, w9, lt +; CHECK-CVT-NEXT: cmn w16, #128 +; CHECK-CVT-NEXT: fmov s0, w15 +; CHECK-CVT-NEXT: csel w15, w16, w10, gt +; CHECK-CVT-NEXT: cmp w17, #127 +; CHECK-CVT-NEXT: csel w9, w17, w9, lt +; CHECK-CVT-NEXT: mov v0.s[1], w14 +; CHECK-CVT-NEXT: cmn w9, #128 +; CHECK-CVT-NEXT: csel w9, w9, w10, gt +; CHECK-CVT-NEXT: mov v1.s[1], w8 +; CHECK-CVT-NEXT: mov v0.s[2], w15 +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: mov v0.s[3], w9 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s ; CHECK-CVT-NEXT: xtn v0.8b, v0.8h ; CHECK-CVT-NEXT: ret ; @@ -2218,71 +2218,71 @@ ; CHECK-CVT-LABEL: test_signed_v8f16_v8i13: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-CVT-NEXT: mov w8, #4095 -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w9, #4095 ; CHECK-CVT-NEXT: mov w10, #-4096 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: fcvtzs w11, s1 ; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: fcvtzs w8, s2 ; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: mov s1, v1.s[3] -; CHECK-CVT-NEXT: cmp w9, #4095 -; CHECK-CVT-NEXT: csel w9, w9, w8, lt ; CHECK-CVT-NEXT: fcvtzs w12, s2 -; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: mov s2, v0.s[1] -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, #4095 -; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: cmp w8, #4095 ; CHECK-CVT-NEXT: fcvtzs w13, s1 +; CHECK-CVT-NEXT: csel w8, w8, w9, lt +; CHECK-CVT-NEXT: mov s1, v0.s[1] +; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w8, w8, w10, gt +; CHECK-CVT-NEXT: cmp w11, #4095 +; CHECK-CVT-NEXT: csel w11, w11, w9, lt +; CHECK-CVT-NEXT: fcvtzs w14, s1 ; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: csel w11, w11, w10, gt ; CHECK-CVT-NEXT: cmp w12, #4095 -; CHECK-CVT-NEXT: csel w12, w12, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: csel w12, w12, w9, lt +; CHECK-CVT-NEXT: fcvtzs w16, s1 ; CHECK-CVT-NEXT: cmn w12, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, #4095 -; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: csel w13, w13, w9, lt +; CHECK-CVT-NEXT: fcvtzs w17, s0 ; CHECK-CVT-NEXT: cmn w13, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, #4095 -; CHECK-CVT-NEXT: csel w14, w14, w8, lt +; CHECK-CVT-NEXT: csel w14, w14, w9, lt ; CHECK-CVT-NEXT: cmn w14, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: csel w14, w14, w10, gt ; CHECK-CVT-NEXT: cmp w15, #4095 -; CHECK-CVT-NEXT: csel w15, w15, w8, lt +; CHECK-CVT-NEXT: csel w15, w15, w9, lt ; CHECK-CVT-NEXT: cmn w15, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: csel w15, w15, w10, gt ; CHECK-CVT-NEXT: cmp w16, #4095 -; CHECK-CVT-NEXT: csel w11, w16, w8, lt -; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: fmov s1, w15 -; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: csel w11, w11, w10, gt -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: mov v1.s[1], w14 -; CHECK-CVT-NEXT: cmp w15, #4095 -; CHECK-CVT-NEXT: csel w8, w15, w8, lt -; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v1.s[2], w11 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: csel w16, w16, w9, lt +; CHECK-CVT-NEXT: cmn w16, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: fmov s0, w15 +; CHECK-CVT-NEXT: csel w15, w16, w10, gt +; CHECK-CVT-NEXT: cmp w17, #4095 +; CHECK-CVT-NEXT: csel w9, w17, w9, lt +; CHECK-CVT-NEXT: mov v0.s[1], w14 +; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w9, w9, w10, gt +; CHECK-CVT-NEXT: mov v1.s[1], w8 +; CHECK-CVT-NEXT: mov v0.s[2], w15 +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: mov v0.s[3], w9 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i13: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mvni v1.8h, #240, lsl #8 ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: mvni v1.8h, #240, lsl #8 ; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: movi v1.8h, #240, lsl #8 ; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h @@ -2295,65 +2295,65 @@ ; CHECK-CVT-LABEL: test_signed_v8f16_v8i16: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-CVT-NEXT: mov w8, #32767 -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w9, #32767 ; CHECK-CVT-NEXT: mov w10, #-32768 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: fcvtzs w11, s1 ; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: fcvtzs w8, s2 ; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: mov s1, v1.s[3] -; CHECK-CVT-NEXT: cmp w9, w8 -; CHECK-CVT-NEXT: csel w9, w9, w8, lt ; CHECK-CVT-NEXT: fcvtzs w12, s2 -; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s2, v0.s[1] -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: cmp w8, w9 ; CHECK-CVT-NEXT: fcvtzs w13, s1 +; CHECK-CVT-NEXT: csel w8, w8, w9, lt +; CHECK-CVT-NEXT: mov s1, v0.s[1] +; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w8, w8, w10, gt +; CHECK-CVT-NEXT: cmp w11, w9 +; CHECK-CVT-NEXT: csel w11, w11, w9, lt +; CHECK-CVT-NEXT: fcvtzs w14, s1 ; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: csel w11, w11, w10, gt -; CHECK-CVT-NEXT: cmp w12, w8 -; CHECK-CVT-NEXT: csel w12, w12, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: cmp w12, w9 +; CHECK-CVT-NEXT: csel w12, w12, w9, lt +; CHECK-CVT-NEXT: fcvtzs w16, s1 ; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csel w12, w12, w10, gt -; CHECK-CVT-NEXT: cmp w13, w8 -; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: cmp w13, w9 +; CHECK-CVT-NEXT: csel w13, w13, w9, lt +; CHECK-CVT-NEXT: fcvtzs w17, s0 ; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fmov s2, w11 +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt -; CHECK-CVT-NEXT: cmp w14, w8 -; CHECK-CVT-NEXT: csel w14, w14, w8, lt +; CHECK-CVT-NEXT: cmp w14, w9 +; CHECK-CVT-NEXT: csel w14, w14, w9, lt ; CHECK-CVT-NEXT: cmn w14, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: csel w14, w14, w10, gt -; CHECK-CVT-NEXT: cmp w15, w8 -; CHECK-CVT-NEXT: csel w15, w15, w8, lt +; CHECK-CVT-NEXT: cmp w15, w9 +; CHECK-CVT-NEXT: csel w15, w15, w9, lt ; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: csel w15, w15, w10, gt -; CHECK-CVT-NEXT: cmp w16, w8 -; CHECK-CVT-NEXT: csel w11, w16, w8, lt -; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fmov s1, w15 -; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: csel w11, w11, w10, gt -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: mov v1.s[1], w14 -; CHECK-CVT-NEXT: cmp w15, w8 -; CHECK-CVT-NEXT: csel w8, w15, w8, lt -; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v1.s[2], w11 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: cmp w16, w9 +; CHECK-CVT-NEXT: csel w16, w16, w9, lt +; CHECK-CVT-NEXT: cmn w16, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s0, w15 +; CHECK-CVT-NEXT: csel w15, w16, w10, gt +; CHECK-CVT-NEXT: cmp w17, w9 +; CHECK-CVT-NEXT: csel w9, w17, w9, lt +; CHECK-CVT-NEXT: mov v0.s[1], w14 +; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: csel w9, w9, w10, gt +; CHECK-CVT-NEXT: mov v1.s[1], w8 +; CHECK-CVT-NEXT: mov v0.s[2], w15 +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: mov v0.s[3], w9 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i16: @@ -2367,24 +2367,24 @@ define <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) { ; CHECK-LABEL: test_signed_v8f16_v8i19: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v2.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-NEXT: movi v1.4s, #3, msl #16 +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: movi v2.4s, #3, msl #16 ; CHECK-NEXT: mvni v3.4s, #3, msl #16 -; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v1.4s, v2.4s, v3.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v3.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v3.4s -; CHECK-NEXT: mov w1, v1.s[1] -; CHECK-NEXT: mov w2, v1.s[2] -; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: mov w6, v0.s[2] -; CHECK-NEXT: mov w7, v0.s[3] -; CHECK-NEXT: fmov w4, s0 -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: mov w6, v1.s[2] +; CHECK-NEXT: mov w7, v1.s[3] +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: mov w2, v0.s[2] +; CHECK-NEXT: mov w3, v0.s[3] ; CHECK-NEXT: ret %x = call <8 x i19> @llvm.fptosi.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x @@ -2408,51 +2408,51 @@ ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-CVT-NEXT: mov x8, #562949953421311 ; CHECK-CVT-NEXT: mov x12, #-562949953421312 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h2, v1.h[1] -; CHECK-CVT-NEXT: fcvt s3, h1 -; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: fcvt s4, h0 +; CHECK-CVT-NEXT: fcvt s2, h1 +; CHECK-CVT-NEXT: mov h3, v1.h[1] +; CHECK-CVT-NEXT: mov h5, v1.h[2] ; CHECK-CVT-NEXT: mov h1, v1.h[3] -; CHECK-CVT-NEXT: fcvtzs x10, s5 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzs x9, s3 -; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzs x10, s4 +; CHECK-CVT-NEXT: fcvtzs x9, s2 +; CHECK-CVT-NEXT: fcvt s2, h5 +; CHECK-CVT-NEXT: fcvtzs x11, s3 ; CHECK-CVT-NEXT: cmp x9, x8 -; CHECK-CVT-NEXT: fcvtzs x11, s2 ; CHECK-CVT-NEXT: csel x9, x9, x8, lt +; CHECK-CVT-NEXT: fcvtzs x13, s2 ; CHECK-CVT-NEXT: cmp x9, x12 -; CHECK-CVT-NEXT: fcvtzs x13, s3 -; CHECK-CVT-NEXT: csel x4, x9, x12, gt ; CHECK-CVT-NEXT: mov h2, v0.h[1] +; CHECK-CVT-NEXT: csel x4, x9, x12, gt ; CHECK-CVT-NEXT: cmp x11, x8 -; CHECK-CVT-NEXT: fcvtzs x9, s1 ; CHECK-CVT-NEXT: csel x11, x11, x8, lt -; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: fcvtzs x9, s1 ; CHECK-CVT-NEXT: cmp x11, x12 -; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csel x5, x11, x12, gt ; CHECK-CVT-NEXT: cmp x13, x8 ; CHECK-CVT-NEXT: csel x11, x13, x8, lt -; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: mov h1, v0.h[2] ; CHECK-CVT-NEXT: cmp x11, x12 -; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov h0, v0.h[3] ; CHECK-CVT-NEXT: csel x6, x11, x12, gt ; CHECK-CVT-NEXT: cmp x9, x8 ; CHECK-CVT-NEXT: csel x9, x9, x8, lt -; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: cmp x9, x12 ; CHECK-CVT-NEXT: fcvtzs x11, s2 ; CHECK-CVT-NEXT: csel x7, x9, x12, gt ; CHECK-CVT-NEXT: cmp x10, x8 ; CHECK-CVT-NEXT: csel x9, x10, x8, lt -; CHECK-CVT-NEXT: fcvtzs x10, s1 +; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: cmp x9, x12 +; CHECK-CVT-NEXT: fcvtzs x10, s1 ; CHECK-CVT-NEXT: csel x0, x9, x12, gt ; CHECK-CVT-NEXT: cmp x11, x8 ; CHECK-CVT-NEXT: csel x9, x11, x8, lt -; CHECK-CVT-NEXT: fcvtzs x11, s0 ; CHECK-CVT-NEXT: cmp x9, x12 +; CHECK-CVT-NEXT: fcvtzs x11, s0 ; CHECK-CVT-NEXT: csel x1, x9, x12, gt ; CHECK-CVT-NEXT: cmp x10, x8 ; CHECK-CVT-NEXT: csel x9, x10, x8, lt @@ -2467,54 +2467,54 @@ ; CHECK-FP16-LABEL: test_signed_v8f16_v8i50: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov x8, #562949953421311 -; CHECK-FP16-NEXT: mov x11, #-562949953421312 +; CHECK-FP16-NEXT: mov x9, #562949953421311 +; CHECK-FP16-NEXT: mov x10, #-562949953421312 +; CHECK-FP16-NEXT: fcvtzs x8, h1 ; CHECK-FP16-NEXT: mov h2, v1.h[1] -; CHECK-FP16-NEXT: fcvtzs x9, h1 -; CHECK-FP16-NEXT: mov h3, v1.h[2] +; CHECK-FP16-NEXT: fcvtzs x11, h2 +; CHECK-FP16-NEXT: mov h2, v1.h[2] +; CHECK-FP16-NEXT: cmp x8, x9 ; CHECK-FP16-NEXT: mov h1, v1.h[3] -; CHECK-FP16-NEXT: cmp x9, x8 -; CHECK-FP16-NEXT: fcvtzs x10, h2 -; CHECK-FP16-NEXT: csel x9, x9, x8, lt -; CHECK-FP16-NEXT: cmp x9, x11 -; CHECK-FP16-NEXT: fcvtzs x12, h3 -; CHECK-FP16-NEXT: csel x4, x9, x11, gt +; CHECK-FP16-NEXT: csel x8, x8, x9, lt +; CHECK-FP16-NEXT: fcvtzs x12, h2 +; CHECK-FP16-NEXT: cmp x8, x10 ; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: cmp x10, x8 -; CHECK-FP16-NEXT: csel x9, x10, x8, lt -; CHECK-FP16-NEXT: fcvtzs x10, h1 -; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: csel x4, x8, x10, gt +; CHECK-FP16-NEXT: cmp x11, x9 +; CHECK-FP16-NEXT: csel x8, x11, x9, lt +; CHECK-FP16-NEXT: fcvtzs x11, h1 +; CHECK-FP16-NEXT: cmp x8, x10 ; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: csel x5, x9, x11, gt -; CHECK-FP16-NEXT: cmp x12, x8 -; CHECK-FP16-NEXT: csel x9, x12, x8, lt -; CHECK-FP16-NEXT: fcvtzs x12, h0 -; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: csel x5, x8, x10, gt +; CHECK-FP16-NEXT: cmp x12, x9 +; CHECK-FP16-NEXT: csel x12, x12, x9, lt +; CHECK-FP16-NEXT: fcvtzs x8, h0 +; CHECK-FP16-NEXT: cmp x12, x10 ; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: csel x6, x9, x11, gt -; CHECK-FP16-NEXT: cmp x10, x8 -; CHECK-FP16-NEXT: csel x9, x10, x8, lt -; CHECK-FP16-NEXT: fcvtzs x10, h1 -; CHECK-FP16-NEXT: cmp x9, x11 -; CHECK-FP16-NEXT: csel x7, x9, x11, gt -; CHECK-FP16-NEXT: cmp x12, x8 -; CHECK-FP16-NEXT: csel x9, x12, x8, lt -; CHECK-FP16-NEXT: fcvtzs x12, h2 -; CHECK-FP16-NEXT: cmp x9, x11 -; CHECK-FP16-NEXT: csel x0, x9, x11, gt -; CHECK-FP16-NEXT: cmp x10, x8 -; CHECK-FP16-NEXT: csel x9, x10, x8, lt -; CHECK-FP16-NEXT: fcvtzs x10, h0 -; CHECK-FP16-NEXT: cmp x9, x11 -; CHECK-FP16-NEXT: csel x1, x9, x11, gt -; CHECK-FP16-NEXT: cmp x12, x8 -; CHECK-FP16-NEXT: csel x9, x12, x8, lt -; CHECK-FP16-NEXT: cmp x9, x11 -; CHECK-FP16-NEXT: csel x2, x9, x11, gt -; CHECK-FP16-NEXT: cmp x10, x8 -; CHECK-FP16-NEXT: csel x8, x10, x8, lt -; CHECK-FP16-NEXT: cmp x8, x11 -; CHECK-FP16-NEXT: csel x3, x8, x11, gt +; CHECK-FP16-NEXT: csel x6, x12, x10, gt +; CHECK-FP16-NEXT: cmp x11, x9 +; CHECK-FP16-NEXT: csel x11, x11, x9, lt +; CHECK-FP16-NEXT: fcvtzs x12, h1 +; CHECK-FP16-NEXT: cmp x11, x10 +; CHECK-FP16-NEXT: csel x7, x11, x10, gt +; CHECK-FP16-NEXT: cmp x8, x9 +; CHECK-FP16-NEXT: csel x8, x8, x9, lt +; CHECK-FP16-NEXT: fcvtzs x11, h2 +; CHECK-FP16-NEXT: cmp x8, x10 +; CHECK-FP16-NEXT: csel x0, x8, x10, gt +; CHECK-FP16-NEXT: cmp x12, x9 +; CHECK-FP16-NEXT: csel x8, x12, x9, lt +; CHECK-FP16-NEXT: fcvtzs x12, h0 +; CHECK-FP16-NEXT: cmp x8, x10 +; CHECK-FP16-NEXT: csel x1, x8, x10, gt +; CHECK-FP16-NEXT: cmp x11, x9 +; CHECK-FP16-NEXT: csel x8, x11, x9, lt +; CHECK-FP16-NEXT: cmp x8, x10 +; CHECK-FP16-NEXT: csel x2, x8, x10, gt +; CHECK-FP16-NEXT: cmp x12, x9 +; CHECK-FP16-NEXT: csel x8, x12, x9, lt +; CHECK-FP16-NEXT: cmp x8, x10 +; CHECK-FP16-NEXT: csel x3, x8, x10, gt ; CHECK-FP16-NEXT: ret %x = call <8 x i50> @llvm.fptosi.sat.v8f16.v8i50(<8 x half> %f) ret <8 x i50> %x @@ -2523,64 +2523,64 @@ define <8 x i64> @test_signed_v8f16_v8i64(<8 x half> %f) { ; CHECK-CVT-LABEL: test_signed_v8f16_v8i64: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-CVT-NEXT: fcvt s2, h0 ; CHECK-CVT-NEXT: mov h4, v0.h[2] -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: fcvt s2, h1 -; CHECK-CVT-NEXT: mov h3, v1.h[1] -; CHECK-CVT-NEXT: mov h6, v1.h[2] +; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-CVT-NEXT: fcvt s4, h4 -; CHECK-CVT-NEXT: mov h1, v1.h[3] -; CHECK-CVT-NEXT: fcvtzs x9, s5 +; CHECK-CVT-NEXT: fcvt s3, h1 +; CHECK-CVT-NEXT: mov h5, v1.h[1] ; CHECK-CVT-NEXT: fcvtzs x8, s2 -; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: mov h2, v1.h[2] +; CHECK-CVT-NEXT: mov h1, v1.h[3] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s6, h2 +; CHECK-CVT-NEXT: fcvtzs x10, s4 +; CHECK-CVT-NEXT: fcvtzs x9, s3 ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h1 -; CHECK-CVT-NEXT: fcvtzs x10, s2 -; CHECK-CVT-NEXT: fmov d2, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s4 +; CHECK-CVT-NEXT: fcvt s7, h1 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvtzs x11, s5 ; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: fmov d0, x9 -; CHECK-CVT-NEXT: mov v2.d[1], x10 -; CHECK-CVT-NEXT: fcvtzs x10, s5 -; CHECK-CVT-NEXT: fmov d1, x8 -; CHECK-CVT-NEXT: fcvtzs x9, s3 -; CHECK-CVT-NEXT: fcvtzs x8, s4 -; CHECK-CVT-NEXT: fmov d3, x10 -; CHECK-CVT-NEXT: fcvtzs x10, s6 -; CHECK-CVT-NEXT: mov v0.d[1], x9 -; CHECK-CVT-NEXT: mov v1.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x10 +; CHECK-CVT-NEXT: fmov d2, x9 +; CHECK-CVT-NEXT: fcvtzs x9, s6 +; CHECK-CVT-NEXT: fmov d0, x8 +; CHECK-CVT-NEXT: fmov d1, x10 +; CHECK-CVT-NEXT: fcvtzs x8, s3 +; CHECK-CVT-NEXT: fcvtzs x10, s4 +; CHECK-CVT-NEXT: fmov d3, x9 +; CHECK-CVT-NEXT: fcvtzs x9, s7 +; CHECK-CVT-NEXT: mov v0.d[1], x8 +; CHECK-CVT-NEXT: mov v1.d[1], x10 +; CHECK-CVT-NEXT: mov v2.d[1], x11 +; CHECK-CVT-NEXT: mov v3.d[1], x9 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i64: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h3, v0.h[2] -; CHECK-FP16-NEXT: mov h5, v0.h[3] -; CHECK-FP16-NEXT: fcvtzs x9, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] -; CHECK-FP16-NEXT: fcvtzs x8, h1 -; CHECK-FP16-NEXT: mov h4, v1.h[2] -; CHECK-FP16-NEXT: mov h6, v1.h[3] +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: fcvtzs x8, h0 +; CHECK-FP16-NEXT: fcvtzs x9, h1 +; CHECK-FP16-NEXT: mov h5, v1.h[2] ; CHECK-FP16-NEXT: fcvtzs x10, h2 -; CHECK-FP16-NEXT: fmov d2, x8 -; CHECK-FP16-NEXT: fcvtzs x8, h3 ; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fmov d0, x9 -; CHECK-FP16-NEXT: mov v2.d[1], x10 +; CHECK-FP16-NEXT: mov h4, v0.h[3] +; CHECK-FP16-NEXT: mov h0, v1.h[1] +; CHECK-FP16-NEXT: fmov d2, x9 +; CHECK-FP16-NEXT: fcvtzs x9, h5 +; CHECK-FP16-NEXT: mov h6, v1.h[3] +; CHECK-FP16-NEXT: fcvtzs x11, h0 +; CHECK-FP16-NEXT: fmov d0, x8 +; CHECK-FP16-NEXT: fcvtzs x8, h3 +; CHECK-FP16-NEXT: fmov d1, x10 ; CHECK-FP16-NEXT: fcvtzs x10, h4 -; CHECK-FP16-NEXT: fmov d1, x8 -; CHECK-FP16-NEXT: fcvtzs x9, h3 -; CHECK-FP16-NEXT: fcvtzs x8, h5 -; CHECK-FP16-NEXT: fmov d3, x10 -; CHECK-FP16-NEXT: fcvtzs x10, h6 -; CHECK-FP16-NEXT: mov v0.d[1], x9 -; CHECK-FP16-NEXT: mov v1.d[1], x8 -; CHECK-FP16-NEXT: mov v3.d[1], x10 +; CHECK-FP16-NEXT: fmov d3, x9 +; CHECK-FP16-NEXT: fcvtzs x9, h6 +; CHECK-FP16-NEXT: mov v0.d[1], x8 +; CHECK-FP16-NEXT: mov v1.d[1], x10 +; CHECK-FP16-NEXT: mov v2.d[1], x11 +; CHECK-FP16-NEXT: mov v3.d[1], x9 ; CHECK-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x @@ -2623,8 +2623,8 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov w8, #-251658240 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov x25, #-34359738368 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov x23, #34359738367 ; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov w8, #1895825407 @@ -2658,12 +2658,12 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, x25, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csinv x9, x9, xzr, le ; CHECK-NEXT: csel x8, x23, x8, gt ; CHECK-NEXT: fcmp s8, s8 @@ -2673,12 +2673,12 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, x25, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: csinv x9, x9, xzr, le ; CHECK-NEXT: csel x8, x23, x8, gt ; CHECK-NEXT: fcmp s8, s8 @@ -2688,12 +2688,12 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 @@ -2712,74 +2712,74 @@ ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x20, xzr, x8, vs -; CHECK-NEXT: csel x28, xzr, x9, vs +; CHECK-NEXT: csel x28, xzr, x8, vs +; CHECK-NEXT: csel x26, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, x25, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csinv x9, x9, xzr, le ; CHECK-NEXT: csel x8, x23, x8, gt ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x21, xzr, x8, vs -; CHECK-NEXT: csel x26, xzr, x9, vs +; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fmov d0, x20 ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: ldr x11, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: lsr x10, x28, #28 +; CHECK-NEXT: fmov d0, x28 ; CHECK-NEXT: ldr d1, [sp] // 8-byte Folded Reload -; CHECK-NEXT: lsr x12, x29, #28 -; CHECK-NEXT: mov v0.d[1], x28 +; CHECK-NEXT: lsr x10, x26, #28 +; CHECK-NEXT: ldr x12, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: lsr x11, x29, #28 ; CHECK-NEXT: csel x8, x25, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: stur x11, [x19, #75] -; CHECK-NEXT: ldr x13, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x26 +; CHECK-NEXT: stur x12, [x19, #75] +; CHECK-NEXT: mov v1.d[1], x29 +; CHECK-NEXT: ldr d2, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: csinv x9, x9, xzr, le ; CHECK-NEXT: csel x8, x23, x8, gt ; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: fmov x11, d0 -; CHECK-NEXT: stur x13, [x19, #50] -; CHECK-NEXT: mov v1.d[1], x29 -; CHECK-NEXT: ldr d0, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csel x9, xzr, x9, vs +; CHECK-NEXT: fmov x12, d0 ; CHECK-NEXT: strb w10, [x19, #49] -; CHECK-NEXT: extr x10, x28, x11, #28 +; CHECK-NEXT: fmov x10, d1 +; CHECK-NEXT: strb w11, [x19, #24] +; CHECK-NEXT: csel x9, xzr, x9, vs +; CHECK-NEXT: extr x11, x26, x12, #28 ; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: bfi x8, x11, #36, #28 -; CHECK-NEXT: strb w12, [x19, #24] +; CHECK-NEXT: bfi x21, x10, #36, #28 +; CHECK-NEXT: bfi x8, x12, #36, #28 ; CHECK-NEXT: stur x9, [x19, #25] -; CHECK-NEXT: fmov x12, d1 -; CHECK-NEXT: stur x10, [x19, #41] -; CHECK-NEXT: lsr x9, x22, #28 -; CHECK-NEXT: ldr d1, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: ldr x9, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: stur x11, [x19, #41] +; CHECK-NEXT: mov v2.d[1], x22 +; CHECK-NEXT: stur x9, [x19, #50] +; CHECK-NEXT: extr x9, x29, x10, #28 +; CHECK-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload ; CHECK-NEXT: stur x8, [x19, #33] +; CHECK-NEXT: fmov x8, d2 +; CHECK-NEXT: str x9, [x19, #16] +; CHECK-NEXT: lsr x9, x22, #28 ; CHECK-NEXT: ldr x11, [sp, #72] // 8-byte Folded Reload -; CHECK-NEXT: extr x18, x29, x12, #28 -; CHECK-NEXT: mov v0.d[1], x22 -; CHECK-NEXT: bfi x21, x12, #36, #28 -; CHECK-NEXT: str x26, [x19] -; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: stp x20, x21, [x19] +; CHECK-NEXT: extr x12, x22, x8, #28 +; CHECK-NEXT: bfi x27, x8, #36, #28 +; CHECK-NEXT: strb w9, [x19, #99] +; CHECK-NEXT: mov v0.d[1], x11 ; CHECK-NEXT: lsr x10, x11, #28 ; CHECK-NEXT: mov x13, x11 -; CHECK-NEXT: stp x21, x18, [x19, #8] -; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: strb w9, [x19, #99] +; CHECK-NEXT: stur x12, [x19, #91] +; CHECK-NEXT: fmov x11, d0 +; CHECK-NEXT: stur x27, [x19, #83] ; CHECK-NEXT: strb w10, [x19, #74] -; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: extr x12, x22, x8, #28 -; CHECK-NEXT: bfi x27, x8, #36, #28 ; CHECK-NEXT: extr x8, x13, x11, #28 ; CHECK-NEXT: bfi x24, x11, #36, #28 -; CHECK-NEXT: stur x12, [x19, #91] -; CHECK-NEXT: stur x27, [x19, #83] ; CHECK-NEXT: stur x8, [x19, #66] ; CHECK-NEXT: stur x24, [x19, #58] ; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload @@ -2832,18 +2832,18 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov w8, #-16777216 +; CHECK-NEXT: mov x22, #-9223372036854775808 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov x21, #-9223372036854775808 -; CHECK-NEXT: mov x22, #9223372036854775807 +; CHECK-NEXT: mov x23, #9223372036854775807 ; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov w8, #2130706431 ; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt +; CHECK-NEXT: csel x9, x22, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2853,13 +2853,13 @@ ; CHECK-NEXT: csel x8, xzr, x9, vs ; CHECK-NEXT: str x8, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt +; CHECK-NEXT: csel x9, x22, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2871,10 +2871,10 @@ ; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt +; CHECK-NEXT: csel x9, x22, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2887,9 +2887,9 @@ ; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt +; CHECK-NEXT: csel x9, x22, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2897,27 +2897,27 @@ ; CHECK-NEXT: csel x29, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt +; CHECK-NEXT: csel x9, x22, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x20, xzr, x8, vs -; CHECK-NEXT: csel x23, xzr, x9, vs +; CHECK-NEXT: csel x21, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt +; CHECK-NEXT: csel x9, x22, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2925,13 +2925,13 @@ ; CHECK-NEXT: csel x25, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt +; CHECK-NEXT: csel x9, x22, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2942,13 +2942,13 @@ ; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: stp x26, x27, [x19, #32] ; CHECK-NEXT: stp x24, x25, [x19, #16] -; CHECK-NEXT: stp x20, x23, [x19] +; CHECK-NEXT: stp x20, x21, [x19] ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x21, x1, lt +; CHECK-NEXT: csel x9, x22, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: stp x28, x29, [x19, #112] ; CHECK-NEXT: ldr x10, [sp] // 8-byte Folded Reload -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: str x10, [x19, #104] diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -62,13 +62,13 @@ ; CHECK-NEXT: mov v0.s[1], v1.s[0] ; CHECK-NEXT: fcvtzu v4.4s, v4.4s ; CHECK-NEXT: mov v0.s[2], v2.s[0] -; CHECK-NEXT: fmov w4, s4 ; CHECK-NEXT: mov v0.s[3], v3.s[0] +; CHECK-NEXT: fmov w4, s4 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptoui.sat.v5f32.v5i32(<5 x float> %f) ret <5 x i32> %x @@ -88,13 +88,13 @@ ; CHECK-NEXT: mov v0.s[2], v2.s[0] ; CHECK-NEXT: fcvtzu v1.4s, v4.4s ; CHECK-NEXT: mov v0.s[3], v3.s[0] -; CHECK-NEXT: mov w5, v1.s[1] -; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptoui.sat.v6f32.v6i32(<6 x float> %f) ret <6 x i32> %x @@ -117,13 +117,13 @@ ; CHECK-NEXT: mov v0.s[3], v3.s[0] ; CHECK-NEXT: fcvtzu v1.4s, v4.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: mov w5, v1.s[1] ; CHECK-NEXT: mov w6, v1.s[2] -; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptoui.sat.v7f32.v7i32(<7 x float> %f) ret <7 x i32> %x @@ -163,8 +163,8 @@ define <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: fcvtzu w8, d1 ; CHECK-NEXT: mov v0.s[1], w8 @@ -177,13 +177,13 @@ define <3 x i32> @test_unsigned_v3f64_v3i32(<3 x double> %f) { ; CHECK-LABEL: test_unsigned_v3f64_v3i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtzu w8, d0 -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: fcvtzu w9, d0 ; CHECK-NEXT: fcvtzu w8, d1 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: fcvtzu w9, d2 ; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: fcvtzu w8, d2 -; CHECK-NEXT: mov v0.s[2], w8 ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: mov v0.s[2], w9 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptoui.sat.v3f64.v3i32(<3 x double> %f) @@ -193,15 +193,15 @@ define <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) { ; CHECK-LABEL: test_unsigned_v4f64_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: mov d2, v0.d[1] +; CHECK-NEXT: fcvtzu w9, d1 +; CHECK-NEXT: mov d1, v1.d[1] ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: fcvtzu w8, d2 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov d1, v1.d[1] -; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: fcvtzu w8, d1 +; CHECK-NEXT: mov v0.s[2], w9 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptoui.sat.v4f64.v4i32(<4 x double> %f) @@ -285,11 +285,11 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -338,12 +338,12 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -406,13 +406,13 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: stp q0, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: stp q0, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q3, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -548,11 +548,11 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptoui.sat.v5f16.v5i32(<5 x half> %f) ret <5 x i32> %x @@ -565,12 +565,12 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptoui.sat.v6f16.v6i32(<6 x half> %f) ret <6 x i32> %x @@ -579,17 +579,17 @@ define <7 x i32> @test_unsigned_v7f16_v7i32(<7 x half> %f) { ; CHECK-LABEL: test_unsigned_v7f16_v7i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v1.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-NEXT: mov w1, v1.s[1] -; CHECK-NEXT: mov w2, v1.s[2] -; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w6, v0.s[2] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: mov w2, v0.s[2] +; CHECK-NEXT: mov w3, v0.s[3] +; CHECK-NEXT: mov w6, v1.s[2] ; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptoui.sat.v7f16.v7i32(<7 x half> %f) ret <7 x i32> %x @@ -624,8 +624,8 @@ define <2 x i1> @test_unsigned_v2f32_v2i1(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #1 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #1 ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i1> @llvm.fptoui.sat.v2f32.v2i1(<2 x float> %f) @@ -635,8 +635,8 @@ define <2 x i8> @test_unsigned_v2f32_v2i8(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0x0000ff000000ff ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi d1, #0x0000ff000000ff ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i8> @llvm.fptoui.sat.v2f32.v2i8(<2 x float> %f) @@ -646,8 +646,8 @@ define <2 x i13> @test_unsigned_v2f32_v2i13(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #31, msl #8 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #31, msl #8 ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i13> @llvm.fptoui.sat.v2f32.v2i13(<2 x float> %f) @@ -657,8 +657,8 @@ define <2 x i16> @test_unsigned_v2f32_v2i16(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i16> @llvm.fptoui.sat.v2f32.v2i16(<2 x float> %f) @@ -668,8 +668,8 @@ define <2 x i19> @test_unsigned_v2f32_v2i19(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #7, msl #16 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #7, msl #16 ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i19> @llvm.fptoui.sat.v2f32.v2i19(<2 x float> %f) @@ -708,8 +708,8 @@ ; CHECK-LABEL: test_unsigned_v2f32_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzu x8, s0 +; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: fcvtzu x8, s1 ; CHECK-NEXT: mov v0.d[1], x8 @@ -837,8 +837,8 @@ define <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -849,8 +849,8 @@ define <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -861,8 +861,8 @@ define <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #31, msl #8 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #31, msl #8 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -883,8 +883,8 @@ define <4 x i19> @test_unsigned_v4f32_v4i19(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #7, msl #16 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #7, msl #16 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %x = call <4 x i19> @llvm.fptoui.sat.v4f32.v4i19(<4 x float> %f) @@ -905,13 +905,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: mov x8, #1125899906842623 -; CHECK-NEXT: mov s3, v0.s[1] ; CHECK-NEXT: fcvtzu x11, s0 ; CHECK-NEXT: mov s2, v1.s[1] ; CHECK-NEXT: fcvtzu x9, s1 -; CHECK-NEXT: fcvtzu x12, s3 -; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzu x10, s2 +; CHECK-NEXT: fcvtzu x12, s1 +; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: csel x2, x9, x8, lo ; CHECK-NEXT: cmp x10, x8 ; CHECK-NEXT: csel x3, x10, x8, lo @@ -928,16 +928,16 @@ ; CHECK-LABEL: test_unsigned_v4f32_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov s3, v0.s[1] ; CHECK-NEXT: fcvtzu x9, s0 -; CHECK-NEXT: mov s2, v1.s[1] +; CHECK-NEXT: mov s2, v0.s[1] ; CHECK-NEXT: fcvtzu x8, s1 +; CHECK-NEXT: mov s1, v1.s[1] ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzu x9, s3 +; CHECK-NEXT: fcvtzu x10, s1 ; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fcvtzu x8, s2 -; CHECK-NEXT: mov v0.d[1], x9 -; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: mov v1.d[1], x10 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %x = call <4 x i64> @llvm.fptoui.sat.v4f32.v4i64(<4 x float> %f) ret <4 x i64> %x @@ -968,13 +968,13 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #1904214015 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov x25, #68719476735 ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 @@ -982,8 +982,8 @@ ; CHECK-NEXT: csinv x20, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s0, s9 @@ -1018,8 +1018,8 @@ ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldp x30, x25, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret @@ -1051,12 +1051,12 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #2139095039 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 @@ -1064,8 +1064,8 @@ ; CHECK-NEXT: csinv x20, x8, xzr, le ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s0, s9 @@ -1220,8 +1220,8 @@ define <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i32_duplicate: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: fcvtzu w8, d1 ; CHECK-NEXT: mov v0.s[1], w8 @@ -1384,8 +1384,8 @@ ; ; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.4h, #1 ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: movi v1.4h, #1 ; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i1> @llvm.fptoui.sat.v4f16.v4i1(<4 x half> %f) @@ -1404,8 +1404,8 @@ ; ; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i8: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi d1, #0xff00ff00ff00ff ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: movi d1, #0xff00ff00ff00ff ; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i8> @llvm.fptoui.sat.v4f16.v4i8(<4 x half> %f) @@ -1424,8 +1424,8 @@ ; ; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i13: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mvni v1.4h, #224, lsl #8 ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: mvni v1.4h, #224, lsl #8 ; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i13> @llvm.fptoui.sat.v4f16.v4i13(<4 x half> %f) @@ -1475,18 +1475,18 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-NEXT: mov h1, v0.h[1] -; CHECK-CVT-NEXT: mov h2, v0.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[3] -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov x8, #1125899906842623 +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h0, v0.h[3] ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: mov x8, #1125899906842623 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvtzu x9, s0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu x9, s2 ; CHECK-CVT-NEXT: fcvtzu x10, s1 -; CHECK-CVT-NEXT: fcvtzu x11, s2 +; CHECK-CVT-NEXT: fcvtzu x11, s3 ; CHECK-CVT-NEXT: cmp x9, x8 -; CHECK-CVT-NEXT: fcvtzu x12, s3 +; CHECK-CVT-NEXT: fcvtzu x12, s0 ; CHECK-CVT-NEXT: csel x0, x9, x8, lo ; CHECK-CVT-NEXT: cmp x10, x8 ; CHECK-CVT-NEXT: csel x1, x10, x8, lo @@ -1500,14 +1500,14 @@ ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h3, v0.h[3] ; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov x8, #1125899906842623 +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h0, v0.h[3] ; CHECK-FP16-NEXT: fcvtzu x10, h1 +; CHECK-FP16-NEXT: mov x8, #1125899906842623 ; CHECK-FP16-NEXT: fcvtzu x11, h2 ; CHECK-FP16-NEXT: cmp x9, x8 -; CHECK-FP16-NEXT: fcvtzu x12, h3 +; CHECK-FP16-NEXT: fcvtzu x12, h0 ; CHECK-FP16-NEXT: csel x0, x9, x8, lo ; CHECK-FP16-NEXT: cmp x10, x8 ; CHECK-FP16-NEXT: csel x1, x10, x8, lo @@ -1525,18 +1525,18 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-NEXT: mov h1, v0.h[2] -; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: fcvt s3, h0 +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: mov h0, v0.h[3] ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzu x8, s3 -; CHECK-CVT-NEXT: fcvt s3, h0 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvtzu x8, s2 +; CHECK-CVT-NEXT: fcvt s2, h0 ; CHECK-CVT-NEXT: fcvtzu x9, s1 ; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzu x8, s2 +; CHECK-CVT-NEXT: fcvtzu x8, s3 ; CHECK-CVT-NEXT: fmov d1, x9 -; CHECK-CVT-NEXT: fcvtzu x9, s3 +; CHECK-CVT-NEXT: fcvtzu x9, s2 ; CHECK-CVT-NEXT: mov v0.d[1], x8 ; CHECK-CVT-NEXT: mov v1.d[1], x9 ; CHECK-CVT-NEXT: ret @@ -1545,9 +1545,9 @@ ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-NEXT: mov h1, v0.h[2] +; CHECK-FP16-NEXT: fcvtzu x8, h0 ; CHECK-FP16-NEXT: mov h2, v0.h[1] ; CHECK-FP16-NEXT: mov h3, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu x8, h0 ; CHECK-FP16-NEXT: fcvtzu x9, h1 ; CHECK-FP16-NEXT: fmov d0, x8 ; CHECK-FP16-NEXT: fcvtzu x8, h2 @@ -1586,12 +1586,12 @@ ; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #1904214015 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: mov x25, #68719476735 -; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fmov s9, w8 +; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 @@ -1637,8 +1637,8 @@ ; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp x30, x25, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #96 @@ -1672,11 +1672,11 @@ ; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #2139095039 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: fmov s9, w8 +; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 @@ -1723,8 +1723,8 @@ ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret @@ -1753,20 +1753,19 @@ ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: fcvtzu w8, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w11, s3 +; CHECK-CVT-NEXT: fcvtzu w12, s1 +; CHECK-CVT-NEXT: fcvtzu w13, s2 ; CHECK-CVT-NEXT: fcvtzu w10, s0 +; CHECK-CVT-NEXT: cmp w9, #1 ; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fcvtzu w8, s2 -; CHECK-CVT-NEXT: fcvtzu w11, s3 -; CHECK-CVT-NEXT: fcvtzu w12, s4 -; CHECK-CVT-NEXT: fcvtzu w13, s5 +; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo ; CHECK-CVT-NEXT: cmp w8, #1 ; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo -; CHECK-CVT-NEXT: cmp w9, #1 -; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo ; CHECK-CVT-NEXT: cmp w11, #1 ; CHECK-CVT-NEXT: csinc w11, w11, wzr, lo ; CHECK-CVT-NEXT: cmp w12, #1 @@ -1775,29 +1774,30 @@ ; CHECK-CVT-NEXT: csinc w13, w13, wzr, lo ; CHECK-CVT-NEXT: cmp w10, #1 ; CHECK-CVT-NEXT: csinc w10, w10, wzr, lo -; CHECK-CVT-NEXT: fmov s2, w10 -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fmov s1, w9 -; CHECK-CVT-NEXT: mov v2.s[1], w13 -; CHECK-CVT-NEXT: cmp w10, #1 -; CHECK-CVT-NEXT: csinc w9, w10, wzr, lo +; CHECK-CVT-NEXT: fcvtzu w14, s1 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: fmov s1, w10 ; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov v1.s[1], w8 -; CHECK-CVT-NEXT: mov v2.s[2], w9 +; CHECK-CVT-NEXT: cmp w14, #1 +; CHECK-CVT-NEXT: fmov s0, w8 +; CHECK-CVT-NEXT: csinc w8, w14, wzr, lo +; CHECK-CVT-NEXT: mov v1.s[1], w13 ; CHECK-CVT-NEXT: cmp w10, #1 -; CHECK-CVT-NEXT: csinc w8, w10, wzr, lo -; CHECK-CVT-NEXT: mov v1.s[2], w11 -; CHECK-CVT-NEXT: mov v2.s[3], w8 -; CHECK-CVT-NEXT: mov v1.s[3], w12 -; CHECK-CVT-NEXT: xtn v0.4h, v2.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-NEXT: csinc w10, w10, wzr, lo +; CHECK-CVT-NEXT: mov v0.s[1], w9 +; CHECK-CVT-NEXT: mov v1.s[2], w8 +; CHECK-CVT-NEXT: mov v0.s[2], w11 +; CHECK-CVT-NEXT: mov v1.s[3], w10 +; CHECK-CVT-NEXT: mov v0.s[3], w12 +; CHECK-CVT-NEXT: xtn v1.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v1.8h, v0.4s +; CHECK-CVT-NEXT: xtn v0.8b, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.8h, #1 ; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: movi v1.8h, #1 ; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret @@ -1813,20 +1813,19 @@ ; CHECK-CVT-NEXT: mov w8, #255 ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w12, s3 +; CHECK-CVT-NEXT: fcvtzu w13, s1 +; CHECK-CVT-NEXT: fcvtzu w14, s2 ; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: cmp w10, #255 ; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fcvtzu w9, s2 -; CHECK-CVT-NEXT: fcvtzu w12, s3 -; CHECK-CVT-NEXT: fcvtzu w13, s4 -; CHECK-CVT-NEXT: fcvtzu w14, s5 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: cmp w9, #255 ; CHECK-CVT-NEXT: csel w9, w9, w8, lo -; CHECK-CVT-NEXT: cmp w10, #255 -; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: cmp w12, #255 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, #255 @@ -1835,23 +1834,24 @@ ; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w11, #255 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w11 -; CHECK-CVT-NEXT: fcvtzu w11, s1 -; CHECK-CVT-NEXT: fmov s1, w10 -; CHECK-CVT-NEXT: mov v2.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, #255 -; CHECK-CVT-NEXT: csel w10, w11, w8, lo +; CHECK-CVT-NEXT: fcvtzu w15, s1 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v1.s[1], w9 -; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: cmp w15, #255 +; CHECK-CVT-NEXT: fmov s0, w9 +; CHECK-CVT-NEXT: csel w9, w15, w8, lo +; CHECK-CVT-NEXT: mov v1.s[1], w14 ; CHECK-CVT-NEXT: cmp w11, #255 ; CHECK-CVT-NEXT: csel w8, w11, w8, lo -; CHECK-CVT-NEXT: mov v1.s[2], w12 -; CHECK-CVT-NEXT: mov v2.s[3], w8 -; CHECK-CVT-NEXT: mov v1.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v2.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-NEXT: mov v0.s[1], w10 +; CHECK-CVT-NEXT: mov v1.s[2], w9 +; CHECK-CVT-NEXT: mov v0.s[2], w12 +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: mov v0.s[3], w13 +; CHECK-CVT-NEXT: xtn v1.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v1.8h, v0.4s +; CHECK-CVT-NEXT: xtn v0.8b, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i8: @@ -1871,20 +1871,19 @@ ; CHECK-CVT-NEXT: mov w8, #8191 ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w12, s3 +; CHECK-CVT-NEXT: fcvtzu w13, s1 +; CHECK-CVT-NEXT: fcvtzu w14, s2 ; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: cmp w10, w8 ; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fcvtzu w9, s2 -; CHECK-CVT-NEXT: fcvtzu w12, s3 -; CHECK-CVT-NEXT: fcvtzu w13, s4 -; CHECK-CVT-NEXT: fcvtzu w14, s5 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: cmp w9, w8 ; CHECK-CVT-NEXT: csel w9, w9, w8, lo -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: cmp w12, w8 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, w8 @@ -1893,28 +1892,29 @@ ; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w11 -; CHECK-CVT-NEXT: fcvtzu w11, s1 -; CHECK-CVT-NEXT: fmov s1, w10 -; CHECK-CVT-NEXT: mov v2.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w10, w11, w8, lo +; CHECK-CVT-NEXT: fcvtzu w15, s1 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: fmov s2, w9 +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v1.s[1], w9 -; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: csel w9, w15, w8, lo +; CHECK-CVT-NEXT: mov v1.s[1], w14 ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w8, w11, w8, lo -; CHECK-CVT-NEXT: mov v1.s[2], w12 -; CHECK-CVT-NEXT: mov v2.s[3], w8 -; CHECK-CVT-NEXT: mov v1.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v2.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: mov v2.s[1], w10 +; CHECK-CVT-NEXT: mov v1.s[2], w9 +; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mvni v1.8h, #224, lsl #8 ; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: mvni v1.8h, #224, lsl #8 ; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: ret %x = call <8 x i13> @llvm.fptoui.sat.v8f16.v8i13(<8 x half> %f) @@ -1929,20 +1929,19 @@ ; CHECK-CVT-NEXT: mov w8, #65535 ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w10, s1 +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: fcvtzu w12, s3 +; CHECK-CVT-NEXT: fcvtzu w13, s1 +; CHECK-CVT-NEXT: fcvtzu w14, s2 ; CHECK-CVT-NEXT: fcvtzu w11, s0 +; CHECK-CVT-NEXT: cmp w10, w8 ; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fcvtzu w9, s2 -; CHECK-CVT-NEXT: fcvtzu w12, s3 -; CHECK-CVT-NEXT: fcvtzu w13, s4 -; CHECK-CVT-NEXT: fcvtzu w14, s5 +; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: cmp w9, w8 ; CHECK-CVT-NEXT: csel w9, w9, w8, lo -; CHECK-CVT-NEXT: cmp w10, w8 -; CHECK-CVT-NEXT: csel w10, w10, w8, lo ; CHECK-CVT-NEXT: cmp w12, w8 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, w8 @@ -1951,22 +1950,23 @@ ; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w11 -; CHECK-CVT-NEXT: fcvtzu w11, s1 -; CHECK-CVT-NEXT: fmov s1, w10 -; CHECK-CVT-NEXT: mov v2.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w10, w11, w8, lo +; CHECK-CVT-NEXT: fcvtzu w15, s1 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: fmov s2, w9 +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v1.s[1], w9 -; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: csel w9, w15, w8, lo +; CHECK-CVT-NEXT: mov v1.s[1], w14 ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w8, w11, w8, lo -; CHECK-CVT-NEXT: mov v1.s[2], w12 -; CHECK-CVT-NEXT: mov v2.s[3], w8 -; CHECK-CVT-NEXT: mov v1.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v2.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s +; CHECK-CVT-NEXT: mov v2.s[1], w10 +; CHECK-CVT-NEXT: mov v1.s[2], w9 +; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: mov v2.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i16: @@ -1980,21 +1980,21 @@ define <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) { ; CHECK-LABEL: test_unsigned_v8f16_v8i19: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v2.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h ; CHECK-NEXT: movi v1.4s, #7, msl #16 ; CHECK-NEXT: fcvtzu v2.4s, v2.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s ; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mov w1, v2.s[1] -; CHECK-NEXT: mov w2, v2.s[2] -; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w3, v2.s[3] -; CHECK-NEXT: mov w6, v0.s[2] -; CHECK-NEXT: mov w7, v0.s[3] -; CHECK-NEXT: fmov w4, s0 -; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: fmov w4, s2 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w5, v2.s[1] +; CHECK-NEXT: mov w6, v2.s[2] +; CHECK-NEXT: mov w7, v2.s[3] +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: mov w2, v0.s[2] +; CHECK-NEXT: mov w3, v0.s[3] ; CHECK-NEXT: ret %x = call <8 x i19> @llvm.fptoui.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x @@ -2017,35 +2017,35 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-CVT-NEXT: mov x8, #1125899906842623 -; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: mov h3, v0.h[2] -; CHECK-CVT-NEXT: mov h5, v0.h[3] -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov h4, v1.h[1] -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: mov h7, v1.h[3] +; CHECK-CVT-NEXT: mov h3, v0.h[1] +; CHECK-CVT-NEXT: mov h6, v1.h[1] +; CHECK-CVT-NEXT: fcvt s5, h1 +; CHECK-CVT-NEXT: mov h7, v1.h[2] +; CHECK-CVT-NEXT: mov h1, v1.h[3] +; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: mov h4, v0.h[2] +; CHECK-CVT-NEXT: fcvtzu x10, s5 +; CHECK-CVT-NEXT: mov h0, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvtzu x9, s0 -; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvtzu x13, s6 ; CHECK-CVT-NEXT: fcvt s4, h4 -; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s0, h7 -; CHECK-CVT-NEXT: fcvtzu x10, s1 -; CHECK-CVT-NEXT: fcvtzu x11, s2 -; CHECK-CVT-NEXT: fcvtzu x12, s3 -; CHECK-CVT-NEXT: fcvtzu x14, s5 -; CHECK-CVT-NEXT: fcvtzu x13, s4 -; CHECK-CVT-NEXT: fcvtzu x15, s6 +; CHECK-CVT-NEXT: fcvtzu x15, s7 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu x16, s1 ; CHECK-CVT-NEXT: cmp x10, x8 -; CHECK-CVT-NEXT: fcvtzu x16, s0 +; CHECK-CVT-NEXT: fcvtzu x9, s2 ; CHECK-CVT-NEXT: csel x4, x10, x8, lo ; CHECK-CVT-NEXT: cmp x13, x8 +; CHECK-CVT-NEXT: fcvtzu x11, s3 ; CHECK-CVT-NEXT: csel x5, x13, x8, lo ; CHECK-CVT-NEXT: cmp x15, x8 +; CHECK-CVT-NEXT: fcvtzu x12, s4 ; CHECK-CVT-NEXT: csel x6, x15, x8, lo ; CHECK-CVT-NEXT: cmp x16, x8 +; CHECK-CVT-NEXT: fcvtzu x14, s0 ; CHECK-CVT-NEXT: csel x7, x16, x8, lo ; CHECK-CVT-NEXT: cmp x9, x8 ; CHECK-CVT-NEXT: csel x0, x9, x8, lo @@ -2063,25 +2063,25 @@ ; CHECK-FP16-NEXT: mov x8, #1125899906842623 ; CHECK-FP16-NEXT: mov h2, v0.h[1] ; CHECK-FP16-NEXT: mov h3, v0.h[2] -; CHECK-FP16-NEXT: mov h5, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov h4, v1.h[1] -; CHECK-FP16-NEXT: mov h6, v1.h[2] -; CHECK-FP16-NEXT: mov h0, v1.h[3] ; CHECK-FP16-NEXT: fcvtzu x10, h1 ; CHECK-FP16-NEXT: fcvtzu x11, h2 +; CHECK-FP16-NEXT: mov h2, v1.h[1] ; CHECK-FP16-NEXT: fcvtzu x12, h3 -; CHECK-FP16-NEXT: fcvtzu x14, h5 -; CHECK-FP16-NEXT: fcvtzu x13, h4 -; CHECK-FP16-NEXT: fcvtzu x15, h6 +; CHECK-FP16-NEXT: mov h3, v1.h[2] +; CHECK-FP16-NEXT: fcvtzu x13, h2 +; CHECK-FP16-NEXT: mov h1, v1.h[3] +; CHECK-FP16-NEXT: fcvtzu x15, h3 ; CHECK-FP16-NEXT: cmp x10, x8 -; CHECK-FP16-NEXT: fcvtzu x16, h0 +; CHECK-FP16-NEXT: fcvtzu x16, h1 +; CHECK-FP16-NEXT: fcvtzu x9, h0 ; CHECK-FP16-NEXT: csel x4, x10, x8, lo ; CHECK-FP16-NEXT: cmp x13, x8 +; CHECK-FP16-NEXT: mov h0, v0.h[3] ; CHECK-FP16-NEXT: csel x5, x13, x8, lo ; CHECK-FP16-NEXT: cmp x15, x8 ; CHECK-FP16-NEXT: csel x6, x15, x8, lo ; CHECK-FP16-NEXT: cmp x16, x8 +; CHECK-FP16-NEXT: fcvtzu x14, h0 ; CHECK-FP16-NEXT: csel x7, x16, x8, lo ; CHECK-FP16-NEXT: cmp x9, x8 ; CHECK-FP16-NEXT: csel x0, x9, x8, lo @@ -2099,64 +2099,64 @@ define <8 x i64> @test_unsigned_v8f16_v8i64(<8 x half> %f) { ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-CVT: // %bb.0: -; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CHECK-CVT-NEXT: fcvt s2, h0 ; CHECK-CVT-NEXT: mov h4, v0.h[2] -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: fcvt s2, h1 -; CHECK-CVT-NEXT: mov h3, v1.h[1] -; CHECK-CVT-NEXT: mov h6, v1.h[2] +; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-CVT-NEXT: fcvt s4, h4 -; CHECK-CVT-NEXT: mov h1, v1.h[3] -; CHECK-CVT-NEXT: fcvtzu x9, s5 +; CHECK-CVT-NEXT: fcvt s3, h1 +; CHECK-CVT-NEXT: mov h5, v1.h[1] ; CHECK-CVT-NEXT: fcvtzu x8, s2 -; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: mov h2, v1.h[2] +; CHECK-CVT-NEXT: mov h1, v1.h[3] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s6, h2 +; CHECK-CVT-NEXT: fcvtzu x10, s4 +; CHECK-CVT-NEXT: fcvtzu x9, s3 ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h1 -; CHECK-CVT-NEXT: fcvtzu x10, s2 -; CHECK-CVT-NEXT: fmov d2, x8 -; CHECK-CVT-NEXT: fcvtzu x8, s4 +; CHECK-CVT-NEXT: fcvt s7, h1 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvtzu x11, s5 ; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: fmov d0, x9 -; CHECK-CVT-NEXT: mov v2.d[1], x10 -; CHECK-CVT-NEXT: fcvtzu x10, s5 -; CHECK-CVT-NEXT: fmov d1, x8 -; CHECK-CVT-NEXT: fcvtzu x9, s3 -; CHECK-CVT-NEXT: fcvtzu x8, s4 -; CHECK-CVT-NEXT: fmov d3, x10 -; CHECK-CVT-NEXT: fcvtzu x10, s6 -; CHECK-CVT-NEXT: mov v0.d[1], x9 -; CHECK-CVT-NEXT: mov v1.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x10 +; CHECK-CVT-NEXT: fmov d2, x9 +; CHECK-CVT-NEXT: fcvtzu x9, s6 +; CHECK-CVT-NEXT: fmov d0, x8 +; CHECK-CVT-NEXT: fmov d1, x10 +; CHECK-CVT-NEXT: fcvtzu x8, s3 +; CHECK-CVT-NEXT: fcvtzu x10, s4 +; CHECK-CVT-NEXT: fmov d3, x9 +; CHECK-CVT-NEXT: fcvtzu x9, s7 +; CHECK-CVT-NEXT: mov v0.d[1], x8 +; CHECK-CVT-NEXT: mov v1.d[1], x10 +; CHECK-CVT-NEXT: mov v2.d[1], x11 +; CHECK-CVT-NEXT: mov v3.d[1], x9 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h3, v0.h[2] -; CHECK-FP16-NEXT: mov h5, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] -; CHECK-FP16-NEXT: fcvtzu x8, h1 -; CHECK-FP16-NEXT: mov h4, v1.h[2] -; CHECK-FP16-NEXT: mov h6, v1.h[3] +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: fcvtzu x8, h0 +; CHECK-FP16-NEXT: fcvtzu x9, h1 +; CHECK-FP16-NEXT: mov h5, v1.h[2] ; CHECK-FP16-NEXT: fcvtzu x10, h2 -; CHECK-FP16-NEXT: fmov d2, x8 -; CHECK-FP16-NEXT: fcvtzu x8, h3 ; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fmov d0, x9 -; CHECK-FP16-NEXT: mov v2.d[1], x10 +; CHECK-FP16-NEXT: mov h4, v0.h[3] +; CHECK-FP16-NEXT: mov h0, v1.h[1] +; CHECK-FP16-NEXT: fmov d2, x9 +; CHECK-FP16-NEXT: fcvtzu x9, h5 +; CHECK-FP16-NEXT: mov h6, v1.h[3] +; CHECK-FP16-NEXT: fcvtzu x11, h0 +; CHECK-FP16-NEXT: fmov d0, x8 +; CHECK-FP16-NEXT: fcvtzu x8, h3 +; CHECK-FP16-NEXT: fmov d1, x10 ; CHECK-FP16-NEXT: fcvtzu x10, h4 -; CHECK-FP16-NEXT: fmov d1, x8 -; CHECK-FP16-NEXT: fcvtzu x9, h3 -; CHECK-FP16-NEXT: fcvtzu x8, h5 -; CHECK-FP16-NEXT: fmov d3, x10 -; CHECK-FP16-NEXT: fcvtzu x10, h6 -; CHECK-FP16-NEXT: mov v0.d[1], x9 -; CHECK-FP16-NEXT: mov v1.d[1], x8 -; CHECK-FP16-NEXT: mov v3.d[1], x10 +; CHECK-FP16-NEXT: fmov d3, x9 +; CHECK-FP16-NEXT: fcvtzu x9, h6 +; CHECK-FP16-NEXT: mov v0.d[1], x8 +; CHECK-FP16-NEXT: mov v1.d[1], x10 +; CHECK-FP16-NEXT: mov v2.d[1], x11 +; CHECK-FP16-NEXT: mov v3.d[1], x9 ; CHECK-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x @@ -2196,18 +2196,18 @@ ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #1904214015 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov x21, #68719476735 -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov x24, #68719476735 ; CHECK-NEXT: fmov s9, w8 +; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x20, x21, x8, gt +; CHECK-NEXT: csel x20, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x9, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti @@ -2218,7 +2218,7 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x23, x21, x8, gt +; CHECK-NEXT: csel x22, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti @@ -2230,9 +2230,9 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x24, x21, x9, gt -; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: csel x23, x24, x9, gt ; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 @@ -2242,9 +2242,9 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x26, x21, x9, gt -; CHECK-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: csel x26, x24, x9, gt ; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: str x8, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 @@ -2253,8 +2253,8 @@ ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x29, x9, xzr, le -; CHECK-NEXT: csel x28, x21, x8, gt +; CHECK-NEXT: csinv x27, x9, xzr, le +; CHECK-NEXT: csel x29, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 @@ -2263,8 +2263,8 @@ ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x27, x9, xzr, le -; CHECK-NEXT: csel x22, x21, x8, gt +; CHECK-NEXT: csinv x28, x9, xzr, le +; CHECK-NEXT: csel x25, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -2275,57 +2275,58 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x25, x21, x9, gt -; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill +; CHECK-NEXT: csel x21, x24, x9, gt ; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr x11, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: fmov d0, x27 -; CHECK-NEXT: fmov d1, x29 ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: lsr x10, x22, #28 -; CHECK-NEXT: stur x11, [x19, #75] -; CHECK-NEXT: lsr x11, x28, #28 -; CHECK-NEXT: mov v0.d[1], x22 -; CHECK-NEXT: ldr x12, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: mov v1.d[1], x28 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: lsr x8, x25, #28 +; CHECK-NEXT: fmov d0, x28 +; CHECK-NEXT: lsr x11, x29, #28 +; CHECK-NEXT: fmov d1, x27 +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x10, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: stur x12, [x19, #50] -; CHECK-NEXT: fmov x12, d0 -; CHECK-NEXT: fmov x13, d1 -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: ldp d0, d1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: csel x9, x21, x9, gt -; CHECK-NEXT: strb w10, [x19, #49] -; CHECK-NEXT: extr x10, x22, x12, #28 -; CHECK-NEXT: bfi x9, x12, #36, #28 -; CHECK-NEXT: stur x8, [x19, #25] -; CHECK-NEXT: extr x8, x28, x13, #28 -; CHECK-NEXT: mov v0.d[1], x23 +; CHECK-NEXT: strb w8, [x19, #49] +; CHECK-NEXT: mov v0.d[1], x25 ; CHECK-NEXT: strb w11, [x19, #24] +; CHECK-NEXT: ldr x11, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: csinv x8, x9, xzr, le +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: csel x10, x24, x10, gt +; CHECK-NEXT: stur x11, [x19, #75] +; CHECK-NEXT: stur x8, [x19, #25] +; CHECK-NEXT: extr x8, x25, x9, #28 +; CHECK-NEXT: mov v1.d[1], x29 +; CHECK-NEXT: bfi x10, x9, #36, #28 +; CHECK-NEXT: ldr x9, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: fmov x11, d1 +; CHECK-NEXT: ldr d0, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: stur x8, [x19, #41] +; CHECK-NEXT: stur x9, [x19, #50] +; CHECK-NEXT: ldr d1, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: extr x9, x29, x11, #28 +; CHECK-NEXT: mov v0.d[1], x22 +; CHECK-NEXT: stur x10, [x19, #33] +; CHECK-NEXT: bfi x21, x11, #36, #28 +; CHECK-NEXT: lsr x10, x20, #28 ; CHECK-NEXT: mov v1.d[1], x20 -; CHECK-NEXT: stur x10, [x19, #41] -; CHECK-NEXT: stur x9, [x19, #33] -; CHECK-NEXT: bfi x25, x13, #36, #28 -; CHECK-NEXT: str x8, [x19, #16] -; CHECK-NEXT: lsr x9, x23, #28 +; CHECK-NEXT: str x9, [x19, #16] ; CHECK-NEXT: fmov x8, d0 ; CHECK-NEXT: ldr x12, [sp] // 8-byte Folded Reload ; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: lsr x10, x20, #28 -; CHECK-NEXT: strb w9, [x19, #99] -; CHECK-NEXT: stp x12, x25, [x19] -; CHECK-NEXT: extr x12, x23, x8, #28 +; CHECK-NEXT: lsr x9, x22, #28 +; CHECK-NEXT: strb w10, [x19, #74] +; CHECK-NEXT: stp x12, x21, [x19] +; CHECK-NEXT: extr x12, x22, x8, #28 ; CHECK-NEXT: bfi x26, x8, #36, #28 ; CHECK-NEXT: extr x8, x20, x11, #28 -; CHECK-NEXT: bfi x24, x11, #36, #28 -; CHECK-NEXT: strb w10, [x19, #74] +; CHECK-NEXT: bfi x23, x11, #36, #28 +; CHECK-NEXT: strb w9, [x19, #99] ; CHECK-NEXT: stur x12, [x19, #91] ; CHECK-NEXT: stur x26, [x19, #83] ; CHECK-NEXT: stur x8, [x19, #66] -; CHECK-NEXT: stur x24, [x19, #58] +; CHECK-NEXT: stur x23, [x19, #58] ; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload @@ -2372,11 +2373,11 @@ ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #2139095039 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fmov s9, w8 +; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 diff --git a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll --- a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll @@ -77,13 +77,13 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) { ; CHECK-LABEL: rotl_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #31 ; CHECK-NEXT: neg v3.4s, v1.4s +; CHECK-NEXT: movi v2.4s, #31 +; CHECK-NEXT: and v3.16b, v3.16b, v2.16b +; CHECK-NEXT: neg v3.4s, v3.4s ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: and v2.16b, v3.16b, v2.16b ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s -; CHECK-NEXT: neg v2.4s, v2.4s -; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ushl v0.4s, v0.4s, v3.4s ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %f = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %x, <4 x i32> %x, <4 x i32> %z) @@ -170,8 +170,8 @@ ; CHECK-NEXT: movi v2.4s, #31 ; CHECK-NEXT: neg v3.4s, v1.4s ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: and v2.16b, v3.16b, v2.16b ; CHECK-NEXT: neg v1.4s, v1.4s +; CHECK-NEXT: and v2.16b, v3.16b, v2.16b ; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -187,9 +187,9 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_splat_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -219,9 +219,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -233,8 +233,8 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_nonsplat_undef1_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: neg v1.4s, v1.4s +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s ; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 @@ -248,8 +248,8 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_nonsplat_undef2_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: neg v1.4s, v1.4s +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s ; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 diff --git a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll --- a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll +++ b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll @@ -97,17 +97,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] -; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: sub x8, x8, #8 ; CHECK-NEXT: mov w9, #8 ; CHECK-NEXT: cmp x8, #8 +; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: lsl x8, x8, #1 ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] @@ -146,17 +146,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cntw x8 -; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] -; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sub x8, x8, #4 ; CHECK-NEXT: mov w9, #4 ; CHECK-NEXT: cmp x8, #4 +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: lsl x8, x8, #2 ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] @@ -195,17 +195,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cntd x8 -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] -; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sub x8, x8, #2 ; CHECK-NEXT: mov w9, #2 ; CHECK-NEXT: cmp x8, #2 +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] diff --git a/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll b/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll --- a/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll +++ b/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll @@ -22,9 +22,9 @@ ; CHECK-LABEL: mlai16_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h -; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %v0 = sext <4 x i16> %vec0 to <4 x i32> @@ -90,9 +90,9 @@ ; CHECK-LABEL: addmuli16_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff ; CHECK-NEXT: smlal v1.4s, v0.4h, v2.4h -; CHECK-NEXT: and v0.16b, v1.16b, v3.16b +; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret entry: %v0 = sext <4 x i16> %vec0 to <4 x i32> @@ -158,9 +158,9 @@ ; CHECK-LABEL: mlai32_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s -; CHECK-NEXT: movi v3.2d, #0x000000ffffffff +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-NEXT: uaddw v0.2d, v0.2d, v2.2s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %v0 = sext <2 x i32> %vec0 to <2 x i64> @@ -226,9 +226,9 @@ ; CHECK-LABEL: addmuli32_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v1.2d, v1.2s, v2.2s -; CHECK-NEXT: movi v3.2d, #0x000000ffffffff ; CHECK-NEXT: smlal v1.2d, v0.2s, v2.2s -; CHECK-NEXT: and v0.16b, v1.16b, v3.16b +; CHECK-NEXT: movi v0.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret entry: %v0 = sext <2 x i32> %vec0 to <2 x i64> diff --git a/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll b/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll --- a/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll +++ b/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll @@ -1079,8 +1079,8 @@ ; CHECK-LABEL: notted_smin_bc_ab: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1101,8 +1101,8 @@ ; CHECK-LABEL: notted_smin_bc_ba: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1167,8 +1167,8 @@ ; CHECK-LABEL: notted_smin_bc_ab_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1189,8 +1189,8 @@ ; CHECK-LABEL: notted_smin_bc_ba_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1255,8 +1255,8 @@ ; CHECK-LABEL: notted_smin_bc_ab_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1277,8 +1277,8 @@ ; CHECK-LABEL: notted_smin_bc_ba_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1343,8 +1343,8 @@ ; CHECK-LABEL: notted_smin_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1365,8 +1365,8 @@ ; CHECK-LABEL: notted_smin_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1431,8 +1431,8 @@ ; CHECK-LABEL: notted_smax_bc_ab: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1453,8 +1453,8 @@ ; CHECK-LABEL: notted_smax_bc_ba: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1519,8 +1519,8 @@ ; CHECK-LABEL: notted_smax_bc_ab_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1541,8 +1541,8 @@ ; CHECK-LABEL: notted_smax_bc_ba_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1607,8 +1607,8 @@ ; CHECK-LABEL: notted_smax_bc_ab_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1629,8 +1629,8 @@ ; CHECK-LABEL: notted_smax_bc_ba_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1695,8 +1695,8 @@ ; CHECK-LABEL: notted_smax_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1717,8 +1717,8 @@ ; CHECK-LABEL: notted_smax_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1783,8 +1783,8 @@ ; CHECK-LABEL: notted_umin_bc_ab: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1805,8 +1805,8 @@ ; CHECK-LABEL: notted_umin_bc_ba: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1871,8 +1871,8 @@ ; CHECK-LABEL: notted_umin_bc_ab_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1893,8 +1893,8 @@ ; CHECK-LABEL: notted_umin_bc_ba_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1959,8 +1959,8 @@ ; CHECK-LABEL: notted_umin_bc_ab_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1981,8 +1981,8 @@ ; CHECK-LABEL: notted_umin_bc_ba_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -2047,8 +2047,8 @@ ; CHECK-LABEL: notted_umin_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -2069,8 +2069,8 @@ ; CHECK-LABEL: notted_umin_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -2135,8 +2135,8 @@ ; CHECK-LABEL: notted_umax_bc_ab: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2157,8 +2157,8 @@ ; CHECK-LABEL: notted_umax_bc_ba: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2223,8 +2223,8 @@ ; CHECK-LABEL: notted_umax_bc_ab_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2245,8 +2245,8 @@ ; CHECK-LABEL: notted_umax_bc_ba_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2311,8 +2311,8 @@ ; CHECK-LABEL: notted_umax_bc_ab_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2333,8 +2333,8 @@ ; CHECK-LABEL: notted_umax_bc_ba_eq_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2399,8 +2399,8 @@ ; CHECK-LABEL: notted_umax_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2421,8 +2421,8 @@ ; CHECK-LABEL: notted_umax_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: ; CHECK-NEXT: mvn v1.16b, v1.16b -; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s diff --git a/llvm/test/CodeGen/AArch64/minmax.ll b/llvm/test/CodeGen/AArch64/minmax.ll --- a/llvm/test/CodeGen/AArch64/minmax.ll +++ b/llvm/test/CodeGen/AArch64/minmax.ll @@ -108,9 +108,9 @@ define <16 x i32> @t11(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: t11: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v6.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v4.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v5.4s +; CHECK-NEXT: smin v2.4s, v2.4s, v6.4s ; CHECK-NEXT: smin v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %t1 = icmp sle <16 x i32> %a, %b @@ -122,10 +122,10 @@ define <16 x i8> @t12(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: t12: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.16b, #1 -; CHECK-NEXT: cmhi v3.16b, v1.16b, v0.16b -; CHECK-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-NEXT: and v1.16b, v3.16b, v2.16b +; CHECK-NEXT: cmhi v2.16b, v1.16b, v0.16b +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: movi v1.16b, #1 +; CHECK-NEXT: and v1.16b, v2.16b, v1.16b ; CHECK-NEXT: add v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %t1 = icmp ugt <16 x i8> %b, %a diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll --- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll @@ -55,8 +55,8 @@ ; CHECK-NEXT: ext v6.16b, v2.16b, v3.16b, #12 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #12 ; CHECK-NEXT: ext v2.16b, v3.16b, v4.16b, #12 -; CHECK-NEXT: mov v3.16b, v5.16b ; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: mov v3.16b, v5.16b ; CHECK-NEXT: ret %res = call <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float> %a, <16 x float> %b, i32 7) ret <16 x float> %res @@ -123,8 +123,8 @@ ; CHECK-NEXT: ext v6.16b, v2.16b, v3.16b, #12 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #12 ; CHECK-NEXT: ext v2.16b, v3.16b, v4.16b, #12 -; CHECK-NEXT: mov v3.16b, v5.16b ; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: mov v3.16b, v5.16b ; CHECK-NEXT: ret %res = call <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float> %a, <16 x float> %b, i32 -9) ret <16 x float> %res diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll --- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll @@ -530,8 +530,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p2/z, z1.d, #0 ; CHECK-NEXT: ret @@ -545,8 +545,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: ptrue p2.s +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: and z1.s, z1.s, #0x1 ; CHECK-NEXT: cmpne p0.s, p2/z, z1.s, #0 ; CHECK-NEXT: ret @@ -560,8 +560,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: ptrue p2.h +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: and z1.h, z1.h, #0x1 ; CHECK-NEXT: cmpne p0.h, p2/z, z1.h, #0 ; CHECK-NEXT: ret @@ -575,8 +575,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: ptrue p2.b +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: and z1.b, z1.b, #0x1 ; CHECK-NEXT: cmpne p0.b, p2/z, z1.b, #0 ; CHECK-NEXT: ret @@ -1042,10 +1042,10 @@ define @splice_nxv2i1( %a, %b) #0 { ; CHECK-LABEL: splice_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.d ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 -; CHECK-NEXT: lastb d0, p2, z0.d +; CHECK-NEXT: ptrue p2.d ; CHECK-NEXT: mov z1.d, p1/z, #1 // =0x1 +; CHECK-NEXT: lastb d0, p2, z0.d ; CHECK-NEXT: insr z1.d, d0 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p2/z, z1.d, #0 @@ -1058,10 +1058,10 @@ define @splice_nxv4i1( %a, %b) #0 { ; CHECK-LABEL: splice_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.s ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: lastb s0, p2, z0.s +; CHECK-NEXT: ptrue p2.s ; CHECK-NEXT: mov z1.s, p1/z, #1 // =0x1 +; CHECK-NEXT: lastb s0, p2, z0.s ; CHECK-NEXT: insr z1.s, s0 ; CHECK-NEXT: and z1.s, z1.s, #0x1 ; CHECK-NEXT: cmpne p0.s, p2/z, z1.s, #0 @@ -1074,10 +1074,10 @@ define @splice_nxv8i1( %a, %b) #0 { ; CHECK-LABEL: splice_nxv8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.h ; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 -; CHECK-NEXT: lastb h0, p2, z0.h +; CHECK-NEXT: ptrue p2.h ; CHECK-NEXT: mov z1.h, p1/z, #1 // =0x1 +; CHECK-NEXT: lastb h0, p2, z0.h ; CHECK-NEXT: insr z1.h, h0 ; CHECK-NEXT: and z1.h, z1.h, #0x1 ; CHECK-NEXT: cmpne p0.h, p2/z, z1.h, #0 @@ -1090,10 +1090,10 @@ define @splice_nxv16i1( %a, %b) #0 { ; CHECK-LABEL: splice_nxv16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 -; CHECK-NEXT: lastb b0, p2, z0.b +; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: mov z1.b, p1/z, #1 // =0x1 +; CHECK-NEXT: lastb b0, p2, z0.b ; CHECK-NEXT: insr z1.b, b0 ; CHECK-NEXT: and z1.b, z1.b, #0x1 ; CHECK-NEXT: cmpne p0.b, p2/z, z1.b, #0 diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -895,8 +895,8 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI89_0] -; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI89_0] +; CHECK-NEXT: tbl v0.8b, { v0.16b }, v2.8b ; CHECK-NEXT: ret %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %c diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll --- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll +++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll @@ -7,11 +7,11 @@ define i32 @test_udot_v8i8(i8* nocapture readonly %a, i8* nocapture readonly %b) { ; CHECK-LABEL: test_udot_v8i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: udot v0.2s, v2.8b, v1.8b -; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: udot v2.2s, v1.8b, v0.8b +; CHECK-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -29,10 +29,10 @@ define i32 @test_udot_v8i8_nomla(i8* nocapture readonly %a1) { ; CHECK-LABEL: test_udot_v8i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.8b, #1 -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: movi v2.8b, #1 ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: udot v1.2s, v2.8b, v0.8b +; CHECK-NEXT: udot v1.2s, v0.8b, v2.8b ; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -47,11 +47,11 @@ define i32 @test_sdot_v8i8(i8* nocapture readonly %a, i8* nocapture readonly %b) { ; CHECK-LABEL: test_sdot_v8i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: sdot v0.2s, v2.8b, v1.8b -; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: sdot v2.2s, v1.8b, v0.8b +; CHECK-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -69,10 +69,10 @@ define i32 @test_sdot_v8i8_nomla(i8* nocapture readonly %a1) { ; CHECK-LABEL: test_sdot_v8i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.8b, #1 -; CHECK-NEXT: ldr d2, [x0] +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: movi v2.8b, #1 ; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: sdot v1.2s, v2.8b, v0.8b +; CHECK-NEXT: sdot v1.2s, v0.8b, v2.8b ; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret @@ -88,11 +88,11 @@ define i32 @test_udot_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_udot_v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: udot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: udot v2.4s, v0.16b, v1.16b +; CHECK-NEXT: addv s0, v2.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret @@ -112,11 +112,11 @@ define i32 @test_udot_v16i8_nomla(i8* nocapture readonly %a1) { ; CHECK-LABEL: test_udot_v16i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.16b, #1 -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: udot v1.4s, v2.16b, v0.16b -; CHECK-NEXT: addv s0, v1.4s +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: movi v1.16b, #1 +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: udot v2.4s, v0.16b, v1.16b +; CHECK-NEXT: addv s0, v2.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -130,11 +130,11 @@ define i32 @test_sdot_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_sdot_v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: sdot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: sdot v2.4s, v0.16b, v1.16b +; CHECK-NEXT: addv s0, v2.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret @@ -154,11 +154,11 @@ define i32 @test_sdot_v16i8_nomla(i8* nocapture readonly %a1) { ; CHECK-LABEL: test_sdot_v16i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.16b, #1 -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: sdot v1.4s, v2.16b, v0.16b -; CHECK-NEXT: addv s0, v1.4s +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: movi v1.16b, #1 +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: sdot v2.4s, v0.16b, v1.16b +; CHECK-NEXT: addv s0, v2.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -195,11 +195,11 @@ define i32 @test_udot_v8i8_double_nomla(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: test_udot_v8i8_double_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.8b, #1 -; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: udot v3.2s, v2.8b, v1.8b -; CHECK-NEXT: udot v3.2s, v0.8b, v1.8b -; CHECK-NEXT: addp v0.2s, v3.2s, v3.2s +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: movi v3.8b, #1 +; CHECK-NEXT: udot v1.2s, v2.8b, v3.8b +; CHECK-NEXT: udot v1.2s, v0.8b, v3.8b +; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -277,11 +277,11 @@ define i32 @test_sdot_v8i8_double_nomla(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d) { ; CHECK-LABEL: test_sdot_v8i8_double_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v1.8b, #1 -; CHECK-NEXT: movi v3.2d, #0000000000000000 -; CHECK-NEXT: sdot v3.2s, v2.8b, v1.8b -; CHECK-NEXT: sdot v3.2s, v0.8b, v1.8b -; CHECK-NEXT: addp v0.2s, v3.2s, v3.2s +; CHECK-NEXT: movi v1.2d, #0000000000000000 +; CHECK-NEXT: movi v3.8b, #1 +; CHECK-NEXT: sdot v1.2s, v2.8b, v3.8b +; CHECK-NEXT: sdot v1.2s, v0.8b, v3.8b +; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/neon-truncstore.ll b/llvm/test/CodeGen/AArch64/neon-truncstore.ll --- a/llvm/test/CodeGen/AArch64/neon-truncstore.ll +++ b/llvm/test/CodeGen/AArch64/neon-truncstore.ll @@ -45,10 +45,10 @@ ; CHECK-LABEL: v2i32_v2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x0] +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: strh w8, [x0, #2] +; CHECK-NEXT: strh w9, [x0] ; CHECK-NEXT: ret %b = trunc <2 x i32> %a to <2 x i16> store <2 x i16> %b, <2 x i16>* %result @@ -96,10 +96,10 @@ ; CHECK-LABEL: v2i32_v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x0] +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: strb w8, [x0, #1] +; CHECK-NEXT: strb w9, [x0] ; CHECK-NEXT: ret %b = trunc <2 x i32> %a to <2 x i8> store <2 x i8> %b, <2 x i8>* %result @@ -173,10 +173,10 @@ ; CHECK-LABEL: v2i16_v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x0] +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: strb w8, [x0, #1] +; CHECK-NEXT: strb w9, [x0] ; CHECK-NEXT: ret %b = trunc <2 x i16> %a to <2 x i8> store <2 x i8> %b, <2 x i8>* %result diff --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll --- a/llvm/test/CodeGen/AArch64/nontemporal.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal.ll @@ -208,8 +208,8 @@ define void @test_stnp_v4f32_invalid_offset_4(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: add x8, x0, #4 +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stnp d0, d1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 4 @@ -221,8 +221,8 @@ define void @test_stnp_v4f32_invalid_offset_neg_4(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: sub x8, x0, #4 +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stnp d0, d1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 -4 @@ -234,8 +234,8 @@ define void @test_stnp_v4f32_invalid_offset_512(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: add x8, x0, #512 +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stnp d0, d1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 512 @@ -259,8 +259,8 @@ define void @test_stnp_v4f32_invalid_offset_508(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: add x8, x0, #508 +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stnp d0, d1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 508 @@ -272,8 +272,8 @@ define void @test_stnp_v4f32_invalid_offset_neg_520(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: sub x8, x0, #520 +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stnp d0, d1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 -520 @@ -298,9 +298,9 @@ define void @test_stnp_v2f32_invalid_offset_256(i8* %p, <2 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256: ; CHECK: ; %bb.0: +; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: stnp s0, s1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 256 @@ -325,9 +325,9 @@ define void @test_stnp_v2f32_invalid_offset_neg_260(i8* %p, <2 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260: ; CHECK: ; %bb.0: +; CHECK-NEXT: sub x8, x0, #260 ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: sub x8, x0, #260 ; CHECK-NEXT: stnp s0, s1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 -260 @@ -355,8 +355,8 @@ ; CHECK-LABEL: test_stnp_v4f32_offset_alloca: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill ; CHECK-NEXT: stnp d0, d1, [sp] ; CHECK-NEXT: bl _dummy @@ -373,8 +373,8 @@ ; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill ; CHECK-NEXT: stnp d0, d1, [sp, #16] ; CHECK-NEXT: bl _dummy @@ -468,26 +468,26 @@ ; CHECK-NEXT: add x9, sp, #8 ; CHECK-NEXT: mov.s v4[1], v5[0] ; CHECK-NEXT: mov.s v0[1], v1[0] +; CHECK-NEXT: ldr s1, [sp, #32] ; CHECK-NEXT: ld1.s { v16 }[2], [x8] ; CHECK-NEXT: add x8, sp, #28 ; CHECK-NEXT: ld1.s { v17 }[2], [x9] ; CHECK-NEXT: add x9, sp, #12 ; CHECK-NEXT: mov.s v4[2], v6[0] ; CHECK-NEXT: mov.s v0[2], v2[0] +; CHECK-NEXT: str s1, [x0, #64] ; CHECK-NEXT: ld1.s { v16 }[3], [x8] -; CHECK-NEXT: ld1.s { v17 }[3], [x9] ; CHECK-NEXT: mov.s v4[3], v7[0] +; CHECK-NEXT: ld1.s { v17 }[3], [x9] ; CHECK-NEXT: mov.s v0[3], v3[0] -; CHECK-NEXT: mov d1, v16[1] -; CHECK-NEXT: mov d2, v17[1] -; CHECK-NEXT: mov d3, v4[1] -; CHECK-NEXT: mov d5, v0[1] -; CHECK-NEXT: stnp d16, d1, [x0, #48] -; CHECK-NEXT: ldr s1, [sp, #32] -; CHECK-NEXT: stnp d17, d2, [x0, #32] -; CHECK-NEXT: stnp d4, d3, [x0, #16] -; CHECK-NEXT: stnp d0, d5, [x0] -; CHECK-NEXT: str s1, [x0, #64] +; CHECK-NEXT: mov d5, v4[1] +; CHECK-NEXT: mov d2, v16[1] +; CHECK-NEXT: stnp d4, d5, [x0, #16] +; CHECK-NEXT: mov d3, v17[1] +; CHECK-NEXT: stnp d16, d2, [x0, #48] +; CHECK-NEXT: mov d2, v0[1] +; CHECK-NEXT: stnp d17, d3, [x0, #32] +; CHECK-NEXT: stnp d0, d2, [x0] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll b/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll --- a/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll +++ b/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll @@ -13,9 +13,9 @@ ; CHECK-NEXT: mul v0.8h, v2.8h, v0.8h ; CHECK-NEXT: mul v1.8h, v3.8h, v1.8h ; CHECK-NEXT: add v2.8h, v0.8h, v1.8h -; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h ; CHECK-NEXT: str q2, [x9, x8] ; CHECK-NEXT: ldr x9, [x2, #56] +; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h ; CHECK-NEXT: str q0, [x9, x8] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/pow.ll b/llvm/test/CodeGen/AArch64/pow.ll --- a/llvm/test/CodeGen/AArch64/pow.ll +++ b/llvm/test/CodeGen/AArch64/pow.ll @@ -69,9 +69,9 @@ ; CHECK-LABEL: pow_v4f32_one_fourth_not_enough_fmf: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl powf ; CHECK-NEXT: fmov s1, #0.25000000 @@ -91,9 +91,9 @@ ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: fmov s1, #0.25000000 +; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: bl powf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 @@ -110,9 +110,9 @@ ; CHECK-LABEL: pow_v2f64_one_fourth_not_enough_fmf: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: fmov d1, #0.25000000 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: fmov d1, #0.25000000 ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl pow ; CHECK-NEXT: fmov d1, #0.25000000 diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -8,12 +8,12 @@ define dso_local void @run_test() local_unnamed_addr #0 { ; CHECK-LABEL: run_test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: sub sp, sp, #112 +; CHECK-NEXT: stp d15, d14, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 112 ; CHECK-NEXT: .cfi_offset b8, -8 ; CHECK-NEXT: .cfi_offset b9, -16 ; CHECK-NEXT: .cfi_offset b10, -24 @@ -22,14 +22,13 @@ ; CHECK-NEXT: .cfi_offset b13, -48 ; CHECK-NEXT: .cfi_offset b14, -56 ; CHECK-NEXT: .cfi_offset b15, -64 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: adrp x10, B+48 ; CHECK-NEXT: adrp x11, A ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: add x10, x10, :lo12:B+48 ; CHECK-NEXT: add x11, x11, :lo12:A -; CHECK-NEXT: // implicit-def: $q2 +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: // implicit-def: $q3 ; CHECK-NEXT: // implicit-def: $q4 ; CHECK-NEXT: // implicit-def: $q5 @@ -57,7 +56,9 @@ ; CHECK-NEXT: // implicit-def: $q11 ; CHECK-NEXT: // implicit-def: $q12 ; CHECK-NEXT: // implicit-def: $q13 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: // implicit-def: $q0 +; CHECK-NEXT: // kill: killed $q0 ; CHECK-NEXT: // implicit-def: $q0 ; CHECK-NEXT: // kill: killed $q0 ; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader @@ -65,66 +66,77 @@ ; CHECK-NEXT: mov x12, xzr ; CHECK-NEXT: ldr q14, [x8] ; CHECK-NEXT: ldr q15, [x10], #64 -; CHECK-NEXT: add x15, x11, x8 -; CHECK-NEXT: add x9, x9, #1 +; CHECK-NEXT: add x4, x11, x8 +; CHECK-NEXT: ldr x15, [x8] ; CHECK-NEXT: ldr q0, [x12] -; CHECK-NEXT: fmov x13, d14 +; CHECK-NEXT: fmov x14, d14 ; CHECK-NEXT: ldr x12, [x12] -; CHECK-NEXT: fmov x0, d15 -; CHECK-NEXT: mov x14, v14.d[1] -; CHECK-NEXT: ldr x15, [x15, #128] -; CHECK-NEXT: fmov x16, d0 -; CHECK-NEXT: mul x17, x13, x12 -; CHECK-NEXT: mov x18, v0.d[1] -; CHECK-NEXT: mul x4, x0, x12 +; CHECK-NEXT: fmov x16, d15 +; CHECK-NEXT: mov x13, v14.d[1] +; CHECK-NEXT: mov x0, v15.d[1] +; CHECK-NEXT: fmov x17, d0 +; CHECK-NEXT: mov x2, v0.d[1] +; CHECK-NEXT: mul x18, x14, x12 +; CHECK-NEXT: ldr x4, [x4, #128] ; CHECK-NEXT: mul x1, x16, x12 -; CHECK-NEXT: mul x3, x14, x12 -; CHECK-NEXT: fmov d0, x17 -; CHECK-NEXT: mul x5, x13, x15 -; CHECK-NEXT: mov x17, v15.d[1] -; CHECK-NEXT: fmov d15, x4 +; CHECK-NEXT: add x9, x9, #1 +; CHECK-NEXT: mul x3, x17, x12 +; CHECK-NEXT: add x8, x8, #8 +; CHECK-NEXT: mul x5, x17, x15 +; CHECK-NEXT: cmp x8, #64 +; CHECK-NEXT: fmov d0, x18 +; CHECK-NEXT: mul x18, x13, x12 ; CHECK-NEXT: fmov d14, x1 -; CHECK-NEXT: mul x1, x18, x12 -; CHECK-NEXT: ldr x2, [x8], #8 -; CHECK-NEXT: mov v0.d[1], x3 -; CHECK-NEXT: mul x3, x16, x15 -; CHECK-NEXT: mul x12, x17, x12 +; CHECK-NEXT: mul x1, x0, x12 +; CHECK-NEXT: mul x12, x2, x12 +; CHECK-NEXT: fmov d15, x3 ; CHECK-NEXT: fmov d1, x5 -; CHECK-NEXT: mul x13, x13, x2 -; CHECK-NEXT: cmp x8, #64 +; CHECK-NEXT: mov v0.d[1], x18 +; CHECK-NEXT: mul x18, x14, x15 ; CHECK-NEXT: mov v14.d[1], x1 -; CHECK-NEXT: mul x1, x14, x15 -; CHECK-NEXT: add v12.2d, v12.2d, v0.2d -; CHECK-NEXT: mul x14, x14, x2 +; CHECK-NEXT: mul x1, x13, x4 ; CHECK-NEXT: mov v15.d[1], x12 -; CHECK-NEXT: mul x12, x18, x2 -; CHECK-NEXT: mul x18, x18, x15 -; CHECK-NEXT: fmov d0, x3 -; CHECK-NEXT: mov v1.d[1], x1 -; CHECK-NEXT: mul x16, x16, x2 -; CHECK-NEXT: mul x3, x0, x15 -; CHECK-NEXT: add v10.2d, v10.2d, v15.2d -; CHECK-NEXT: fmov d15, x13 -; CHECK-NEXT: mov v0.d[1], x18 -; CHECK-NEXT: mul x13, x0, x2 -; CHECK-NEXT: add v29.2d, v29.2d, v1.2d -; CHECK-NEXT: mul x15, x17, x15 -; CHECK-NEXT: mov v15.d[1], x14 -; CHECK-NEXT: fmov d1, x16 -; CHECK-NEXT: add v28.2d, v28.2d, v0.2d +; CHECK-NEXT: mul x12, x14, x4 +; CHECK-NEXT: add v12.2d, v12.2d, v0.2d +; CHECK-NEXT: mul x14, x17, x4 +; CHECK-NEXT: add v13.2d, v13.2d, v15.2d +; CHECK-NEXT: mul x13, x13, x15 +; CHECK-NEXT: add v11.2d, v11.2d, v15.2d +; CHECK-NEXT: mul x17, x2, x4 +; CHECK-NEXT: fmov d15, x12 +; CHECK-NEXT: mul x12, x16, x4 +; CHECK-NEXT: fmov d0, x18 +; CHECK-NEXT: mul x18, x0, x4 +; CHECK-NEXT: add v10.2d, v10.2d, v14.2d +; CHECK-NEXT: fmov d2, x14 +; CHECK-NEXT: mul x14, x2, x15 +; CHECK-NEXT: fmov d14, x12 +; CHECK-NEXT: mul x12, x16, x15 +; CHECK-NEXT: mov v0.d[1], x13 +; CHECK-NEXT: mul x13, x0, x15 +; CHECK-NEXT: mov v2.d[1], x17 +; CHECK-NEXT: mov v14.d[1], x18 +; CHECK-NEXT: add v8.2d, v8.2d, v0.2d +; CHECK-NEXT: add v27.2d, v27.2d, v14.2d +; CHECK-NEXT: ldr q14, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: add v25.2d, v25.2d, v0.2d +; CHECK-NEXT: add v22.2d, v22.2d, v0.2d +; CHECK-NEXT: add v18.2d, v18.2d, v0.2d +; CHECK-NEXT: add v6.2d, v6.2d, v0.2d +; CHECK-NEXT: add v14.2d, v14.2d, v0.2d +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: mov v1.d[1], x14 +; CHECK-NEXT: add v28.2d, v28.2d, v2.2d +; CHECK-NEXT: str q14, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: fmov d2, x12 +; CHECK-NEXT: add v0.2d, v0.2d, v1.2d +; CHECK-NEXT: mov v15.d[1], x1 +; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: add v13.2d, v13.2d, v14.2d -; CHECK-NEXT: mov v1.d[1], x12 -; CHECK-NEXT: mul x12, x17, x2 -; CHECK-NEXT: add v0.2d, v0.2d, v15.2d -; CHECK-NEXT: add v11.2d, v11.2d, v14.2d -; CHECK-NEXT: fmov d14, x3 +; CHECK-NEXT: mov v2.d[1], x13 +; CHECK-NEXT: add v29.2d, v29.2d, v15.2d ; CHECK-NEXT: add v9.2d, v9.2d, v1.2d -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: fmov d0, x13 -; CHECK-NEXT: mov v14.d[1], x15 ; CHECK-NEXT: add v31.2d, v31.2d, v1.2d -; CHECK-NEXT: mov v0.d[1], x12 ; CHECK-NEXT: add v26.2d, v26.2d, v1.2d ; CHECK-NEXT: add v23.2d, v23.2d, v1.2d ; CHECK-NEXT: add v21.2d, v21.2d, v1.2d @@ -133,37 +145,30 @@ ; CHECK-NEXT: add v7.2d, v7.2d, v1.2d ; CHECK-NEXT: add v5.2d, v5.2d, v1.2d ; CHECK-NEXT: add v3.2d, v3.2d, v1.2d -; CHECK-NEXT: add v2.2d, v2.2d, v1.2d -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add v27.2d, v27.2d, v14.2d -; CHECK-NEXT: add v8.2d, v8.2d, v15.2d -; CHECK-NEXT: add v25.2d, v25.2d, v15.2d -; CHECK-NEXT: add v22.2d, v22.2d, v15.2d -; CHECK-NEXT: add v18.2d, v18.2d, v15.2d -; CHECK-NEXT: add v6.2d, v6.2d, v15.2d -; CHECK-NEXT: add v30.2d, v30.2d, v0.2d -; CHECK-NEXT: add v24.2d, v24.2d, v0.2d -; CHECK-NEXT: add v20.2d, v20.2d, v0.2d -; CHECK-NEXT: add v16.2d, v16.2d, v0.2d -; CHECK-NEXT: add v4.2d, v4.2d, v0.2d -; CHECK-NEXT: add v1.2d, v1.2d, v0.2d -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: add v30.2d, v30.2d, v2.2d +; CHECK-NEXT: add v24.2d, v24.2d, v2.2d +; CHECK-NEXT: add v20.2d, v20.2d, v2.2d +; CHECK-NEXT: add v16.2d, v16.2d, v2.2d +; CHECK-NEXT: add v4.2d, v4.2d, v2.2d +; CHECK-NEXT: add v0.2d, v0.2d, v2.2d +; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %for.cond.cleanup ; CHECK-NEXT: adrp x8, C -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldr q1, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: add x8, x8, :lo12:C -; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: stp q13, q12, [x8] ; CHECK-NEXT: stp q11, q10, [x8, #32] ; CHECK-NEXT: stp q9, q8, [x8, #64] -; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: stp q0, q2, [x8, #464] -; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: stp q1, q0, [x8, #464] +; CHECK-NEXT: ldp d11, d10, [sp, #80] // 16-byte Folded Reload ; CHECK-NEXT: stp q31, q30, [x8, #96] -; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: stp q29, q28, [x8, #144] -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: stp q27, q26, [x8, #176] ; CHECK-NEXT: str q25, [x8, #208] ; CHECK-NEXT: stp q24, q23, [x8, #240] @@ -174,7 +179,7 @@ ; CHECK-NEXT: stp q6, q5, [x8, #400] ; CHECK-NEXT: stp q4, q3, [x8, #432] ; CHECK-NEXT: str q0, [x8, #496] -; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret entry: br label %for.cond1.preheader diff --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll --- a/llvm/test/CodeGen/AArch64/reduce-and.ll +++ b/llvm/test/CodeGen/AArch64/reduce-and.ll @@ -21,8 +21,8 @@ ; CHECK-LABEL: test_redand_v2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w8, w9, w8 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -81,17 +81,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: and w8, w8, w12 -; CHECK-NEXT: and w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: and w8, w8, w11 +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -101,24 +101,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 -; GISEL-NEXT: and w8, w8, w10 -; GISEL-NEXT: and w9, w9, w11 -; GISEL-NEXT: and w9, w12, w9 +; GISEL-NEXT: and w11, w12, w13 +; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret @@ -134,18 +134,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: and w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w8, w8, w11 +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -154,45 +154,45 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: mov b16, v0.b[8] -; GISEL-NEXT: mov b17, v0.b[9] -; GISEL-NEXT: mov b18, v0.b[10] -; GISEL-NEXT: mov b19, v0.b[11] +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: fmov w10, s4 ; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: fmov w12, s6 ; GISEL-NEXT: fmov w13, s7 -; GISEL-NEXT: mov b20, v0.b[12] -; GISEL-NEXT: mov b21, v0.b[13] -; GISEL-NEXT: mov b22, v0.b[14] -; GISEL-NEXT: mov b23, v0.b[15] +; GISEL-NEXT: mov b16, v0.b[8] +; GISEL-NEXT: mov b17, v0.b[9] +; GISEL-NEXT: mov b18, v0.b[10] +; GISEL-NEXT: mov b19, v0.b[11] ; GISEL-NEXT: and w10, w10, w11 ; GISEL-NEXT: and w11, w12, w13 ; GISEL-NEXT: fmov w12, s16 ; GISEL-NEXT: fmov w13, s17 ; GISEL-NEXT: fmov w14, s18 ; GISEL-NEXT: fmov w15, s19 -; GISEL-NEXT: fmov w16, s22 -; GISEL-NEXT: fmov w17, s23 -; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: mov b20, v0.b[12] +; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: mov b22, v0.b[14] +; GISEL-NEXT: mov b23, v0.b[15] ; GISEL-NEXT: and w12, w12, w13 -; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w13, w14, w15 ; GISEL-NEXT: fmov w14, s20 ; GISEL-NEXT: fmov w15, s21 -; GISEL-NEXT: and w10, w12, w13 +; GISEL-NEXT: fmov w16, s22 +; GISEL-NEXT: fmov w17, s23 ; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: and w10, w12, w13 ; GISEL-NEXT: and w14, w14, w15 +; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: and w15, w16, w17 ; GISEL-NEXT: and w11, w14, w15 ; GISEL-NEXT: and w9, w10, w11 @@ -275,17 +275,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: and w8, w8, w12 -; CHECK-NEXT: and w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: and w8, w8, w9 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: and w8, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v8i8: @@ -294,24 +294,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 -; GISEL-NEXT: and w8, w8, w10 -; GISEL-NEXT: and w9, w9, w11 -; GISEL-NEXT: and w9, w12, w9 +; GISEL-NEXT: and w11, w12, w13 +; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w0, w8, w9 ; GISEL-NEXT: ret %and_result = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a) @@ -326,18 +326,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: and w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v16i8: @@ -347,22 +347,22 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: and w10, w10, w11 ; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 ; GISEL-NEXT: and w8, w8, w10 +; GISEL-NEXT: and w12, w12, w13 ; GISEL-NEXT: and w9, w9, w11 ; GISEL-NEXT: and w9, w12, w9 ; GISEL-NEXT: and w0, w8, w9 @@ -380,18 +380,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: and w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v32i8: @@ -402,22 +402,22 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: and w10, w10, w11 ; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 ; GISEL-NEXT: and w8, w8, w10 +; GISEL-NEXT: and w12, w12, w13 ; GISEL-NEXT: and w9, w9, w11 ; GISEL-NEXT: and w9, w12, w9 ; GISEL-NEXT: and w0, w8, w9 @@ -465,10 +465,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v8i16: @@ -499,10 +499,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v16i16: @@ -529,8 +529,8 @@ ; CHECK-LABEL: test_redand_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret ; @@ -551,8 +551,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret ; @@ -575,8 +575,8 @@ ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll --- a/llvm/test/CodeGen/AArch64/reduce-or.ll +++ b/llvm/test/CodeGen/AArch64/reduce-or.ll @@ -21,8 +21,8 @@ ; CHECK-LABEL: test_redor_v2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: orr w8, w9, w8 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -81,17 +81,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: orr w8, w8, w12 -; CHECK-NEXT: orr w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: orr w8, w8, w11 +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -101,24 +101,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 -; GISEL-NEXT: orr w8, w8, w10 -; GISEL-NEXT: orr w9, w9, w11 -; GISEL-NEXT: orr w9, w12, w9 +; GISEL-NEXT: orr w11, w12, w13 +; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret @@ -134,18 +134,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: orr w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: orr w8, w8, w12 ; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w8, w8, w11 +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -154,45 +154,45 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: mov b16, v0.b[8] -; GISEL-NEXT: mov b17, v0.b[9] -; GISEL-NEXT: mov b18, v0.b[10] -; GISEL-NEXT: mov b19, v0.b[11] +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: fmov w10, s4 ; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: fmov w12, s6 ; GISEL-NEXT: fmov w13, s7 -; GISEL-NEXT: mov b20, v0.b[12] -; GISEL-NEXT: mov b21, v0.b[13] -; GISEL-NEXT: mov b22, v0.b[14] -; GISEL-NEXT: mov b23, v0.b[15] +; GISEL-NEXT: mov b16, v0.b[8] +; GISEL-NEXT: mov b17, v0.b[9] +; GISEL-NEXT: mov b18, v0.b[10] +; GISEL-NEXT: mov b19, v0.b[11] ; GISEL-NEXT: orr w10, w10, w11 ; GISEL-NEXT: orr w11, w12, w13 ; GISEL-NEXT: fmov w12, s16 ; GISEL-NEXT: fmov w13, s17 ; GISEL-NEXT: fmov w14, s18 ; GISEL-NEXT: fmov w15, s19 -; GISEL-NEXT: fmov w16, s22 -; GISEL-NEXT: fmov w17, s23 -; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: mov b20, v0.b[12] +; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: mov b22, v0.b[14] +; GISEL-NEXT: mov b23, v0.b[15] ; GISEL-NEXT: orr w12, w12, w13 -; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w13, w14, w15 ; GISEL-NEXT: fmov w14, s20 ; GISEL-NEXT: fmov w15, s21 -; GISEL-NEXT: orr w10, w12, w13 +; GISEL-NEXT: fmov w16, s22 +; GISEL-NEXT: fmov w17, s23 ; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: orr w10, w12, w13 ; GISEL-NEXT: orr w14, w14, w15 +; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: orr w15, w16, w17 ; GISEL-NEXT: orr w11, w14, w15 ; GISEL-NEXT: orr w9, w10, w11 @@ -274,17 +274,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: orr w8, w8, w12 -; CHECK-NEXT: orr w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: orr w0, w8, w10 +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: orr w8, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v8i8: @@ -293,24 +293,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 -; GISEL-NEXT: orr w8, w8, w10 -; GISEL-NEXT: orr w9, w9, w11 -; GISEL-NEXT: orr w9, w12, w9 +; GISEL-NEXT: orr w11, w12, w13 +; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w0, w8, w9 ; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a) @@ -325,18 +325,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: orr w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: orr w8, w8, w12 ; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v16i8: @@ -346,22 +346,22 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 ; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: orr w10, w10, w11 ; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 ; GISEL-NEXT: orr w8, w8, w10 +; GISEL-NEXT: orr w12, w12, w13 ; GISEL-NEXT: orr w9, w9, w11 ; GISEL-NEXT: orr w9, w12, w9 ; GISEL-NEXT: orr w0, w8, w9 @@ -379,18 +379,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: orr w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: orr w8, w8, w12 ; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v32i8: @@ -401,22 +401,22 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 ; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: orr w10, w10, w11 ; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 ; GISEL-NEXT: orr w8, w8, w10 +; GISEL-NEXT: orr w12, w12, w13 ; GISEL-NEXT: orr w9, w9, w11 ; GISEL-NEXT: orr w9, w12, w9 ; GISEL-NEXT: orr w0, w8, w9 @@ -464,10 +464,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v8i16: @@ -498,10 +498,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v16i16: @@ -528,8 +528,8 @@ ; CHECK-LABEL: test_redor_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret ; @@ -550,8 +550,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret ; @@ -574,8 +574,8 @@ ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/reduce-xor.ll b/llvm/test/CodeGen/AArch64/reduce-xor.ll --- a/llvm/test/CodeGen/AArch64/reduce-xor.ll +++ b/llvm/test/CodeGen/AArch64/reduce-xor.ll @@ -20,8 +20,8 @@ ; CHECK-LABEL: test_redxor_v2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: eor w8, w9, w8 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -80,17 +80,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: eor w8, w8, w12 -; CHECK-NEXT: eor w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: eor w8, w8, w11 +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -100,24 +100,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 -; GISEL-NEXT: eor w8, w8, w10 -; GISEL-NEXT: eor w9, w9, w11 -; GISEL-NEXT: eor w9, w12, w9 +; GISEL-NEXT: eor w11, w12, w13 +; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret @@ -133,18 +133,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: eor w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: eor w8, w8, w12 ; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w8, w8, w11 +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -153,45 +153,45 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: mov b16, v0.b[8] -; GISEL-NEXT: mov b17, v0.b[9] -; GISEL-NEXT: mov b18, v0.b[10] -; GISEL-NEXT: mov b19, v0.b[11] +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: fmov w10, s4 ; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: fmov w12, s6 ; GISEL-NEXT: fmov w13, s7 -; GISEL-NEXT: mov b20, v0.b[12] -; GISEL-NEXT: mov b21, v0.b[13] -; GISEL-NEXT: mov b22, v0.b[14] -; GISEL-NEXT: mov b23, v0.b[15] +; GISEL-NEXT: mov b16, v0.b[8] +; GISEL-NEXT: mov b17, v0.b[9] +; GISEL-NEXT: mov b18, v0.b[10] +; GISEL-NEXT: mov b19, v0.b[11] ; GISEL-NEXT: eor w10, w10, w11 ; GISEL-NEXT: eor w11, w12, w13 ; GISEL-NEXT: fmov w12, s16 ; GISEL-NEXT: fmov w13, s17 ; GISEL-NEXT: fmov w14, s18 ; GISEL-NEXT: fmov w15, s19 -; GISEL-NEXT: fmov w16, s22 -; GISEL-NEXT: fmov w17, s23 -; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: mov b20, v0.b[12] +; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: mov b22, v0.b[14] +; GISEL-NEXT: mov b23, v0.b[15] ; GISEL-NEXT: eor w12, w12, w13 -; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: eor w13, w14, w15 ; GISEL-NEXT: fmov w14, s20 ; GISEL-NEXT: fmov w15, s21 -; GISEL-NEXT: eor w10, w12, w13 +; GISEL-NEXT: fmov w16, s22 +; GISEL-NEXT: fmov w17, s23 ; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: eor w10, w12, w13 ; GISEL-NEXT: eor w14, w14, w15 +; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: eor w15, w16, w17 ; GISEL-NEXT: eor w11, w14, w15 ; GISEL-NEXT: eor w9, w10, w11 @@ -273,17 +273,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: eor w8, w8, w12 -; CHECK-NEXT: eor w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: eor w8, w8, w9 -; CHECK-NEXT: eor w0, w8, w10 +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: eor w8, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v8i8: @@ -292,24 +292,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 -; GISEL-NEXT: eor w8, w8, w10 -; GISEL-NEXT: eor w9, w9, w11 -; GISEL-NEXT: eor w9, w12, w9 +; GISEL-NEXT: eor w11, w12, w13 +; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: eor w0, w8, w9 ; GISEL-NEXT: ret %xor_result = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a) @@ -324,18 +324,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: eor w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: eor w8, w8, w12 ; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v16i8: @@ -345,22 +345,22 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 ; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: eor w10, w10, w11 ; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 ; GISEL-NEXT: eor w8, w8, w10 +; GISEL-NEXT: eor w12, w12, w13 ; GISEL-NEXT: eor w9, w9, w11 ; GISEL-NEXT: eor w9, w12, w9 ; GISEL-NEXT: eor w0, w8, w9 @@ -378,18 +378,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: eor w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: eor w8, w8, w12 ; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v32i8: @@ -400,22 +400,22 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w12, s4 ; GISEL-NEXT: fmov w13, s5 ; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: eor w10, w10, w11 ; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 ; GISEL-NEXT: eor w8, w8, w10 +; GISEL-NEXT: eor w12, w12, w13 ; GISEL-NEXT: eor w9, w9, w11 ; GISEL-NEXT: eor w9, w12, w9 ; GISEL-NEXT: eor w0, w8, w9 @@ -463,10 +463,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v8i16: @@ -497,10 +497,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v16i16: @@ -527,8 +527,8 @@ ; CHECK-LABEL: test_redxor_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: eor w0, w9, w8 ; CHECK-NEXT: ret ; @@ -549,8 +549,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: eor w0, w9, w8 ; CHECK-NEXT: ret ; @@ -573,8 +573,8 @@ ; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: eor w0, w9, w8 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -54,9 +54,9 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.16b, v2.16b, v6.16b ; CHECK-NEXT: sqadd v0.16b, v0.16b, v4.16b ; CHECK-NEXT: sqadd v1.16b, v1.16b, v5.16b +; CHECK-NEXT: sqadd v2.16b, v2.16b, v6.16b ; CHECK-NEXT: sqadd v3.16b, v3.16b, v7.16b ; CHECK-NEXT: ret %z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) @@ -85,9 +85,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { ; CHECK-LABEL: v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.8h, v2.8h, v6.8h ; CHECK-NEXT: sqadd v0.8h, v0.8h, v4.8h ; CHECK-NEXT: sqadd v1.8h, v1.8h, v5.8h +; CHECK-NEXT: sqadd v2.8h, v2.8h, v6.8h ; CHECK-NEXT: sqadd v3.8h, v3.8h, v7.8h ; CHECK-NEXT: ret %z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) @@ -97,9 +97,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: sqadd v0.8b, v1.8b, v0.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -143,10 +143,10 @@ ; CHECK-NEXT: shl v1.2s, v1.2s, #24 ; CHECK-NEXT: sqadd v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strb w9, [x2, #1] +; CHECK-NEXT: strb w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -158,9 +158,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: sqadd v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -183,10 +183,10 @@ ; CHECK-NEXT: shl v1.2s, v1.2s, #16 ; CHECK-NEXT: sqadd v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strh w9, [x2, #2] +; CHECK-NEXT: strh w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py @@ -224,9 +224,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: sqadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr b0, [x1] +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: sqadd v0.8b, v1.8b, v0.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -239,9 +239,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr h0, [x1] +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: sqadd v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -254,10 +254,10 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { ; CHECK-LABEL: v16i4: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v0.16b, v0.16b, #4 -; CHECK-NEXT: sshr v1.16b, v1.16b, #4 +; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: sshr v0.16b, v0.16b, #4 +; CHECK-NEXT: sshr v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v0.16b, v0.16b, #4 ; CHECK-NEXT: sqadd v0.16b, v0.16b, v1.16b @@ -307,9 +307,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ; CHECK-LABEL: v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.4s, v2.4s, v6.4s ; CHECK-NEXT: sqadd v0.4s, v0.4s, v4.4s ; CHECK-NEXT: sqadd v1.4s, v1.4s, v5.4s +; CHECK-NEXT: sqadd v2.4s, v2.4s, v6.4s ; CHECK-NEXT: sqadd v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %z = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y) @@ -338,9 +338,9 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.2d, v2.2d, v6.2d ; CHECK-NEXT: sqadd v0.2d, v0.2d, v4.2d ; CHECK-NEXT: sqadd v1.2d, v1.2d, v5.2d +; CHECK-NEXT: sqadd v2.2d, v2.2d, v6.2d ; CHECK-NEXT: sqadd v3.2d, v3.2d, v7.2d ; CHECK-NEXT: ret %z = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y) diff --git a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll --- a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll @@ -205,9 +205,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: mov w8, #65536 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: shl v0.2d, v0.2d, #63 ; CHECK-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %shl = select <2 x i1> %t, <2 x i64> , <2 x i64> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/shift-mod.ll b/llvm/test/CodeGen/AArch64/shift-mod.ll --- a/llvm/test/CodeGen/AArch64/shift-mod.ll +++ b/llvm/test/CodeGen/AArch64/shift-mod.ll @@ -102,8 +102,8 @@ define <4 x i32> @ashr_add_shl_v4i8(<4 x i32> %r) { ; CHECK-LABEL: ashr_add_shl_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #1, lsl #24 ; CHECK-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-NEXT: movi v1.4s, #1, lsl #24 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: sshr v0.4s, v0.4s, #24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll --- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll +++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll @@ -160,8 +160,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = add <4 x i32> %t0, %b @@ -172,8 +172,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = add <4 x i32> %b, %t0 @@ -188,8 +188,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = add <4 x i32> %t0, %b @@ -200,8 +200,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = add <4 x i32> %b, %t0 @@ -216,8 +216,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = add <4 x i32> %t0, %b @@ -228,8 +228,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = add <4 x i32> %b, %t0 @@ -244,8 +244,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = sub <4 x i32> %t0, %b @@ -256,8 +256,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI19_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = sub <4 x i32> %b, %t0 @@ -272,8 +272,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI20_0 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = sub <4 x i32> %t0, %b @@ -284,8 +284,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI21_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = sub <4 x i32> %b, %t0 @@ -300,8 +300,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI22_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0] -; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_0] +; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = sub <4 x i32> %t0, %b @@ -312,8 +312,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = sub <4 x i32> %b, %t0 diff --git a/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll b/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll --- a/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll +++ b/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll @@ -5,20 +5,20 @@ ; CHECK-LABEL: test_sitofp_fixed: ; CHECK: ; %bb.0: ; CHECK-NEXT: sshll2.2d v4, v2, #0 -; CHECK-NEXT: sshll.2d v16, v1, #0 ; CHECK-NEXT: sshll2.2d v5, v0, #0 ; CHECK-NEXT: sshll2.2d v6, v1, #0 ; CHECK-NEXT: sshll2.2d v7, v3, #0 ; CHECK-NEXT: sshll.2d v0, v0, #0 +; CHECK-NEXT: sshll.2d v16, v1, #0 ; CHECK-NEXT: sshll.2d v17, v2, #0 ; CHECK-NEXT: sshll.2d v18, v3, #0 ; CHECK-NEXT: scvtf.2d v1, v5, #6 +; CHECK-NEXT: scvtf.2d v0, v0, #6 ; CHECK-NEXT: scvtf.2d v3, v6, #6 ; CHECK-NEXT: scvtf.2d v2, v16, #6 ; CHECK-NEXT: scvtf.2d v5, v4, #6 -; CHECK-NEXT: scvtf.2d v0, v0, #6 -; CHECK-NEXT: scvtf.2d v7, v7, #6 ; CHECK-NEXT: scvtf.2d v4, v17, #6 +; CHECK-NEXT: scvtf.2d v7, v7, #6 ; CHECK-NEXT: scvtf.2d v6, v18, #6 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -60,49 +60,49 @@ ; CHECK-LABEL: test_srem_vec: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x11, #7282 -; CHECK-NEXT: sbfx x10, x0, #0, #33 -; CHECK-NEXT: movk x11, #29127, lsl #16 ; CHECK-NEXT: mov x9, #7281 -; CHECK-NEXT: movk x11, #50972, lsl #32 +; CHECK-NEXT: movk x11, #29127, lsl #16 ; CHECK-NEXT: movk x9, #29127, lsl #16 -; CHECK-NEXT: movk x11, #7281, lsl #48 +; CHECK-NEXT: movk x11, #50972, lsl #32 ; CHECK-NEXT: movk x9, #50972, lsl #32 -; CHECK-NEXT: sbfx x13, x1, #0, #33 +; CHECK-NEXT: sbfx x10, x0, #0, #33 +; CHECK-NEXT: movk x11, #7281, lsl #48 ; CHECK-NEXT: sbfx x8, x2, #0, #33 -; CHECK-NEXT: smulh x12, x10, x11 ; CHECK-NEXT: movk x9, #7281, lsl #48 -; CHECK-NEXT: smulh x11, x13, x11 +; CHECK-NEXT: smulh x13, x10, x11 +; CHECK-NEXT: sbfx x12, x1, #0, #33 ; CHECK-NEXT: smulh x9, x8, x9 -; CHECK-NEXT: add x12, x12, x12, lsr #63 +; CHECK-NEXT: smulh x11, x12, x11 ; CHECK-NEXT: sub x9, x9, x8 -; CHECK-NEXT: add x11, x11, x11, lsr #63 -; CHECK-NEXT: add x12, x12, x12, lsl #3 +; CHECK-NEXT: add x13, x13, x13, lsr #63 ; CHECK-NEXT: asr x14, x9, #3 -; CHECK-NEXT: sub x10, x10, x12 ; CHECK-NEXT: add x9, x14, x9, lsr #63 -; CHECK-NEXT: add x11, x11, x11, lsl #3 -; CHECK-NEXT: sub x11, x13, x11 +; CHECK-NEXT: add x13, x13, x13, lsl #3 +; CHECK-NEXT: add x11, x11, x11, lsr #63 +; CHECK-NEXT: sub x10, x10, x13 ; CHECK-NEXT: add x9, x9, x9, lsl #3 -; CHECK-NEXT: fmov d0, x10 +; CHECK-NEXT: mov x13, #8589934591 +; CHECK-NEXT: add x11, x11, x11, lsl #3 ; CHECK-NEXT: add x8, x8, x9 -; CHECK-NEXT: mov x9, #8589934591 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: dup v2.2d, x9 -; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: sub x11, x12, x11 +; CHECK-NEXT: adrp x12, .LCPI3_0 +; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: adrp x9, .LCPI3_1 -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI3_1] +; CHECK-NEXT: fmov d3, x8 +; CHECK-NEXT: dup v1.2d, x13 +; CHECK-NEXT: ldr q2, [x12, :lo12:.LCPI3_0] +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: ldr q4, [x9, :lo12:.LCPI3_1] +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b +; CHECK-NEXT: and v1.16b, v3.16b, v1.16b ; CHECK-NEXT: cmeq v0.2d, v0.2d, v2.2d -; CHECK-NEXT: cmeq v1.2d, v1.2d, v3.2d +; CHECK-NEXT: cmeq v1.2d, v1.2d, v4.2d ; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: fmov w2, s1 ; CHECK-NEXT: ret %srem = srem <3 x i33> %X, diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll @@ -17,9 +17,9 @@ ; CHECK-NEXT: adrp x8, .LCPI0_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -39,12 +39,12 @@ ; CHECK-NEXT: mov w9, #39321 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: adrp x10, .LCPI1_0 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q0, [x10, :lo12:.LCPI1_0] ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -60,12 +60,12 @@ ; CHECK-NEXT: mov w9, #39321 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: adrp x10, .LCPI2_0 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q0, [x10, :lo12:.LCPI2_0] ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -89,10 +89,10 @@ ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, @@ -113,10 +113,10 @@ ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, @@ -141,9 +141,9 @@ ; CHECK-NEXT: adrp x8, .LCPI5_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -167,9 +167,9 @@ ; CHECK-NEXT: adrp x8, .LCPI6_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_4] -; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -197,9 +197,9 @@ ; CHECK-NEXT: adrp x8, .LCPI7_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -225,9 +225,9 @@ ; CHECK-NEXT: adrp x8, .LCPI8_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -253,9 +253,9 @@ ; CHECK-NEXT: adrp x8, .LCPI9_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -275,12 +275,12 @@ ; CHECK-NEXT: mov w9, #39321 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: adrp x10, .LCPI10_0 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q0, [x10, :lo12:.LCPI10_0] ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -304,10 +304,10 @@ ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, @@ -332,9 +332,9 @@ ; CHECK-NEXT: adrp x8, .LCPI12_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -355,12 +355,12 @@ ; CHECK-NEXT: adrp x8, .LCPI13_1 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s -; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_1] +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI13_1] ; CHECK-NEXT: adrp x8, .LCPI13_2 -; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_2] ; CHECK-NEXT: adrp x8, .LCPI13_3 +; CHECK-NEXT: mla v1.4s, v0.4s, v3.4s ; CHECK-NEXT: sshl v2.4s, v1.4s, v2.4s ; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_3] @@ -384,12 +384,12 @@ ; CHECK-NEXT: adrp x8, .LCPI14_1 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s -; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_1] +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI14_1] ; CHECK-NEXT: adrp x8, .LCPI14_2 -; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_2] ; CHECK-NEXT: adrp x8, .LCPI14_3 +; CHECK-NEXT: mla v1.4s, v0.4s, v3.4s ; CHECK-NEXT: sshl v2.4s, v1.4s, v2.4s ; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_3] @@ -413,12 +413,12 @@ ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s -; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_1] +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI15_1] ; CHECK-NEXT: adrp x8, .LCPI15_2 -; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_2] ; CHECK-NEXT: adrp x8, .LCPI15_3 +; CHECK-NEXT: mla v1.4s, v0.4s, v3.4s ; CHECK-NEXT: sshl v2.4s, v1.4s, v2.4s ; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_3] @@ -451,9 +451,9 @@ ; CHECK-NEXT: adrp x8, .LCPI16_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -479,9 +479,9 @@ ; CHECK-NEXT: adrp x8, .LCPI17_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -507,9 +507,9 @@ ; CHECK-NEXT: adrp x8, .LCPI18_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -529,12 +529,12 @@ ; CHECK-NEXT: mov w9, #39321 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: adrp x10, .LCPI19_0 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: adrp x8, .LCPI19_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q0, [x10, :lo12:.LCPI19_0] ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -558,10 +558,10 @@ ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, @@ -586,9 +586,9 @@ ; CHECK-NEXT: adrp x8, .LCPI21_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -616,9 +616,9 @@ ; CHECK-NEXT: adrp x8, .LCPI22_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -644,9 +644,9 @@ ; CHECK-NEXT: adrp x8, .LCPI23_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -672,9 +672,9 @@ ; CHECK-NEXT: adrp x8, .LCPI24_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -701,9 +701,9 @@ ; CHECK-NEXT: adrp x8, .LCPI25_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -728,9 +728,9 @@ ; CHECK-NEXT: adrp x8, .LCPI26_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll @@ -40,9 +40,9 @@ ; CHECK-NEXT: movk w8, #655, lsl #16 ; CHECK-NEXT: shl v0.4s, v2.4s, #30 ; CHECK-NEXT: ushr v1.4s, v2.4s, #2 +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -93,9 +93,9 @@ ; CHECK-NEXT: movk w8, #655, lsl #16 ; CHECK-NEXT: shl v0.4s, v2.4s, #30 ; CHECK-NEXT: ushr v1.4s, v2.4s, #2 +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -114,14 +114,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; CHECK-NEXT: sshr v2.4s, v1.4s, #3 +; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: movi v1.4s, #25 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: smull2 v3.2d, v0.4s, v2.4s -; CHECK-NEXT: smull v2.2d, v0.2s, v2.2s -; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s -; CHECK-NEXT: sshr v3.4s, v2.4s, #3 -; CHECK-NEXT: usra v3.4s, v2.4s, #31 -; CHECK-NEXT: mls v0.4s, v3.4s, v1.4s +; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b @@ -137,14 +137,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; CHECK-NEXT: sshr v2.4s, v1.4s, #5 +; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: movi v1.4s, #100 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: smull2 v3.2d, v0.4s, v2.4s -; CHECK-NEXT: smull v2.2d, v0.2s, v2.2s -; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s -; CHECK-NEXT: sshr v3.4s, v2.4s, #5 -; CHECK-NEXT: usra v3.4s, v2.4s, #31 -; CHECK-NEXT: mls v0.4s, v3.4s, v1.4s +; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b @@ -185,11 +185,11 @@ ; CHECK-LABEL: test_srem_pow2: ; CHECK: // %bb.0: ; CHECK-NEXT: sshr v2.4s, v0.4s, #31 -; CHECK-NEXT: mov v3.16b, v0.16b +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: usra v1.4s, v2.4s, #28 +; CHECK-NEXT: bic v1.4s, #15 +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: usra v3.4s, v2.4s, #28 -; CHECK-NEXT: bic v3.4s, #15 -; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -204,10 +204,10 @@ ; CHECK-LABEL: test_srem_int_min: ; CHECK: // %bb.0: ; CHECK-NEXT: sshr v2.4s, v0.4s, #31 -; CHECK-NEXT: mov v3.16b, v0.16b -; CHECK-NEXT: movi v1.4s, #128, lsl #24 -; CHECK-NEXT: usra v3.4s, v2.4s, #1 -; CHECK-NEXT: and v1.16b, v3.16b, v1.16b +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: usra v1.4s, v2.4s, #1 +; CHECK-NEXT: movi v2.4s, #128, lsl #24 +; CHECK-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll --- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll @@ -4,49 +4,49 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; CHECK-LABEL: fold_srem_vec_1: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #33437 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[1] -; CHECK-NEXT: smov w9, v0.h[0] -; CHECK-NEXT: mov w10, #63421 -; CHECK-NEXT: mov w11, #37253 -; CHECK-NEXT: movk w10, #31710, lsl #16 -; CHECK-NEXT: movk w11, #44150, lsl #16 -; CHECK-NEXT: smov w13, v0.h[2] -; CHECK-NEXT: mov w12, #33437 -; CHECK-NEXT: smull x10, w8, w10 -; CHECK-NEXT: movk w12, #21399, lsl #16 -; CHECK-NEXT: smull x11, w9, w11 +; CHECK-NEXT: smov w9, v0.h[2] +; CHECK-NEXT: movk w8, #21399, lsl #16 +; CHECK-NEXT: mov w10, #37253 +; CHECK-NEXT: movk w10, #44150, lsl #16 +; CHECK-NEXT: smov w11, v0.h[0] +; CHECK-NEXT: smull x8, w9, w8 +; CHECK-NEXT: mov w12, #63421 +; CHECK-NEXT: smull x10, w11, w10 +; CHECK-NEXT: movk w12, #31710, lsl #16 +; CHECK-NEXT: lsr x13, x8, #63 +; CHECK-NEXT: asr x8, x8, #37 +; CHECK-NEXT: smov w14, v0.h[1] +; CHECK-NEXT: add w8, w8, w13 +; CHECK-NEXT: mov w13, #98 ; CHECK-NEXT: lsr x10, x10, #32 -; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: sub w10, w10, w8 -; CHECK-NEXT: add w11, w11, w9 -; CHECK-NEXT: asr w14, w10, #6 -; CHECK-NEXT: asr w15, w11, #6 -; CHECK-NEXT: add w10, w14, w10, lsr #31 -; CHECK-NEXT: add w11, w15, w11, lsr #31 -; CHECK-NEXT: mov w14, #95 -; CHECK-NEXT: mov w15, #-124 -; CHECK-NEXT: smull x12, w13, w12 -; CHECK-NEXT: msub w9, w11, w14, w9 -; CHECK-NEXT: msub w8, w10, w15, w8 -; CHECK-NEXT: lsr x10, x12, #63 -; CHECK-NEXT: asr x11, x12, #37 -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: add w10, w11, w10 -; CHECK-NEXT: mov w11, #98 +; CHECK-NEXT: smull x12, w14, w12 +; CHECK-NEXT: add w10, w10, w11 +; CHECK-NEXT: msub w8, w8, w13, w9 +; CHECK-NEXT: asr w9, w10, #6 +; CHECK-NEXT: mov w13, #95 +; CHECK-NEXT: add w9, w9, w10, lsr #31 +; CHECK-NEXT: lsr x12, x12, #32 +; CHECK-NEXT: mov w10, #63249 +; CHECK-NEXT: sub w12, w12, w14 +; CHECK-NEXT: movk w10, #48808, lsl #16 +; CHECK-NEXT: msub w9, w9, w13, w11 +; CHECK-NEXT: smov w11, v0.h[3] +; CHECK-NEXT: asr w13, w12, #6 +; CHECK-NEXT: add w12, w13, w12, lsr #31 +; CHECK-NEXT: mov w13, #-124 +; CHECK-NEXT: smull x10, w11, w10 +; CHECK-NEXT: msub w12, w12, w13, w14 ; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov w9, #63249 -; CHECK-NEXT: movk w9, #48808, lsl #16 -; CHECK-NEXT: msub w10, w10, w11, w13 -; CHECK-NEXT: smull x9, w12, w9 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: lsr x8, x9, #63 -; CHECK-NEXT: asr x9, x9, #40 -; CHECK-NEXT: add w8, w9, w8 -; CHECK-NEXT: mov w9, #-1003 -; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w8, w9, w12 -; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: lsr x9, x10, #63 +; CHECK-NEXT: asr x10, x10, #40 +; CHECK-NEXT: add w9, w10, w9 +; CHECK-NEXT: mov w10, #-1003 +; CHECK-NEXT: mov v0.h[1], w12 +; CHECK-NEXT: msub w9, w9, w10, w11 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v0.h[3], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, @@ -56,41 +56,41 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { ; CHECK-LABEL: fold_srem_vec_2: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #37253 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w9, v0.h[0] -; CHECK-NEXT: mov w8, #37253 ; CHECK-NEXT: movk w8, #44150, lsl #16 ; CHECK-NEXT: smov w10, v0.h[1] -; CHECK-NEXT: smov w14, v0.h[2] +; CHECK-NEXT: smov w13, v0.h[2] ; CHECK-NEXT: mov w12, #95 ; CHECK-NEXT: smull x11, w9, w8 -; CHECK-NEXT: smull x13, w10, w8 -; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: add w11, w11, w9 -; CHECK-NEXT: lsr x13, x13, #32 -; CHECK-NEXT: asr w15, w11, #6 -; CHECK-NEXT: add w13, w13, w10 -; CHECK-NEXT: add w11, w15, w11, lsr #31 ; CHECK-NEXT: smov w15, v0.h[3] -; CHECK-NEXT: asr w16, w13, #6 -; CHECK-NEXT: msub w9, w11, w12, w9 -; CHECK-NEXT: add w13, w16, w13, lsr #31 -; CHECK-NEXT: smull x11, w14, w8 -; CHECK-NEXT: msub w10, w13, w12, w10 +; CHECK-NEXT: smull x14, w10, w8 ; CHECK-NEXT: lsr x11, x11, #32 +; CHECK-NEXT: add w11, w11, w9 +; CHECK-NEXT: lsr x14, x14, #32 +; CHECK-NEXT: asr w16, w11, #6 +; CHECK-NEXT: add w14, w14, w10 +; CHECK-NEXT: add w11, w16, w11, lsr #31 +; CHECK-NEXT: smull x16, w13, w8 +; CHECK-NEXT: asr w17, w14, #6 ; CHECK-NEXT: smull x8, w15, w8 -; CHECK-NEXT: add w11, w11, w14 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: asr w9, w11, #6 +; CHECK-NEXT: add w14, w17, w14, lsr #31 +; CHECK-NEXT: msub w9, w11, w12, w9 +; CHECK-NEXT: lsr x11, x16, #32 ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: add w9, w9, w11, lsr #31 +; CHECK-NEXT: add w11, w11, w13 +; CHECK-NEXT: msub w10, w14, w12, w10 +; CHECK-NEXT: asr w14, w11, #6 ; CHECK-NEXT: add w8, w8, w15 +; CHECK-NEXT: add w11, w14, w11, lsr #31 +; CHECK-NEXT: fmov s0, w9 +; CHECK-NEXT: asr w9, w8, #6 +; CHECK-NEXT: msub w11, w11, w12, w13 +; CHECK-NEXT: add w8, w9, w8, lsr #31 ; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: asr w10, w8, #6 -; CHECK-NEXT: msub w9, w9, w12, w14 -; CHECK-NEXT: add w8, w10, w8, lsr #31 ; CHECK-NEXT: msub w8, w8, w12, w15 -; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: mov v0.h[2], w11 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -103,42 +103,42 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; CHECK-LABEL: combine_srem_sdiv: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #37253 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w9, v0.h[0] -; CHECK-NEXT: mov w8, #37253 ; CHECK-NEXT: movk w8, #44150, lsl #16 -; CHECK-NEXT: smov w10, v0.h[1] -; CHECK-NEXT: smov w11, v0.h[2] -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: mov w14, #95 +; CHECK-NEXT: smov w11, v0.h[1] +; CHECK-NEXT: smov w12, v0.h[2] +; CHECK-NEXT: mov w10, #95 ; CHECK-NEXT: smull x13, w9, w8 -; CHECK-NEXT: smull x15, w10, w8 +; CHECK-NEXT: smov w14, v0.h[3] +; CHECK-NEXT: smull x15, w11, w8 ; CHECK-NEXT: lsr x13, x13, #32 -; CHECK-NEXT: smull x16, w11, w8 +; CHECK-NEXT: smull x16, w12, w8 ; CHECK-NEXT: add w13, w13, w9 ; CHECK-NEXT: lsr x15, x15, #32 ; CHECK-NEXT: asr w17, w13, #6 -; CHECK-NEXT: add w15, w15, w10 +; CHECK-NEXT: add w15, w15, w11 ; CHECK-NEXT: add w13, w17, w13, lsr #31 +; CHECK-NEXT: smull x8, w14, w8 +; CHECK-NEXT: lsr x16, x16, #32 ; CHECK-NEXT: asr w17, w15, #6 +; CHECK-NEXT: msub w9, w13, w10, w9 +; CHECK-NEXT: add w16, w16, w12 ; CHECK-NEXT: add w15, w17, w15, lsr #31 -; CHECK-NEXT: smull x8, w12, w8 -; CHECK-NEXT: msub w9, w13, w14, w9 -; CHECK-NEXT: lsr x16, x16, #32 -; CHECK-NEXT: add w16, w16, w11 -; CHECK-NEXT: msub w10, w15, w14, w10 ; CHECK-NEXT: asr w17, w16, #6 ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: fmov s1, w13 ; CHECK-NEXT: add w16, w17, w16, lsr #31 +; CHECK-NEXT: msub w11, w15, w10, w11 +; CHECK-NEXT: add w8, w8, w14 ; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: add w8, w8, w12 +; CHECK-NEXT: fmov s1, w13 ; CHECK-NEXT: asr w9, w8, #6 ; CHECK-NEXT: add w8, w9, w8, lsr #31 -; CHECK-NEXT: msub w9, w16, w14, w11 -; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: msub w9, w16, w10, w12 +; CHECK-NEXT: mov v0.h[1], w11 +; CHECK-NEXT: msub w10, w8, w10, w14 ; CHECK-NEXT: mov v1.h[1], w15 -; CHECK-NEXT: msub w10, w8, w14, w12 ; CHECK-NEXT: mov v0.h[2], w9 ; CHECK-NEXT: mov v1.h[2], w16 ; CHECK-NEXT: mov v0.h[3], w10 @@ -159,36 +159,36 @@ ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w10, v0.h[0] ; CHECK-NEXT: mov w8, #37253 -; CHECK-NEXT: movk w8, #44150, lsl #16 -; CHECK-NEXT: add w11, w9, #31 +; CHECK-NEXT: smov w11, v0.h[3] +; CHECK-NEXT: add w12, w9, #31 ; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: add w12, w10, #63 -; CHECK-NEXT: csel w11, w11, w9, lt +; CHECK-NEXT: movk w8, #44150, lsl #16 +; CHECK-NEXT: add w13, w10, #63 +; CHECK-NEXT: csel w12, w12, w9, lt ; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: and w11, w11, #0xffffffe0 -; CHECK-NEXT: csel w12, w12, w10, lt -; CHECK-NEXT: sub w9, w9, w11 -; CHECK-NEXT: and w12, w12, #0xffffffc0 -; CHECK-NEXT: sub w10, w10, w12 -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: smov w10, v0.h[2] -; CHECK-NEXT: smull x8, w12, w8 -; CHECK-NEXT: mov v1.h[1], w9 +; CHECK-NEXT: and w12, w12, #0xffffffe0 +; CHECK-NEXT: csel w13, w13, w10, lt +; CHECK-NEXT: sub w9, w9, w12 +; CHECK-NEXT: smov w12, v0.h[2] +; CHECK-NEXT: and w13, w13, #0xffffffc0 +; CHECK-NEXT: smull x8, w11, w8 +; CHECK-NEXT: sub w10, w10, w13 +; CHECK-NEXT: add w13, w12, #7 +; CHECK-NEXT: cmp w12, #0 ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: add w9, w10, #7 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel w9, w9, w10, lt -; CHECK-NEXT: add w8, w8, w12 -; CHECK-NEXT: and w9, w9, #0xfffffff8 -; CHECK-NEXT: sub w9, w10, w9 -; CHECK-NEXT: asr w10, w8, #6 -; CHECK-NEXT: add w8, w10, w8, lsr #31 -; CHECK-NEXT: mov w10, #95 -; CHECK-NEXT: mov v1.h[2], w9 -; CHECK-NEXT: msub w8, w8, w10, w12 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: csel w13, w13, w12, lt +; CHECK-NEXT: add w8, w8, w11 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: and w10, w13, #0xfffffff8 +; CHECK-NEXT: asr w13, w8, #6 +; CHECK-NEXT: sub w10, w12, w10 +; CHECK-NEXT: add w8, w13, w8, lsr #31 +; CHECK-NEXT: mov w13, #95 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: msub w8, w8, w13, w11 +; CHECK-NEXT: mov v0.h[2], w10 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -198,40 +198,40 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { ; CHECK-LABEL: dont_fold_srem_one: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #17097 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[1] ; CHECK-NEXT: smov w9, v0.h[2] +; CHECK-NEXT: movk w8, #45590, lsl #16 ; CHECK-NEXT: mov w10, #30865 -; CHECK-NEXT: mov w11, #17097 ; CHECK-NEXT: movk w10, #51306, lsl #16 -; CHECK-NEXT: movk w11, #45590, lsl #16 -; CHECK-NEXT: mov w12, #654 -; CHECK-NEXT: smull x10, w8, w10 -; CHECK-NEXT: smull x11, w9, w11 +; CHECK-NEXT: smov w11, v0.h[1] +; CHECK-NEXT: smull x8, w9, w8 +; CHECK-NEXT: mov w13, #23 +; CHECK-NEXT: smull x10, w11, w10 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: add w8, w8, w9 ; CHECK-NEXT: lsr x10, x10, #32 -; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: add w10, w10, w8 -; CHECK-NEXT: add w11, w11, w9 -; CHECK-NEXT: asr w13, w10, #9 -; CHECK-NEXT: add w10, w13, w10, lsr #31 -; CHECK-NEXT: asr w13, w11, #4 -; CHECK-NEXT: add w11, w13, w11, lsr #31 -; CHECK-NEXT: smov w13, v0.h[3] -; CHECK-NEXT: msub w8, w10, w12, w8 -; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: asr w12, w8, #4 +; CHECK-NEXT: add w10, w10, w11 +; CHECK-NEXT: add w8, w12, w8, lsr #31 ; CHECK-NEXT: mov w12, #47143 -; CHECK-NEXT: mov w10, #23 ; CHECK-NEXT: movk w12, #24749, lsl #16 -; CHECK-NEXT: msub w9, w11, w10, w9 -; CHECK-NEXT: smull x10, w13, w12 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: lsr x8, x10, #63 -; CHECK-NEXT: asr x10, x10, #43 -; CHECK-NEXT: add w8, w10, w8 -; CHECK-NEXT: mov w10, #5423 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: msub w8, w8, w10, w13 -; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: msub w8, w8, w13, w9 +; CHECK-NEXT: smov w9, v0.h[3] +; CHECK-NEXT: asr w13, w10, #9 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: add w10, w13, w10, lsr #31 +; CHECK-NEXT: mov w13, #654 +; CHECK-NEXT: smull x12, w9, w12 +; CHECK-NEXT: msub w10, w10, w13, w11 +; CHECK-NEXT: lsr x11, x12, #63 +; CHECK-NEXT: asr x12, x12, #43 +; CHECK-NEXT: add w11, w12, w11 +; CHECK-NEXT: mov w12, #5423 +; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: msub w9, w11, w12, w9 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v0.h[3], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, @@ -242,38 +242,38 @@ define <4 x i16> @dont_fold_srem_i16_smax(<4 x i16> %x) { ; CHECK-LABEL: dont_fold_srem_i16_smax: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #17097 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[2] -; CHECK-NEXT: mov w9, #17097 -; CHECK-NEXT: smov w10, v0.h[1] -; CHECK-NEXT: movk w9, #45590, lsl #16 -; CHECK-NEXT: mov w11, #32767 -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: smull x9, w8, w9 -; CHECK-NEXT: add w11, w10, w11 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: lsr x9, x9, #32 -; CHECK-NEXT: csel w11, w11, w10, lt -; CHECK-NEXT: add w9, w9, w8 -; CHECK-NEXT: and w11, w11, #0xffff8000 -; CHECK-NEXT: asr w13, w9, #4 -; CHECK-NEXT: sub w10, w10, w11 +; CHECK-NEXT: smov w9, v0.h[2] +; CHECK-NEXT: movk w8, #45590, lsl #16 ; CHECK-NEXT: mov w11, #47143 -; CHECK-NEXT: add w9, w13, w9, lsr #31 -; CHECK-NEXT: mov w13, #23 +; CHECK-NEXT: smov w12, v0.h[1] +; CHECK-NEXT: mov w10, #32767 +; CHECK-NEXT: smull x8, w9, w8 ; CHECK-NEXT: movk w11, #24749, lsl #16 -; CHECK-NEXT: mov v1.h[1], w10 -; CHECK-NEXT: msub w8, w9, w13, w8 -; CHECK-NEXT: smull x9, w12, w11 -; CHECK-NEXT: lsr x10, x9, #63 -; CHECK-NEXT: asr x9, x9, #43 -; CHECK-NEXT: add w9, w9, w10 -; CHECK-NEXT: mov w10, #5423 -; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: msub w8, w9, w10, w12 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: smov w13, v0.h[3] +; CHECK-NEXT: add w10, w12, w10 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: cmp w12, #0 +; CHECK-NEXT: add w8, w8, w9 +; CHECK-NEXT: csel w10, w10, w12, lt +; CHECK-NEXT: asr w14, w8, #4 +; CHECK-NEXT: smull x11, w13, w11 +; CHECK-NEXT: add w8, w14, w8, lsr #31 +; CHECK-NEXT: mov w14, #23 +; CHECK-NEXT: and w10, w10, #0xffff8000 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: msub w8, w8, w14, w9 +; CHECK-NEXT: sub w9, w12, w10 +; CHECK-NEXT: lsr x10, x11, #63 +; CHECK-NEXT: asr x11, x11, #43 +; CHECK-NEXT: add w10, w11, w10 +; CHECK-NEXT: mov w11, #5423 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: msub w10, w10, w11, w13 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -286,35 +286,35 @@ ; CHECK-NEXT: mov x8, #8549 ; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: movk x8, #22795, lsl #16 -; CHECK-NEXT: mov x12, #6055 +; CHECK-NEXT: mov x11, #6055 ; CHECK-NEXT: movk x8, #17096, lsl #32 -; CHECK-NEXT: movk x12, #58853, lsl #16 +; CHECK-NEXT: movk x11, #58853, lsl #16 ; CHECK-NEXT: movk x8, #45590, lsl #48 -; CHECK-NEXT: mov x14, #21445 -; CHECK-NEXT: mov x10, v1.d[1] -; CHECK-NEXT: movk x12, #47142, lsl #32 +; CHECK-NEXT: mov x12, #21445 +; CHECK-NEXT: movk x11, #47142, lsl #32 +; CHECK-NEXT: movk x12, #1603, lsl #16 ; CHECK-NEXT: smulh x8, x9, x8 -; CHECK-NEXT: movk x14, #1603, lsl #16 -; CHECK-NEXT: mov x11, v0.d[1] -; CHECK-NEXT: movk x12, #24749, lsl #48 +; CHECK-NEXT: mov x10, v1.d[1] +; CHECK-NEXT: movk x11, #24749, lsl #48 +; CHECK-NEXT: movk x12, #15432, lsl #32 +; CHECK-NEXT: movk x12, #25653, lsl #48 +; CHECK-NEXT: mov x13, v0.d[1] ; CHECK-NEXT: add x8, x8, x9 -; CHECK-NEXT: movk x14, #15432, lsl #32 -; CHECK-NEXT: asr x13, x8, #4 -; CHECK-NEXT: movk x14, #25653, lsl #48 -; CHECK-NEXT: add x8, x13, x8, lsr #63 -; CHECK-NEXT: mov w13, #23 -; CHECK-NEXT: smulh x12, x10, x12 -; CHECK-NEXT: smulh x14, x11, x14 -; CHECK-NEXT: msub x8, x8, x13, x9 -; CHECK-NEXT: asr x13, x12, #11 -; CHECK-NEXT: add x12, x13, x12, lsr #63 -; CHECK-NEXT: asr x13, x14, #8 +; CHECK-NEXT: smulh x11, x10, x11 +; CHECK-NEXT: asr x14, x8, #4 +; CHECK-NEXT: mov w15, #23 +; CHECK-NEXT: add x8, x14, x8, lsr #63 +; CHECK-NEXT: smulh x12, x13, x12 +; CHECK-NEXT: asr x14, x11, #11 +; CHECK-NEXT: add x11, x14, x11, lsr #63 +; CHECK-NEXT: msub x8, x8, x15, x9 +; CHECK-NEXT: asr x14, x12, #8 ; CHECK-NEXT: mov w9, #5423 -; CHECK-NEXT: add x13, x13, x14, lsr #63 +; CHECK-NEXT: add x12, x14, x12, lsr #63 ; CHECK-NEXT: mov w14, #654 -; CHECK-NEXT: msub x9, x12, x9, x10 +; CHECK-NEXT: msub x9, x11, x9, x10 +; CHECK-NEXT: msub x10, x12, x14, x13 ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: msub x10, x13, x14, x11 ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v1.d[1], x9 ; CHECK-NEXT: mov v0.d[1], x10 diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -55,9 +55,9 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.16b, v2.16b, v6.16b ; CHECK-NEXT: sqsub v0.16b, v0.16b, v4.16b ; CHECK-NEXT: sqsub v1.16b, v1.16b, v5.16b +; CHECK-NEXT: sqsub v2.16b, v2.16b, v6.16b ; CHECK-NEXT: sqsub v3.16b, v3.16b, v7.16b ; CHECK-NEXT: ret %z = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) @@ -86,9 +86,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { ; CHECK-LABEL: v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.8h, v2.8h, v6.8h ; CHECK-NEXT: sqsub v0.8h, v0.8h, v4.8h ; CHECK-NEXT: sqsub v1.8h, v1.8h, v5.8h +; CHECK-NEXT: sqsub v2.8h, v2.8h, v6.8h ; CHECK-NEXT: sqsub v3.8h, v3.8h, v7.8h ; CHECK-NEXT: ret %z = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) @@ -98,9 +98,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqsub v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: sqsub v0.8b, v1.8b, v0.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -144,10 +144,10 @@ ; CHECK-NEXT: shl v1.2s, v1.2s, #24 ; CHECK-NEXT: sqsub v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strb w9, [x2, #1] +; CHECK-NEXT: strb w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -159,9 +159,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: sqsub v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -184,10 +184,10 @@ ; CHECK-NEXT: shl v1.2s, v1.2s, #16 ; CHECK-NEXT: sqsub v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strh w9, [x2, #2] +; CHECK-NEXT: strh w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py @@ -225,9 +225,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: sqsub v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr b0, [x1] +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: sqsub v0.8b, v1.8b, v0.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -240,9 +240,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr h0, [x1] +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: sqsub v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -255,10 +255,10 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { ; CHECK-LABEL: v16i4: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v0.16b, v0.16b, #4 -; CHECK-NEXT: sshr v1.16b, v1.16b, #4 +; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: sshr v0.16b, v0.16b, #4 +; CHECK-NEXT: sshr v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v0.16b, v0.16b, #4 ; CHECK-NEXT: sqsub v0.16b, v0.16b, v1.16b @@ -310,9 +310,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ; CHECK-LABEL: v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.4s, v2.4s, v6.4s ; CHECK-NEXT: sqsub v0.4s, v0.4s, v4.4s ; CHECK-NEXT: sqsub v1.4s, v1.4s, v5.4s +; CHECK-NEXT: sqsub v2.4s, v2.4s, v6.4s ; CHECK-NEXT: sqsub v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %z = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %x, <16 x i32> %y) @@ -341,9 +341,9 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.2d, v2.2d, v6.2d ; CHECK-NEXT: sqsub v0.2d, v0.2d, v4.2d ; CHECK-NEXT: sqsub v1.2d, v1.2d, v5.2d +; CHECK-NEXT: sqsub v2.2d, v2.2d, v6.2d ; CHECK-NEXT: sqsub v3.2d, v3.2d, v7.2d ; CHECK-NEXT: ret %z = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %x, <8 x i64> %y) diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll --- a/llvm/test/CodeGen/AArch64/sve-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll @@ -454,9 +454,9 @@ define @scvtf_h_nxv2i1( %a) { ; CHECK-LABEL: scvtf_h_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.h, p1/m, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z0.h, p0/m, z0.d ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -495,9 +495,9 @@ define @scvtf_h_nxv3i1( %a) { ; CHECK-LABEL: scvtf_h_nxv3i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.h, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.h, p0/m, z0.s ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -516,9 +516,9 @@ define @scvtf_h_nxv4i1( %a) { ; CHECK-LABEL: scvtf_h_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.h, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.h, p0/m, z0.s ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -547,9 +547,9 @@ define @scvtf_h_nxv7i1( %a) { ; CHECK-LABEL: scvtf_h_nxv7i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.h, p1/m, z0.h +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: scvtf z0.h, p0/m, z0.h ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -568,9 +568,9 @@ define @scvtf_h_nxv8i1( %a) { ; CHECK-LABEL: scvtf_h_nxv8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.h, p1/m, z0.h +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: scvtf z0.h, p0/m, z0.h ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -589,9 +589,9 @@ define @scvtf_s_nxv2i1( %a) { ; CHECK-LABEL: scvtf_s_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.s, p1/m, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z0.s, p0/m, z0.d ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -620,9 +620,9 @@ define @scvtf_s_nxv3i1( %a) { ; CHECK-LABEL: scvtf_s_nxv3i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.s, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -641,9 +641,9 @@ define @scvtf_s_nxv4i1( %a) { ; CHECK-LABEL: scvtf_s_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.s, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -662,9 +662,9 @@ define @scvtf_d_nxv2i1( %a) { ; CHECK-LABEL: scvtf_d_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.d, p1/m, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -695,9 +695,9 @@ define @ucvtf_h_nxv2i1( %a) { ; CHECK-LABEL: ucvtf_h_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.h, p1/m, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d ; CHECK-NEXT: ret %res = uitofp %a to ret %res @@ -736,9 +736,9 @@ define @ucvtf_h_nxv3i1( %a) { ; CHECK-LABEL: ucvtf_h_nxv3i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.h, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s ; CHECK-NEXT: ret %res = uitofp %a to ret %res @@ -767,9 +767,9 @@ define @ucvtf_h_nxv4i1( %a) { ; CHECK-LABEL: ucvtf_h_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.h, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s ; CHECK-NEXT: ret %res = uitofp %a to ret %res @@ -798,9 +798,9 @@ define @ucvtf_h_nxv8i1( %a) { ; CHECK-LABEL: ucvtf_h_nxv8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.h, p1/m, z0.h +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h ; CHECK-NEXT: ret %res = uitofp %a to ret %res @@ -819,9 +819,9 @@ define @ucvtf_s_nxv2i1( %a) { ; CHECK-LABEL: ucvtf_s_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.s, p1/m, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d ; CHECK-NEXT: ret %res = uitofp %a to ret %res @@ -850,9 +850,9 @@ define @ucvtf_s_nxv4i1( %a) { ; CHECK-LABEL: ucvtf_s_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.s, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: ret %res = uitofp %a to ret %res @@ -871,9 +871,9 @@ define @ucvtf_d_nxv2i1( %a) { ; CHECK-LABEL: ucvtf_d_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.d, p1/m, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: ret %res = uitofp %a to ret %res diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-extract-subvector.ll @@ -116,13 +116,13 @@ define <2 x i16> @extract_subvector_v4i16(<4 x i16> %op) #0 { ; CHECK-LABEL: extract_subvector_v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: umov w9, v0.h[3] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 -; CHECK-NEXT: ret +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w8, v0.h[2] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: umov w8, v0.h[3] +; CHECK-NEXT: mov v1.s[1], w8 +; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: ret %ret = call <2 x i16> @llvm.experimental.vector.extract.v2i16.v4i16(<4 x i16> %op, i64 2) ret <2 x i16> %ret } diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll @@ -179,10 +179,10 @@ ; VBITS_EQ_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: fcvt z0.d, p0/m, z0.h -; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: uunpklo z1.s, z1.h +; VBITS_EQ_256-NEXT: fcvt z0.d, p0/m, z0.h ; VBITS_EQ_256-NEXT: uunpklo z1.d, z1.s +; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: fcvt z1.d, p0/m, z1.h ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_EQ_256-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll @@ -35,10 +35,10 @@ ; CHECK-NEXT: sdiv [[DIV:z[0-9]+]].s, [[PG0]]/m, [[OP1_LO_LO]].s, [[OP2_LO_LO]].s ; CHECK-NEXT: uzp1 [[RES:z[0-9]+]].h, [[DIV]].h, [[DIV]].h ; CHECK-NEXT: umov [[SCALAR0:w[0-9]+]], [[VEC:v[0-9]+]].h[0] -; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: fmov s0, [[SCALAR0]] -; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] +; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: mov [[FINAL:v[0-9]+]].b[1], [[SCALAR1]] +; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] ; CHECK-NEXT: mov [[FINAL]].b[2], [[SCALAR2]] ; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[3] ; CHECK-NEXT: mov [[FINAL]].b[3], [[SCALAR3]] @@ -641,10 +641,10 @@ ; CHECK-NEXT: udiv [[DIV:z[0-9]+]].s, [[PG0]]/m, [[OP1_LO_LO]].s, [[OP2_LO_LO]].s ; CHECK-NEXT: uzp1 [[RES:z[0-9]+]].h, [[DIV]].h, [[DIV]].h ; CHECK-NEXT: umov [[SCALAR0:w[0-9]+]], [[VEC:v[0-9]+]].h[0] -; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: fmov s0, [[SCALAR0]] -; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] +; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: mov [[FINAL:v[0-9]+]].b[1], [[SCALAR1]] +; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] ; CHECK-NEXT: mov [[FINAL]].b[2], [[SCALAR2]] ; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[3] ; CHECK-NEXT: mov [[FINAL]].b[3], [[SCALAR3]] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll @@ -39,10 +39,10 @@ ; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b ; CHECK-NEXT: ushr v1.8h, v0.8h, #8 ; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umov w9, v1.h[1] ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: mov v0.b[1], w8 ; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[1], w9 ; CHECK-NEXT: mov v0.b[2], w8 ; CHECK-NEXT: umov w8, v1.h[3] ; CHECK-NEXT: mov v0.b[3], w8 @@ -667,10 +667,10 @@ ; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b ; CHECK-NEXT: ushr v1.8h, v0.8h, #8 ; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umov w9, v1.h[1] ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: mov v0.b[1], w8 ; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[1], w9 ; CHECK-NEXT: mov v0.b[2], w8 ; CHECK-NEXT: umov w8, v1.h[3] ; CHECK-NEXT: mov v0.b[3], w8 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll @@ -35,10 +35,10 @@ ; CHECK-NEXT: sdivr [[DIV1:z[0-9]+]].s, [[PG1]]/m, [[OP2_LO_LO]].s, [[OP1_LO_LO]].s ; CHECK-NEXT: uzp1 [[UZP1:z[0-9]+]].h, [[DIV1]].h, [[DIV1]].h ; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC:v[0-9]+]].h[0] -; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: fmov s3, [[SCALAR1]] -; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[2] +; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: mov [[FINAL:v[0-9]+]].b[1], [[SCALAR2]] +; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[2] ; CHECK-NEXT: mov [[FINAL]].b[2], [[SCALAR3]] ; CHECK-NEXT: umov [[SCALAR4:w[0-9]+]], [[VEC]].h[3] ; CHECK-NEXT: mov [[FINAL]].b[3], [[SCALAR4]] @@ -714,10 +714,10 @@ ; CHECK-NEXT: udivr [[DIV1:z[0-9]+]].s, [[PG1]]/m, [[OP2_LO_LO]].s, [[OP1_LO_LO]].s ; CHECK-NEXT: uzp1 [[UZP1:z[0-9]+]].h, [[DIV1]].h, [[DIV1]].h ; CHECK-NEXT: umov [[SCALAR0:w[0-9]+]], [[VEC:v[0-9]+]].h[0] -; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: fmov s3, [[SCALAR0]] -; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] +; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: mov [[FINAL:v[0-9]+]].b[1], [[SCALAR1]] +; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] ; CHECK-NEXT: mov [[FINAL]].b[2], [[SCALAR2]] ; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[3] ; CHECK-NEXT: mov [[FINAL]].b[3], [[SCALAR3]] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll @@ -278,10 +278,10 @@ ; VBITS_EQ_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: ucvtf z0.d, p0/m, z0.d -; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: uunpklo z1.s, z1.h +; VBITS_EQ_256-NEXT: ucvtf z0.d, p0/m, z0.d ; VBITS_EQ_256-NEXT: uunpklo z1.d, z1.s +; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: ucvtf z1.d, p0/m, z1.d ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_EQ_256-NEXT: ret @@ -1221,10 +1221,10 @@ ; VBITS_EQ_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: sunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: sunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: scvtf z0.d, p0/m, z0.d -; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: sunpklo z1.s, z1.h +; VBITS_EQ_256-NEXT: scvtf z0.d, p0/m, z0.d ; VBITS_EQ_256-NEXT: sunpklo z1.d, z1.s +; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: scvtf z1.d, p0/m, z1.d ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_EQ_256-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -105,7 +105,6 @@ ; VBITS_EQ_256-NEXT: uzp1 v0.8b, v1.8b, v0.8b ; VBITS_EQ_256-NEXT: str d0, [x0] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_gather_v8i8: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr d0, [x0] @@ -159,15 +158,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_v32i8: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.b, vl32 -; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1b { z0.b }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p2/z, [x1] ; VBITS_GE_2048-NEXT: cmpeq p1.b, p0/z, z0.b, #0 ; VBITS_GE_2048-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 ; VBITS_GE_2048-NEXT: ld1b { z0.d }, p1/z, [z1.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h @@ -247,10 +246,10 @@ ; VBITS_EQ_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z0.d, #0 -; VBITS_EQ_256-NEXT: ld1h { z0.d }, p1/z, [z3.d] ; VBITS_EQ_256-NEXT: uunpklo z1.s, z1.h +; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z0.d, #0 ; VBITS_EQ_256-NEXT: uunpklo z1.d, z1.s +; VBITS_EQ_256-NEXT: ld1h { z0.d }, p1/z, [z3.d] ; VBITS_EQ_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 ; VBITS_EQ_256-NEXT: ld1h { z1.d }, p0/z, [z2.d] ; VBITS_EQ_256-NEXT: uzp1 z0.s, z0.s, z0.s @@ -260,7 +259,6 @@ ; VBITS_EQ_256-NEXT: mov v0.d[1], v1.d[0] ; VBITS_EQ_256-NEXT: str q0, [x0] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_gather_v8i16: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr q0, [x0] @@ -287,15 +285,15 @@ ; VBITS_GE_1024-LABEL: masked_gather_v16i16: ; VBITS_GE_1024: // %bb.0: ; VBITS_GE_1024-NEXT: ptrue p0.h, vl16 -; VBITS_GE_1024-NEXT: ptrue p1.d, vl16 +; VBITS_GE_1024-NEXT: ptrue p2.d, vl16 ; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_1024-NEXT: cmpeq p2.h, p0/z, z0.h, #0 -; VBITS_GE_1024-NEXT: ld1d { z0.d }, p1/z, [x1] -; VBITS_GE_1024-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_1024-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_1024-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_1024-NEXT: ld1h { z0.d }, p1/z, [z0.d] +; VBITS_GE_1024-NEXT: ld1d { z1.d }, p2/z, [x1] +; VBITS_GE_1024-NEXT: cmpeq p1.h, p0/z, z0.h, #0 +; VBITS_GE_1024-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_1024-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_1024-NEXT: ld1h { z0.d }, p1/z, [z1.d] ; VBITS_GE_1024-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_1024-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x0] @@ -312,15 +310,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_v32i16: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: cmpeq p2.h, p0/z, z0.h, #0 -; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [z0.d] +; VBITS_GE_2048-NEXT: ld1d { z1.d }, p2/z, [x1] +; VBITS_GE_2048-NEXT: cmpeq p1.h, p0/z, z0.h, #0 +; VBITS_GE_2048-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [z1.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] @@ -388,34 +386,33 @@ ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ptrue p1.d, vl4 ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p1/z, [x1, x8, lsl #3] -; VBITS_EQ_256-NEXT: ld1d { z2.d }, p1/z, [x1] ; VBITS_EQ_256-NEXT: cmpeq p2.s, p0/z, z0.s, #0 -; VBITS_EQ_256-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_EQ_256-NEXT: uunpklo z3.d, z0.s -; VBITS_EQ_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s +; VBITS_EQ_256-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_EQ_256-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_EQ_256-NEXT: uunpklo z3.d, z2.s +; VBITS_EQ_256-NEXT: ext z2.b, z2.b, z2.b, #16 +; VBITS_EQ_256-NEXT: uunpklo z2.d, z2.s ; VBITS_EQ_256-NEXT: cmpne p2.d, p1/z, z3.d, #0 -; VBITS_EQ_256-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_EQ_256-NEXT: ld1w { z2.d }, p2/z, [z2.d] -; VBITS_EQ_256-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_EQ_256-NEXT: cmpne p1.d, p1/z, z2.d, #0 +; VBITS_EQ_256-NEXT: ld1w { z0.d }, p2/z, [z0.d] +; VBITS_EQ_256-NEXT: ld1w { z1.d }, p1/z, [z1.d] ; VBITS_EQ_256-NEXT: ptrue p1.s, vl4 -; VBITS_EQ_256-NEXT: uzp1 z1.s, z2.s, z2.s ; VBITS_EQ_256-NEXT: uzp1 z0.s, z0.s, z0.s -; VBITS_EQ_256-NEXT: splice z1.s, p1, z1.s, z0.s -; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_EQ_256-NEXT: uzp1 z1.s, z1.s, z1.s +; VBITS_EQ_256-NEXT: splice z0.s, p1, z0.s, z1.s +; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_gather_v8i32: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: ptrue p1.d, vl8 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_512-NEXT: cmpeq p2.s, p0/z, z0.s, #0 -; VBITS_GE_512-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_512-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_512-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_512-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_512-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: ret @@ -433,12 +430,12 @@ ; VBITS_GE_1024-NEXT: ptrue p0.s, vl16 ; VBITS_GE_1024-NEXT: ptrue p1.d, vl16 ; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_1024-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_1024-NEXT: cmpeq p2.s, p0/z, z0.s, #0 -; VBITS_GE_1024-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_1024-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_1024-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_1024-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_1024-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_1024-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_1024-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_1024-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_1024-NEXT: ret @@ -456,12 +453,12 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: cmpeq p2.s, p0/z, z0.s, #0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret @@ -554,7 +551,6 @@ ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_gather_v8i64: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 @@ -621,10 +617,10 @@ ; CHECK-NEXT: ptrue p0.d, vl4 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, #0.0 ; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umov w9, v1.h[1] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 +; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: shl v1.2s, v2.2s, #16 ; CHECK-NEXT: sshr v1.2s, v1.2s, #16 ; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w9, v1.s[1] @@ -699,15 +695,15 @@ ; VBITS_GE_1024-LABEL: masked_gather_v16f16: ; VBITS_GE_1024: // %bb.0: ; VBITS_GE_1024-NEXT: ptrue p0.h, vl16 -; VBITS_GE_1024-NEXT: ptrue p1.d, vl16 +; VBITS_GE_1024-NEXT: ptrue p2.d, vl16 ; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_1024-NEXT: fcmeq p2.h, p0/z, z0.h, #0.0 -; VBITS_GE_1024-NEXT: ld1d { z0.d }, p1/z, [x1] -; VBITS_GE_1024-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_1024-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_1024-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_1024-NEXT: ld1h { z0.d }, p1/z, [z0.d] +; VBITS_GE_1024-NEXT: ld1d { z1.d }, p2/z, [x1] +; VBITS_GE_1024-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; VBITS_GE_1024-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_1024-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_1024-NEXT: ld1h { z0.d }, p1/z, [z1.d] ; VBITS_GE_1024-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_1024-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x0] @@ -724,15 +720,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_v32f16: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: fcmeq p2.h, p0/z, z0.h, #0.0 -; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [z0.d] +; VBITS_GE_2048-NEXT: ld1d { z1.d }, p2/z, [x1] +; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; VBITS_GE_2048-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [z1.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] @@ -797,12 +793,12 @@ ; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: ptrue p1.d, vl8 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_512-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_512-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_512-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_512-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_512-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_512-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: ret @@ -820,12 +816,12 @@ ; VBITS_GE_1024-NEXT: ptrue p0.s, vl16 ; VBITS_GE_1024-NEXT: ptrue p1.d, vl16 ; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_1024-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_1024-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_1024-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_1024-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_1024-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_1024-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_1024-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_1024-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_1024-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_1024-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_1024-NEXT: ret @@ -843,12 +839,12 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret @@ -986,15 +982,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_32b_scaled_sext_f16: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: fcmeq p2.h, p0/z, z0.h, #0.0 -; VBITS_GE_2048-NEXT: ld1sw { z0.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z0.d, lsl #1] +; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p2/z, [x1] +; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; VBITS_GE_2048-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z1.d, lsl #1] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] @@ -1016,12 +1012,12 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z1.d, lsl #2] +; VBITS_GE_2048-NEXT: ld1sw { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z0.d, lsl #2] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret @@ -1061,15 +1057,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_32b_scaled_zext: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: fcmeq p2.h, p0/z, z0.h, #0.0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z0.d, lsl #1] +; VBITS_GE_2048-NEXT: ld1w { z1.d }, p2/z, [x1] +; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; VBITS_GE_2048-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z1.d, lsl #1] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] @@ -1089,15 +1085,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_32b_unscaled_sext: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: fcmeq p2.h, p0/z, z0.h, #0.0 -; VBITS_GE_2048-NEXT: ld1sw { z0.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z0.d] +; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p2/z, [x1] +; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; VBITS_GE_2048-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z1.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] @@ -1118,15 +1114,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_32b_unscaled_zext: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: fcmeq p2.h, p0/z, z0.h, #0.0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z0.d] +; VBITS_GE_2048-NEXT: ld1w { z1.d }, p2/z, [x1] +; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; VBITS_GE_2048-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z1.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] @@ -1148,12 +1144,12 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z1.d, lsl #2] +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z0.d, lsl #2] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret @@ -1172,12 +1168,12 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z1.d] +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z0.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret @@ -1253,15 +1249,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x2] -; VBITS_GE_2048-NEXT: ld1w { z1.d }, p1/z, [z1.d] -; VBITS_GE_2048-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_2048-NEXT: mov z0.s, p2/m, z1.s +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z1.s }, p0/z, [x2] +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z0.d] +; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_2048-NEXT: sel z0.s, p2, z0.s, z1.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret %cvals = load <32 x float>, <32 x float>* %a @@ -1279,12 +1275,12 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-loads.ll @@ -32,19 +32,19 @@ ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: fcmeq v1.4h, v1.4h, v2.4h ; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umov w9, v1.h[1] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: mov v1.s[1], w9 -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: sshr v1.2s, v1.2s, #16 -; CHECK-NEXT: fmov w8, s1 -; CHECK-NEXT: mov w9, v1.s[1] -; CHECK-NEXT: mov v0.h[0], w8 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: shl v0.4h, v0.4h, #15 -; CHECK-NEXT: sshr v0.4h, v0.4h, #15 -; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 -; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] +; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: mov v2.s[1], w8 +; CHECK-NEXT: shl v1.2s, v2.2s, #16 +; CHECK-NEXT: sshr v1.2s, v1.2s, #16 +; CHECK-NEXT: fmov w8, s1 +; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: mov v0.h[0], w8 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: shl v0.4h, v0.4h, #15 +; CHECK-NEXT: sshr v0.4h, v0.4h, #15 +; CHECK-NEXT: cmpne p0.h, p0/z, z0.h, #0 +; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $z0 ; CHECK-NEXT: ret %a = load <2 x half>, <2 x half>* %ap diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -76,18 +76,18 @@ ; VBITS_EQ_256-NEXT: mov x8, #4 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 ; VBITS_EQ_256-NEXT: cmeq v1.8b, v0.8b, #0 -; VBITS_EQ_256-NEXT: zip1 v5.8b, v0.8b, v0.8b ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1, x8, lsl #3] ; VBITS_EQ_256-NEXT: ld1d { z4.d }, p0/z, [x1] +; VBITS_EQ_256-NEXT: zip1 v5.8b, v0.8b, v0.8b ; VBITS_EQ_256-NEXT: zip1 v2.8b, v1.8b, v0.8b ; VBITS_EQ_256-NEXT: zip2 v1.8b, v1.8b, v0.8b ; VBITS_EQ_256-NEXT: zip2 v0.8b, v0.8b, v0.8b ; VBITS_EQ_256-NEXT: shl v2.4h, v2.4h, #8 ; VBITS_EQ_256-NEXT: shl v1.4h, v1.4h, #8 ; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h -; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s ; VBITS_EQ_256-NEXT: sshr v2.4h, v2.4h, #8 ; VBITS_EQ_256-NEXT: sshr v1.4h, v1.4h, #8 +; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s ; VBITS_EQ_256-NEXT: uunpklo z2.s, z2.h ; VBITS_EQ_256-NEXT: uunpklo z1.s, z1.h ; VBITS_EQ_256-NEXT: uunpklo z2.d, z2.s @@ -99,7 +99,6 @@ ; VBITS_EQ_256-NEXT: st1b { z1.d }, p1, [z4.d] ; VBITS_EQ_256-NEXT: st1b { z0.d }, p0, [z3.d] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_scatter_v8i8: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr d0, [x0] @@ -108,8 +107,8 @@ ; VBITS_GE_512-NEXT: cmeq v2.8b, v0.8b, #0 ; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_512-NEXT: uunpklo z2.h, z2.b +; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_512-NEXT: uunpklo z2.s, z2.h ; VBITS_GE_512-NEXT: uunpklo z2.d, z2.s ; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z2.d, #0 @@ -131,8 +130,8 @@ ; VBITS_GE_1024-NEXT: cmeq v2.16b, v0.16b, #0 ; VBITS_GE_1024-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_1024-NEXT: uunpklo z2.h, z2.b +; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_1024-NEXT: uunpklo z2.s, z2.h ; VBITS_GE_1024-NEXT: uunpklo z2.d, z2.s ; VBITS_GE_1024-NEXT: cmpne p0.d, p0/z, z2.d, #0 @@ -149,18 +148,18 @@ ; VBITS_GE_2048-LABEL: masked_scatter_v32i8: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.b, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1b { z0.b }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; VBITS_GE_2048-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_2048-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: ptrue p0.d, vl32 +; VBITS_GE_2048-NEXT: ld1d { z2.d }, p0/z, [x1] ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_2048-NEXT: uunpklo z1.h, z1.b ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h ; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: cmpne p0.d, p0/z, z1.d, #0 ; VBITS_GE_2048-NEXT: st1b { z0.d }, p0, [z2.d] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x i8>, <32 x i8>* %a @@ -225,24 +224,23 @@ ; VBITS_EQ_256-NEXT: mov x8, #4 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 ; VBITS_EQ_256-NEXT: cmeq v1.8h, v0.8h, #0 -; VBITS_EQ_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3] -; VBITS_EQ_256-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h -; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: uunpklo z2.s, z1.h +; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] +; VBITS_EQ_256-NEXT: ld1d { z4.d }, p0/z, [x1] +; VBITS_EQ_256-NEXT: uunpklo z3.s, z1.h +; VBITS_EQ_256-NEXT: uunpklo z3.d, z3.s ; VBITS_EQ_256-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; VBITS_EQ_256-NEXT: uunpklo z2.d, z2.s -; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z2.d, #0 -; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1] -; VBITS_EQ_256-NEXT: uunpklo z3.s, z3.h +; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z3.d, #0 +; VBITS_EQ_256-NEXT: uunpklo z3.s, z0.h +; VBITS_EQ_256-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: uunpklo z1.s, z1.h ; VBITS_EQ_256-NEXT: uunpklo z1.d, z1.s -; VBITS_EQ_256-NEXT: st1h { z0.d }, p1, [z2.d] +; VBITS_EQ_256-NEXT: uunpklo z3.d, z3.s +; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; VBITS_EQ_256-NEXT: uunpklo z1.d, z3.s -; VBITS_EQ_256-NEXT: st1h { z1.d }, p0, [z4.d] +; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s +; VBITS_EQ_256-NEXT: st1h { z3.d }, p1, [z4.d] +; VBITS_EQ_256-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_scatter_v8i16: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr q0, [x0] @@ -269,15 +267,15 @@ ; VBITS_GE_1024-NEXT: ptrue p0.h, vl16 ; VBITS_GE_1024-NEXT: ptrue p1.d, vl16 ; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_1024-NEXT: ld1d { z1.d }, p1/z, [x1] +; VBITS_GE_1024-NEXT: ld1d { z2.d }, p1/z, [x1] ; VBITS_GE_1024-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_1024-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_1024-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_1024-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_1024-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_1024-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_1024-NEXT: st1h { z0.d }, p0, [z1.d] +; VBITS_GE_1024-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_1024-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_1024-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_GE_1024-NEXT: ret %vals = load <16 x i16>, <16 x i16>* %a %ptrs = load <16 x i16*>, <16 x i16*>* %b @@ -292,15 +290,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1d { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_2048-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [z1.d] +; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x i16>, <32 x i16>* %a %ptrs = load <32 x i16*>, <32 x i16*>* %b @@ -358,23 +356,22 @@ ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8 ; VBITS_EQ_256-NEXT: mov x8, #4 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_EQ_256-NEXT: ptrue p1.d, vl4 -; VBITS_EQ_256-NEXT: ld1d { z2.d }, p1/z, [x1, x8, lsl #3] -; VBITS_EQ_256-NEXT: ld1d { z4.d }, p1/z, [x1] ; VBITS_EQ_256-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; VBITS_EQ_256-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff +; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 +; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] +; VBITS_EQ_256-NEXT: ld1d { z4.d }, p0/z, [x1] ; VBITS_EQ_256-NEXT: uunpklo z3.d, z1.s ; VBITS_EQ_256-NEXT: ext z1.b, z1.b, z1.b, #16 -; VBITS_EQ_256-NEXT: cmpne p0.d, p1/z, z3.d, #0 +; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z3.d, #0 ; VBITS_EQ_256-NEXT: uunpklo z3.d, z0.s ; VBITS_EQ_256-NEXT: uunpklo z1.d, z1.s ; VBITS_EQ_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_EQ_256-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_EQ_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 ; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: st1w { z3.d }, p0, [z4.d] -; VBITS_EQ_256-NEXT: st1w { z0.d }, p1, [z2.d] +; VBITS_EQ_256-NEXT: st1w { z3.d }, p1, [z4.d] +; VBITS_EQ_256-NEXT: st1w { z0.d }, p0, [z2.d] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_scatter_v8i32: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.s, vl8 @@ -466,10 +463,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q2, [x1] -; CHECK-NEXT: cmeq v1.2d, v0.2d, #0 -; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: st1d { z0.d }, p0, [z2.d] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: cmeq v2.2d, v0.2d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] ; CHECK-NEXT: ret %vals = load <2 x i64>, <2 x i64>* %a %ptrs = load <2 x i64*>, <2 x i64*>* %b @@ -570,10 +567,10 @@ ; CHECK-NEXT: fcmeq v2.4h, v1.4h, #0.0 ; CHECK-NEXT: uunpklo z1.s, z1.h ; CHECK-NEXT: umov w8, v2.h[0] -; CHECK-NEXT: umov w9, v2.h[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: umov w8, v2.h[1] +; CHECK-NEXT: mov v3.s[1], w8 +; CHECK-NEXT: shl v2.2s, v3.2s, #16 ; CHECK-NEXT: sshr v2.2s, v2.2s, #16 ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov w9, v2.s[1] @@ -643,15 +640,15 @@ ; VBITS_GE_1024-NEXT: ptrue p0.h, vl16 ; VBITS_GE_1024-NEXT: ptrue p1.d, vl16 ; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_1024-NEXT: ld1d { z1.d }, p1/z, [x1] +; VBITS_GE_1024-NEXT: ld1d { z2.d }, p1/z, [x1] ; VBITS_GE_1024-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_1024-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_1024-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_1024-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_1024-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_1024-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_1024-NEXT: st1h { z0.d }, p0, [z1.d] +; VBITS_GE_1024-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_1024-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_1024-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_GE_1024-NEXT: ret %vals = load <16 x half>, <16 x half>* %a %ptrs = load <16 x half*>, <16 x half*>* %b @@ -666,15 +663,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1d { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_2048-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [z1.d] +; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %ptrs = load <32 x half*>, <32 x half*>* %b @@ -817,10 +814,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q2, [x1] -; CHECK-NEXT: fcmeq v1.2d, v0.2d, #0.0 -; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: st1d { z0.d }, p0, [z2.d] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: fcmeq v2.2d, v0.2d, #0.0 +; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] ; CHECK-NEXT: ret %vals = load <2 x double>, <2 x double>* %a %ptrs = load <2 x double*>, <2 x double*>* %b @@ -903,15 +900,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1sw { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_2048-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z1.d, lsl #1] +; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z2.d, lsl #1] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %idxs = load <32 x i32>, <32 x i32>* %b @@ -972,15 +969,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1w { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_2048-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z1.d, lsl #1] +; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z2.d, lsl #1] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %idxs = load <32 x i32>, <32 x i32>* %b @@ -998,15 +995,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1sw { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_2048-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z1.d] +; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z2.d] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %idxs = load <32 x i32>, <32 x i32>* %b @@ -1025,15 +1022,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1w { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_2048-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z1.d] +; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z2.d] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %idxs = load <32 x i32>, <32 x i32>* %b @@ -1098,12 +1095,12 @@ ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z2.d, x2 +; VBITS_GE_2048-NEXT: mov z3.d, x2 ; VBITS_GE_2048-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: add z1.d, p1/m, z1.d, z2.d -; VBITS_GE_2048-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: add z1.d, p1/m, z1.d, z3.d +; VBITS_GE_2048-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.d, z3.s +; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s ; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 ; VBITS_GE_2048-NEXT: st1w { z0.d }, p0, [z1.d] ; VBITS_GE_2048-NEXT: ret @@ -1124,12 +1121,12 @@ ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z2.d, #4 // =0x4 +; VBITS_GE_2048-NEXT: mov z3.d, #4 // =0x4 ; VBITS_GE_2048-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: add z1.d, p1/m, z1.d, z2.d -; VBITS_GE_2048-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: add z1.d, p1/m, z1.d, z3.d +; VBITS_GE_2048-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.d, z3.s +; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s ; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 ; VBITS_GE_2048-NEXT: st1w { z0.d }, p0, [z1.d] ; VBITS_GE_2048-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -32,10 +32,10 @@ ; CHECK-NEXT: ptrue p0.h, vl4 ; CHECK-NEXT: fcmeq v2.4h, v1.4h, v2.4h ; CHECK-NEXT: umov w8, v2.h[0] -; CHECK-NEXT: umov w9, v2.h[1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: shl v2.2s, v2.2s, #16 +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: umov w8, v2.h[1] +; CHECK-NEXT: mov v3.s[1], w8 +; CHECK-NEXT: shl v2.2s, v3.2s, #16 ; CHECK-NEXT: sshr v2.2s, v2.2s, #16 ; CHECK-NEXT: fmov w8, s2 ; CHECK-NEXT: mov w9, v2.s[1] @@ -162,10 +162,10 @@ ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_512-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_512-NEXT: ptrue p0.b, vl8 -; VBITS_GE_512-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h -; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h +; VBITS_GE_512-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b +; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_512-NEXT: uzp1 z1.b, z1.b, z1.b ; VBITS_GE_512-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x2] @@ -188,8 +188,8 @@ ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_512-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_512-NEXT: ptrue p0.h, vl8 -; VBITS_GE_512-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h +; VBITS_GE_512-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_512-NEXT: cmpne p0.h, p0/z, z1.h, #0 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2] @@ -234,8 +234,8 @@ ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_512-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_512-NEXT: ptrue p0.b, vl16 -; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b +; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_512-NEXT: uzp1 z1.b, z1.b, z1.b ; VBITS_GE_512-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x2] diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -357,10 +357,10 @@ define @test_predicate_insert_2xi1_immediate ( %val, i1 %elt) { ; CHECK-LABEL: test_predicate_insert_2xi1_immediate: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d, vl1 ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ptrue p0.d, vl1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: mov z0.d, p1/m, x0 +; CHECK-NEXT: mov z0.d, p0/m, x0 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z0.d, z0.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 @@ -373,14 +373,14 @@ ; CHECK-LABEL: test_predicate_insert_4xi1_immediate: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: index z1.s, #0, #1 +; CHECK-NEXT: index z0.s, #0, #1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z0.s, w8 -; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z0.s -; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.s, p2/m, w0 -; CHECK-NEXT: and z0.s, z0.s, #0x1 -; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0 +; CHECK-NEXT: mov z2.s, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: cmpeq p0.s, p1/z, z0.s, z1.s +; CHECK-NEXT: mov z2.s, p0/m, w0 +; CHECK-NEXT: and z2.s, z2.s, #0x1 +; CHECK-NEXT: cmpne p0.s, p1/z, z2.s, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 %elt, i32 2 ret %res @@ -392,14 +392,14 @@ ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x8, w0 ; CHECK-NEXT: mov w9, #1 -; CHECK-NEXT: index z1.h, #0, #1 +; CHECK-NEXT: index z0.h, #0, #1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z0.h, w8 -; CHECK-NEXT: cmpeq p2.h, p1/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.h, p2/m, w9 -; CHECK-NEXT: and z0.h, z0.h, #0x1 -; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0 +; CHECK-NEXT: mov z2.h, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: cmpeq p0.h, p1/z, z0.h, z1.h +; CHECK-NEXT: mov z2.h, p0/m, w9 +; CHECK-NEXT: and z2.h, z2.h, #0x1 +; CHECK-NEXT: cmpne p0.h, p1/z, z2.h, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 1, i32 %idx ret %res @@ -410,14 +410,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w9, #4 ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: index z1.b, #0, #1 +; CHECK-NEXT: index z0.b, #0, #1 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mov z0.b, w9 -; CHECK-NEXT: cmpeq p2.b, p1/z, z1.b, z0.b -; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.b, p2/m, w8 -; CHECK-NEXT: and z0.b, z0.b, #0x1 -; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 +; CHECK-NEXT: mov z2.b, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.b, w9 +; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, z1.b +; CHECK-NEXT: mov z2.b, p0/m, w8 +; CHECK-NEXT: and z2.b, z2.b, #0x1 +; CHECK-NEXT: cmpne p0.b, p1/z, z2.b, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 0, i32 4 ret %res @@ -429,15 +429,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x8, w1 -; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: mov z2.d, p0/z, #1 // =0x1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: mov z0.d, x8 -; CHECK-NEXT: cmpeq p2.d, p1/z, z1.d, z0.d -; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.d, p2/m, x0 -; CHECK-NEXT: and z0.d, z0.d, #0x1 -; CHECK-NEXT: cmpne p0.d, p1/z, z0.d, #0 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: cmpeq p0.d, p1/z, z0.d, z1.d +; CHECK-NEXT: mov z2.d, p0/m, x0 +; CHECK-NEXT: and z2.d, z2.d, #0x1 +; CHECK-NEXT: cmpne p0.d, p1/z, z2.d, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 %elt, i32 %idx ret %res @@ -448,14 +448,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x8, w1 -; CHECK-NEXT: index z1.s, #0, #1 +; CHECK-NEXT: index z0.s, #0, #1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z0.s, w8 -; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z0.s -; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.s, p2/m, w0 -; CHECK-NEXT: and z0.s, z0.s, #0x1 -; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0 +; CHECK-NEXT: mov z2.s, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: cmpeq p0.s, p1/z, z0.s, z1.s +; CHECK-NEXT: mov z2.s, p0/m, w0 +; CHECK-NEXT: and z2.s, z2.s, #0x1 +; CHECK-NEXT: cmpne p0.s, p1/z, z2.s, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 %elt, i32 %idx ret %res @@ -465,14 +465,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x8, w1 -; CHECK-NEXT: index z1.h, #0, #1 +; CHECK-NEXT: index z0.h, #0, #1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z0.h, w8 -; CHECK-NEXT: cmpeq p2.h, p1/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.h, p2/m, w0 -; CHECK-NEXT: and z0.h, z0.h, #0x1 -; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0 +; CHECK-NEXT: mov z2.h, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: cmpeq p0.h, p1/z, z0.h, z1.h +; CHECK-NEXT: mov z2.h, p0/m, w0 +; CHECK-NEXT: and z2.h, z2.h, #0x1 +; CHECK-NEXT: cmpne p0.h, p1/z, z2.h, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 %elt, i32 %idx ret %res @@ -483,14 +483,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x8, w1 -; CHECK-NEXT: index z1.b, #0, #1 +; CHECK-NEXT: index z0.b, #0, #1 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mov z0.b, w8 -; CHECK-NEXT: cmpeq p2.b, p1/z, z1.b, z0.b -; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.b, p2/m, w0 -; CHECK-NEXT: and z0.b, z0.b, #0x1 -; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 +; CHECK-NEXT: mov z2.b, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.b, w8 +; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, z1.b +; CHECK-NEXT: mov z2.b, p0/m, w0 +; CHECK-NEXT: and z2.b, z2.b, #0x1 +; CHECK-NEXT: cmpne p0.b, p1/z, z2.b, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 %elt, i32 %idx ret %res @@ -508,13 +508,13 @@ ; CHECK-NEXT: sxtw x9, w1 ; CHECK-NEXT: mov x10, sp ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 +; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 ; CHECK-NEXT: ptrue p1.b ; CHECK-NEXT: addvl x8, x8, #2 ; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: st1b { z0.b }, p1, [x10, #1, mul vl] ; CHECK-NEXT: csel x8, x9, x8, lo -; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 -; CHECK-NEXT: st1b { z0.b }, p1, [sp] +; CHECK-NEXT: st1b { z1.b }, p1, [sp] ; CHECK-NEXT: strb w0, [x10, x8] ; CHECK-NEXT: ld1b { z0.b }, p1/z, [sp] ; CHECK-NEXT: ld1b { z1.b }, p1/z, [x10, #1, mul vl] diff --git a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -307,9 +307,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: punpklo p2.h, p0.b ; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p2/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: scvtf z0.d, p1/m, z0.d ; CHECK-NEXT: scvtf z1.d, p1/m, z1.d ; CHECK-NEXT: ret @@ -367,9 +367,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: punpklo p2.h, p0.b ; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p2/z, #1 // =0x1 ; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ucvtf z0.d, p1/m, z0.d ; CHECK-NEXT: ucvtf z1.d, p1/m, z1.d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll --- a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll +++ b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll @@ -15,8 +15,8 @@ ; CHECK-NOARG-NEXT: ldp q6, q4, [x1] ; CHECK-NOARG-NEXT: stp q0, q1, [x0, #32] ; CHECK-NOARG-NEXT: add v2.4s, v2.4s, v6.4s -; CHECK-NOARG-NEXT: add v3.4s, v3.4s, v4.4s -; CHECK-NOARG-NEXT: stp q2, q3, [x0] +; CHECK-NOARG-NEXT: add v0.4s, v3.4s, v4.4s +; CHECK-NOARG-NEXT: stp q2, q0, [x0] ; CHECK-NOARG-NEXT: ret ; ; CHECK-ARG-LABEL: func_vscale_none: @@ -47,8 +47,8 @@ ; CHECK-NEXT: ldp q6, q4, [x1] ; CHECK-NEXT: stp q0, q1, [x0, #32] ; CHECK-NEXT: add v2.4s, v2.4s, v6.4s -; CHECK-NEXT: add v3.4s, v3.4s, v4.4s -; CHECK-NEXT: stp q2, q3, [x0] +; CHECK-NEXT: add v0.4s, v3.4s, v4.4s +; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %op2 = load <16 x i32>, <16 x i32>* %b diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -54,9 +54,9 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.16b, v2.16b, v6.16b ; CHECK-NEXT: uqadd v0.16b, v0.16b, v4.16b ; CHECK-NEXT: uqadd v1.16b, v1.16b, v5.16b +; CHECK-NEXT: uqadd v2.16b, v2.16b, v6.16b ; CHECK-NEXT: uqadd v3.16b, v3.16b, v7.16b ; CHECK-NEXT: ret %z = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) @@ -85,9 +85,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { ; CHECK-LABEL: v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.8h, v2.8h, v6.8h ; CHECK-NEXT: uqadd v0.8h, v0.8h, v4.8h ; CHECK-NEXT: uqadd v1.8h, v1.8h, v5.8h +; CHECK-NEXT: uqadd v2.8h, v2.8h, v6.8h ; CHECK-NEXT: uqadd v3.8h, v3.8h, v7.8h ; CHECK-NEXT: ret %z = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) @@ -97,9 +97,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -112,13 +112,13 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: movi d0, #0xff00ff00ff00ff -; CHECK-NEXT: ldr s2, [x1] +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll v2.8h, v2.8b, #0 -; CHECK-NEXT: add v1.4h, v1.4h, v2.4h -; CHECK-NEXT: umin v0.4h, v1.4h, v0.4h +; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: movi d1, #0xff00ff00ff00ff +; CHECK-NEXT: umin v0.4h, v0.4h, v1.4h ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret @@ -143,10 +143,10 @@ ; CHECK-NEXT: mov v1.s[1], w9 ; CHECK-NEXT: add v1.2s, v1.2s, v2.2s ; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strb w9, [x2, #1] +; CHECK-NEXT: strb w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -158,9 +158,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqadd v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -184,10 +184,10 @@ ; CHECK-NEXT: mov v1.s[1], w9 ; CHECK-NEXT: add v1.2s, v1.2s, v2.2s ; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strh w9, [x2, #2] +; CHECK-NEXT: strh w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py @@ -225,9 +225,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr b0, [x1] +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -240,9 +240,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr h0, [x1] +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: uqadd v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -305,9 +305,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ; CHECK-LABEL: v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.4s, v2.4s, v6.4s ; CHECK-NEXT: uqadd v0.4s, v0.4s, v4.4s ; CHECK-NEXT: uqadd v1.4s, v1.4s, v5.4s +; CHECK-NEXT: uqadd v2.4s, v2.4s, v6.4s ; CHECK-NEXT: uqadd v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %z = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y) @@ -336,9 +336,9 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.2d, v2.2d, v6.2d ; CHECK-NEXT: uqadd v0.2d, v0.2d, v4.2d ; CHECK-NEXT: uqadd v1.2d, v1.2d, v5.2d +; CHECK-NEXT: uqadd v2.2d, v2.2d, v6.2d ; CHECK-NEXT: uqadd v3.2d, v3.2d, v7.2d ; CHECK-NEXT: ret %z = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y) diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll @@ -31,8 +31,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0x0000ff000000ff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <2 x i8> %x, %mask @@ -63,8 +63,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0xff00ff00ff00ff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <4 x i8> %x, %mask @@ -79,8 +79,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0xff00ff00ff00ff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <4 x i8> %x, %mask @@ -95,8 +95,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0x00ffff0000ffff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <2 x i16> %x, %mask diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll @@ -67,25 +67,25 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: adrp x9, .LCPI4_1 +; CHECK-NEXT: movi d3, #0x0000000000ffff ; CHECK-NEXT: mov v0.h[1], w1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: adrp x8, .LCPI4_1 +; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI4_1] +; CHECK-NEXT: adrp x8, .LCPI4_2 ; CHECK-NEXT: mov v0.h[2], w2 ; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_1] -; CHECK-NEXT: adrp x8, .LCPI4_2 -; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h -; CHECK-NEXT: movi d1, #0x0000000000ffff -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_2] +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_2] ; CHECK-NEXT: adrp x8, .LCPI4_3 +; CHECK-NEXT: mul v0.4h, v0.4h, v2.4h ; CHECK-NEXT: shl v2.4h, v0.4h, #1 ; CHECK-NEXT: bic v0.4h, #248, lsl #8 -; CHECK-NEXT: ushl v2.4h, v2.4h, v3.4h -; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_3] -; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b +; CHECK-NEXT: ushl v0.4h, v0.4h, v3.4h +; CHECK-NEXT: ushl v1.4h, v2.4h, v1.4h +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_3] +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: bic v0.4h, #248, lsl #8 -; CHECK-NEXT: cmhi v0.4h, v0.4h, v1.4h +; CHECK-NEXT: cmhi v0.4h, v0.4h, v2.4h ; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: umov w1, v0.h[1] ; CHECK-NEXT: umov w2, v0.h[2] diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll @@ -15,9 +15,9 @@ ; CHECK-NEXT: adrp x8, .LCPI0_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -79,9 +79,9 @@ ; CHECK-NEXT: adrp x8, .LCPI3_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -103,9 +103,9 @@ ; CHECK-NEXT: adrp x8, .LCPI4_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_3] -; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -129,9 +129,9 @@ ; CHECK-NEXT: adrp x8, .LCPI5_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -153,9 +153,9 @@ ; CHECK-NEXT: adrp x8, .LCPI6_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_3] -; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -181,9 +181,9 @@ ; CHECK-NEXT: adrp x8, .LCPI7_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -207,9 +207,9 @@ ; CHECK-NEXT: adrp x8, .LCPI8_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -233,9 +233,9 @@ ; CHECK-NEXT: adrp x8, .LCPI9_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -252,11 +252,11 @@ ; CHECK-LABEL: test_urem_odd_one: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: adrp x9, .LCPI10_0 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI10_0] +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b @@ -276,11 +276,11 @@ ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -304,9 +304,9 @@ ; CHECK-NEXT: adrp x8, .LCPI12_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -332,9 +332,9 @@ ; CHECK-NEXT: adrp x8, .LCPI13_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -358,9 +358,9 @@ ; CHECK-NEXT: adrp x8, .LCPI14_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -384,9 +384,9 @@ ; CHECK-NEXT: adrp x8, .LCPI15_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -412,9 +412,9 @@ ; CHECK-NEXT: adrp x8, .LCPI16_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -438,9 +438,9 @@ ; CHECK-NEXT: adrp x8, .LCPI17_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -464,9 +464,9 @@ ; CHECK-NEXT: adrp x8, .LCPI18_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -511,9 +511,9 @@ ; CHECK-NEXT: adrp x8, .LCPI20_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -537,9 +537,9 @@ ; CHECK-NEXT: adrp x8, .LCPI21_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -565,9 +565,9 @@ ; CHECK-NEXT: adrp x8, .LCPI22_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -591,9 +591,9 @@ ; CHECK-NEXT: adrp x8, .LCPI23_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -617,9 +617,9 @@ ; CHECK-NEXT: adrp x8, .LCPI24_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -644,9 +644,9 @@ ; CHECK-NEXT: adrp x8, .LCPI25_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -669,9 +669,9 @@ ; CHECK-NEXT: adrp x8, .LCPI26_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll @@ -54,11 +54,11 @@ ; CHECK-NEXT: mov w8, #43690 ; CHECK-NEXT: movk w8, #10922, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, @@ -70,18 +70,18 @@ ; CHECK-LABEL: t32_6_part1: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: movk w9, #43690, lsl #16 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: movk w8, #43690, lsl #16 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI3_1 -; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_1] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll @@ -32,11 +32,11 @@ ; CHECK-NEXT: mov w8, #23592 ; CHECK-NEXT: movk w8, #655, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: shl v1.4s, v0.4s, #30 ; CHECK-NEXT: ushr v0.4s, v0.4s, #2 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -75,11 +75,11 @@ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: adrp x8, .LCPI3_1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] ; CHECK-NEXT: shl v1.4s, v0.4s, #30 ; CHECK-NEXT: ushr v0.4s, v0.4s, #2 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_1] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -98,13 +98,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v1.4s, #25 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: umull2 v3.2d, v0.4s, v2.4s -; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s -; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s -; CHECK-NEXT: ushr v2.4s, v2.4s, #3 -; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; CHECK-NEXT: movi v2.4s, #25 +; CHECK-NEXT: ushr v1.4s, v1.4s, #3 +; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b @@ -120,13 +120,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v1.4s, #100 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: umull2 v3.2d, v0.4s, v2.4s -; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s -; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s -; CHECK-NEXT: ushr v2.4s, v2.4s, #5 -; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; CHECK-NEXT: movi v2.4s, #100 +; CHECK-NEXT: ushr v1.4s, v1.4s, #5 +; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b @@ -168,8 +168,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #15 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, @@ -183,8 +183,8 @@ ; CHECK-LABEL: test_urem_int_min: ; CHECK: // %bb.0: ; CHECK-NEXT: bic v0.4s, #128, lsl #24 -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, @@ -197,8 +197,8 @@ define <4 x i32> @test_urem_allones(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_urem_allones: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: neg v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll @@ -5,11 +5,11 @@ ; CHECK-LABEL: t0_all_tautological: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: adrp x9, .LCPI0_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: adrp x8, .LCPI0_1 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_1] -; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, @@ -82,10 +82,10 @@ ; CHECK-NEXT: mov x10, v0.d[1] ; CHECK-NEXT: mul x9, x9, x8 ; CHECK-NEXT: mul x8, x10, x8 +; CHECK-NEXT: adrp x10, .LCPI4_0 ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: adrp x9, .LCPI4_0 +; CHECK-NEXT: ldr q1, [x10, :lo12:.LCPI4_0] ; CHECK-NEXT: mov v0.d[1], x8 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI4_0] ; CHECK-NEXT: cmhs v0.2d, v1.2d, v0.2d ; CHECK-NEXT: movi d1, #0xffffffff00000000 ; CHECK-NEXT: xtn v0.2s, v0.2d diff --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll --- a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll @@ -4,42 +4,42 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; CHECK-LABEL: fold_urem_vec_1: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #8969 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: mov w9, #8969 -; CHECK-NEXT: movk w9, #22765, lsl #16 +; CHECK-NEXT: umov w9, v0.h[0] +; CHECK-NEXT: movk w8, #22765, lsl #16 ; CHECK-NEXT: umov w10, v0.h[1] -; CHECK-NEXT: mov w12, #16913 -; CHECK-NEXT: mov w13, #95 -; CHECK-NEXT: movk w12, #8456, lsl #16 -; CHECK-NEXT: umull x9, w8, w9 -; CHECK-NEXT: ubfx w14, w10, #2, #14 -; CHECK-NEXT: lsr x9, x9, #32 -; CHECK-NEXT: sub w11, w8, w9 -; CHECK-NEXT: umull x12, w14, w12 -; CHECK-NEXT: add w9, w9, w11, lsr #1 -; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: lsr w9, w9, #6 -; CHECK-NEXT: lsr x12, x12, #34 -; CHECK-NEXT: msub w8, w9, w13, w8 -; CHECK-NEXT: mov w9, #33437 -; CHECK-NEXT: movk w9, #21399, lsl #16 +; CHECK-NEXT: mov w11, #16913 ; CHECK-NEXT: mov w13, #124 -; CHECK-NEXT: umull x9, w11, w9 -; CHECK-NEXT: msub w10, w12, w13, w10 -; CHECK-NEXT: umov w12, v0.h[3] -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: umull x8, w9, w8 +; CHECK-NEXT: movk w11, #8456, lsl #16 +; CHECK-NEXT: ubfx w12, w10, #2, #14 +; CHECK-NEXT: mov w14, #95 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: umull x11, w12, w11 +; CHECK-NEXT: sub w12, w9, w8 +; CHECK-NEXT: lsr x11, x11, #34 +; CHECK-NEXT: add w8, w8, w12, lsr #1 +; CHECK-NEXT: mov w12, #33437 +; CHECK-NEXT: lsr w8, w8, #6 +; CHECK-NEXT: movk w12, #21399, lsl #16 +; CHECK-NEXT: msub w10, w11, w13, w10 +; CHECK-NEXT: umov w11, v0.h[2] +; CHECK-NEXT: msub w8, w8, w14, w9 ; CHECK-NEXT: mov w13, #2287 -; CHECK-NEXT: lsr x8, x9, #37 -; CHECK-NEXT: mov w9, #98 +; CHECK-NEXT: mov w14, #98 +; CHECK-NEXT: umull x9, w11, w12 +; CHECK-NEXT: umov w12, v0.h[3] ; CHECK-NEXT: movk w13, #16727, lsl #16 -; CHECK-NEXT: msub w8, w8, w9, w11 +; CHECK-NEXT: lsr x9, x9, #37 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: umull x13, w12, w13 +; CHECK-NEXT: msub w9, w9, w14, w11 +; CHECK-NEXT: mov w11, #1003 +; CHECK-NEXT: lsr x8, x13, #40 ; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: umull x9, w12, w13 -; CHECK-NEXT: mov w10, #1003 -; CHECK-NEXT: lsr x9, x9, #40 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: msub w8, w9, w10, w12 +; CHECK-NEXT: msub w8, w8, w11, w12 +; CHECK-NEXT: mov v0.h[2], w9 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -50,41 +50,41 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { ; CHECK-LABEL: fold_urem_vec_2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: mov w8, #8969 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: movk w8, #22765, lsl #16 +; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w15, v0.h[2] -; CHECK-NEXT: umov w16, v0.h[3] -; CHECK-NEXT: umull x12, w10, w8 +; CHECK-NEXT: mov w16, #95 ; CHECK-NEXT: umull x11, w9, w8 -; CHECK-NEXT: lsr x12, x12, #32 +; CHECK-NEXT: umull x12, w10, w8 ; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: sub w14, w10, w12 +; CHECK-NEXT: lsr x12, x12, #32 ; CHECK-NEXT: sub w13, w9, w11 -; CHECK-NEXT: add w12, w12, w14, lsr #1 -; CHECK-NEXT: umull x14, w15, w8 +; CHECK-NEXT: sub w14, w10, w12 ; CHECK-NEXT: add w11, w11, w13, lsr #1 -; CHECK-NEXT: mov w13, #95 +; CHECK-NEXT: umull x13, w15, w8 +; CHECK-NEXT: add w12, w12, w14, lsr #1 +; CHECK-NEXT: umov w14, v0.h[3] ; CHECK-NEXT: lsr w12, w12, #6 +; CHECK-NEXT: lsr x13, x13, #32 +; CHECK-NEXT: umull x8, w14, w8 ; CHECK-NEXT: lsr w11, w11, #6 -; CHECK-NEXT: umull x8, w16, w8 -; CHECK-NEXT: msub w10, w12, w13, w10 -; CHECK-NEXT: lsr x12, x14, #32 -; CHECK-NEXT: msub w9, w11, w13, w9 -; CHECK-NEXT: sub w11, w15, w12 +; CHECK-NEXT: msub w10, w12, w16, w10 +; CHECK-NEXT: sub w12, w15, w13 ; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: msub w9, w11, w16, w9 +; CHECK-NEXT: add w12, w13, w12, lsr #1 +; CHECK-NEXT: sub w13, w14, w8 +; CHECK-NEXT: lsr w11, w12, #6 ; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: add w10, w12, w11, lsr #1 -; CHECK-NEXT: lsr w10, w10, #6 -; CHECK-NEXT: sub w11, w16, w8 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: msub w9, w10, w13, w15 -; CHECK-NEXT: add w8, w8, w11, lsr #1 +; CHECK-NEXT: add w8, w8, w13, lsr #1 +; CHECK-NEXT: msub w10, w11, w16, w15 ; CHECK-NEXT: lsr w8, w8, #6 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: msub w8, w8, w13, w16 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: msub w8, w8, w16, w14 +; CHECK-NEXT: mov v0.h[2], w10 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -97,46 +97,46 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; CHECK-LABEL: combine_urem_udiv: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #8969 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w9, v0.h[0] -; CHECK-NEXT: mov w8, #8969 ; CHECK-NEXT: movk w8, #22765, lsl #16 ; CHECK-NEXT: umov w10, v0.h[1] -; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: mov w15, #95 -; CHECK-NEXT: umov w13, v0.h[3] -; CHECK-NEXT: umull x12, w9, w8 +; CHECK-NEXT: umov w13, v0.h[2] +; CHECK-NEXT: mov w12, #95 +; CHECK-NEXT: umull x11, w9, w8 +; CHECK-NEXT: umov w15, v0.h[3] ; CHECK-NEXT: umull x14, w10, w8 -; CHECK-NEXT: lsr x12, x12, #32 -; CHECK-NEXT: umull x17, w11, w8 -; CHECK-NEXT: sub w16, w9, w12 +; CHECK-NEXT: lsr x11, x11, #32 +; CHECK-NEXT: umull x17, w13, w8 +; CHECK-NEXT: sub w16, w9, w11 ; CHECK-NEXT: lsr x14, x14, #32 +; CHECK-NEXT: umull x8, w15, w8 ; CHECK-NEXT: lsr x17, x17, #32 -; CHECK-NEXT: umull x8, w13, w8 -; CHECK-NEXT: add w12, w12, w16, lsr #1 +; CHECK-NEXT: add w11, w11, w16, lsr #1 ; CHECK-NEXT: sub w16, w10, w14 -; CHECK-NEXT: lsr w12, w12, #6 +; CHECK-NEXT: lsr w11, w11, #6 ; CHECK-NEXT: lsr x8, x8, #32 ; CHECK-NEXT: add w14, w14, w16, lsr #1 -; CHECK-NEXT: sub w16, w11, w17 -; CHECK-NEXT: msub w9, w12, w15, w9 +; CHECK-NEXT: sub w16, w13, w17 +; CHECK-NEXT: msub w9, w11, w12, w9 ; CHECK-NEXT: lsr w14, w14, #6 ; CHECK-NEXT: add w16, w17, w16, lsr #1 -; CHECK-NEXT: fmov s1, w12 -; CHECK-NEXT: msub w10, w14, w15, w10 -; CHECK-NEXT: sub w17, w13, w8 +; CHECK-NEXT: sub w17, w15, w8 +; CHECK-NEXT: msub w10, w14, w12, w10 +; CHECK-NEXT: fmov s1, w11 +; CHECK-NEXT: add w8, w8, w17, lsr #1 ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: lsr w9, w16, #6 -; CHECK-NEXT: mov v1.h[1], w14 -; CHECK-NEXT: add w8, w8, w17, lsr #1 -; CHECK-NEXT: msub w11, w9, w15, w11 ; CHECK-NEXT: lsr w8, w8, #6 +; CHECK-NEXT: msub w11, w9, w12, w13 ; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: msub w10, w8, w15, w13 -; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: msub w10, w8, w12, w15 +; CHECK-NEXT: mov v1.h[1], w14 ; CHECK-NEXT: mov v0.h[2], w11 -; CHECK-NEXT: mov v1.h[3], w8 +; CHECK-NEXT: mov v1.h[2], w9 ; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: mov v1.h[3], w8 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, @@ -150,26 +150,26 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { ; CHECK-LABEL: dont_fold_urem_power_of_two: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #8969 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w9, v0.h[3] -; CHECK-NEXT: mov w8, #8969 -; CHECK-NEXT: umov w11, v0.h[1] ; CHECK-NEXT: movk w8, #22765, lsl #16 -; CHECK-NEXT: and w10, w10, #0x3f +; CHECK-NEXT: umov w10, v0.h[0] +; CHECK-NEXT: umov w12, v0.h[1] ; CHECK-NEXT: umull x8, w9, w8 -; CHECK-NEXT: and w11, w11, #0x1f +; CHECK-NEXT: and w10, w10, #0x3f ; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: sub w11, w9, w8 ; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: sub w12, w9, w8 -; CHECK-NEXT: mov v1.h[1], w11 -; CHECK-NEXT: add w8, w8, w12, lsr #1 -; CHECK-NEXT: and w10, w10, #0x7 +; CHECK-NEXT: and w10, w12, #0x1f +; CHECK-NEXT: mov w12, #95 +; CHECK-NEXT: add w8, w8, w11, lsr #1 +; CHECK-NEXT: umov w11, v0.h[2] ; CHECK-NEXT: lsr w8, w8, #6 -; CHECK-NEXT: mov w11, #95 -; CHECK-NEXT: msub w8, w8, w11, w9 -; CHECK-NEXT: mov v1.h[2], w10 +; CHECK-NEXT: and w11, w11, #0x7 +; CHECK-NEXT: msub w8, w8, w12, w9 +; CHECK-NEXT: mov v1.h[1], w10 +; CHECK-NEXT: mov v1.h[2], w11 ; CHECK-NEXT: mov v1.h[3], w8 ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: ret @@ -186,29 +186,29 @@ ; CHECK-NEXT: mov w8, #30865 ; CHECK-NEXT: movk w8, #51306, lsl #16 ; CHECK-NEXT: umov w11, v0.h[2] +; CHECK-NEXT: ubfx w10, w9, #1, #15 ; CHECK-NEXT: mov w12, #654 -; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: mov w13, #47143 -; CHECK-NEXT: ubfx w10, w9, #1, #15 -; CHECK-NEXT: movk w13, #24749, lsl #16 ; CHECK-NEXT: umull x8, w10, w8 ; CHECK-NEXT: mov w10, #17097 ; CHECK-NEXT: movk w10, #45590, lsl #16 +; CHECK-NEXT: movk w13, #24749, lsl #16 ; CHECK-NEXT: lsr x8, x8, #40 ; CHECK-NEXT: umull x10, w11, w10 ; CHECK-NEXT: msub w8, w8, w12, w9 ; CHECK-NEXT: umov w9, v0.h[3] -; CHECK-NEXT: lsr x10, x10, #36 ; CHECK-NEXT: mov w12, #23 +; CHECK-NEXT: lsr x10, x10, #36 +; CHECK-NEXT: umull x13, w9, w13 +; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: msub w10, w10, w12, w11 -; CHECK-NEXT: mov w11, #5423 -; CHECK-NEXT: mov v1.h[1], w8 -; CHECK-NEXT: umull x8, w9, w13 -; CHECK-NEXT: lsr x8, x8, #43 -; CHECK-NEXT: mov v1.h[2], w10 -; CHECK-NEXT: msub w8, w8, w11, w9 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: mov w12, #5423 +; CHECK-NEXT: lsr x11, x13, #43 +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: msub w8, w11, w12, w9 +; CHECK-NEXT: mov v0.h[2], w10 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -228,39 +228,39 @@ ; CHECK-LABEL: dont_fold_urem_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #17097 -; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: mov x11, #21445 ; CHECK-NEXT: movk x8, #45590, lsl #16 -; CHECK-NEXT: mov x13, #21445 +; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: movk x8, #34192, lsl #32 -; CHECK-NEXT: movk x13, #1603, lsl #16 -; CHECK-NEXT: movk x8, #25644, lsl #48 -; CHECK-NEXT: movk x13, #15432, lsl #32 ; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: movk x13, #25653, lsl #48 +; CHECK-NEXT: movk x11, #1603, lsl #16 +; CHECK-NEXT: movk x8, #25644, lsl #48 +; CHECK-NEXT: movk x11, #15432, lsl #32 +; CHECK-NEXT: lsr x12, x10, #1 +; CHECK-NEXT: movk x11, #25653, lsl #48 ; CHECK-NEXT: umulh x8, x9, x8 -; CHECK-NEXT: mov x11, v1.d[1] -; CHECK-NEXT: sub x12, x9, x8 -; CHECK-NEXT: lsr x14, x10, #1 -; CHECK-NEXT: add x8, x8, x12, lsr #1 +; CHECK-NEXT: mov x14, v1.d[1] +; CHECK-NEXT: umulh x11, x12, x11 ; CHECK-NEXT: mov x12, #12109 +; CHECK-NEXT: sub x13, x9, x8 ; CHECK-NEXT: movk x12, #52170, lsl #16 -; CHECK-NEXT: umulh x13, x14, x13 ; CHECK-NEXT: movk x12, #28749, lsl #32 -; CHECK-NEXT: mov w14, #23 +; CHECK-NEXT: lsr x11, x11, #7 ; CHECK-NEXT: movk x12, #49499, lsl #48 +; CHECK-NEXT: add x8, x8, x13, lsr #1 +; CHECK-NEXT: mov w13, #23 ; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: lsr x13, x13, #7 -; CHECK-NEXT: umulh x12, x11, x12 -; CHECK-NEXT: msub x8, x8, x14, x9 -; CHECK-NEXT: mov w9, #5423 -; CHECK-NEXT: lsr x12, x12, #12 -; CHECK-NEXT: mov w14, #654 -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: msub x9, x12, x9, x11 -; CHECK-NEXT: msub x10, x13, x14, x10 +; CHECK-NEXT: umulh x12, x14, x12 +; CHECK-NEXT: msub x8, x8, x13, x9 +; CHECK-NEXT: mov w13, #654 +; CHECK-NEXT: lsr x9, x12, #12 +; CHECK-NEXT: mov w12, #5423 +; CHECK-NEXT: msub x10, x11, x13, x10 +; CHECK-NEXT: msub x9, x9, x12, x14 ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov v1.d[1], x9 +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x9 ; CHECK-NEXT: ret %1 = urem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -55,9 +55,9 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.16b, v2.16b, v6.16b ; CHECK-NEXT: uqsub v0.16b, v0.16b, v4.16b ; CHECK-NEXT: uqsub v1.16b, v1.16b, v5.16b +; CHECK-NEXT: uqsub v2.16b, v2.16b, v6.16b ; CHECK-NEXT: uqsub v3.16b, v3.16b, v7.16b ; CHECK-NEXT: ret %z = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) @@ -86,9 +86,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { ; CHECK-LABEL: v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.8h, v2.8h, v6.8h ; CHECK-NEXT: uqsub v0.8h, v0.8h, v4.8h ; CHECK-NEXT: uqsub v1.8h, v1.8h, v5.8h +; CHECK-NEXT: uqsub v2.8h, v2.8h, v6.8h ; CHECK-NEXT: uqsub v3.8h, v3.8h, v7.8h ; CHECK-NEXT: ret %z = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) @@ -98,9 +98,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqsub v0.8b, v1.8b, v0.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -140,10 +140,10 @@ ; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strb w9, [x2, #1] +; CHECK-NEXT: strb w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -155,9 +155,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqsub v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -179,10 +179,10 @@ ; CHECK-NEXT: mov v1.s[1], w10 ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strh w9, [x2, #2] +; CHECK-NEXT: strh w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py @@ -220,9 +220,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr b0, [x1] +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: uqsub v0.8b, v1.8b, v0.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -235,9 +235,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr h0, [x1] +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: uqsub v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -301,9 +301,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ; CHECK-LABEL: v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.4s, v2.4s, v6.4s ; CHECK-NEXT: uqsub v0.4s, v0.4s, v4.4s ; CHECK-NEXT: uqsub v1.4s, v1.4s, v5.4s +; CHECK-NEXT: uqsub v2.4s, v2.4s, v6.4s ; CHECK-NEXT: uqsub v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %z = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %x, <16 x i32> %y) @@ -332,9 +332,9 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.2d, v2.2d, v6.2d ; CHECK-NEXT: uqsub v0.2d, v0.2d, v4.2d ; CHECK-NEXT: uqsub v1.2d, v1.2d, v5.2d +; CHECK-NEXT: uqsub v2.2d, v2.2d, v6.2d ; CHECK-NEXT: uqsub v3.2d, v3.2d, v7.2d ; CHECK-NEXT: ret %z = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %x, <8 x i64> %y) diff --git a/llvm/test/CodeGen/AArch64/vec-extract-branch.ll b/llvm/test/CodeGen/AArch64/vec-extract-branch.ll --- a/llvm/test/CodeGen/AArch64/vec-extract-branch.ll +++ b/llvm/test/CodeGen/AArch64/vec-extract-branch.ll @@ -6,8 +6,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fcmgt v0.2d, v0.2d, #0.0 ; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w8, w9, w8 ; CHECK-NEXT: tbz w8, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: // %true diff --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll --- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll +++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll @@ -124,8 +124,8 @@ ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: bl sinf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 diff --git a/llvm/test/CodeGen/AArch64/vec_cttz.ll b/llvm/test/CodeGen/AArch64/vec_cttz.ll --- a/llvm/test/CodeGen/AArch64/vec_cttz.ll +++ b/llvm/test/CodeGen/AArch64/vec_cttz.ll @@ -85,8 +85,8 @@ ; CHECK-NEXT: movi v1.8h, #1 ; CHECK-NEXT: sub v1.8h, v0.8h, v1.8h ; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-NEXT: movi v1.8h, #16 ; CHECK-NEXT: clz v0.8h, v0.8h +; CHECK-NEXT: movi v1.8h, #16 ; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %b = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) @@ -99,8 +99,8 @@ ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: sub v1.4s, v0.4s, v1.4s ; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-NEXT: movi v1.4s, #32 ; CHECK-NEXT: clz v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #32 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ret %b = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -52,8 +52,8 @@ ; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: st1 { v1.s }[2], [x8] ; CHECK-NEXT: str d1, [x0] +; CHECK-NEXT: st1 { v1.s }[2], [x8] ; CHECK-NEXT: ret %t = call {<3 x i32>, <3 x i1>} @llvm.uadd.with.overflow.v3i32(<3 x i32> %a0, <3 x i32> %a1) %val = extractvalue {<3 x i32>, <3 x i1>} %t, 0 @@ -82,33 +82,33 @@ ; CHECK-LABEL: uaddo_v6i32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s0, w6 -; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: add x9, sp, #8 +; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: ldr s2, [sp, #16] +; CHECK-NEXT: add x9, sp, #24 ; CHECK-NEXT: fmov s3, w4 ; CHECK-NEXT: mov v0.s[1], w7 ; CHECK-NEXT: mov v1.s[1], w1 -; CHECK-NEXT: mov v3.s[1], w5 ; CHECK-NEXT: ld1 { v0.s }[2], [x8] -; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: add x8, sp, #8 ; CHECK-NEXT: mov v1.s[2], w2 -; CHECK-NEXT: ld1 { v2.s }[1], [x8] -; CHECK-NEXT: ld1 { v0.s }[3], [x9] +; CHECK-NEXT: ld1 { v2.s }[1], [x9] +; CHECK-NEXT: mov v3.s[1], w5 ; CHECK-NEXT: mov v1.s[3], w3 -; CHECK-NEXT: ldr x8, [sp, #32] +; CHECK-NEXT: ld1 { v0.s }[3], [x8] ; CHECK-NEXT: add v2.4s, v3.4s, v2.4s -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ldr x8, [sp, #32] ; CHECK-NEXT: cmhi v3.4s, v3.4s, v2.4s +; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: str d2, [x8, #16] +; CHECK-NEXT: fmov w4, s3 ; CHECK-NEXT: cmhi v1.4s, v1.4s, v0.4s -; CHECK-NEXT: str q0, [x8] ; CHECK-NEXT: mov w5, v3.s[1] -; CHECK-NEXT: fmov w4, s3 +; CHECK-NEXT: str q0, [x8] +; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: fmov w0, s1 ; CHECK-NEXT: ret %t = call {<6 x i32>, <6 x i1>} @llvm.uadd.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1) %val = extractvalue {<6 x i32>, <6 x i1>} %t, 0 @@ -141,23 +141,23 @@ ; CHECK-NEXT: add v4.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhi v0.16b, v0.16b, v4.16b ; CHECK-NEXT: str q4, [x0] -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b +; CHECK-NEXT: zip1 v1.8b, v0.8b, v0.8b +; CHECK-NEXT: zip2 v2.8b, v0.8b, v0.8b +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: zip1 v3.8b, v0.8b, v0.8b ; CHECK-NEXT: zip2 v0.8b, v0.8b, v0.8b ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: zip1 v3.8b, v1.8b, v0.8b -; CHECK-NEXT: zip2 v1.8b, v1.8b, v0.8b +; CHECK-NEXT: ushll v3.4s, v3.4h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: shl v5.4s, v0.4s, #31 -; CHECK-NEXT: sshr v0.4s, v2.4s, #31 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: shl v6.4s, v1.4s, #31 -; CHECK-NEXT: sshr v1.4s, v5.4s, #31 +; CHECK-NEXT: shl v5.4s, v0.4s, #31 +; CHECK-NEXT: sshr v0.4s, v1.4s, #31 +; CHECK-NEXT: sshr v1.4s, v2.4s, #31 ; CHECK-NEXT: sshr v2.4s, v3.4s, #31 -; CHECK-NEXT: sshr v3.4s, v6.4s, #31 +; CHECK-NEXT: sshr v3.4s, v5.4s, #31 ; CHECK-NEXT: ret %t = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1) %val = extractvalue {<16 x i8>, <16 x i1>} %t, 0 @@ -213,26 +213,26 @@ ; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: bic v0.4s, #255, lsl #24 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: mov w10, v0.s[1] ; CHECK-NEXT: fmov w11, s0 -; CHECK-NEXT: bic v1.4s, #255, lsl #24 +; CHECK-NEXT: mov v1.16b, v0.16b +; CHECK-NEXT: mov w9, v0.s[2] ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: mov w10, v0.s[1] ; CHECK-NEXT: strh w9, [x0, #6] -; CHECK-NEXT: sturh w10, [x0, #3] +; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: lsr w9, w9, #16 -; CHECK-NEXT: strh w11, [x0] ; CHECK-NEXT: cmeq v1.4s, v1.4s, v0.4s +; CHECK-NEXT: sturh w10, [x0, #3] +; CHECK-NEXT: lsr w10, w10, #16 ; CHECK-NEXT: strb w8, [x0, #11] -; CHECK-NEXT: lsr w8, w10, #16 -; CHECK-NEXT: lsr w10, w11, #16 -; CHECK-NEXT: strb w9, [x0, #8] +; CHECK-NEXT: lsr w8, w11, #16 +; CHECK-NEXT: strh w11, [x0] ; CHECK-NEXT: mvn v0.16b, v1.16b -; CHECK-NEXT: strb w8, [x0, #5] -; CHECK-NEXT: strb w10, [x0, #2] +; CHECK-NEXT: strb w9, [x0, #8] +; CHECK-NEXT: strb w10, [x0, #5] +; CHECK-NEXT: strb w8, [x0, #2] ; CHECK-NEXT: ret %t = call {<4 x i24>, <4 x i1>} @llvm.uadd.with.overflow.v4i24(<4 x i24> %a0, <4 x i24> %a1) %val = extractvalue {<4 x i24>, <4 x i1>} %t, 0 @@ -249,20 +249,20 @@ ; CHECK-NEXT: and v1.8b, v1.8b, v2.8b ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: and v1.8b, v0.8b, v2.8b ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[2] ; CHECK-NEXT: umov w10, v0.h[0] -; CHECK-NEXT: umov w11, v0.h[3] -; CHECK-NEXT: cmeq v1.4h, v1.4h, v0.4h +; CHECK-NEXT: and v1.8b, v0.8b, v2.8b ; CHECK-NEXT: and w8, w8, #0x1 ; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: cmeq v1.4h, v1.4h, v0.4h ; CHECK-NEXT: bfi w10, w8, #1, #1 -; CHECK-NEXT: mvn v1.8b, v1.8b +; CHECK-NEXT: umov w8, v0.h[3] ; CHECK-NEXT: bfi w10, w9, #2, #1 -; CHECK-NEXT: bfi w10, w11, #3, #29 -; CHECK-NEXT: and w8, w10, #0xf +; CHECK-NEXT: mvn v1.8b, v1.8b +; CHECK-NEXT: bfi w10, w8, #3, #29 ; CHECK-NEXT: sshll v0.4s, v1.4h, #0 +; CHECK-NEXT: and w8, w10, #0xf ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.uadd.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) @@ -293,10 +293,10 @@ ; CHECK-NEXT: fmov s0, w13 ; CHECK-NEXT: mov v0.s[1], w10 ; CHECK-NEXT: ldr x10, [sp] -; CHECK-NEXT: stp x8, x9, [x10, #16] ; CHECK-NEXT: shl v0.2s, v0.2s, #31 -; CHECK-NEXT: stp x11, x12, [x10] +; CHECK-NEXT: stp x8, x9, [x10, #16] ; CHECK-NEXT: sshr v0.2s, v0.2s, #31 +; CHECK-NEXT: stp x11, x12, [x10] ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.uadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -22,8 +22,8 @@ ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: shrn v0.2s, v1.2d, #32 ; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: str s1, [x0] ; CHECK-NEXT: cmtst v0.2s, v0.2s, v0.2s +; CHECK-NEXT: str s1, [x0] ; CHECK-NEXT: ret %t = call {<1 x i32>, <1 x i1>} @llvm.umul.with.overflow.v1i32(<1 x i32> %a0, <1 x i32> %a1) %val = extractvalue {<1 x i32>, <1 x i1>} %t, 0 @@ -39,8 +39,8 @@ ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: shrn v0.2s, v1.2d, #32 ; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: cmtst v0.2s, v0.2s, v0.2s +; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: ret %t = call {<2 x i32>, <2 x i1>} @llvm.umul.with.overflow.v2i32(<2 x i32> %a0, <2 x i32> %a1) %val = extractvalue {<2 x i32>, <2 x i1>} %t, 0 @@ -54,13 +54,13 @@ ; CHECK-LABEL: umulo_v3i32: ; CHECK: // %bb.0: ; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s -; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: umull v3.2d, v0.2s, v1.2s ; CHECK-NEXT: mul v1.4s, v0.4s, v1.4s +; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: uzp2 v2.4s, v3.4s, v2.4s ; CHECK-NEXT: st1 { v1.s }[2], [x8] -; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: cmtst v2.4s, v2.4s, v2.4s +; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %t = call {<3 x i32>, <3 x i1>} @llvm.umul.with.overflow.v3i32(<3 x i32> %a0, <3 x i32> %a1) @@ -94,39 +94,39 @@ ; CHECK-LABEL: umulo_v6i32: ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s0, w6 -; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: add x9, sp, #8 +; CHECK-NEXT: fmov s1, w0 ; CHECK-NEXT: ldr s2, [sp, #16] +; CHECK-NEXT: add x9, sp, #24 ; CHECK-NEXT: fmov s3, w4 ; CHECK-NEXT: mov v0.s[1], w7 ; CHECK-NEXT: mov v1.s[1], w1 -; CHECK-NEXT: mov v3.s[1], w5 ; CHECK-NEXT: ld1 { v0.s }[2], [x8] -; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: add x8, sp, #8 ; CHECK-NEXT: mov v1.s[2], w2 -; CHECK-NEXT: ld1 { v2.s }[1], [x8] -; CHECK-NEXT: ld1 { v0.s }[3], [x9] +; CHECK-NEXT: ld1 { v2.s }[1], [x9] +; CHECK-NEXT: mov v3.s[1], w5 ; CHECK-NEXT: mov v1.s[3], w3 -; CHECK-NEXT: ldr x8, [sp, #32] +; CHECK-NEXT: ld1 { v0.s }[3], [x8] ; CHECK-NEXT: umull2 v6.2d, v3.4s, v2.4s ; CHECK-NEXT: umull v7.2d, v3.2s, v2.2s +; CHECK-NEXT: mul v2.4s, v3.4s, v2.4s +; CHECK-NEXT: ldr x8, [sp, #32] ; CHECK-NEXT: umull2 v4.2d, v1.4s, v0.4s ; CHECK-NEXT: umull v5.2d, v1.2s, v0.2s -; CHECK-NEXT: mul v2.4s, v3.4s, v2.4s ; CHECK-NEXT: mul v0.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp2 v4.4s, v5.4s, v4.4s -; CHECK-NEXT: uzp2 v5.4s, v7.4s, v6.4s ; CHECK-NEXT: str d2, [x8, #16] +; CHECK-NEXT: uzp2 v3.4s, v5.4s, v4.4s +; CHECK-NEXT: uzp2 v4.4s, v7.4s, v6.4s ; CHECK-NEXT: str q0, [x8] -; CHECK-NEXT: cmtst v4.4s, v4.4s, v4.4s -; CHECK-NEXT: cmtst v3.4s, v5.4s, v5.4s -; CHECK-NEXT: mov w1, v4.s[1] -; CHECK-NEXT: mov w2, v4.s[2] -; CHECK-NEXT: mov w3, v4.s[3] -; CHECK-NEXT: mov w5, v3.s[1] -; CHECK-NEXT: fmov w0, s4 -; CHECK-NEXT: fmov w4, s3 +; CHECK-NEXT: cmtst v3.4s, v3.4s, v3.4s +; CHECK-NEXT: cmtst v1.4s, v4.4s, v4.4s +; CHECK-NEXT: fmov w0, s3 +; CHECK-NEXT: mov w1, v3.s[1] +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: mov w2, v3.s[2] +; CHECK-NEXT: mov w3, v3.s[3] +; CHECK-NEXT: mov w5, v1.s[1] ; CHECK-NEXT: ret %t = call {<6 x i32>, <6 x i1>} @llvm.umul.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1) %val = extractvalue {<6 x i32>, <6 x i1>} %t, 0 @@ -147,9 +147,9 @@ ; CHECK-NEXT: mul v2.4s, v0.4s, v2.4s ; CHECK-NEXT: uzp2 v5.4s, v6.4s, v5.4s ; CHECK-NEXT: uzp2 v6.4s, v7.4s, v4.4s -; CHECK-NEXT: stp q2, q3, [x0] ; CHECK-NEXT: cmtst v4.4s, v5.4s, v5.4s ; CHECK-NEXT: cmtst v5.4s, v6.4s, v6.4s +; CHECK-NEXT: stp q2, q3, [x0] ; CHECK-NEXT: mov v0.16b, v4.16b ; CHECK-NEXT: mov v1.16b, v5.16b ; CHECK-NEXT: ret @@ -166,29 +166,27 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b ; CHECK-NEXT: umull v3.8h, v0.8b, v1.8b +; CHECK-NEXT: mul v6.16b, v0.16b, v1.16b ; CHECK-NEXT: uzp2 v2.16b, v3.16b, v2.16b +; CHECK-NEXT: str q6, [x0] ; CHECK-NEXT: cmtst v2.16b, v2.16b, v2.16b -; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: zip1 v4.8b, v2.8b, v0.8b +; CHECK-NEXT: zip1 v3.8b, v2.8b, v0.8b +; CHECK-NEXT: zip2 v4.8b, v2.8b, v0.8b +; CHECK-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-NEXT: zip1 v5.8b, v2.8b, v0.8b ; CHECK-NEXT: zip2 v2.8b, v2.8b, v0.8b -; CHECK-NEXT: zip1 v5.8b, v3.8b, v0.8b -; CHECK-NEXT: zip2 v3.8b, v3.8b, v0.8b ; CHECK-NEXT: ushll v4.4s, v4.4h, #0 -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: ushll v5.4s, v5.4h, #0 -; CHECK-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-NEXT: shl v4.4s, v4.4s, #31 -; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: shl v6.4s, v5.4s, #31 -; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: sshr v4.4s, v4.4s, #31 -; CHECK-NEXT: sshr v5.4s, v2.4s, #31 -; CHECK-NEXT: sshr v2.4s, v6.4s, #31 -; CHECK-NEXT: sshr v3.4s, v3.4s, #31 -; CHECK-NEXT: mul v6.16b, v0.16b, v1.16b -; CHECK-NEXT: mov v0.16b, v4.16b -; CHECK-NEXT: mov v1.16b, v5.16b -; CHECK-NEXT: str q6, [x0] +; CHECK-NEXT: ushll v0.4s, v2.4h, #0 +; CHECK-NEXT: shl v1.4s, v3.4s, #31 +; CHECK-NEXT: shl v2.4s, v4.4s, #31 +; CHECK-NEXT: shl v3.4s, v5.4s, #31 +; CHECK-NEXT: shl v4.4s, v0.4s, #31 +; CHECK-NEXT: sshr v0.4s, v1.4s, #31 +; CHECK-NEXT: sshr v1.4s, v2.4s, #31 +; CHECK-NEXT: sshr v2.4s, v3.4s, #31 +; CHECK-NEXT: sshr v3.4s, v4.4s, #31 ; CHECK-NEXT: ret %t = call {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1) %val = extractvalue {<16 x i8>, <16 x i1>} %t, 0 @@ -231,17 +229,17 @@ ; CHECK-LABEL: umulo_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, v1.d[1] -; CHECK-NEXT: fmov x10, d1 ; CHECK-NEXT: mov x9, v0.d[1] +; CHECK-NEXT: fmov x10, d1 ; CHECK-NEXT: fmov x11, d0 ; CHECK-NEXT: umulh x12, x9, x8 +; CHECK-NEXT: mul x8, x9, x8 ; CHECK-NEXT: umulh x13, x11, x10 ; CHECK-NEXT: cmp xzr, x12 -; CHECK-NEXT: mul x10, x11, x10 ; CHECK-NEXT: csetm x12, ne +; CHECK-NEXT: mul x10, x11, x10 ; CHECK-NEXT: cmp xzr, x13 ; CHECK-NEXT: csetm x13, ne -; CHECK-NEXT: mul x8, x9, x8 ; CHECK-NEXT: fmov d1, x10 ; CHECK-NEXT: fmov d0, x13 ; CHECK-NEXT: mov v1.d[1], x8 @@ -266,23 +264,23 @@ ; CHECK-NEXT: umull v3.2d, v0.2s, v1.2s ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s ; CHECK-NEXT: uzp2 v1.4s, v3.4s, v2.4s -; CHECK-NEXT: ushr v2.4s, v0.4s, #24 ; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: mov w10, v0.s[1] -; CHECK-NEXT: cmeq v1.4s, v1.4s, #0 +; CHECK-NEXT: ushr v2.4s, v0.4s, #24 ; CHECK-NEXT: fmov w11, s0 -; CHECK-NEXT: cmtst v2.4s, v2.4s, v2.4s ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: mov w9, v0.s[2] +; CHECK-NEXT: mov w10, v0.s[1] +; CHECK-NEXT: cmeq v0.4s, v1.4s, #0 +; CHECK-NEXT: strh w11, [x0] +; CHECK-NEXT: cmtst v1.4s, v2.4s, v2.4s ; CHECK-NEXT: strh w9, [x0, #6] -; CHECK-NEXT: lsr w9, w9, #16 ; CHECK-NEXT: sturh w10, [x0, #3] -; CHECK-NEXT: orn v0.16b, v2.16b, v1.16b +; CHECK-NEXT: lsr w9, w9, #16 ; CHECK-NEXT: strb w8, [x0, #11] ; CHECK-NEXT: lsr w8, w10, #16 ; CHECK-NEXT: lsr w10, w11, #16 -; CHECK-NEXT: strh w11, [x0] +; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: strb w9, [x0, #8] ; CHECK-NEXT: strb w8, [x0, #5] ; CHECK-NEXT: strb w10, [x0, #2] @@ -298,19 +296,18 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind { ; CHECK-LABEL: umulo_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d2, d0 -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: and v1.8b, v2.8b, v1.8b -; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: umov w9, v1.h[2] -; CHECK-NEXT: umov w10, v1.h[0] -; CHECK-NEXT: umov w11, v1.h[3] +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: umov w9, v0.h[2] +; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: and w8, w8, #0x1 ; CHECK-NEXT: and w9, w9, #0x1 ; CHECK-NEXT: bfi w10, w8, #1, #1 +; CHECK-NEXT: umov w8, v0.h[3] ; CHECK-NEXT: bfi w10, w9, #2, #1 -; CHECK-NEXT: bfi w10, w11, #3, #29 +; CHECK-NEXT: bfi w10, w8, #3, #29 ; CHECK-NEXT: and w8, w10, #0xf +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) @@ -369,8 +366,8 @@ ; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: mul x9, x2, x6 ; CHECK-NEXT: shl v0.2s, v0.2s, #31 -; CHECK-NEXT: stp x9, x8, [x10, #16] ; CHECK-NEXT: sshr v0.2s, v0.2s, #31 +; CHECK-NEXT: stp x9, x8, [x10, #16] ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll @@ -96,9 +96,8 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind { ; CHECK-LABEL: test_v9i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: umov w12, v0.b[4] +; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov v1.b[9], w8 ; CHECK-NEXT: mov v1.b[10], w8 ; CHECK-NEXT: mov v1.b[11], w8 @@ -108,17 +107,18 @@ ; CHECK-NEXT: umov w8, v1.b[1] ; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: umov w10, v1.b[2] -; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v1.b[3] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: and w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret %b = call i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a) ret i8 %b @@ -159,8 +159,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %b = call i24 @llvm.vector.reduce.and.v4i24(<4 x i24> %a) @@ -185,8 +185,8 @@ ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %b = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll @@ -105,9 +105,9 @@ define float @test_v3f32_neutral(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32_neutral: ; CHECK: // %bb.0: -; CHECK-NEXT: mov s1, v0.s[2] -; CHECK-NEXT: faddp s0, v0.2s -; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: faddp s1, v0.2s +; CHECK-NEXT: mov s0, v0.s[2] +; CHECK-NEXT: fadd s0, s1, s0 ; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float -0.0, <3 x float> %a) ret float %b @@ -166,34 +166,34 @@ define float @test_v16f32(<16 x float> %a, float %s) nounwind { ; CHECK-LABEL: test_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov s6, v0.s[1] ; CHECK-NEXT: fadd s4, s4, s0 -; CHECK-NEXT: mov s7, v0.s[2] +; CHECK-NEXT: mov s5, v0.s[1] +; CHECK-NEXT: fadd s4, s4, s5 +; CHECK-NEXT: mov s5, v0.s[2] ; CHECK-NEXT: mov s0, v0.s[3] -; CHECK-NEXT: mov s5, v3.s[1] -; CHECK-NEXT: fadd s4, s4, s6 -; CHECK-NEXT: mov s6, v1.s[2] -; CHECK-NEXT: fadd s4, s4, s7 +; CHECK-NEXT: fadd s4, s4, s5 ; CHECK-NEXT: fadd s0, s4, s0 ; CHECK-NEXT: mov s4, v1.s[1] ; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s4 +; CHECK-NEXT: mov s4, v1.s[2] ; CHECK-NEXT: mov s1, v1.s[3] ; CHECK-NEXT: fadd s0, s0, s4 -; CHECK-NEXT: mov s4, v2.s[2] -; CHECK-NEXT: fadd s0, s0, s6 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v2.s[1] ; CHECK-NEXT: fadd s0, s0, s2 ; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v2.s[2] +; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v2.s[3] -; CHECK-NEXT: mov s2, v3.s[3] -; CHECK-NEXT: fadd s0, s0, s4 ; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: mov s1, v3.s[2] +; CHECK-NEXT: mov s1, v3.s[1] ; CHECK-NEXT: fadd s0, s0, s3 -; CHECK-NEXT: fadd s0, s0, s5 ; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: mov s1, v3.s[2] +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v3.s[3] +; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float %s, <16 x float> %a) ret float %b @@ -202,31 +202,31 @@ define float @test_v16f32_neutral(<16 x float> %a) nounwind { ; CHECK-LABEL: test_v16f32_neutral: ; CHECK: // %bb.0: +; CHECK-NEXT: faddp s4, v0.2s ; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: faddp s6, v0.2s ; CHECK-NEXT: mov s0, v0.s[3] -; CHECK-NEXT: mov s4, v2.s[1] -; CHECK-NEXT: fadd s5, s6, s5 -; CHECK-NEXT: mov s6, v1.s[2] -; CHECK-NEXT: fadd s0, s5, s0 -; CHECK-NEXT: mov s5, v1.s[1] +; CHECK-NEXT: fadd s4, s4, s5 +; CHECK-NEXT: fadd s0, s4, s0 +; CHECK-NEXT: mov s4, v1.s[1] ; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s4 +; CHECK-NEXT: mov s4, v1.s[2] ; CHECK-NEXT: mov s1, v1.s[3] -; CHECK-NEXT: fadd s0, s0, s5 -; CHECK-NEXT: fadd s0, s0, s6 +; CHECK-NEXT: fadd s0, s0, s4 ; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: mov s1, v2.s[2] +; CHECK-NEXT: mov s1, v2.s[1] ; CHECK-NEXT: fadd s0, s0, s2 -; CHECK-NEXT: mov s2, v2.s[3] -; CHECK-NEXT: fadd s0, s0, s4 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v2.s[2] +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v2.s[3] ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v3.s[1] -; CHECK-NEXT: fadd s0, s0, s2 -; CHECK-NEXT: mov s2, v3.s[2] ; CHECK-NEXT: fadd s0, s0, s3 ; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v3.s[2] +; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v3.s[3] -; CHECK-NEXT: fadd s0, s0, s2 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll @@ -32,9 +32,9 @@ ; CHECKNOFP16-NEXT: fadd s1, s2, s1 ; CHECKNOFP16-NEXT: mov h2, v0.h[2] ; CHECKNOFP16-NEXT: mov h0, v0.h[3] -; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s1, s2 ; CHECKNOFP16-NEXT: fcvt h1, s1 @@ -62,30 +62,30 @@ ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s2, s1 ; CHECKNOFP16-NEXT: mov h2, v0.h[2] -; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s1, s2 ; CHECKNOFP16-NEXT: mov h2, v0.h[3] -; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s1, s2 ; CHECKNOFP16-NEXT: mov h2, v0.h[4] -; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s1, s2 ; CHECKNOFP16-NEXT: mov h2, v0.h[5] -; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s1, s2 ; CHECKNOFP16-NEXT: mov h2, v0.h[6] ; CHECKNOFP16-NEXT: mov h0, v0.h[7] -; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s1, s2 ; CHECKNOFP16-NEXT: fcvt h1, s1 @@ -147,65 +147,65 @@ ; CHECKNOFP16-NEXT: fadd s4, s5, s4 ; CHECKNOFP16-NEXT: mov h5, v0.h[2] ; CHECKNOFP16-NEXT: fadd s2, s3, s2 -; CHECKNOFP16-NEXT: mov h3, v1.h[2] -; CHECKNOFP16-NEXT: fcvt h4, s4 ; CHECKNOFP16-NEXT: fcvt s5, h5 +; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: mov h4, v1.h[2] ; CHECKNOFP16-NEXT: fcvt h2, s2 -; CHECKNOFP16-NEXT: fcvt s3, h3 ; CHECKNOFP16-NEXT: fcvt s4, h4 +; CHECKNOFP16-NEXT: fcvt s3, h3 ; CHECKNOFP16-NEXT: fcvt s2, h2 -; CHECKNOFP16-NEXT: fadd s3, s5, s3 +; CHECKNOFP16-NEXT: fadd s4, s5, s4 ; CHECKNOFP16-NEXT: mov h5, v0.h[3] -; CHECKNOFP16-NEXT: fadd s2, s4, s2 -; CHECKNOFP16-NEXT: mov h4, v1.h[3] -; CHECKNOFP16-NEXT: fcvt h3, s3 ; CHECKNOFP16-NEXT: fcvt s5, h5 -; CHECKNOFP16-NEXT: fcvt h2, s2 +; CHECKNOFP16-NEXT: fadd s2, s3, s2 +; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: mov h4, v1.h[3] ; CHECKNOFP16-NEXT: fcvt s4, h4 +; CHECKNOFP16-NEXT: fcvt h2, s2 ; CHECKNOFP16-NEXT: fcvt s3, h3 -; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fadd s4, s5, s4 ; CHECKNOFP16-NEXT: mov h5, v0.h[4] -; CHECKNOFP16-NEXT: fadd s2, s2, s3 -; CHECKNOFP16-NEXT: mov h3, v1.h[4] -; CHECKNOFP16-NEXT: fcvt h4, s4 +; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fcvt s5, h5 +; CHECKNOFP16-NEXT: fadd s2, s2, s3 +; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: mov h4, v1.h[4] +; CHECKNOFP16-NEXT: fcvt s4, h4 ; CHECKNOFP16-NEXT: fcvt h2, s2 ; CHECKNOFP16-NEXT: fcvt s3, h3 -; CHECKNOFP16-NEXT: fcvt s4, h4 -; CHECKNOFP16-NEXT: fcvt s2, h2 -; CHECKNOFP16-NEXT: fadd s3, s5, s3 +; CHECKNOFP16-NEXT: fadd s4, s5, s4 ; CHECKNOFP16-NEXT: mov h5, v0.h[5] -; CHECKNOFP16-NEXT: fadd s2, s2, s4 -; CHECKNOFP16-NEXT: mov h4, v1.h[5] -; CHECKNOFP16-NEXT: fcvt h3, s3 +; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fcvt s5, h5 -; CHECKNOFP16-NEXT: fcvt h2, s2 +; CHECKNOFP16-NEXT: fadd s2, s2, s3 +; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: mov h4, v1.h[5] ; CHECKNOFP16-NEXT: fcvt s4, h4 +; CHECKNOFP16-NEXT: fcvt h2, s2 ; CHECKNOFP16-NEXT: fcvt s3, h3 -; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fadd s4, s5, s4 ; CHECKNOFP16-NEXT: mov h5, v0.h[6] +; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: mov h0, v0.h[7] -; CHECKNOFP16-NEXT: fadd s2, s2, s3 -; CHECKNOFP16-NEXT: mov h3, v1.h[6] -; CHECKNOFP16-NEXT: fcvt h4, s4 ; CHECKNOFP16-NEXT: fcvt s5, h5 -; CHECKNOFP16-NEXT: mov h1, v1.h[7] ; CHECKNOFP16-NEXT: fcvt s0, h0 -; CHECKNOFP16-NEXT: fcvt h2, s2 -; CHECKNOFP16-NEXT: fcvt s3, h3 +; CHECKNOFP16-NEXT: fadd s2, s2, s3 +; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: mov h4, v1.h[6] +; CHECKNOFP16-NEXT: mov h1, v1.h[7] ; CHECKNOFP16-NEXT: fcvt s4, h4 ; CHECKNOFP16-NEXT: fcvt s1, h1 -; CHECKNOFP16-NEXT: fcvt s2, h2 -; CHECKNOFP16-NEXT: fadd s3, s5, s3 +; CHECKNOFP16-NEXT: fcvt h2, s2 +; CHECKNOFP16-NEXT: fcvt s3, h3 +; CHECKNOFP16-NEXT: fadd s4, s5, s4 ; CHECKNOFP16-NEXT: fadd s0, s0, s1 -; CHECKNOFP16-NEXT: fadd s2, s2, s4 -; CHECKNOFP16-NEXT: fcvt h3, s3 +; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: fadd s2, s2, s3 +; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: fcvt s0, h0 ; CHECKNOFP16-NEXT: fcvt h2, s2 ; CHECKNOFP16-NEXT: fcvt s3, h3 -; CHECKNOFP16-NEXT: fcvt s0, h0 ; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fadd s2, s2, s3 ; CHECKNOFP16-NEXT: fcvt h1, s2 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -57,9 +57,9 @@ ; CHECK-NOFP-NEXT: fmaxnm s1, s2, s1 ; CHECK-NOFP-NEXT: mov h2, v0.h[2] ; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 ; CHECK-NOFP-NEXT: fcvt h1, s1 @@ -86,9 +86,9 @@ ; CHECK-NOFP-NEXT: fmaxnm s1, s2, s1 ; CHECK-NOFP-NEXT: mov h2, v0.h[2] ; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 ; CHECK-NOFP-NEXT: fcvt h1, s1 @@ -184,11 +184,11 @@ ; CHECK-FP-LABEL: test_v11f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 @@ -198,12 +198,12 @@ ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fmaxnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h @@ -292,11 +292,11 @@ ; CHECK-FP-LABEL: test_v11f16_ninf: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 @@ -306,12 +306,12 @@ ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fmaxnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -57,9 +57,9 @@ ; CHECK-NOFP-NEXT: fminnm s1, s2, s1 ; CHECK-NOFP-NEXT: mov h2, v0.h[2] ; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fminnm s1, s1, s2 ; CHECK-NOFP-NEXT: fcvt h1, s1 @@ -86,9 +86,9 @@ ; CHECK-NOFP-NEXT: fminnm s1, s2, s1 ; CHECK-NOFP-NEXT: mov h2, v0.h[2] ; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fminnm s1, s1, s2 ; CHECK-NOFP-NEXT: fcvt h1, s1 @@ -184,11 +184,11 @@ ; CHECK-FP-LABEL: test_v11f16: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 @@ -198,12 +198,12 @@ ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fminnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fminnmv h0, v0.8h @@ -292,11 +292,11 @@ ; CHECK-FP-LABEL: test_v11f16_ninf: ; CHECK-FP: // %bb.0: ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 @@ -306,12 +306,12 @@ ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fminnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fminnmv h0, v0.8h diff --git a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll @@ -82,8 +82,8 @@ define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_copysign_v2f32_v2f64: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.2s v2, #128, lsl #24 ; CHECK-NEXT: fcvtn v1.2s, v1.2d +; CHECK-NEXT: movi.2s v2, #128, lsl #24 ; CHECK-NEXT: bit.8b v0, v1, v2 ; CHECK-NEXT: ret %tmp0 = fptrunc <2 x double> %b to <2 x float> @@ -110,9 +110,9 @@ ; CHECK-LABEL: test_copysign_v4f32_v4f64: ; CHECK: ; %bb.0: ; CHECK-NEXT: fcvtn v1.2s, v1.2d -; CHECK-NEXT: movi.4s v3, #128, lsl #24 ; CHECK-NEXT: fcvtn2 v1.4s, v2.2d -; CHECK-NEXT: bit.16b v0, v1, v3 +; CHECK-NEXT: movi.4s v2, #128, lsl #24 +; CHECK-NEXT: bit.16b v0, v1, v2 ; CHECK-NEXT: ret %tmp0 = fptrunc <4 x double> %b to <4 x float> %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0) @@ -156,11 +156,11 @@ ; CHECK-LABEL: test_copysign_v4f64_v4f32: ; CHECK: ; %bb.0: ; CHECK-NEXT: movi.2d v3, #0000000000000000 -; CHECK-NEXT: fcvtl2 v4.2d, v2.4s -; CHECK-NEXT: fcvtl v2.2d, v2.2s +; CHECK-NEXT: fcvtl v4.2d, v2.2s ; CHECK-NEXT: fneg.2d v3, v3 -; CHECK-NEXT: bit.16b v1, v4, v3 -; CHECK-NEXT: bit.16b v0, v2, v3 +; CHECK-NEXT: fcvtl2 v2.2d, v2.4s +; CHECK-NEXT: bit.16b v0, v4, v3 +; CHECK-NEXT: bit.16b v1, v2, v3 ; CHECK-NEXT: ret %tmp0 = fpext <4 x float> %b to <4 x double> %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0) @@ -191,29 +191,29 @@ ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 ; NOFP16-NEXT: mov h3, v1[1] ; NOFP16-NEXT: mov h4, v0[1] -; NOFP16-NEXT: movi.4s v2, #128, lsl #24 ; NOFP16-NEXT: fcvt s5, h1 ; NOFP16-NEXT: fcvt s6, h0 ; NOFP16-NEXT: mov h7, v1[2] ; NOFP16-NEXT: mov h16, v0[2] ; NOFP16-NEXT: fcvt s3, h3 ; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: fcvt s16, h16 ; NOFP16-NEXT: mov h1, v1[3] +; NOFP16-NEXT: mov h0, v0[3] +; NOFP16-NEXT: movi.4s v2, #128, lsl #24 ; NOFP16-NEXT: bit.16b v6, v5, v2 -; NOFP16-NEXT: fcvt s5, h7 -; NOFP16-NEXT: fcvt s7, h16 ; NOFP16-NEXT: bit.16b v4, v3, v2 -; NOFP16-NEXT: mov h3, v0[3] -; NOFP16-NEXT: fcvt h0, s6 ; NOFP16-NEXT: fcvt s1, h1 -; NOFP16-NEXT: bit.16b v7, v5, v2 +; NOFP16-NEXT: fcvt s3, h0 ; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: fcvt s3, h3 -; NOFP16-NEXT: fcvt h5, s7 -; NOFP16-NEXT: mov.h v0[1], v4[0] +; NOFP16-NEXT: bit.16b v16, v7, v2 +; NOFP16-NEXT: fcvt h0, s6 +; NOFP16-NEXT: fcvt h5, s16 ; NOFP16-NEXT: bit.16b v3, v1, v2 -; NOFP16-NEXT: mov.h v0[2], v5[0] +; NOFP16-NEXT: mov.h v0[1], v4[0] ; NOFP16-NEXT: fcvt h1, s3 +; NOFP16-NEXT: mov.h v0[2], v5[0] ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; NOFP16-NEXT: ret @@ -233,38 +233,38 @@ ; NOFP16-NEXT: fcvtn v1.4h, v1.4s ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 ; NOFP16-NEXT: mov h3, v0[1] -; NOFP16-NEXT: movi.4s v2, #128, lsl #24 -; NOFP16-NEXT: fcvt s5, h0 -; NOFP16-NEXT: mov h7, v0[2] -; NOFP16-NEXT: mov h4, v1[1] -; NOFP16-NEXT: fcvt s6, h1 -; NOFP16-NEXT: mov h16, v1[2] +; NOFP16-NEXT: fcvt s4, h0 +; NOFP16-NEXT: mov h5, v0[2] ; NOFP16-NEXT: fcvt s3, h3 +; NOFP16-NEXT: mov h0, v0[3] +; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: mov h6, v1[1] +; NOFP16-NEXT: fcvt s7, h1 +; NOFP16-NEXT: mov h16, v1[2] ; NOFP16-NEXT: mov h1, v1[3] -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: bit.16b v5, v6, v2 -; NOFP16-NEXT: fcvt s6, h7 -; NOFP16-NEXT: fcvt s7, h16 +; NOFP16-NEXT: fcvt s6, h6 +; NOFP16-NEXT: fcvt s16, h16 ; NOFP16-NEXT: fcvt s1, h1 -; NOFP16-NEXT: bit.16b v3, v4, v2 -; NOFP16-NEXT: mov h4, v0[3] -; NOFP16-NEXT: fcvt h0, s5 -; NOFP16-NEXT: bit.16b v6, v7, v2 +; NOFP16-NEXT: movi.4s v2, #128, lsl #24 +; NOFP16-NEXT: bit.16b v4, v7, v2 +; NOFP16-NEXT: bit.16b v3, v6, v2 +; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: bit.16b v5, v16, v2 ; NOFP16-NEXT: fcvt h3, s3 -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: fcvt h5, s6 +; NOFP16-NEXT: fcvt h0, s4 +; NOFP16-NEXT: fcvt h4, s5 +; NOFP16-NEXT: bit.16b v6, v1, v2 ; NOFP16-NEXT: mov.h v0[1], v3[0] -; NOFP16-NEXT: bit.16b v4, v1, v2 -; NOFP16-NEXT: mov.h v0[2], v5[0] -; NOFP16-NEXT: fcvt h1, s4 +; NOFP16-NEXT: fcvt h1, s6 +; NOFP16-NEXT: mov.h v0[2], v4[0] ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; NOFP16-NEXT: ret ; ; FP16-LABEL: test_copysign_v4f16_v4f32: ; FP16: ; %bb.0: -; FP16-NEXT: movi.4h v2, #128, lsl #8 ; FP16-NEXT: fcvtn v1.4h, v1.4s +; FP16-NEXT: movi.4h v2, #128, lsl #8 ; FP16-NEXT: bit.8b v0, v1, v2 ; FP16-NEXT: ret %tmp0 = fptrunc <4 x float> %b to <4 x half> @@ -278,45 +278,45 @@ ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 ; NOFP16-NEXT: mov d4, v1[1] ; NOFP16-NEXT: mov h5, v0[1] -; NOFP16-NEXT: movi.4s v3, #128, lsl #24 ; NOFP16-NEXT: fcvt s1, d1 ; NOFP16-NEXT: fcvt s6, h0 -; NOFP16-NEXT: mov h7, v0[2] ; NOFP16-NEXT: fcvt s4, d4 ; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: bit.16b v6, v1, v3 -; NOFP16-NEXT: fcvt s1, d2 +; NOFP16-NEXT: mov h7, v0[2] +; NOFP16-NEXT: mov d16, v2[1] +; NOFP16-NEXT: fcvt s2, d2 +; NOFP16-NEXT: mov h0, v0[3] ; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: movi.4s v3, #128, lsl #24 +; NOFP16-NEXT: bit.16b v6, v1, v3 ; NOFP16-NEXT: bit.16b v5, v4, v3 -; NOFP16-NEXT: mov d2, v2[1] -; NOFP16-NEXT: mov h4, v0[3] +; NOFP16-NEXT: fcvt s1, d16 +; NOFP16-NEXT: fcvt s4, h0 +; NOFP16-NEXT: fcvt h5, s5 ; NOFP16-NEXT: fcvt h0, s6 -; NOFP16-NEXT: bit.16b v7, v1, v3 -; NOFP16-NEXT: fcvt h1, s5 -; NOFP16-NEXT: fcvt s2, d2 -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: fcvt h5, s7 -; NOFP16-NEXT: mov.h v0[1], v1[0] -; NOFP16-NEXT: bit.16b v4, v2, v3 -; NOFP16-NEXT: mov.h v0[2], v5[0] +; NOFP16-NEXT: bit.16b v7, v2, v3 +; NOFP16-NEXT: fcvt h2, s7 +; NOFP16-NEXT: bit.16b v4, v1, v3 +; NOFP16-NEXT: mov.h v0[1], v5[0] ; NOFP16-NEXT: fcvt h1, s4 +; NOFP16-NEXT: mov.h v0[2], v2[0] ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; NOFP16-NEXT: ret ; ; FP16-LABEL: test_copysign_v4f16_v4f64: ; FP16: ; %bb.0: -; FP16-NEXT: mov d4, v1[1] +; FP16-NEXT: mov d3, v1[1] ; FP16-NEXT: fcvt h1, d1 -; FP16-NEXT: movi.4h v3, #128, lsl #8 -; FP16-NEXT: fcvt h4, d4 -; FP16-NEXT: mov.h v1[1], v4[0] ; FP16-NEXT: fcvt h4, d2 ; FP16-NEXT: mov d2, v2[1] -; FP16-NEXT: mov.h v1[2], v4[0] +; FP16-NEXT: fcvt h3, d3 ; FP16-NEXT: fcvt h2, d2 +; FP16-NEXT: mov.h v1[1], v3[0] +; FP16-NEXT: mov.h v1[2], v4[0] ; FP16-NEXT: mov.h v1[3], v2[0] -; FP16-NEXT: bit.8b v0, v1, v3 +; FP16-NEXT: movi.4h v2, #128, lsl #8 +; FP16-NEXT: bit.8b v0, v1, v2 ; FP16-NEXT: ret %tmp0 = fptrunc <4 x double> %b to <4 x half> %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) @@ -330,59 +330,59 @@ define <8 x half> @test_copysign_v8f16_v8f16(<8 x half> %a, <8 x half> %b) #0 { ; NOFP16-LABEL: test_copysign_v8f16_v8f16: ; NOFP16: ; %bb.0: -; NOFP16-NEXT: mov h5, v1[1] -; NOFP16-NEXT: mov h6, v0[1] -; NOFP16-NEXT: movi.4s v3, #128, lsl #24 -; NOFP16-NEXT: fcvt s2, h1 -; NOFP16-NEXT: fcvt s4, h0 +; NOFP16-NEXT: mov h2, v1[1] +; NOFP16-NEXT: mov h4, v0[1] +; NOFP16-NEXT: fcvt s5, h1 +; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: fcvt s2, h2 +; NOFP16-NEXT: fcvt s4, h4 ; NOFP16-NEXT: mov h7, v1[2] ; NOFP16-NEXT: mov h16, v0[2] -; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: fcvt s6, h6 -; NOFP16-NEXT: mov h17, v0[3] -; NOFP16-NEXT: bit.16b v4, v2, v3 -; NOFP16-NEXT: mov h2, v1[3] +; NOFP16-NEXT: movi.4s v3, #128, lsl #24 +; NOFP16-NEXT: mov h17, v1[3] ; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: mov h18, v1[4] ; NOFP16-NEXT: bit.16b v6, v5, v3 -; NOFP16-NEXT: fcvt s17, h17 -; NOFP16-NEXT: fcvt s18, h2 -; NOFP16-NEXT: mov h5, v1[4] -; NOFP16-NEXT: fcvt h2, s4 +; NOFP16-NEXT: mov h5, v0[3] +; NOFP16-NEXT: bit.16b v4, v2, v3 +; NOFP16-NEXT: fcvt h2, s6 +; NOFP16-NEXT: fcvt h4, s4 +; NOFP16-NEXT: mov h6, v0[4] ; NOFP16-NEXT: bit.16b v16, v7, v3 -; NOFP16-NEXT: mov h7, v0[4] -; NOFP16-NEXT: fcvt h4, s6 -; NOFP16-NEXT: bit.16b v17, v18, v3 -; NOFP16-NEXT: mov h6, v1[5] -; NOFP16-NEXT: mov h18, v0[5] +; NOFP16-NEXT: fcvt s7, h17 ; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: fcvt h16, s16 +; NOFP16-NEXT: mov h17, v1[5] ; NOFP16-NEXT: mov.h v2[1], v4[0] -; NOFP16-NEXT: fcvt h4, s16 +; NOFP16-NEXT: mov h4, v0[5] +; NOFP16-NEXT: fcvt s18, h18 ; NOFP16-NEXT: fcvt s6, h6 -; NOFP16-NEXT: fcvt s16, h18 -; NOFP16-NEXT: fcvt h17, s17 -; NOFP16-NEXT: bit.16b v7, v5, v3 -; NOFP16-NEXT: mov h5, v0[6] -; NOFP16-NEXT: mov.h v2[2], v4[0] -; NOFP16-NEXT: mov h4, v1[6] -; NOFP16-NEXT: bit.16b v16, v6, v3 -; NOFP16-NEXT: mov h1, v1[7] -; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: mov.h v2[3], v17[0] -; NOFP16-NEXT: fcvt h6, s7 +; NOFP16-NEXT: bit.16b v5, v7, v3 +; NOFP16-NEXT: mov h7, v1[6] +; NOFP16-NEXT: mov.h v2[2], v16[0] +; NOFP16-NEXT: mov h16, v0[6] +; NOFP16-NEXT: fcvt s17, h17 ; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: fcvt h5, s5 +; NOFP16-NEXT: bit.16b v6, v18, v3 +; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: mov h1, v1[7] ; NOFP16-NEXT: mov h0, v0[7] +; NOFP16-NEXT: bit.16b v4, v17, v3 +; NOFP16-NEXT: fcvt h6, s6 ; NOFP16-NEXT: fcvt s1, h1 -; NOFP16-NEXT: mov.h v2[4], v6[0] -; NOFP16-NEXT: bit.16b v5, v4, v3 -; NOFP16-NEXT: fcvt h4, s16 ; NOFP16-NEXT: fcvt s0, h0 -; NOFP16-NEXT: fcvt h5, s5 -; NOFP16-NEXT: mov.h v2[5], v4[0] +; NOFP16-NEXT: mov.h v2[3], v5[0] +; NOFP16-NEXT: bit.16b v16, v7, v3 +; NOFP16-NEXT: fcvt h4, s4 +; NOFP16-NEXT: mov.h v2[4], v6[0] +; NOFP16-NEXT: fcvt h5, s16 ; NOFP16-NEXT: bit.16b v0, v1, v3 -; NOFP16-NEXT: mov.h v2[6], v5[0] +; NOFP16-NEXT: mov.h v2[5], v4[0] ; NOFP16-NEXT: fcvt h0, s0 +; NOFP16-NEXT: mov.h v2[6], v5[0] ; NOFP16-NEXT: mov.h v2[7], v0[0] ; NOFP16-NEXT: mov.16b v0, v2 ; NOFP16-NEXT: ret @@ -400,56 +400,56 @@ ; NOFP16-LABEL: test_copysign_v8f16_v8f32: ; NOFP16: ; %bb.0: ; NOFP16-NEXT: fcvtn v1.4h, v1.4s -; NOFP16-NEXT: fcvtn v2.4h, v2.4s -; NOFP16-NEXT: movi.4s v3, #128, lsl #24 +; NOFP16-NEXT: fcvt s5, h0 ; NOFP16-NEXT: mov h4, v0[1] -; NOFP16-NEXT: mov h5, v0[4] -; NOFP16-NEXT: fcvt s7, h0 -; NOFP16-NEXT: mov h17, v0[2] -; NOFP16-NEXT: mov h6, v1[1] -; NOFP16-NEXT: fcvt s16, h1 +; NOFP16-NEXT: mov h7, v0[2] +; NOFP16-NEXT: movi.4s v3, #128, lsl #24 +; NOFP16-NEXT: fcvtn v2.4h, v2.4s ; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: mov h18, v1[2] -; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: fcvt s6, h1 +; NOFP16-NEXT: mov h16, v1[1] +; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: mov h17, v0[3] +; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: mov h18, v0[4] ; NOFP16-NEXT: fcvt s17, h17 +; NOFP16-NEXT: bit.16b v5, v6, v3 +; NOFP16-NEXT: mov h6, v1[2] +; NOFP16-NEXT: mov h1, v1[3] ; NOFP16-NEXT: fcvt s6, h6 -; NOFP16-NEXT: bit.16b v7, v16, v3 -; NOFP16-NEXT: fcvt s16, h2 ; NOFP16-NEXT: fcvt s18, h18 -; NOFP16-NEXT: bit.16b v4, v6, v3 -; NOFP16-NEXT: mov h6, v0[3] -; NOFP16-NEXT: bit.16b v5, v16, v3 -; NOFP16-NEXT: mov h16, v1[3] -; NOFP16-NEXT: fcvt h1, s7 -; NOFP16-NEXT: mov h7, v0[5] -; NOFP16-NEXT: bit.16b v17, v18, v3 +; NOFP16-NEXT: bit.16b v4, v16, v3 +; NOFP16-NEXT: fcvt s19, h1 +; NOFP16-NEXT: fcvt h1, s5 ; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: fcvt s6, h6 -; NOFP16-NEXT: fcvt s16, h16 -; NOFP16-NEXT: mov h18, v2[1] -; NOFP16-NEXT: fcvt s7, h7 -; NOFP16-NEXT: fcvt h5, s5 +; NOFP16-NEXT: fcvt s16, h2 +; NOFP16-NEXT: bit.16b v7, v6, v3 +; NOFP16-NEXT: mov h5, v0[5] +; NOFP16-NEXT: bit.16b v17, v19, v3 +; NOFP16-NEXT: fcvt h6, s7 +; NOFP16-NEXT: mov h7, v2[1] ; NOFP16-NEXT: mov.h v1[1], v4[0] -; NOFP16-NEXT: fcvt h4, s17 -; NOFP16-NEXT: bit.16b v6, v16, v3 -; NOFP16-NEXT: fcvt s17, h18 -; NOFP16-NEXT: mov h16, v2[2] -; NOFP16-NEXT: mov.h v1[2], v4[0] ; NOFP16-NEXT: mov h4, v0[6] +; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: mov h0, v0[7] -; NOFP16-NEXT: fcvt h6, s6 -; NOFP16-NEXT: mov h2, v2[3] -; NOFP16-NEXT: bit.16b v7, v17, v3 -; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: mov.h v1[2], v6[0] +; NOFP16-NEXT: mov h6, v2[2] +; NOFP16-NEXT: bit.16b v18, v16, v3 ; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: fcvt h16, s17 +; NOFP16-NEXT: mov h2, v2[3] +; NOFP16-NEXT: fcvt s6, h6 +; NOFP16-NEXT: bit.16b v5, v7, v3 +; NOFP16-NEXT: fcvt h7, s18 ; NOFP16-NEXT: fcvt s0, h0 -; NOFP16-NEXT: mov.h v1[3], v6[0] ; NOFP16-NEXT: fcvt s2, h2 -; NOFP16-NEXT: bit.16b v4, v16, v3 -; NOFP16-NEXT: mov.h v1[4], v5[0] -; NOFP16-NEXT: fcvt h5, s7 -; NOFP16-NEXT: bit.16b v0, v2, v3 +; NOFP16-NEXT: mov.h v1[3], v16[0] +; NOFP16-NEXT: bit.16b v4, v6, v3 +; NOFP16-NEXT: fcvt h5, s5 +; NOFP16-NEXT: mov.h v1[4], v7[0] ; NOFP16-NEXT: fcvt h4, s4 +; NOFP16-NEXT: bit.16b v0, v2, v3 ; NOFP16-NEXT: mov.h v1[5], v5[0] ; NOFP16-NEXT: fcvt h0, s0 ; NOFP16-NEXT: mov.h v1[6], v4[0] @@ -461,9 +461,9 @@ ; FP16: ; %bb.0: ; FP16-NEXT: fcvtn v2.4h, v2.4s ; FP16-NEXT: fcvtn v1.4h, v1.4s -; FP16-NEXT: movi.8h v3, #128, lsl #8 ; FP16-NEXT: mov.d v1[1], v2[0] -; FP16-NEXT: bit.16b v0, v1, v3 +; FP16-NEXT: movi.8h v2, #128, lsl #8 +; FP16-NEXT: bit.16b v0, v1, v2 ; FP16-NEXT: ret %tmp0 = fptrunc <8 x float> %b to <8 x half> %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0) diff --git a/llvm/test/CodeGen/AArch64/vector-gep.ll b/llvm/test/CodeGen/AArch64/vector-gep.ll --- a/llvm/test/CodeGen/AArch64/vector-gep.ll +++ b/llvm/test/CodeGen/AArch64/vector-gep.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64_32-apple-watchos2.0.0 --aarch64-neon-syntax=generic | FileCheck %s target datalayout = "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128" @@ -8,13 +9,6 @@ ; CHECK-NEXT: .quad 4804 define <2 x i8*> @vector_gep(<2 x i8*> %0) { -; CHECK-LABEL: vector_gep: -; CHECK: adrp x[[REG8:[123]?[0-9]]], lCPI0_0@PAGE -; CHECK: movi v[[REG1:[0-9]+]].2d, #0x000000ffffffff -; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG8]], lCPI0_0@PAGEOFF] -; CHECK: add v[[REG0:[0-9]+]].2d, v[[REG0]].2d, v[[REG2]].2d -; CHECK: and v[[REG0]].16b, v[[REG0]].16b, v[[REG1]].16b -; CHECK: ret entry: %1 = getelementptr i8, <2 x i8*> %0, <2 x i32> ret <2 x i8*> %1 diff --git a/llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll --- a/llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll +++ b/llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll @@ -4,8 +4,8 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_1_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -17,8 +17,8 @@ define <16 x i8> @ult_2_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_2_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -30,8 +30,8 @@ define <16 x i8> @ugt_2_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_2_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -43,8 +43,8 @@ define <16 x i8> @ult_3_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_3_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -56,8 +56,8 @@ define <16 x i8> @ugt_3_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_3_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -69,8 +69,8 @@ define <16 x i8> @ult_4_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_4_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -82,8 +82,8 @@ define <16 x i8> @ugt_4_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_4_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -95,8 +95,8 @@ define <16 x i8> @ult_5_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_5_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -108,8 +108,8 @@ define <16 x i8> @ugt_5_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_5_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -121,8 +121,8 @@ define <16 x i8> @ult_6_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_6_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -134,8 +134,8 @@ define <16 x i8> @ugt_6_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_6_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -147,8 +147,8 @@ define <16 x i8> @ult_7_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_7_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #7 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #7 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -1477,8 +1477,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1494,8 +1494,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1511,8 +1511,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1528,8 +1528,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1545,8 +1545,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1562,8 +1562,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #5 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1579,8 +1579,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #5 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1596,8 +1596,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1613,8 +1613,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1630,8 +1630,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #7 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1647,8 +1647,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #7 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1664,8 +1664,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #8 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1681,8 +1681,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #8 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1698,8 +1698,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1715,8 +1715,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1732,8 +1732,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #10 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1749,8 +1749,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #10 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1766,8 +1766,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #11 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1783,8 +1783,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #11 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1800,8 +1800,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #12 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1817,8 +1817,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #12 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1834,8 +1834,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #13 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1851,8 +1851,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #13 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1868,8 +1868,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #14 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1885,8 +1885,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #14 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1902,8 +1902,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #15 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1919,8 +1919,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #15 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1936,8 +1936,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1953,8 +1953,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1970,8 +1970,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #17 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1987,8 +1987,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #17 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2004,8 +2004,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #18 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2021,8 +2021,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #18 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2038,8 +2038,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #19 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2055,8 +2055,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #19 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2072,8 +2072,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #20 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2089,8 +2089,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #20 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2106,8 +2106,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #21 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2123,8 +2123,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #21 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2140,8 +2140,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #22 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2157,8 +2157,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #22 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2174,8 +2174,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #23 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2191,8 +2191,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #23 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2208,8 +2208,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #24 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2225,8 +2225,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #24 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2242,8 +2242,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #25 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2259,8 +2259,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #25 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2276,8 +2276,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #26 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2293,8 +2293,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #26 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2310,8 +2310,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #27 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2327,8 +2327,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #27 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2344,8 +2344,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #28 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2361,8 +2361,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #28 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2378,8 +2378,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #29 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2395,8 +2395,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #29 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2412,8 +2412,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #30 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2429,8 +2429,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #30 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2446,8 +2446,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #31 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2463,8 +2463,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #31 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2480,8 +2480,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2497,8 +2497,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2514,8 +2514,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #33 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2531,8 +2531,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #33 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2548,8 +2548,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #34 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2565,8 +2565,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #34 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2582,8 +2582,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #35 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2599,8 +2599,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #35 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2616,8 +2616,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #36 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2633,8 +2633,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #36 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2650,8 +2650,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2667,8 +2667,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2684,8 +2684,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #38 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2701,8 +2701,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #38 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2718,8 +2718,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #39 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2735,8 +2735,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #39 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2752,8 +2752,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #40 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2769,8 +2769,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #40 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2786,8 +2786,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #41 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2803,8 +2803,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #41 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2820,8 +2820,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2837,8 +2837,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2854,8 +2854,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #43 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2871,8 +2871,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #43 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2888,8 +2888,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #44 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2905,8 +2905,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #44 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2922,8 +2922,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #45 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2939,8 +2939,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #45 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2956,8 +2956,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #46 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2973,8 +2973,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #46 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2990,8 +2990,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #47 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3007,8 +3007,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #47 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3024,8 +3024,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #48 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3041,8 +3041,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #48 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3058,8 +3058,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #49 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3075,8 +3075,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #49 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3092,8 +3092,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #50 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3109,8 +3109,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #50 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3126,8 +3126,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #51 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3143,8 +3143,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #51 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3160,8 +3160,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #52 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3177,8 +3177,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #52 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3194,8 +3194,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #53 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3211,8 +3211,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #53 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3228,8 +3228,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #54 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3245,8 +3245,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #54 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3262,8 +3262,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #55 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3279,8 +3279,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #55 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3296,8 +3296,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #56 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3313,8 +3313,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #56 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3330,8 +3330,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #57 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3347,8 +3347,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #57 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3364,8 +3364,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #58 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3381,8 +3381,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #58 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3398,8 +3398,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #59 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3415,8 +3415,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #59 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3432,8 +3432,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #60 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3449,8 +3449,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #60 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3466,8 +3466,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #61 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3483,8 +3483,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #61 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3500,8 +3500,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #62 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3517,8 +3517,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #62 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3534,8 +3534,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #63 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d diff --git a/llvm/test/CodeGen/AArch64/vselect-constants.ll b/llvm/test/CodeGen/AArch64/vselect-constants.ll --- a/llvm/test/CodeGen/AArch64/vselect-constants.ll +++ b/llvm/test/CodeGen/AArch64/vselect-constants.ll @@ -10,11 +10,11 @@ define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_C1_or_C2_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x9, .LCPI0_1 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] ; CHECK-NEXT: sshr v0.4s, v0.4s, #31 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b @@ -29,9 +29,9 @@ ; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: adrp x9, .LCPI1_1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI1_1] -; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI1_1] +; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -41,11 +41,11 @@ define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_Cplus1_or_C_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x8, .LCPI2_0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x9, .LCPI2_1 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] ; CHECK-NEXT: sshr v0.4s, v0.4s, #31 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b @@ -60,9 +60,9 @@ ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: adrp x9, .LCPI3_1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_1] -; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI3_1] +; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -72,11 +72,11 @@ define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_Cminus1_or_C_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x9, .LCPI4_1 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_1] ; CHECK-NEXT: sshr v0.4s, v0.4s, #31 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b @@ -91,9 +91,9 @@ ; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: adrp x9, .LCPI5_1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_1] -; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI5_1] +; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -159,9 +159,9 @@ define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: cmp_sel_1_or_0_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -184,9 +184,9 @@ define <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: cmp_sel_0_or_1_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bic v0.16b, v2.16b, v0.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/xor.ll b/llvm/test/CodeGen/AArch64/xor.ll --- a/llvm/test/CodeGen/AArch64/xor.ll +++ b/llvm/test/CodeGen/AArch64/xor.ll @@ -62,9 +62,9 @@ define <4 x i32> @vec_add_of_not_with_undef(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: vec_add_of_not_with_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %x, %y %r = add <4 x i32> %t0, @@ -74,9 +74,9 @@ define <4 x i32> @vec_add_of_not_with_undef_decrement(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: vec_add_of_not_with_undef_decrement: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %x, %y %r = add <4 x i32> %t0, diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s @@ -2428,44 +2428,44 @@ # CHECK-NEXT: 1 1 1.00 * stp q3, q5, [sp] # CHECK-NEXT: 1 1 1.00 * stp q17, q19, [sp, #1008] # CHECK-NEXT: 2 6 6.00 * ldp q23, q29, [x1, #-1024] -# CHECK-NEXT: 2 4 1.00 * ldp w3, w5, [sp], #0 +# CHECK-NEXT: 3 4 1.00 * ldp w3, w5, [sp], #0 # CHECK-NEXT: 2 1 1.00 * stp wzr, w9, [sp], #252 -# CHECK-NEXT: 2 4 1.00 * ldp w2, wzr, [sp], #-256 -# CHECK-NEXT: 2 4 1.00 * ldp w9, w10, [sp], #4 -# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [sp], #4 -# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [x2], #-256 -# CHECK-NEXT: 2 4 1.00 * ldpsw x20, x30, [sp], #252 -# CHECK-NEXT: 2 5 2.00 * ldp x21, x29, [x2], #504 -# CHECK-NEXT: 2 5 2.00 * ldp x22, x23, [x3], #-512 -# CHECK-NEXT: 2 5 2.00 * ldp x24, x25, [x4], #8 -# CHECK-NEXT: 2 4 1.00 * ldp s29, s28, [sp], #252 +# CHECK-NEXT: 3 4 1.00 * ldp w2, wzr, [sp], #-256 +# CHECK-NEXT: 3 4 1.00 * ldp w9, w10, [sp], #4 +# CHECK-NEXT: 3 4 1.00 * ldpsw x9, x10, [sp], #4 +# CHECK-NEXT: 3 4 1.00 * ldpsw x9, x10, [x2], #-256 +# CHECK-NEXT: 3 4 1.00 * ldpsw x20, x30, [sp], #252 +# CHECK-NEXT: 3 5 2.00 * ldp x21, x29, [x2], #504 +# CHECK-NEXT: 3 5 2.00 * ldp x22, x23, [x3], #-512 +# CHECK-NEXT: 3 5 2.00 * ldp x24, x25, [x4], #8 +# CHECK-NEXT: 3 4 1.00 * ldp s29, s28, [sp], #252 # CHECK-NEXT: 2 1 1.00 * stp s27, s26, [sp], #-256 -# CHECK-NEXT: 2 4 1.00 * ldp s1, s2, [x3], #44 +# CHECK-NEXT: 3 4 1.00 * ldp s1, s2, [x3], #44 # CHECK-NEXT: 2 1 1.00 * stp d3, d5, [x9], #504 # CHECK-NEXT: 2 1 1.00 * stp d7, d11, [x10], #-512 -# CHECK-NEXT: 2 5 2.00 * ldp d2, d3, [x30], #-8 +# CHECK-NEXT: 3 5 2.00 * ldp d2, d3, [x30], #-8 # CHECK-NEXT: 2 1 1.00 * stp q3, q5, [sp], #0 # CHECK-NEXT: 2 1 1.00 * stp q17, q19, [sp], #1008 -# CHECK-NEXT: 2 6 6.00 * ldp q23, q29, [x1], #-1024 -# CHECK-NEXT: 2 4 1.00 * ldp w3, w5, [sp, #0]! +# CHECK-NEXT: 3 6 6.00 * ldp q23, q29, [x1], #-1024 +# CHECK-NEXT: 3 4 1.00 * ldp w3, w5, [sp, #0]! # CHECK-NEXT: 2 1 1.00 * stp wzr, w9, [sp, #252]! -# CHECK-NEXT: 2 4 1.00 * ldp w2, wzr, [sp, #-256]! -# CHECK-NEXT: 2 4 1.00 * ldp w9, w10, [sp, #4]! -# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [sp, #4]! -# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [x2, #-256]! -# CHECK-NEXT: 2 4 1.00 * ldpsw x20, x30, [sp, #252]! -# CHECK-NEXT: 2 5 2.00 * ldp x21, x29, [x2, #504]! -# CHECK-NEXT: 2 5 2.00 * ldp x22, x23, [x3, #-512]! -# CHECK-NEXT: 2 5 2.00 * ldp x24, x25, [x4, #8]! -# CHECK-NEXT: 2 4 1.00 * ldp s29, s28, [sp, #252]! +# CHECK-NEXT: 3 4 1.00 * ldp w2, wzr, [sp, #-256]! +# CHECK-NEXT: 3 4 1.00 * ldp w9, w10, [sp, #4]! +# CHECK-NEXT: 3 4 1.00 * ldpsw x9, x10, [sp, #4]! +# CHECK-NEXT: 3 4 1.00 * ldpsw x9, x10, [x2, #-256]! +# CHECK-NEXT: 3 4 1.00 * ldpsw x20, x30, [sp, #252]! +# CHECK-NEXT: 3 5 2.00 * ldp x21, x29, [x2, #504]! +# CHECK-NEXT: 3 5 2.00 * ldp x22, x23, [x3, #-512]! +# CHECK-NEXT: 3 5 2.00 * ldp x24, x25, [x4, #8]! +# CHECK-NEXT: 3 4 1.00 * ldp s29, s28, [sp, #252]! # CHECK-NEXT: 2 1 1.00 * stp s27, s26, [sp, #-256]! -# CHECK-NEXT: 2 4 1.00 * ldp s1, s2, [x3, #44]! +# CHECK-NEXT: 3 4 1.00 * ldp s1, s2, [x3, #44]! # CHECK-NEXT: 2 1 1.00 * stp d3, d5, [x9, #504]! # CHECK-NEXT: 2 1 1.00 * stp d7, d11, [x10, #-512]! -# CHECK-NEXT: 2 5 2.00 * ldp d2, d3, [x30, #-8]! +# CHECK-NEXT: 3 5 2.00 * ldp d2, d3, [x30, #-8]! # CHECK-NEXT: 2 1 1.00 * stp q3, q5, [sp, #0]! # CHECK-NEXT: 2 1 1.00 * stp q17, q19, [sp, #1008]! -# CHECK-NEXT: 2 6 6.00 * ldp q23, q29, [x1, #-1024]! +# CHECK-NEXT: 3 6 6.00 * ldp q23, q29, [x1, #-1024]! # CHECK-NEXT: 2 5 2.00 * ldnp w3, w5, [sp] # CHECK-NEXT: 1 1 1.00 * stnp wzr, w9, [sp, #252] # CHECK-NEXT: 2 5 2.00 * ldnp w2, wzr, [sp, #-256] diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-readadv.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-readadv.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-readadv.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-readadv.s @@ -161,13 +161,13 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 15400 -# CHECK-NEXT: Total Cycles: 30104 -# CHECK-NEXT: Total uOps: 19900 +# CHECK-NEXT: Total Cycles: 30303 +# CHECK-NEXT: Total uOps: 20900 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.66 +# CHECK-NEXT: uOps Per Cycle: 0.69 # CHECK-NEXT: IPC: 0.51 -# CHECK-NEXT: Block RThroughput: 104.0 +# CHECK-NEXT: Block RThroughput: 104.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -305,33 +305,33 @@ # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 # CHECK-NEXT: 2 5 2.00 * ldp d0, d1, [x2, #16] # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 5 2.00 * ldp d0, d1, [x2, #16]! +# CHECK-NEXT: 3 5 2.00 * ldp d0, d1, [x2, #16]! # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 5 2.00 * ldp d0, d1, [x2], #16 +# CHECK-NEXT: 3 5 2.00 * ldp d0, d1, [x2], #16 # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 # CHECK-NEXT: 2 6 6.00 * ldp q0, q1, [x2, #16] # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 6 6.00 * ldp q0, q1, [x2, #16]! +# CHECK-NEXT: 3 6 6.00 * ldp q0, q1, [x2, #16]! # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 6 6.00 * ldp q0, q1, [x2], #16 +# CHECK-NEXT: 3 6 6.00 * ldp q0, q1, [x2], #16 # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 # CHECK-NEXT: 2 4 1.00 * ldp s0, s1, [x2, #16] # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 4 1.00 * ldp s0, s1, [x2, #16]! +# CHECK-NEXT: 3 4 1.00 * ldp s0, s1, [x2, #16]! # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 4 1.00 * ldp s0, s1, [x2], #16 +# CHECK-NEXT: 3 4 1.00 * ldp s0, s1, [x2], #16 # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 # CHECK-NEXT: 2 5 2.00 * ldp x0, x1, [x2, #16] # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 5 2.00 * ldp x0, x1, [x2, #16]! +# CHECK-NEXT: 3 5 2.00 * ldp x0, x1, [x2, #16]! # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 5 2.00 * ldp x0, x1, [x2], #16 +# CHECK-NEXT: 3 5 2.00 * ldp x0, x1, [x2], #16 # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 # CHECK-NEXT: 2 4 1.00 * ldpsw x0, x1, [x2, #16] # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 4 1.00 * ldpsw x0, x1, [x2, #16]! +# CHECK-NEXT: 3 4 1.00 * ldpsw x0, x1, [x2, #16]! # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 4 1.00 * ldpsw x0, x1, [x2], #16 +# CHECK-NEXT: 3 4 1.00 * ldpsw x0, x1, [x2], #16 # CHECK: Resources: # CHECK-NEXT: [0.0] - CortexA55UnitALU @@ -510,162 +510,162 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 01234 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345 -# CHECK: [0,0] DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,1] . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2, #16] -# CHECK-NEXT: [0,2] . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,3] . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2, #16]! -# CHECK-NEXT: [0,4] . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,5] . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2], #16 -# CHECK-NEXT: [0,6] . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,7] . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2], #16 -# CHECK-NEXT: [0,8] . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,9] . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, #16]! -# CHECK-NEXT: [0,10] . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,11] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, #16] -# CHECK-NEXT: [0,12] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,13] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2], #16 -# CHECK-NEXT: [0,14] . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,15] . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2, #16]! -# CHECK-NEXT: [0,16] . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,17] . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2, #16] -# CHECK-NEXT: [0,18] . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,19] . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2], #16 -# CHECK-NEXT: [0,20] . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,21] . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, #16]! -# CHECK-NEXT: [0,22] . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,23] . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, #16] -# CHECK-NEXT: [0,24] . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,25] . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2], #16 -# CHECK-NEXT: [0,26] . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,27] . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2, #16]! -# CHECK-NEXT: [0,28] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,29] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2, #16] -# CHECK-NEXT: [0,30] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,31] . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2], #16 -# CHECK-NEXT: [0,32] . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,33] . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, #16]! -# CHECK-NEXT: [0,34] . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,35] . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, #16] -# CHECK-NEXT: [0,36] . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,37] . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2], #16 -# CHECK-NEXT: [0,38] . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,39] . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, #16]! -# CHECK-NEXT: [0,40] . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,41] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, #16] -# CHECK-NEXT: [0,42] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,43] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2], #16 -# CHECK-NEXT: [0,44] . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,45] . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, #16]! -# CHECK-NEXT: [0,46] . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,47] . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, #16] -# CHECK-NEXT: [0,48] . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,49] . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2], #16 -# CHECK-NEXT: [0,50] . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,51] . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, #16]! -# CHECK-NEXT: [0,52] . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,53] . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, #16] -# CHECK-NEXT: [0,54] . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,55] . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2], #16 -# CHECK-NEXT: [0,56] . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,57] . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, #16]! -# CHECK-NEXT: [0,58] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,59] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, #16] -# CHECK-NEXT: [0,60] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,61] . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2], #16 -# CHECK-NEXT: [0,62] . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,63] . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, #16]! -# CHECK-NEXT: [0,64] . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,65] . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, #16] -# CHECK-NEXT: [0,66] . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,67] . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2], #16 -# CHECK-NEXT: [0,68] . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,69] . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, #16]! -# CHECK-NEXT: [0,70] . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,71] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, #16] -# CHECK-NEXT: [0,72] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,73] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, x2, lsl #3] -# CHECK-NEXT: [0,74] . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,75] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, w0, sxtw] -# CHECK-NEXT: [0,76] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,77] . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,78] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,79] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, w0, sxtw] -# CHECK-NEXT: [0,80] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,81] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,82] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,83] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,84] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,85] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,86] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,87] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,88] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,89] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, w0, sxtw] -# CHECK-NEXT: [0,90] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,91] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . ldur b0, [x2, #255] -# CHECK-NEXT: [0,92] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,93] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . ldur d0, [x2, #255] -# CHECK-NEXT: [0,94] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,95] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . ldur h0, [x2, #255] -# CHECK-NEXT: [0,96] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,97] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . ldur q0, [x2, #255] -# CHECK-NEXT: [0,98] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,99] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . ldur s0, [x2, #255] -# CHECK-NEXT: [0,100] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,101] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . ldur w0, [x2, #255] -# CHECK-NEXT: [0,102] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,103] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . ldurb w0, [x2, #255] -# CHECK-NEXT: [0,104] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,105] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . ldurh w0, [x2, #255] -# CHECK-NEXT: [0,106] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,107] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . ldursb w0, [x2, #255] -# CHECK-NEXT: [0,108] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,109] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . ldursh w0, [x2, #255] -# CHECK-NEXT: [0,110] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,111] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . ldursw x0, [x2, #255] -# CHECK-NEXT: [0,112] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,113] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . . . ldnp d0, d1, [x2, #16] -# CHECK-NEXT: [0,114] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,115] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . . ldnp q0, q1, [x2, #16] -# CHECK-NEXT: [0,116] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,117] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . ldnp s0, s1, [x2, #16] -# CHECK-NEXT: [0,118] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,119] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . ldnp s0, s1, [x2, #16] -# CHECK-NEXT: [0,120] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,121] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . ldnp w0, w1, [x2, #16] -# CHECK-NEXT: [0,122] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,123] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . ldnp x0, x1, [x2, #16] -# CHECK-NEXT: [0,124] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,125] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . ldp d0, d1, [x2, #16] -# CHECK-NEXT: [0,126] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,127] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . ldp d0, d1, [x2, #16]! -# CHECK-NEXT: [0,128] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,129] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . ldp d0, d1, [x2], #16 -# CHECK-NEXT: [0,130] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,131] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE . . . . . . . . . . . ldp q0, q1, [x2, #16] -# CHECK-NEXT: [0,132] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,133] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE. . . . . . . . . . ldp q0, q1, [x2, #16]! -# CHECK-NEXT: [0,134] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,135] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE . . . . . . . . ldp q0, q1, [x2], #16 -# CHECK-NEXT: [0,136] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,137] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . ldp s0, s1, [x2, #16] -# CHECK-NEXT: [0,138] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,139] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . ldp s0, s1, [x2, #16]! -# CHECK-NEXT: [0,140] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,141] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . ldp s0, s1, [x2], #16 -# CHECK-NEXT: [0,142] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,143] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . ldp x0, x1, [x2, #16] -# CHECK-NEXT: [0,144] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,145] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . ldp x0, x1, [x2, #16]! -# CHECK-NEXT: [0,146] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . add x2, x3, #1 -# CHECK-NEXT: [0,147] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . ldp x0, x1, [x2], #16 -# CHECK-NEXT: [0,148] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . add x2, x3, #1 -# CHECK-NEXT: [0,149] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . ldpsw x0, x1, [x2, #16] -# CHECK-NEXT: [0,150] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . add x2, x3, #1 -# CHECK-NEXT: [0,151] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . ldpsw x0, x1, [x2, #16]! -# CHECK-NEXT: [0,152] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . add x2, x3, #1 -# CHECK-NEXT: [0,153] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE ldpsw x0, x1, [x2], #16 +# CHECK: [0,0] DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,1] . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2, #16] +# CHECK-NEXT: [0,2] . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,3] . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2, #16]! +# CHECK-NEXT: [0,4] . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,5] . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2], #16 +# CHECK-NEXT: [0,6] . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,7] . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2], #16 +# CHECK-NEXT: [0,8] . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,9] . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, #16]! +# CHECK-NEXT: [0,10] . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,11] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, #16] +# CHECK-NEXT: [0,12] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,13] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2], #16 +# CHECK-NEXT: [0,14] . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,15] . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2, #16]! +# CHECK-NEXT: [0,16] . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,17] . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2, #16] +# CHECK-NEXT: [0,18] . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,19] . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2], #16 +# CHECK-NEXT: [0,20] . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,21] . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, #16]! +# CHECK-NEXT: [0,22] . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,23] . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, #16] +# CHECK-NEXT: [0,24] . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,25] . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2], #16 +# CHECK-NEXT: [0,26] . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,27] . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2, #16]! +# CHECK-NEXT: [0,28] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,29] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2, #16] +# CHECK-NEXT: [0,30] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,31] . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2], #16 +# CHECK-NEXT: [0,32] . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,33] . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, #16]! +# CHECK-NEXT: [0,34] . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,35] . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, #16] +# CHECK-NEXT: [0,36] . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,37] . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2], #16 +# CHECK-NEXT: [0,38] . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,39] . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, #16]! +# CHECK-NEXT: [0,40] . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,41] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, #16] +# CHECK-NEXT: [0,42] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,43] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2], #16 +# CHECK-NEXT: [0,44] . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,45] . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, #16]! +# CHECK-NEXT: [0,46] . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,47] . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, #16] +# CHECK-NEXT: [0,48] . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,49] . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2], #16 +# CHECK-NEXT: [0,50] . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,51] . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, #16]! +# CHECK-NEXT: [0,52] . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,53] . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, #16] +# CHECK-NEXT: [0,54] . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,55] . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2], #16 +# CHECK-NEXT: [0,56] . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,57] . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, #16]! +# CHECK-NEXT: [0,58] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,59] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, #16] +# CHECK-NEXT: [0,60] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,61] . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2], #16 +# CHECK-NEXT: [0,62] . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,63] . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, #16]! +# CHECK-NEXT: [0,64] . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,65] . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, #16] +# CHECK-NEXT: [0,66] . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,67] . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2], #16 +# CHECK-NEXT: [0,68] . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,69] . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, #16]! +# CHECK-NEXT: [0,70] . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,71] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, #16] +# CHECK-NEXT: [0,72] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,73] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, x2, lsl #3] +# CHECK-NEXT: [0,74] . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,75] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, w0, sxtw] +# CHECK-NEXT: [0,76] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,77] . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,78] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,79] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, w0, sxtw] +# CHECK-NEXT: [0,80] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,81] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,82] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,83] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,84] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,85] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,86] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,87] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,88] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,89] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, w0, sxtw] +# CHECK-NEXT: [0,90] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,91] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . ldur b0, [x2, #255] +# CHECK-NEXT: [0,92] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,93] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . ldur d0, [x2, #255] +# CHECK-NEXT: [0,94] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,95] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . ldur h0, [x2, #255] +# CHECK-NEXT: [0,96] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,97] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . ldur q0, [x2, #255] +# CHECK-NEXT: [0,98] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,99] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . ldur s0, [x2, #255] +# CHECK-NEXT: [0,100] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,101] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . ldur w0, [x2, #255] +# CHECK-NEXT: [0,102] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,103] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . ldurb w0, [x2, #255] +# CHECK-NEXT: [0,104] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,105] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . ldurh w0, [x2, #255] +# CHECK-NEXT: [0,106] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,107] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . ldursb w0, [x2, #255] +# CHECK-NEXT: [0,108] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,109] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . ldursh w0, [x2, #255] +# CHECK-NEXT: [0,110] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,111] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . ldursw x0, [x2, #255] +# CHECK-NEXT: [0,112] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,113] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . . . ldnp d0, d1, [x2, #16] +# CHECK-NEXT: [0,114] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,115] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . . ldnp q0, q1, [x2, #16] +# CHECK-NEXT: [0,116] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,117] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . ldnp s0, s1, [x2, #16] +# CHECK-NEXT: [0,118] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,119] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . ldnp s0, s1, [x2, #16] +# CHECK-NEXT: [0,120] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,121] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . ldnp w0, w1, [x2, #16] +# CHECK-NEXT: [0,122] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,123] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . ldnp x0, x1, [x2, #16] +# CHECK-NEXT: [0,124] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,125] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . ldp d0, d1, [x2, #16] +# CHECK-NEXT: [0,126] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,127] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . ldp d0, d1, [x2, #16]! +# CHECK-NEXT: [0,128] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,129] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . ldp d0, d1, [x2], #16 +# CHECK-NEXT: [0,130] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,131] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE . . . . . . . . . . . ldp q0, q1, [x2, #16] +# CHECK-NEXT: [0,132] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,133] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE. . . . . . . . . . ldp q0, q1, [x2, #16]! +# CHECK-NEXT: [0,134] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,135] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE . . . . . . . . ldp q0, q1, [x2], #16 +# CHECK-NEXT: [0,136] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,137] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . ldp s0, s1, [x2, #16] +# CHECK-NEXT: [0,138] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,139] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . ldp s0, s1, [x2, #16]! +# CHECK-NEXT: [0,140] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,141] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . ldp s0, s1, [x2], #16 +# CHECK-NEXT: [0,142] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,143] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . ldp x0, x1, [x2, #16] +# CHECK-NEXT: [0,144] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,145] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . ldp x0, x1, [x2, #16]! +# CHECK-NEXT: [0,146] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . add x2, x3, #1 +# CHECK-NEXT: [0,147] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . ldp x0, x1, [x2], #16 +# CHECK-NEXT: [0,148] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . add x2, x3, #1 +# CHECK-NEXT: [0,149] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . ldpsw x0, x1, [x2, #16] +# CHECK-NEXT: [0,150] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . add x2, x3, #1 +# CHECK-NEXT: [0,151] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . ldpsw x0, x1, [x2, #16]! +# CHECK-NEXT: [0,152] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . add x2, x3, #1 +# CHECK-NEXT: [0,153] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE ldpsw x0, x1, [x2], #16 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s @@ -1,1065 +1,861 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a55 -instruction-tables < %s | FileCheck %s -abs d29, d24 -abs v0.16b, v0.16b -abs v0.2d, v0.2d -abs v0.2s, v0.2s -abs v0.4h, v0.4h -abs v0.4s, v0.4s -abs v0.8b, v0.8b -abs v0.8h, v0.8h -add d17, d31, d29 -add v0.8b, v0.8b, v0.8b -addhn v0.2s, v0.2d, v0.2d -addhn v0.4h, v0.4s, v0.4s -addhn v0.8b, v0.8h, v0.8h -addhn2 v0.16b, v0.8h, v0.8h -addhn2 v0.4s, v0.2d, v0.2d -addhn2 v0.8h, v0.4s, v0.4s -addp v0.2d, v0.2d, v0.2d -addp v0.8b, v0.8b, v0.8b -and v0.8b, v0.8b, v0.8b -bic v0.4h, #15, lsl #8 -bic v0.8b, v0.8b, v0.8b -bif v0.16b, v0.16b, v0.16b -bit v0.16b, v0.16b, v0.16b -bsl v0.8b, v0.8b, v0.8b -cls v0.16b, v0.16b -cls v0.2s, v0.2s -cls v0.4h, v0.4h -cls v0.4s, v0.4s -cls v0.8b, v0.8b -cls v0.8h, v0.8h -clz v0.16b, v0.16b -clz v0.2s, v0.2s -clz v0.4h, v0.4h -clz v0.4s, v0.4s -clz v0.8b, v0.8b -clz v0.8h, v0.8h -cmeq d20, d21, 0 -cmeq d20, d21, d22 -cmeq v0.16b, v0.16b, 0 -cmeq v0.16b, v0.16b, v0.16b -cmge d20, d21, 0 -cmge d20, d21, d22 -cmge v0.4h, v0.4h, v0.4h -cmge v0.8b, v0.8b, 0 -cmgt d20, d21, 0 -cmgt d20, d21, d22 -cmgt v0.2s, v0.2s, 0 -cmgt v0.4s, v0.4s, v0.4s -cmhi d20, d21, d22 -cmhi v0.8h, v0.8h, v0.8h -cmhs d20, d21, d22 -cmhs v0.8b, v0.8b, v0.8b -cmle d20, d21, 0 -cmle v0.2d, v0.2d, 0 -cmlt d20, d21, 0 -cmlt v0.8h, v0.8h, 0 -cmtst d20, d21, d22 -cmtst v0.2s, v0.2s, v0.2s -cnt v0.16b, v0.16b -cnt v0.8b, v0.8b -dup v0.16b,w28 -dup v0.2d,x28 -dup v0.2s,w28 -dup v0.4h,w28 -dup v0.4s,w28 -dup v0.8b,w28 -dup v0.8h,w28 -eor v0.16b, v0.16b, v0.16b -ext v0.16b, v0.16b, v0.16b, #3 -ext v0.8b, v0.8b, v0.8b, #3 -fabd d29, d24, d20 -fabd s29, s24, s20 -fabd v0.4s, v0.4s, v0.4s -fabs v0.2d, v0.2d -fabs v0.2s, v0.2s -fabs v0.4h, v0.4h -fabs v0.4s, v0.4s -fabs v0.8h, v0.8h -facge d20, d21, d22 -facge s10, s11, s12 -facge v0.4s, v0.4s, v0.4s -facgt d20, d21, d22 -facgt s10, s11, s12 -facgt v0.2d, v0.2d, v0.2d -fadd v0.4s, v0.4s, v0.4s -faddp v0.2s, v0.2s, v0.2s -faddp v0.4s, v0.4s, v0.4s -fcmeq d20, d21, #0.0 -fcmeq d20, d21, d22 -fcmeq s10, s11, #0.0 -fcmeq s10, s11, s12 -fcmeq v0.2s, v0.2s, #0.0 -fcmeq v0.2s, v0.2s, v0.2s -fcmge d20, d21, #0.0 -fcmge d20, d21, d22 -fcmge s10, s11, #0.0 -fcmge s10, s11, s12 -fcmge v0.2d, v0.2d, #0.0 -fcmge v0.4s, v0.4s, v0.4s -fcmgt d20, d21, #0.0 -fcmgt d20, d21, d22 -fcmgt s10, s11, #0.0 -fcmgt s10, s11, s12 -fcmgt v0.4s, v0.4s, #0.0 -fcmgt v0.4s, v0.4s, v0.4s -fcmle d20, d21, #0.0 -fcmle s10, s11, #0.0 -fcmle v0.2d, v0.2d, #0.0 -fcmlt d20, d21, #0.0 -fcmlt s10, s11, #0.0 -fcmlt v0.4s, v0.4s, #0.0 -fcvtas d21, d14 -fcvtas s12, s13 -fcvtas v0.2d, v0.2d -fcvtas v0.2s, v0.2s -fcvtas v0.4h, v0.4h -fcvtas v0.4s, v0.4s -fcvtas v0.8h, v0.8h -fcvtau d21, d14 -fcvtau s12, s13 -fcvtau v0.2d, v0.2d -fcvtau v0.2s, v0.2s -fcvtau v0.4h, v0.4h -fcvtau v0.4s, v0.4s -fcvtau v0.8h, v0.8h -fcvtl v0.2d, v0.2s -fcvtl v0.4s, v0.4h -fcvtl2 v0.2d, v0.4s -fcvtl2 v0.4s, v0.8h -fcvtms d21, d14 -fcvtms s22, s13 -fcvtms v0.2d, v0.2d -fcvtms v0.2s, v0.2s -fcvtms v0.4h, v0.4h -fcvtms v0.4s, v0.4s -fcvtms v0.8h, v0.8h -fcvtmu d21, d14 -fcvtmu s12, s13 -fcvtmu v0.2d, v0.2d -fcvtmu v0.2s, v0.2s -fcvtmu v0.4h, v0.4h -fcvtmu v0.4s, v0.4s -fcvtmu v0.8h, v0.8h -fcvtn v0.2s, v0.2d -fcvtn v0.4h, v0.4s -fcvtn2 v0.4s, v0.2d -fcvtn2 v0.8h, v0.4s -fcvtns d21, d14 -fcvtns s22, s13 -fcvtns v0.2d, v0.2d -fcvtns v0.2s, v0.2s -fcvtns v0.4h, v0.4h -fcvtns v0.4s, v0.4s -fcvtns v0.8h, v0.8h -fcvtnu d21, d14 -fcvtnu s12, s13 -fcvtnu v0.2d, v0.2d -fcvtnu v0.2s, v0.2s -fcvtnu v0.4h, v0.4h -fcvtnu v0.4s, v0.4s -fcvtnu v0.8h, v0.8h -fcvtps d21, d14 -fcvtps s22, s13 -fcvtps v0.2d, v0.2d -fcvtps v0.2s, v0.2s -fcvtps v0.4h, v0.4h -fcvtps v0.4s, v0.4s -fcvtps v0.8h, v0.8h -fcvtpu d21, d14 -fcvtpu s12, s13 -fcvtpu v0.2d, v0.2d -fcvtpu v0.2s, v0.2s -fcvtpu v0.4h, v0.4h -fcvtpu v0.4s, v0.4s -fcvtpu v0.8h, v0.8h -fcvtxn s22, d13 -fcvtxn v0.2s, v0.2d -fcvtxn2 v0.4s, v0.2d -fcvtzs d21, d12, #1 -fcvtzs d21, d14 -fcvtzs s12, s13 -fcvtzs s21, s12, #1 -fcvtzs v0.2d, v0.2d -fcvtzs v0.2d, v0.2d, #3 -fcvtzs v0.2s, v0.2s -fcvtzs v0.2s, v0.2s, #3 -fcvtzs v0.4h, v0.4h -fcvtzs v0.4s, v0.4s -fcvtzs v0.4s, v0.4s, #3 -fcvtzs v0.8h, v0.8h -fcvtzu d21, d12, #1 -fcvtzu d21, d14 -fcvtzu s12, s13 -fcvtzu s21, s12, #1 -fcvtzu v0.2d, v0.2d -fcvtzu v0.2d, v0.2d, #3 -fcvtzu v0.2s, v0.2s -fcvtzu v0.2s, v0.2s, #3 -fcvtzu v0.4h, v0.4h -fcvtzu v0.4s, v0.4s -fcvtzu v0.4s, v0.4s, #3 -fcvtzu v0.8h, v0.8h -fdiv v0.2s, v0.2s, v0.2s -fmax v0.2d, v0.2d, v0.2d -fmax v0.2s, v0.2s, v0.2s -fmax v0.4s, v0.4s, v0.4s -fmaxnm v0.2d, v0.2d, v0.2d -fmaxnm v0.2s, v0.2s, v0.2s -fmaxnm v0.4s, v0.4s, v0.4s -fmaxnmp v0.2d, v0.2d, v0.2d -fmaxnmp v0.2s, v0.2s, v0.2s -fmaxnmp v0.4s, v0.4s, v0.4s -fmaxp v0.2d, v0.2d, v0.2d -fmaxp v0.2s, v0.2s, v0.2s -fmaxp v0.4s, v0.4s, v0.4s -fmin v0.2d, v0.2d, v0.2d -fmin v0.2s, v0.2s, v0.2s -fmin v0.4s, v0.4s, v0.4s -fminnm v0.2d, v0.2d, v0.2d -fminnm v0.2s, v0.2s, v0.2s -fminnm v0.4s, v0.4s, v0.4s -fminnmp v0.2d, v0.2d, v0.2d -fminnmp v0.2s, v0.2s, v0.2s -fminnmp v0.4s, v0.4s, v0.4s -fminp v0.2d, v0.2d, v0.2d -fminp v0.2s, v0.2s, v0.2s -fminp v0.4s, v0.4s, v0.4s -fmla d0, d1, v0.d[1] -fmla s0, s1, v0.s[3] -fmla v0.2s, v0.2s, v0.2s -fmls d0, d4, v0.d[1] -fmls s3, s5, v0.s[3] -fmls v0.2s, v0.2s, v0.2s -fmov v0.2d, #-1.25 -fmov v0.2s, #13.0 -fmov v0.4s, #1.0 -fmul d0, d1, v0.d[1] -fmul s0, s1, v0.s[3] -fmul v0.2s, v0.2s, v0.2s -fmulx d0, d4, v0.d[1] -fmulx d23, d11, d1 -fmulx s20, s22, s15 -fmulx s3, s5, v0.s[3] -fmulx v0.2d, v0.2d, v0.2d -fmulx v0.2s, v0.2s, v0.2s -fmulx v0.4s, v0.4s, v0.4s -fneg v0.2d, v0.2d -fneg v0.2s, v0.2s -fneg v0.4h, v0.4h -fneg v0.4s, v0.4s -fneg v0.8h, v0.8h -frecpe d13, d13 -frecpe s19, s14 -frecpe v0.2d, v0.2d -frecpe v0.2s, v0.2s -frecpe v0.4h, v0.4h -frecpe v0.4s, v0.4s -frecpe v0.8h, v0.8h -frecps v0.4s, v0.4s, v0.4s -frecps d22, d30, d21 -frecps s21, s16, s13 -frecpx d16, d19 -frecpx s18, s10 -frinta v0.2d, v0.2d -frinta v0.2s, v0.2s -frinta v0.4h, v0.4h -frinta v0.4s, v0.4s -frinta v0.8h, v0.8h -frinti v0.2d, v0.2d -frinti v0.2s, v0.2s -frinti v0.4h, v0.4h -frinti v0.4s, v0.4s -frinti v0.8h, v0.8h -frintm v0.2d, v0.2d -frintm v0.2s, v0.2s -frintm v0.4h, v0.4h -frintm v0.4s, v0.4s -frintm v0.8h, v0.8h -frintn v0.2d, v0.2d -frintn v0.2s, v0.2s -frintn v0.4h, v0.4h -frintn v0.4s, v0.4s -frintn v0.8h, v0.8h -frintp v0.2d, v0.2d -frintp v0.2s, v0.2s -frintp v0.4h, v0.4h -frintp v0.4s, v0.4s -frintp v0.8h, v0.8h -frintx v0.2d, v0.2d -frintx v0.2s, v0.2s -frintx v0.4h, v0.4h -frintx v0.4s, v0.4s -frintx v0.8h, v0.8h -frintz v0.2d, v0.2d -frintz v0.2s, v0.2s -frintz v0.4h, v0.4h -frintz v0.4s, v0.4s -frintz v0.8h, v0.8h -frsqrte d21, d12 -frsqrte s22, s13 -frsqrte v0.2d, v0.2d -frsqrte v0.2s, v0.2s -frsqrte v0.4h, v0.4h -frsqrte v0.4s, v0.4s -frsqrte v0.8h, v0.8h -frsqrts d8, d22, d18 -frsqrts s21, s5, s12 -frsqrts v0.2d, v0.2d, v0.2d -fsqrt v0.2d, v0.2d -fsqrt v0.2s, v0.2s -fsqrt v0.4h, v0.4h -fsqrt v0.4s, v0.4s -fsqrt v0.8h, v0.8h -fsub v0.2s, v0.2s, v0.2s -ld1 { v0.16b }, [x0] -ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 -ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] -ld1 { v0.4s, v1.4s }, [sp], #32 -ld1 { v0.4s, v1.4s, v2.4s }, [sp] -ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 -ld1 { v0.8h }, [x15], x2 -ld1 { v0.8h, v1.8h }, [x15] -ld1 { v0.b }[9], [x0] -ld1 { v0.b }[9], [x0], #1 -ld1r { v0.16b }, [x0] -ld1r { v0.16b }, [x0], #1 -ld1r { v0.8h }, [x15] -ld1r { v0.8h }, [x15], #2 -ld2 { v0.16b, v1.16b }, [x0], x1 -ld2 { v0.8b, v1.8b }, [x0] -ld2 { v0.h, v1.h }[7], [x15] -ld2 { v0.h, v1.h }[7], [x15], #4 -ld2r { v0.2d, v1.2d }, [x0] -ld2r { v0.2d, v1.2d }, [x0], #16 -ld2r { v0.4s, v1.4s }, [sp] -ld2r { v0.4s, v1.4s }, [sp], #8 -ld3 { v0.4h, v1.4h, v2.4h }, [x15] -ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2 -ld3 { v0.s, v1.s, v2.s }[3], [sp] -ld3 { v0.s, v1.s, v2.s }[3], [sp], x3 -ld3r { v0.4h, v1.4h, v2.4h }, [x15] -ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6 -ld3r { v0.8b, v1.8b, v2.8b }, [x0] -ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 -ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 -ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] -ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 -ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 -ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp] -ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7 -ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30 -mla v0.8b, v0.8b, v0.8b -mls v0.4h, v0.4h, v0.4h -mov b0, v0.b[15] -mov d6, v0.d[1] -mov h2, v0.h[5] -mov s17, v0.s[2] -mov v0.16b, v0.16b -mov v0.8b, v0.8b -movi d15, #0xff00ff00ff00ff -movi v0.16b, #31 -movi v0.2d, #0xff0000ff0000ffff -movi v0.2s, #8, msl #8 -movi v0.4s, #255, lsl #24 -movi v0.8b, #255 -mul v0.8b, v0.8b, v0.8b -mvni v0.2s, 0 -mvni v0.4s, #16, msl #16 -neg d29, d24 -neg v0.16b, v0.16b -neg v0.2d, v0.2d -neg v0.2s, v0.2s -neg v0.4h, v0.4h -neg v0.4s, v0.4s -neg v0.8b, v0.8b -neg v0.8h, v0.8h -not v0.16b, v0.16b -not v0.8b, v0.8b -orn v0.16b, v0.16b, v0.16b -orr v0.16b, v0.16b, v0.16b -orr v0.8h, #31 -pmul v0.16b, v0.16b, v0.16b -pmul v0.8b, v0.8b, v0.8b -pmull v0.8h, v0.8b, v0.8b -pmull2 v0.8h, v0.16b, v0.16b -raddhn v0.2s, v0.2d, v0.2d -raddhn v0.4h, v0.4s, v0.4s -raddhn v0.8b, v0.8h, v0.8h -raddhn2 v0.16b, v0.8h, v0.8h -raddhn2 v0.4s, v0.2d, v0.2d -raddhn2 v0.8h, v0.4s, v0.4s -rbit v0.16b, v0.16b -rbit v0.8b, v0.8b -rev16 v21.8b, v1.8b -rev16 v30.16b, v31.16b -rev32 v0.4h, v9.4h -rev32 v21.8b, v1.8b -rev32 v30.16b, v31.16b -rev32 v4.8h, v7.8h -rev64 v0.16b, v31.16b -rev64 v1.8b, v9.8b -rev64 v13.4h, v21.4h -rev64 v2.8h, v4.8h -rev64 v4.2s, v0.2s -rev64 v6.4s, v8.4s -rshrn v0.2s, v0.2d, #3 -rshrn v0.4h, v0.4s, #3 -rshrn v0.8b, v0.8h, #3 -rshrn2 v0.16b, v0.8h, #3 -rshrn2 v0.4s, v0.2d, #3 -rshrn2 v0.8h, v0.4s, #3 -rsubhn v0.2s, v0.2d, v0.2d -rsubhn v0.4h, v0.4s, v0.4s -rsubhn v0.8b, v0.8h, v0.8h -rsubhn2 v0.16b, v0.8h, v0.8h -rsubhn2 v0.4s, v0.2d, v0.2d -rsubhn2 v0.8h, v0.4s, v0.4s -saba v0.16b, v0.16b, v0.16b -sabal v0.2d, v0.2s, v0.2s -sabal v0.4s, v0.4h, v0.4h -sabal v0.8h, v0.8b, v0.8b -sabal2 v0.2d, v0.4s, v0.4s -sabal2 v0.4s, v0.8h, v0.8h -sabal2 v0.8h, v0.16b, v0.16b -sabd v0.4h, v0.4h, v0.4h -sabdl v0.2d, v0.2s, v0.2s -sabdl v0.4s, v0.4h, v0.4h -sabdl v0.8h, v0.8b, v0.8b -sabdl2 v0.2d, v0.4s, v0.4s -sabdl2 v0.4s, v0.8h, v0.8h -sabdl2 v0.8h, v0.16b, v0.16b -sadalp v0.1d, v0.2s -sadalp v0.2d, v0.4s -sadalp v0.2s, v0.4h -sadalp v0.4h, v0.8b -sadalp v0.4s, v0.8h -sadalp v0.8h, v0.16b -saddl v0.2d, v0.2s, v0.2s -saddl v0.4s, v0.4h, v0.4h -saddl v0.8h, v0.8b, v0.8b -saddl2 v0.2d, v0.4s, v0.4s -saddl2 v0.4s, v0.8h, v0.8h -saddl2 v0.8h, v0.16b, v0.16b -saddlp v0.1d, v0.2s -saddlp v0.2d, v0.4s -saddlp v0.2s, v0.4h -saddlp v0.4h, v0.8b -saddlp v0.4s, v0.8h -saddlp v0.8h, v0.16b -saddw v0.2d, v0.2d, v0.2s -saddw v0.4s, v0.4s, v0.4h -saddw v0.8h, v0.8h, v0.8b -saddw2 v0.2d, v0.2d, v0.4s -saddw2 v0.4s, v0.4s, v0.8h -saddw2 v0.8h, v0.8h, v0.16b -scvtf d21, d12 -scvtf d21, d12, #64 -scvtf s22, s13 -scvtf s22, s13, #32 -scvtf v0.2d, v0.2d -scvtf v0.2d, v0.2d, #3 -scvtf v0.2s, v0.2s -scvtf v0.2s, v0.2s, #3 -scvtf v0.4h, v0.4h -scvtf v0.4s, v0.4s -scvtf v0.4s, v0.4s, #3 -scvtf v0.8h, v0.8h -shadd v0.8b, v0.8b, v0.8b -shl d7, d10, #12 -shl v0.16b, v0.16b, #3 -shl v0.2d, v0.2d, #3 -shl v0.4h, v0.4h, #3 -shl v0.4s, v0.4s, #3 -shll v0.2d, v0.2s, #32 -shll v0.4s, v0.4h, #16 -shll v0.8h, v0.8b, #8 -shll v0.2d, v0.2s, #32 -shll v0.4s, v0.4h, #16 -shll v0.8h, v0.8b, #8 -shll2 v0.2d, v0.4s, #32 -shll2 v0.4s, v0.8h, #16 -shll2 v0.8h, v0.16b, #8 -shll2 v0.2d, v0.4s, #32 -shll2 v0.4s, v0.8h, #16 -shll2 v0.8h, v0.16b, #8 -shrn v0.2s, v0.2d, #3 -shrn v0.4h, v0.4s, #3 -shrn v0.8b, v0.8h, #3 -shrn2 v0.16b, v0.8h, #3 -shrn2 v0.4s, v0.2d, #3 -shrn2 v0.8h, v0.4s, #3 -shsub v0.2s, v0.2s, v0.2s -shsub v0.4h, v0.4h, v0.4h -sli d10, d14, #12 -sli v0.16b, v0.16b, #3 -sli v0.2d, v0.2d, #3 -sli v0.2s, v0.2s, #3 -sli v0.4h, v0.4h, #3 -sli v0.4s, v0.4s, #3 -sli v0.8b, v0.8b, #3 -sli v0.8h, v0.8h, #3 -smax v0.2s, v0.2s, v0.2s -smax v0.4h, v0.4h, v0.4h -smax v0.8b, v0.8b, v0.8b -smaxp v0.2s, v0.2s, v0.2s -smaxp v0.4h, v0.4h, v0.4h -smaxp v0.8b, v0.8b, v0.8b -smin v0.16b, v0.16b, v0.16b -smin v0.4s, v0.4s, v0.4s -smin v0.8h, v0.8h, v0.8h -sminp v0.16b, v0.16b, v0.16b -sminp v0.4s, v0.4s, v0.4s -sminp v0.8h, v0.8h, v0.8h -smlal v0.2d, v0.2s, v0.2s -smlal v0.4s, v0.4h, v0.4h -smlal v0.8h, v0.8b, v0.8b -smlal2 v0.2d, v0.4s, v0.4s -smlal2 v0.4s, v0.8h, v0.8h -smlal2 v0.8h, v0.16b, v0.16b -smlsl v0.2d, v0.2s, v0.2s -smlsl v0.4s, v0.4h, v0.4h -smlsl v0.8h, v0.8b, v0.8b -smlsl2 v0.2d, v0.4s, v0.4s -smlsl2 v0.4s, v0.8h, v0.8h -smlsl2 v0.8h, v0.16b, v0.16b -smull v0.2d, v0.2s, v0.2s -smull v0.4s, v0.4h, v0.4h -smull v0.8h, v0.8b, v0.8b -smull2 v0.2d, v0.4s, v0.4s -smull2 v0.4s, v0.8h, v0.8h -smull2 v0.8h, v0.16b, v0.16b -sqabs b19, b14 -sqabs d18, d12 -sqabs h21, h15 -sqabs s20, s12 -sqabs v0.16b, v0.16b -sqabs v0.2d, v0.2d -sqabs v0.2s, v0.2s -sqabs v0.4h, v0.4h -sqabs v0.4s, v0.4s -sqabs v0.8b, v0.8b -sqabs v0.8h, v0.8h -sqadd b20, b11, b15 -sqadd v0.16b, v0.16b, v0.16b -sqadd v0.2s, v0.2s, v0.2s -sqdmlal d19, s24, s12 -sqdmlal d8, s9, v0.s[1] -sqdmlal s0, h0, v0.h[3] -sqdmlal s17, h27, h12 -sqdmlal v0.2d, v0.2s, v0.2s -sqdmlal v0.4s, v0.4h, v0.4h -sqdmlal2 v0.2d, v0.4s, v0.4s -sqdmlal2 v0.4s, v0.8h, v0.8h -sqdmlsl d12, s23, s13 -sqdmlsl d8, s9, v0.s[1] -sqdmlsl s0, h0, v0.h[3] -sqdmlsl s14, h12, h25 -sqdmlsl v0.2d, v0.2s, v0.2s -sqdmlsl v0.4s, v0.4h, v0.4h -sqdmlsl2 v0.2d, v0.4s, v0.4s -sqdmlsl2 v0.4s, v0.8h, v0.8h -sqdmulh h10, h11, h12 -sqdmulh h7, h15, v0.h[3] -sqdmulh s15, s14, v0.s[1] -sqdmulh s20, s21, s2 -sqdmulh v0.2s, v0.2s, v0.2s -sqdmulh v0.4s, v0.4s, v0.4s -sqdmull d1, s1, v0.s[1] -sqdmull d15, s22, s12 -sqdmull s1, h1, v0.h[3] -sqdmull s12, h22, h12 -sqdmull v0.2d, v0.2s, v0.2s -sqdmull v0.4s, v0.4h, v0.4h -sqdmull2 v0.2d, v0.4s, v0.4s -sqdmull2 v0.4s, v0.8h, v0.8h -sqneg b19, b14 -sqneg d18, d12 -sqneg h21, h15 -sqneg s20, s12 -sqneg v0.16b, v0.16b -sqneg v0.2d, v0.2d -sqneg v0.2s, v0.2s -sqneg v0.4h, v0.4h -sqneg v0.4s, v0.4s -sqneg v0.8b, v0.8b -sqneg v0.8h, v0.8h -sqrdmulh h10, h11, h12 -sqrdmulh h7, h15, v0.h[3] -sqrdmulh s15, s14, v0.s[1] -sqrdmulh s20, s21, s2 -sqrdmulh v0.4h, v0.4h, v0.4h -sqrdmulh v0.8h, v0.8h, v0.8h -sqrshl d31, d31, d31 -sqrshl h3, h4, h15 -sqrshl v0.2s, v0.2s, v0.2s -sqrshl v0.4h, v0.4h, v0.4h -sqrshl v0.8b, v0.8b, v0.8b -sqrshrn b10, h13, #2 -sqrshrn h15, s10, #6 -sqrshrn s15, d12, #9 -sqrshrn v0.2s, v0.2d, #3 -sqrshrn v0.4h, v0.4s, #3 -sqrshrn v0.8b, v0.8h, #3 -sqrshrn2 v0.16b, v0.8h, #3 -sqrshrn2 v0.4s, v0.2d, #3 -sqrshrn2 v0.8h, v0.4s, #3 -sqrshrun b17, h10, #6 -sqrshrun h10, s13, #15 -sqrshrun s22, d16, #31 -sqrshrun v0.2s, v0.2d, #3 -sqrshrun v0.4h, v0.4s, #3 -sqrshrun v0.8b, v0.8h, #3 -sqrshrun2 v0.16b, v0.8h, #3 -sqrshrun2 v0.4s, v0.2d, #3 -sqrshrun2 v0.8h, v0.4s, #3 -sqshl b11, b19, #7 -sqshl d15, d16, #51 -sqshl d31, d31, d31 -sqshl h13, h18, #11 -sqshl h3, h4, h15 -sqshl s14, s17, #22 -sqshl v0.16b, v0.16b, #3 -sqshl v0.2d, v0.2d, #3 -sqshl v0.2s, v0.2s, #3 -sqshl v0.2s, v0.2s, v0.2s -sqshl v0.4h, v0.4h, #3 -sqshl v0.4h, v0.4h, v0.4h -sqshl v0.4s, v0.4s, #3 -sqshl v0.8b, v0.8b, #3 -sqshl v0.8b, v0.8b, v0.8b -sqshl v0.8h, v0.8h, #3 -sqshlu b15, b18, #6 -sqshlu d11, d13, #32 -sqshlu h19, h17, #6 -sqshlu s16, s14, #25 -sqshlu v0.16b, v0.16b, #3 -sqshlu v0.2d, v0.2d, #3 -sqshlu v0.2s, v0.2s, #3 -sqshlu v0.4h, v0.4h, #3 -sqshlu v0.4s, v0.4s, #3 -sqshlu v0.8b, v0.8b, #3 -sqshlu v0.8h, v0.8h, #3 -sqshrn b10, h15, #5 -sqshrn h17, s10, #4 -sqshrn s18, d10, #31 -sqshrn v0.2s, v0.2d, #3 -sqshrn v0.4h, v0.4s, #3 -sqshrn v0.8b, v0.8h, #3 -sqshrn2 v0.16b, v0.8h, #3 -sqshrn2 v0.4s, v0.2d, #3 -sqshrn2 v0.8h, v0.4s, #3 -sqshrun b15, h10, #7 -sqshrun h20, s14, #3 -sqshrun s10, d15, #15 -sqshrun v0.2s, v0.2d, #3 -sqshrun v0.4h, v0.4s, #3 -sqshrun v0.8b, v0.8h, #3 -sqshrun2 v0.16b, v0.8h, #3 -sqshrun2 v0.4s, v0.2d, #3 -sqshrun2 v0.8h, v0.4s, #3 -sqsub s20, s10, s7 -sqsub v0.2d, v0.2d, v0.2d -sqsub v0.4s, v0.4s, v0.4s -sqsub v0.8b, v0.8b, v0.8b -sqxtn b18, h18 -sqxtn h20, s17 -sqxtn s19, d14 -sqxtn v0.2s, v0.2d -sqxtn v0.4h, v0.4s -sqxtn v0.8b, v0.8h -sqxtn2 v0.16b, v0.8h -sqxtn2 v0.4s, v0.2d -sqxtn2 v0.8h, v0.4s -sqxtun b19, h14 -sqxtun h21, s15 -sqxtun s20, d12 -sqxtun v0.2s, v0.2d -sqxtun v0.4h, v0.4s -sqxtun v0.8b, v0.8h -sqxtun2 v0.16b, v0.8h -sqxtun2 v0.4s, v0.2d -sqxtun2 v0.8h, v0.4s -srhadd v0.2s, v0.2s, v0.2s -srhadd v0.4h, v0.4h, v0.4h -srhadd v0.8b, v0.8b, v0.8b -sri d10, d12, #14 -sri v0.16b, v0.16b, #3 -sri v0.2d, v0.2d, #3 -sri v0.2s, v0.2s, #3 -sri v0.4h, v0.4h, #3 -sri v0.4s, v0.4s, #3 -sri v0.8b, v0.8b, #3 -sri v0.8h, v0.8h, #3 -srshl d16, d16, d16 -srshl v0.2s, v0.2s, v0.2s -srshl v0.4h, v0.4h, v0.4h -srshl v0.8b, v0.8b, v0.8b -srshr d19, d18, #7 -srshr v0.16b, v0.16b, #3 -srshr v0.2d, v0.2d, #3 -srshr v0.2s, v0.2s, #3 -srshr v0.4h, v0.4h, #3 -srshr v0.4s, v0.4s, #3 -srshr v0.8b, v0.8b, #3 -srshr v0.8h, v0.8h, #3 -srsra d15, d11, #19 -srsra v0.16b, v0.16b, #3 -srsra v0.2d, v0.2d, #3 -srsra v0.2s, v0.2s, #3 -srsra v0.4h, v0.4h, #3 -srsra v0.4s, v0.4s, #3 -srsra v0.8b, v0.8b, #3 -srsra v0.8h, v0.8h, #3 -sshl d31, d31, d31 -sshl v0.2d, v0.2d, v0.2d -sshl v0.2s, v0.2s, v0.2s -sshl v0.4h, v0.4h, v0.4h -sshl v0.8b, v0.8b, v0.8b -sshll v0.2d, v0.2s, #3 -sshll2 v0.4s, v0.8h, #3 -sshr d15, d16, #12 -sshr v0.16b, v0.16b, #3 -sshr v0.2d, v0.2d, #3 -sshr v0.2s, v0.2s, #3 -sshr v0.4h, v0.4h, #3 -sshr v0.4s, v0.4s, #3 -sshr v0.8b, v0.8b, #3 -sshr v0.8h, v0.8h, #3 -ssra d18, d12, #21 -ssra v0.16b, v0.16b, #3 -ssra v0.2d, v0.2d, #3 -ssra v0.2s, v0.2s, #3 -ssra v0.4h, v0.4h, #3 -ssra v0.4s, v0.4s, #3 -ssra v0.8b, v0.8b, #3 -ssra v0.8h, v0.8h, #3 -ssubl v0.2d, v0.2s, v0.2s -ssubl v0.4s, v0.4h, v0.4h -ssubl v0.8h, v0.8b, v0.8b -ssubl2 v0.2d, v0.4s, v0.4s -ssubl2 v0.4s, v0.8h, v0.8h -ssubl2 v0.8h, v0.16b, v0.16b -ssubw v0.2d, v0.2d, v0.2s -ssubw v0.4s, v0.4s, v0.4h -ssubw v0.8h, v0.8h, v0.8b -ssubw2 v0.2d, v0.2d, v0.4s -ssubw2 v0.4s, v0.4s, v0.8h -ssubw2 v0.8h, v0.8h, v0.16b -st1 { v0.16b }, [x0] -st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 -st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] -st1 { v0.4s, v1.4s }, [sp], #32 -st1 { v0.4s, v1.4s, v2.4s }, [sp] -st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 -st1 { v0.8h }, [x15], x2 -st1 { v0.8h, v1.8h }, [x15] -st1 { v0.d }[1], [x0] -st1 { v0.d }[1], [x0], #8 -st2 { v0.16b, v1.16b }, [x0], x1 -st2 { v0.8b, v1.8b }, [x0] -st2 { v0.s, v1.s }[3], [sp] -st2 { v0.s, v1.s }[3], [sp], #8 -st3 { v0.4h, v1.4h, v2.4h }, [x15] -st3 { v0.8h, v1.8h, v2.8h }, [x15], x2 -st3 { v0.h, v1.h, v2.h }[7], [x15] -st3 { v0.h, v1.h, v2.h }[7], [x15], #6 -st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 -st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] -st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 -sub d15, d5, d16 -sub v0.2d, v0.2d, v0.2d -suqadd b19, b14 -suqadd d18, d22 -suqadd h20, h15 -suqadd s21, s12 -suqadd v0.16b, v0.16b -suqadd v0.2d, v0.2d -suqadd v0.2s, v0.2s -suqadd v0.4h, v0.4h -suqadd v0.4s, v0.4s -suqadd v0.8b, v0.8b -suqadd v0.8h, v0.8h -tbl v0.16b, { v0.16b }, v0.16b -tbl v0.16b, { v0.16b, v1.16b }, v0.16b -tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -tbl v0.8b, { v0.16b }, v0.8b -tbl v0.8b, { v0.16b, v1.16b }, v0.8b -tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -tbx v0.16b, { v0.16b }, v0.16b -tbx v0.16b, { v0.16b, v1.16b }, v0.16b -tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -tbx v0.8b, { v0.16b }, v0.8b -tbx v0.8b, { v0.16b, v1.16b }, v0.8b -tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -trn1 v0.16b, v0.16b, v0.16b -trn1 v0.2d, v0.2d, v0.2d -trn1 v0.2s, v0.2s, v0.2s -trn1 v0.4h, v0.4h, v0.4h -trn1 v0.4s, v0.4s, v0.4s -trn1 v0.8b, v0.8b, v0.8b -trn1 v0.8h, v0.8h, v0.8h -trn2 v0.16b, v0.16b, v0.16b -trn2 v0.2d, v0.2d, v0.2d -trn2 v0.2s, v0.2s, v0.2s -trn2 v0.4h, v0.4h, v0.4h -trn2 v0.4s, v0.4s, v0.4s -trn2 v0.8b, v0.8b, v0.8b -trn2 v0.8h, v0.8h, v0.8h -uaba v0.8b, v0.8b, v0.8b -uabal v0.2d, v0.2s, v0.2s -uabal v0.4s, v0.4h, v0.4h -uabal v0.8h, v0.8b, v0.8b -uabal2 v0.2d, v0.4s, v0.4s -uabal2 v0.4s, v0.8h, v0.8h -uabal2 v0.8h, v0.16b, v0.16b -uabd v0.4h, v0.4h, v0.4h -uabdl v0.2d, v0.2s, v0.2s -uabdl v0.4s, v0.4h, v0.4h -uabdl v0.8h, v0.8b, v0.8b -uabdl2 v0.2d, v0.4s, v0.4s -uabdl2 v0.4s, v0.8h, v0.8h -uabdl2 v0.8h, v0.16b, v0.16b -uadalp v0.1d, v0.2s -uadalp v0.2d, v0.4s -uadalp v0.2s, v0.4h -uadalp v0.4h, v0.8b -uadalp v0.4s, v0.8h -uadalp v0.8h, v0.16b -uaddl v0.2d, v0.2s, v0.2s -uaddl v0.4s, v0.4h, v0.4h -uaddl v0.8h, v0.8b, v0.8b -uaddl2 v0.2d, v0.4s, v0.4s -uaddl2 v0.4s, v0.8h, v0.8h -uaddl2 v0.8h, v0.16b, v0.16b -uaddlp v0.1d, v0.2s -uaddlp v0.2d, v0.4s -uaddlp v0.2s, v0.4h -uaddlp v0.4h, v0.8b -uaddlp v0.4s, v0.8h -uaddlp v0.8h, v0.16b -uaddw v0.2d, v0.2d, v0.2s -uaddw v0.4s, v0.4s, v0.4h -uaddw v0.8h, v0.8h, v0.8b -uaddw2 v0.2d, v0.2d, v0.4s -uaddw2 v0.4s, v0.4s, v0.8h -uaddw2 v0.8h, v0.8h, v0.16b -ucvtf d21, d14 -ucvtf d21, d14, #64 -ucvtf s22, s13 -ucvtf s22, s13, #32 -ucvtf v0.2d, v0.2d -ucvtf v0.2d, v0.2d, #3 -ucvtf v0.2s, v0.2s -ucvtf v0.2s, v0.2s, #3 -ucvtf v0.4h, v0.4h -ucvtf v0.4s, v0.4s -ucvtf v0.4s, v0.4s, #3 -ucvtf v0.8h, v0.8h -uhadd v0.16b, v0.16b, v0.16b -uhadd v0.8h, v0.8h, v0.8h -uhsub v0.4s, v0.4s, v0.4s -umax v0.16b, v0.16b, v0.16b -umax v0.4s, v0.4s, v0.4s -umax v0.8h, v0.8h, v0.8h -umaxp v0.16b, v0.16b, v0.16b -umaxp v0.4s, v0.4s, v0.4s -umaxp v0.8h, v0.8h, v0.8h -umin v0.2s, v0.2s, v0.2s -umin v0.4h, v0.4h, v0.4h -umin v0.8b, v0.8b, v0.8b -uminp v0.2s, v0.2s, v0.2s -uminp v0.4h, v0.4h, v0.4h -uminp v0.8b, v0.8b, v0.8b -umlal v0.2d, v0.2s, v0.2s -umlal v0.4s, v0.4h, v0.4h -umlal v0.8h, v0.8b, v0.8b -umlal2 v0.2d, v0.4s, v0.4s -umlal2 v0.4s, v0.8h, v0.8h -umlal2 v0.8h, v0.16b, v0.16b -umlsl v0.2d, v0.2s, v0.2s -umlsl v0.4s, v0.4h, v0.4h -umlsl v0.8h, v0.8b, v0.8b -umlsl2 v0.2d, v0.4s, v0.4s -umlsl2 v0.4s, v0.8h, v0.8h -umlsl2 v0.8h, v0.16b, v0.16b -umull v0.2d, v0.2s, v0.2s -umull v0.4s, v0.4h, v0.4h -umull v0.8h, v0.8b, v0.8b -umull2 v0.2d, v0.4s, v0.4s -umull2 v0.4s, v0.8h, v0.8h -umull2 v0.8h, v0.16b, v0.16b -uqadd h0, h1, h5 -uqadd v0.8h, v0.8h, v0.8h -uqrshl b11, b20, b30 -uqrshl s23, s20, s16 -uqrshl v0.16b, v0.16b, v0.16b -uqrshl v0.4s, v0.4s, v0.4s -uqrshl v0.4s, v0.4s, v0.4s -uqrshl v0.8h, v0.8h, v0.8h -uqrshrn b10, h12, #5 -uqrshrn h12, s10, #14 -uqrshrn s10, d10, #25 -uqrshrn v0.2s, v0.2d, #3 -uqrshrn v0.4h, v0.4s, #3 -uqrshrn v0.8b, v0.8h, #3 -uqrshrn2 v0.16b, v0.8h, #3 -uqrshrn2 v0.4s, v0.2d, #3 -uqrshrn2 v0.8h, v0.4s, #3 -uqshl b11, b20, b30 -uqshl b18, b15, #6 -uqshl d15, d12, #19 -uqshl h11, h18, #7 -uqshl s14, s19, #18 -uqshl s23, s20, s16 -uqshl v0.16b, v0.16b, #3 -uqshl v0.16b, v0.16b, v0.16b -uqshl v0.2d, v0.2d, #3 -uqshl v0.2d, v0.2d, v0.2d -uqshl v0.2s, v0.2s, #3 -uqshl v0.4h, v0.4h, #3 -uqshl v0.4s, v0.4s, #3 -uqshl v0.4s, v0.4s, v0.4s -uqshl v0.8b, v0.8b, #3 -uqshl v0.8h, v0.8h, #3 -uqshl v0.8h, v0.8h, v0.8h -uqshrn b12, h10, #7 -uqshrn h10, s14, #5 -uqshrn s10, d12, #13 -uqshrn v0.2s, v0.2d, #3 -uqshrn v0.4h, v0.4s, #3 -uqshrn v0.8b, v0.8h, #3 -uqshrn2 v0.16b, v0.8h, #3 -uqshrn2 v0.4s, v0.2d, #3 -uqshrn2 v0.8h, v0.4s, #3 -uqsub d16, d16, d16 -uqsub v0.4h, v0.4h, v0.4h -uqxtn b18, h18 -uqxtn h20, s17 -uqxtn s19, d14 -uqxtn v0.2s, v0.2d -uqxtn v0.4h, v0.4s -uqxtn v0.8b, v0.8h -uqxtn2 v0.16b, v0.8h -uqxtn2 v0.4s, v0.2d -uqxtn2 v0.8h, v0.4s -urecpe v0.2s, v0.2s -urecpe v0.4s, v0.4s -urhadd v0.16b, v0.16b, v0.16b -urhadd v0.4s, v0.4s, v0.4s -urhadd v0.8h, v0.8h, v0.8h -urshl d8, d7, d4 -urshl v0.16b, v0.16b, v0.16b -urshl v0.2d, v0.2d, v0.2d -urshl v0.4s, v0.4s, v0.4s -urshl v0.8h, v0.8h, v0.8h -urshr d20, d23, #31 -urshr v0.16b, v0.16b, #3 -urshr v0.2d, v0.2d, #3 -urshr v0.2s, v0.2s, #3 -urshr v0.4h, v0.4h, #3 -urshr v0.4s, v0.4s, #3 -urshr v0.8b, v0.8b, #3 -urshr v0.8h, v0.8h, #3 -ursqrte v0.2s, v0.2s -ursqrte v0.4s, v0.4s -ursra d18, d10, #13 -ursra v0.16b, v0.16b, #3 -ursra v0.2d, v0.2d, #3 -ursra v0.2s, v0.2s, #3 -ursra v0.4h, v0.4h, #3 -ursra v0.4s, v0.4s, #3 -ursra v0.8b, v0.8b, #3 -ursra v0.8h, v0.8h, #3 -ushl d0, d0, d0 -ushl v0.16b, v0.16b, v0.16b -ushl v0.4s, v0.4s, v0.4s -ushl v0.8h, v0.8h, v0.8h -ushll v0.4s, v0.4h, #3 -ushll2 v0.8h, v0.16b, #3 -ushr d10, d17, #18 -ushr v0.16b, v0.16b, #3 -ushr v0.2d, v0.2d, #3 -ushr v0.2s, v0.2s, #3 -ushr v0.4h, v0.4h, #3 -ushr v0.4s, v0.4s, #3 -ushr v0.8b, v0.8b, #3 -ushr v0.8h, v0.8h, #3 -usqadd b19, b14 -usqadd d18, d22 -usqadd h20, h15 -usqadd s21, s12 -usqadd v0.16b, v0.16b -usqadd v0.2d, v0.2d -usqadd v0.2s, v0.2s -usqadd v0.4h, v0.4h -usqadd v0.4s, v0.4s -usqadd v0.8b, v0.8b -usqadd v0.8h, v0.8h -usra d20, d13, #61 -usra v0.16b, v0.16b, #3 -usra v0.2d, v0.2d, #3 -usra v0.2s, v0.2s, #3 -usra v0.4h, v0.4h, #3 -usra v0.4s, v0.4s, #3 -usra v0.8b, v0.8b, #3 -usra v0.8h, v0.8h, #3 -usubl v0.2d, v0.2s, v0.2s -usubl v0.4s, v0.4h, v0.4h -usubl v0.8h, v0.8b, v0.8b -usubl2 v0.2d, v0.4s, v0.4s -usubl2 v0.4s, v0.8h, v0.8h -usubl2 v0.8h, v0.16b, v0.16b -usubw v0.2d, v0.2d, v0.2s -usubw v0.4s, v0.4s, v0.4h -usubw v0.8h, v0.8h, v0.8b -usubw2 v0.2d, v0.2d, v0.4s -usubw2 v0.4s, v0.4s, v0.8h -usubw2 v0.8h, v0.8h, v0.16b -uzp1 v0.16b, v0.16b, v0.16b -uzp1 v0.2d, v0.2d, v0.2d -uzp1 v0.2s, v0.2s, v0.2s -uzp1 v0.4h, v0.4h, v0.4h -uzp1 v0.4s, v0.4s, v0.4s -uzp1 v0.8b, v0.8b, v0.8b -uzp1 v0.8h, v0.8h, v0.8h -uzp2 v0.16b, v0.16b, v0.16b -uzp2 v0.2d, v0.2d, v0.2d -uzp2 v0.2s, v0.2s, v0.2s -uzp2 v0.4h, v0.4h, v0.4h -uzp2 v0.4s, v0.4s, v0.4s -uzp2 v0.8b, v0.8b, v0.8b -uzp2 v0.8h, v0.8h, v0.8h -xtn v0.2s, v0.2d -xtn v0.4h, v0.4s -xtn v0.8b, v0.8h -xtn2 v0.16b, v0.8h -xtn2 v0.4s, v0.2d -xtn2 v0.8h, v0.4s -zip1 v0.16b, v0.16b, v0.16b -zip1 v0.2d, v0.2d, v0.2d -zip1 v0.2s, v0.2s, v0.2s -zip1 v0.4h, v0.4h, v0.4h -zip1 v0.4s, v0.4s, v0.4s -zip1 v0.8b, v0.8b, v0.8b -zip1 v0.8h, v0.8h, v0.8h -zip2 v0.16b, v0.16b, v0.16b -zip2 v0.2d, v0.2d, v0.2d -zip2 v0.2s, v0.2s, v0.2s -zip2 v0.4h, v0.4h, v0.4h -zip2 v0.4s, v0.4s, v0.4s -zip2 v0.8b, v0.8b, v0.8b -zip2 v0.8h, v0.8h, v0.8h + .text + add v31.8b, v31.8b, v31.8b + sub v0.2d, v0.2d, v0.2d + fadd v0.4s, v0.4s, v0.4s + fsub v31.2s, v31.2s, v31.2s + mul v0.8b, v1.8b, v2.8b + fmul v0.2s, v1.2s, v2.2s + fdiv v31.2s, v31.2s, v31.2s + pmul v0.8b, v15.8b, v16.8b + pmul v31.16b, v7.16b, v8.16b + and v2.8b, v2.8b, v2.8b + orr v31.16b, v31.16b, v30.16b + eor v0.16b, v1.16b, v2.16b + orn v9.16b, v10.16b, v11.16b + bic v31.8b, v30.8b, v29.8b + bsl v0.8b, v1.8b, v2.8b + bit v31.16b, v31.16b, v31.16b + bif v0.16b, v1.16b, v2.16b + mla v0.8b, v1.8b, v2.8b + mls v31.4h, v31.4h, v31.4h + fmla v0.2s, v1.2s, v2.2s + fmls v31.2s, v31.2s, v31.2s + movi v31.4s, #255, lsl #24 + mvni v0.2s, #0 + bic v15.4h, #15, lsl #8 + orr v16.8h, #31 + movi v8.2s, #8, msl #8 + mvni v16.4s, #16, msl #16 + movi v16.8b, #255 + movi v31.16b, #31 + movi d15, #0xff00ff00ff00ff + movi v31.2d, #0xff0000ff0000ffff + fmov v0.2s, #13.00000000 + fmov v15.4s, #1.00000000 + fmov v31.2d, #-1.25000000 + mov v1.16b, v15.16b + mov v25.8b, v4.8b + uaba v0.8b, v1.8b, v2.8b + saba v31.16b, v30.16b, v29.16b + uabd v15.4h, v16.4h, v17.4h + sabd v5.4h, v4.4h, v6.4h + fabd v1.4s, v31.4s, v16.4s + add d17, d31, d29 + sub d15, d5, d16 + frsqrts v31.2d, v15.2d, v8.2d + frecps v5.4s, v7.4s, v16.4s + facge v0.4s, v31.4s, v16.4s + facgt v31.2d, v29.2d, v28.2d + cmeq v5.16b, v15.16b, v31.16b + cmhs v1.8b, v16.8b, v30.8b + cmge v20.4h, v11.4h, v23.4h + cmhi v13.8h, v3.8h, v27.8h + cmgt v9.4s, v4.4s, v28.4s + cmtst v21.2s, v19.2s, v18.2s + fcmeq v0.2s, v15.2s, v16.2s + fcmge v31.4s, v7.4s, v29.4s + fcmgt v17.4s, v8.4s, v25.4s + cmeq v31.16b, v15.16b, #0 + cmge v3.8b, v15.8b, #0 + cmgt v22.2s, v9.2s, #0 + cmle v5.2d, v14.2d, #0 + cmlt v13.8h, v11.8h, #0 + fcmeq v15.2s, v21.2s, #0.0 + fcmge v14.2d, v13.2d, #0.0 + fcmgt v9.4s, v23.4s, #0.0 + fcmle v11.2d, v6.2d, #0.0 + fcmlt v12.4s, v25.4s, #0.0 + shadd v0.8b, v31.8b, v29.8b + uhadd v15.16b, v16.16b, v17.16b + shsub v0.4h, v1.4h, v2.4h + uhadd v5.8h, v7.8h, v8.8h + shsub v9.2s, v11.2s, v21.2s + uhsub v22.4s, v30.4s, v19.4s + srhadd v3.8b, v5.8b, v7.8b + urhadd v7.16b, v17.16b, v27.16b + srhadd v10.4h, v11.4h, v13.4h + urhadd v1.8h, v2.8h, v3.8h + srhadd v4.2s, v5.2s, v6.2s + urhadd v7.4s, v7.4s, v7.4s + sqsub v0.8b, v1.8b, v2.8b + sqadd v0.16b, v1.16b, v2.16b + uqsub v0.4h, v1.4h, v2.4h + uqadd v0.8h, v1.8h, v2.8h + sqadd v0.2s, v1.2s, v2.2s + sqsub v0.4s, v1.4s, v2.4s + sqsub v0.2d, v1.2d, v2.2d + sqadd b20, b11, b15 + uqadd h0, h1, h5 + sqsub s20, s10, s7 + uqsub d16, d16, d16 + sshl v10.8b, v15.8b, v22.8b + ushl v10.16b, v5.16b, v2.16b + sshl v10.4h, v15.4h, v22.4h + ushl v10.8h, v5.8h, v2.8h + sshl v10.2s, v15.2s, v22.2s + ushl v10.4s, v5.4s, v2.4s + sshl v0.2d, v1.2d, v2.2d + sqshl v1.8b, v15.8b, v22.8b + uqshl v2.16b, v14.16b, v23.16b + sqshl v3.4h, v13.4h, v24.4h + uqshl v4.8h, v12.8h, v25.8h + sqshl v5.2s, v11.2s, v26.2s + uqshl v6.4s, v10.4s, v27.4s + uqshl v0.2d, v1.2d, v2.2d + srshl v10.8b, v5.8b, v22.8b + urshl v10.16b, v5.16b, v2.16b + srshl v1.4h, v5.4h, v31.4h + urshl v1.8h, v5.8h, v2.8h + srshl v10.2s, v15.2s, v2.2s + urshl v1.4s, v5.4s, v2.4s + urshl v0.2d, v1.2d, v2.2d + sqrshl v1.8b, v15.8b, v22.8b + uqrshl v2.16b, v14.16b, v23.16b + sqrshl v3.4h, v13.4h, v24.4h + uqrshl v4.8h, v12.8h, v25.8h + sqrshl v5.2s, v11.2s, v26.2s + uqrshl v6.4s, v10.4s, v27.4s + uqrshl v6.4s, v10.4s, v27.4s + sshl d31, d31, d31 + ushl d0, d0, d0 + sqshl d31, d31, d31 + uqshl s23, s20, s16 + sqshl h3, h4, h15 + uqshl b11, b20, b30 + srshl d16, d16, d16 + urshl d8, d7, d4 + sqrshl d31, d31, d31 + uqrshl s23, s20, s16 + sqrshl h3, h4, h15 + uqrshl b11, b20, b30 + smax v1.8b, v15.8b, v22.8b + umax v2.16b, v14.16b, v23.16b + smax v3.4h, v13.4h, v24.4h + umax v4.8h, v12.8h, v25.8h + smax v5.2s, v11.2s, v26.2s + umax v6.4s, v10.4s, v27.4s + umin v1.8b, v15.8b, v22.8b + smin v2.16b, v14.16b, v23.16b + umin v3.4h, v13.4h, v24.4h + smin v4.8h, v12.8h, v25.8h + umin v5.2s, v11.2s, v26.2s + smin v6.4s, v10.4s, v27.4s + fmax v29.2s, v28.2s, v25.2s + fmax v9.4s, v8.4s, v5.4s + fmax v11.2d, v10.2d, v7.2d + fmin v29.2s, v28.2s, v25.2s + fmin v9.4s, v8.4s, v5.4s + fmin v11.2d, v10.2d, v7.2d + fmaxnm v9.2s, v8.2s, v5.2s + fmaxnm v9.4s, v8.4s, v5.4s + fmaxnm v11.2d, v10.2d, v7.2d + fminnm v2.2s, v8.2s, v25.2s + fminnm v9.4s, v8.4s, v5.4s + fminnm v11.2d, v10.2d, v7.2d + smaxp v1.8b, v15.8b, v22.8b + umaxp v2.16b, v14.16b, v23.16b + smaxp v3.4h, v13.4h, v24.4h + umaxp v4.8h, v12.8h, v25.8h + smaxp v5.2s, v11.2s, v26.2s + umaxp v6.4s, v10.4s, v27.4s + uminp v1.8b, v15.8b, v22.8b + sminp v2.16b, v14.16b, v23.16b + uminp v3.4h, v13.4h, v24.4h + sminp v4.8h, v12.8h, v25.8h + uminp v5.2s, v11.2s, v26.2s + sminp v6.4s, v10.4s, v27.4s + fmaxp v29.2s, v28.2s, v25.2s + fmaxp v9.4s, v8.4s, v5.4s + fmaxp v11.2d, v10.2d, v7.2d + fminp v29.2s, v28.2s, v25.2s + fminp v9.4s, v8.4s, v5.4s + fminp v11.2d, v10.2d, v7.2d + fmaxnmp v9.2s, v8.2s, v5.2s + fmaxnmp v9.4s, v8.4s, v5.4s + fmaxnmp v11.2d, v10.2d, v7.2d + fminnmp v2.2s, v8.2s, v25.2s + fminnmp v9.4s, v8.4s, v5.4s + fminnmp v11.2d, v10.2d, v7.2d + addp v31.8b, v31.8b, v31.8b + addp v0.2d, v0.2d, v0.2d + faddp v0.4s, v0.4s, v0.4s + faddp v31.2s, v31.2s, v31.2s + sqdmulh v31.2s, v31.2s, v31.2s + sqdmulh v5.4s, v7.4s, v9.4s + sqrdmulh v31.4h, v3.4h, v13.4h + sqrdmulh v0.8h, v10.8h, v20.8h + fmulx v1.2s, v22.2s, v2.2s + fmulx v21.4s, v15.4s, v3.4s + fmulx v11.2d, v5.2d, v23.2d + shll2 v2.8h, v4.16b, #8 + shll2 v6.4s, v8.8h, #16 + shll2 v6.2d, v8.4s, #32 + shll v2.8h, v4.8b, #8 + shll v6.4s, v8.4h, #16 + shll v6.2d, v8.2s, #32 + shl v0.4h, v1.4h, #3 + shl v0.16b, v1.16b, #3 + shl v0.4s, v1.4s, #3 + shl v0.2d, v1.2d, #3 + sshll v0.2d, v1.2s, #3 + sshll2 v0.4s, v1.8h, #3 + ushll v0.4s, v1.4h, #3 + ushll2 v0.8h, v1.16b, #3 + sshr v0.8b, v1.8b, #3 + sshr v0.4h, v1.4h, #3 + sshr v0.2s, v1.2s, #3 + sshr v0.16b, v1.16b, #3 + sshr v0.8h, v1.8h, #3 + sshr v0.4s, v1.4s, #3 + sshr v0.2d, v1.2d, #3 + ushr v0.8b, v1.8b, #3 + ushr v0.4h, v1.4h, #3 + ushr v0.2s, v1.2s, #3 + ushr v0.16b, v1.16b, #3 + ushr v0.8h, v1.8h, #3 + ushr v0.4s, v1.4s, #3 + ushr v0.2d, v1.2d, #3 + ssra v0.8b, v1.8b, #3 + ssra v0.4h, v1.4h, #3 + ssra v0.2s, v1.2s, #3 + ssra v0.16b, v1.16b, #3 + ssra v0.8h, v1.8h, #3 + ssra v0.4s, v1.4s, #3 + ssra v0.2d, v1.2d, #3 + usra v0.8b, v1.8b, #3 + usra v0.4h, v1.4h, #3 + usra v0.2s, v1.2s, #3 + usra v0.16b, v1.16b, #3 + usra v0.8h, v1.8h, #3 + usra v0.4s, v1.4s, #3 + usra v0.2d, v1.2d, #3 + srshr v0.8b, v1.8b, #3 + srshr v0.4h, v1.4h, #3 + srshr v0.2s, v1.2s, #3 + srshr v0.16b, v1.16b, #3 + srshr v0.8h, v1.8h, #3 + srshr v0.4s, v1.4s, #3 + srshr v0.2d, v1.2d, #3 + urshr v0.8b, v1.8b, #3 + urshr v0.4h, v1.4h, #3 + urshr v0.2s, v1.2s, #3 + urshr v0.16b, v1.16b, #3 + urshr v0.8h, v1.8h, #3 + urshr v0.4s, v1.4s, #3 + urshr v0.2d, v1.2d, #3 + srsra v0.8b, v1.8b, #3 + srsra v0.4h, v1.4h, #3 + srsra v0.2s, v1.2s, #3 + srsra v0.16b, v1.16b, #3 + srsra v0.8h, v1.8h, #3 + srsra v0.4s, v1.4s, #3 + srsra v0.2d, v1.2d, #3 + ursra v0.8b, v1.8b, #3 + ursra v0.4h, v1.4h, #3 + ursra v0.2s, v1.2s, #3 + ursra v0.16b, v1.16b, #3 + ursra v0.8h, v1.8h, #3 + ursra v0.4s, v1.4s, #3 + ursra v0.2d, v1.2d, #3 + sri v0.8b, v1.8b, #3 + sri v0.4h, v1.4h, #3 + sri v0.2s, v1.2s, #3 + sri v0.16b, v1.16b, #3 + sri v0.8h, v1.8h, #3 + sri v0.4s, v1.4s, #3 + sri v0.2d, v1.2d, #3 + sli v0.8b, v1.8b, #3 + sli v0.4h, v1.4h, #3 + sli v0.2s, v1.2s, #3 + sli v0.16b, v1.16b, #3 + sli v0.8h, v1.8h, #3 + sli v0.4s, v1.4s, #3 + sli v0.2d, v1.2d, #3 + sqshlu v0.8b, v1.8b, #3 + sqshlu v0.4h, v1.4h, #3 + sqshlu v0.2s, v1.2s, #3 + sqshlu v0.16b, v1.16b, #3 + sqshlu v0.8h, v1.8h, #3 + sqshlu v0.4s, v1.4s, #3 + sqshlu v0.2d, v1.2d, #3 + sqshl v0.8b, v1.8b, #3 + sqshl v0.4h, v1.4h, #3 + sqshl v0.2s, v1.2s, #3 + sqshl v0.16b, v1.16b, #3 + sqshl v0.8h, v1.8h, #3 + sqshl v0.4s, v1.4s, #3 + sqshl v0.2d, v1.2d, #3 + uqshl v0.8b, v1.8b, #3 + uqshl v0.4h, v1.4h, #3 + uqshl v0.2s, v1.2s, #3 + uqshl v0.16b, v1.16b, #3 + uqshl v0.8h, v1.8h, #3 + uqshl v0.4s, v1.4s, #3 + uqshl v0.2d, v1.2d, #3 + shrn v0.8b, v1.8h, #3 + shrn v0.4h, v1.4s, #3 + shrn v0.2s, v1.2d, #3 + shrn2 v0.16b, v1.8h, #3 + shrn2 v0.8h, v1.4s, #3 + shrn2 v0.4s, v1.2d, #3 + sqshrun v0.8b, v1.8h, #3 + sqshrun v0.4h, v1.4s, #3 + sqshrun v0.2s, v1.2d, #3 + sqshrun2 v0.16b, v1.8h, #3 + sqshrun2 v0.8h, v1.4s, #3 + sqshrun2 v0.4s, v1.2d, #3 + rshrn v0.8b, v1.8h, #3 + rshrn v0.4h, v1.4s, #3 + rshrn v0.2s, v1.2d, #3 + rshrn2 v0.16b, v1.8h, #3 + rshrn2 v0.8h, v1.4s, #3 + rshrn2 v0.4s, v1.2d, #3 + sqrshrun v0.8b, v1.8h, #3 + sqrshrun v0.4h, v1.4s, #3 + sqrshrun v0.2s, v1.2d, #3 + sqrshrun2 v0.16b, v1.8h, #3 + sqrshrun2 v0.8h, v1.4s, #3 + sqrshrun2 v0.4s, v1.2d, #3 + sqshrn v0.8b, v1.8h, #3 + sqshrn v0.4h, v1.4s, #3 + sqshrn v0.2s, v1.2d, #3 + sqshrn2 v0.16b, v1.8h, #3 + sqshrn2 v0.8h, v1.4s, #3 + sqshrn2 v0.4s, v1.2d, #3 + uqshrn v0.8b, v1.8h, #3 + uqshrn v0.4h, v1.4s, #3 + uqshrn v0.2s, v1.2d, #3 + uqshrn2 v0.16b, v1.8h, #3 + uqshrn2 v0.8h, v1.4s, #3 + uqshrn2 v0.4s, v1.2d, #3 + sqrshrn v0.8b, v1.8h, #3 + sqrshrn v0.4h, v1.4s, #3 + sqrshrn v0.2s, v1.2d, #3 + sqrshrn2 v0.16b, v1.8h, #3 + sqrshrn2 v0.8h, v1.4s, #3 + sqrshrn2 v0.4s, v1.2d, #3 + uqrshrn v0.8b, v1.8h, #3 + uqrshrn v0.4h, v1.4s, #3 + uqrshrn v0.2s, v1.2d, #3 + uqrshrn2 v0.16b, v1.8h, #3 + uqrshrn2 v0.8h, v1.4s, #3 + uqrshrn2 v0.4s, v1.2d, #3 + scvtf v0.2s, v1.2s, #3 + scvtf v0.4s, v1.4s, #3 + scvtf v0.2d, v1.2d, #3 + ucvtf v0.2s, v1.2s, #3 + ucvtf v0.4s, v1.4s, #3 + ucvtf v0.2d, v1.2d, #3 + fcvtzs v0.2s, v1.2s, #3 + fcvtzs v0.4s, v1.4s, #3 + fcvtzs v0.2d, v1.2d, #3 + fcvtzu v0.2s, v1.2s, #3 + fcvtzu v0.4s, v1.4s, #3 + fcvtzu v0.2d, v1.2d, #3 + saddl v0.8h, v1.8b, v2.8b + saddl v0.4s, v1.4h, v2.4h + saddl v0.2d, v1.2s, v2.2s + saddl2 v0.4s, v1.8h, v2.8h + saddl2 v0.8h, v1.16b, v2.16b + saddl2 v0.2d, v1.4s, v2.4s + uaddl v0.8h, v1.8b, v2.8b + uaddl v0.4s, v1.4h, v2.4h + uaddl v0.2d, v1.2s, v2.2s + uaddl2 v0.8h, v1.16b, v2.16b + uaddl2 v0.4s, v1.8h, v2.8h + uaddl2 v0.2d, v1.4s, v2.4s + ssubl v0.8h, v1.8b, v2.8b + ssubl v0.4s, v1.4h, v2.4h + ssubl v0.2d, v1.2s, v2.2s + ssubl2 v0.8h, v1.16b, v2.16b + ssubl2 v0.4s, v1.8h, v2.8h + ssubl2 v0.2d, v1.4s, v2.4s + usubl v0.8h, v1.8b, v2.8b + usubl v0.4s, v1.4h, v2.4h + usubl v0.2d, v1.2s, v2.2s + usubl2 v0.8h, v1.16b, v2.16b + usubl2 v0.4s, v1.8h, v2.8h + usubl2 v0.2d, v1.4s, v2.4s + sabal v0.8h, v1.8b, v2.8b + sabal v0.4s, v1.4h, v2.4h + sabal v0.2d, v1.2s, v2.2s + sabal2 v0.8h, v1.16b, v2.16b + sabal2 v0.4s, v1.8h, v2.8h + sabal2 v0.2d, v1.4s, v2.4s + uabal v0.8h, v1.8b, v2.8b + uabal v0.4s, v1.4h, v2.4h + uabal v0.2d, v1.2s, v2.2s + uabal2 v0.8h, v1.16b, v2.16b + uabal2 v0.4s, v1.8h, v2.8h + uabal2 v0.2d, v1.4s, v2.4s + sabdl v0.8h, v1.8b, v2.8b + sabdl v0.4s, v1.4h, v2.4h + sabdl v0.2d, v1.2s, v2.2s + sabdl2 v0.8h, v1.16b, v2.16b + sabdl2 v0.4s, v1.8h, v2.8h + sabdl2 v0.2d, v1.4s, v2.4s + uabdl v0.8h, v1.8b, v2.8b + uabdl v0.4s, v1.4h, v2.4h + uabdl v0.2d, v1.2s, v2.2s + uabdl2 v0.8h, v1.16b, v2.16b + uabdl2 v0.4s, v1.8h, v2.8h + uabdl2 v0.2d, v1.4s, v2.4s + smlal v0.8h, v1.8b, v2.8b + smlal v0.4s, v1.4h, v2.4h + smlal v0.2d, v1.2s, v2.2s + smlal2 v0.8h, v1.16b, v2.16b + smlal2 v0.4s, v1.8h, v2.8h + smlal2 v0.2d, v1.4s, v2.4s + umlal v0.8h, v1.8b, v2.8b + umlal v0.4s, v1.4h, v2.4h + umlal v0.2d, v1.2s, v2.2s + umlal2 v0.8h, v1.16b, v2.16b + umlal2 v0.4s, v1.8h, v2.8h + umlal2 v0.2d, v1.4s, v2.4s + smlsl v0.8h, v1.8b, v2.8b + smlsl v0.4s, v1.4h, v2.4h + smlsl v0.2d, v1.2s, v2.2s + smlsl2 v0.8h, v1.16b, v2.16b + smlsl2 v0.4s, v1.8h, v2.8h + smlsl2 v0.2d, v1.4s, v2.4s + umlsl v0.8h, v1.8b, v2.8b + umlsl v0.4s, v1.4h, v2.4h + umlsl v0.2d, v1.2s, v2.2s + umlsl2 v0.8h, v1.16b, v2.16b + umlsl2 v0.4s, v1.8h, v2.8h + umlsl2 v0.2d, v1.4s, v2.4s + smull v0.8h, v1.8b, v2.8b + smull v0.4s, v1.4h, v2.4h + smull v0.2d, v1.2s, v2.2s + smull2 v0.8h, v1.16b, v2.16b + smull2 v0.4s, v1.8h, v2.8h + smull2 v0.2d, v1.4s, v2.4s + umull v0.8h, v1.8b, v2.8b + umull v0.4s, v1.4h, v2.4h + umull v0.2d, v1.2s, v2.2s + umull2 v0.8h, v1.16b, v2.16b + umull2 v0.4s, v1.8h, v2.8h + umull2 v0.2d, v1.4s, v2.4s + sqdmlal v0.4s, v1.4h, v2.4h + sqdmlal v0.2d, v1.2s, v2.2s + sqdmlal2 v0.4s, v1.8h, v2.8h + sqdmlal2 v0.2d, v1.4s, v2.4s + sqdmlsl v0.4s, v1.4h, v2.4h + sqdmlsl v0.2d, v1.2s, v2.2s + sqdmlsl2 v0.4s, v1.8h, v2.8h + sqdmlsl2 v0.2d, v1.4s, v2.4s + sqdmull v0.4s, v1.4h, v2.4h + sqdmull v0.2d, v1.2s, v2.2s + sqdmull2 v0.4s, v1.8h, v2.8h + sqdmull2 v0.2d, v1.4s, v2.4s + pmull v0.8h, v1.8b, v2.8b + pmull2 v0.8h, v1.16b, v2.16b + saddw v0.8h, v1.8h, v2.8b + saddw v0.4s, v1.4s, v2.4h + saddw v0.2d, v1.2d, v2.2s + saddw2 v0.8h, v1.8h, v2.16b + saddw2 v0.4s, v1.4s, v2.8h + saddw2 v0.2d, v1.2d, v2.4s + uaddw v0.8h, v1.8h, v2.8b + uaddw v0.4s, v1.4s, v2.4h + uaddw v0.2d, v1.2d, v2.2s + uaddw2 v0.8h, v1.8h, v2.16b + uaddw2 v0.4s, v1.4s, v2.8h + uaddw2 v0.2d, v1.2d, v2.4s + ssubw v0.8h, v1.8h, v2.8b + ssubw v0.4s, v1.4s, v2.4h + ssubw v0.2d, v1.2d, v2.2s + ssubw2 v0.8h, v1.8h, v2.16b + ssubw2 v0.4s, v1.4s, v2.8h + ssubw2 v0.2d, v1.2d, v2.4s + usubw v0.8h, v1.8h, v2.8b + usubw v0.4s, v1.4s, v2.4h + usubw v0.2d, v1.2d, v2.2s + usubw2 v0.8h, v1.8h, v2.16b + usubw2 v0.4s, v1.4s, v2.8h + usubw2 v0.2d, v1.2d, v2.4s + addhn v0.8b, v1.8h, v2.8h + addhn v0.4h, v1.4s, v2.4s + addhn v0.2s, v1.2d, v2.2d + addhn2 v0.16b, v1.8h, v2.8h + addhn2 v0.8h, v1.4s, v2.4s + addhn2 v0.4s, v1.2d, v2.2d + raddhn v0.8b, v1.8h, v2.8h + raddhn v0.4h, v1.4s, v2.4s + raddhn v0.2s, v1.2d, v2.2d + raddhn2 v0.16b, v1.8h, v2.8h + raddhn2 v0.8h, v1.4s, v2.4s + raddhn2 v0.4s, v1.2d, v2.2d + rsubhn v0.8b, v1.8h, v2.8h + rsubhn v0.4h, v1.4s, v2.4s + rsubhn v0.2s, v1.2d, v2.2d + rsubhn2 v0.16b, v1.8h, v2.8h + rsubhn2 v0.8h, v1.4s, v2.4s + rsubhn2 v0.4s, v1.2d, v2.2d + sqdmulh h10, h11, h12 + sqdmulh s20, s21, s2 + sqrdmulh h10, h11, h12 + sqrdmulh s20, s21, s2 + fmulx s20, s22, s15 + fmulx d23, d11, d1 + frecps s21, s16, s13 + frecps d22, d30, d21 + frsqrts s21, s5, s12 + frsqrts d8, d22, d18 + scvtf s22, s13 + scvtf d21, d12 + ucvtf s22, s13 + ucvtf d21, d14 + frecpe s19, s14 + frecpe d13, d13 + frecpx s18, s10 + frecpx d16, d19 + frsqrte s22, s13 + frsqrte d21, d12 + cmeq d20, d21, d22 + cmeq d20, d21, #0 + cmhs d20, d21, d22 + cmge d20, d21, d22 + cmge d20, d21, #0 + cmhi d20, d21, d22 + cmgt d20, d21, d22 + cmgt d20, d21, #0 + cmle d20, d21, #0 + cmlt d20, d21, #0 + cmtst d20, d21, d22 + fcmeq s10, s11, s12 + fcmeq d20, d21, d22 + fcmeq s10, s11, #0.0 + fcmeq d20, d21, #0.0 + fcmge s10, s11, s12 + fcmge d20, d21, d22 + fcmge s10, s11, #0.0 + fcmge d20, d21, #0.0 + fcmgt s10, s11, s12 + fcmgt d20, d21, d22 + fcmgt s10, s11, #0.0 + fcmgt d20, d21, #0.0 + fcmle s10, s11, #0.0 + fcmle d20, d21, #0.0 + fcmlt s10, s11, #0.0 + fcmlt d20, d21, #0.0 + facge s10, s11, s12 + facge d20, d21, d22 + facgt s10, s11, s12 + facgt d20, d21, d22 + abs d29, d24 + sqabs b19, b14 + sqabs h21, h15 + sqabs s20, s12 + sqabs d18, d12 + neg d29, d24 + sqneg b19, b14 + sqneg h21, h15 + sqneg s20, s12 + sqneg d18, d12 + suqadd b19, b14 + suqadd h20, h15 + suqadd s21, s12 + suqadd d18, d22 + usqadd b19, b14 + usqadd h20, h15 + usqadd s21, s12 + usqadd d18, d22 + sqdmlal s17, h27, h12 + sqdmlal d19, s24, s12 + sqdmlsl s14, h12, h25 + sqdmlsl d12, s23, s13 + sqdmull s12, h22, h12 + sqdmull d15, s22, s12 + sqxtun b19, h14 + sqxtun h21, s15 + sqxtun s20, d12 + sqxtn b18, h18 + sqxtn h20, s17 + sqxtn s19, d14 + uqxtn b18, h18 + uqxtn h20, s17 + uqxtn s19, d14 + sshr d15, d16, #12 + ushr d10, d17, #18 + srshr d19, d18, #7 + urshr d20, d23, #31 + ssra d18, d12, #21 + usra d20, d13, #61 + srsra d15, d11, #19 + ursra d18, d10, #13 + shl d7, d10, #12 + sqshl b11, b19, #7 + sqshl h13, h18, #11 + sqshl s14, s17, #22 + sqshl d15, d16, #51 + uqshl b18, b15, #6 + uqshl h11, h18, #7 + uqshl s14, s19, #18 + uqshl d15, d12, #19 + sqshlu b15, b18, #6 + sqshlu h19, h17, #6 + sqshlu s16, s14, #25 + sqshlu d11, d13, #32 + sri d10, d12, #14 + sli d10, d14, #12 + sqshrn b10, h15, #5 + sqshrn h17, s10, #4 + sqshrn s18, d10, #31 + uqshrn b12, h10, #7 + uqshrn h10, s14, #5 + uqshrn s10, d12, #13 + sqrshrn b10, h13, #2 + sqrshrn h15, s10, #6 + sqrshrn s15, d12, #9 + uqrshrn b10, h12, #5 + uqrshrn h12, s10, #14 + uqrshrn s10, d10, #25 + sqshrun b15, h10, #7 + sqshrun h20, s14, #3 + sqshrun s10, d15, #15 + sqrshrun b17, h10, #6 + sqrshrun h10, s13, #15 + sqrshrun s22, d16, #31 + scvtf s22, s13, #32 + scvtf d21, d12, #64 + ucvtf s22, s13, #32 + ucvtf d21, d14, #64 + fcvtzs s21, s12, #1 + fcvtzs d21, d12, #1 + fcvtzu s21, s12, #1 + fcvtzu d21, d12, #1 + ld1 { v0.16b }, [x0] + ld1 { v15.8h, v16.8h }, [x15] + ld1 { v31.4s, v0.4s, v1.4s }, [sp] + ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] + ld2 { v0.8b, v1.8b }, [x0] + ld3 { v15.4h, v16.4h, v17.4h }, [x15] + ld4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] + st1 { v0.16b }, [x0] + st1 { v15.8h, v16.8h }, [x15] + st1 { v31.4s, v0.4s, v1.4s }, [sp] + st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] + st2 { v0.8b, v1.8b }, [x0] + st3 { v15.4h, v16.4h, v17.4h }, [x15] + st4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] + ld1 { v15.8h }, [x15], x2 + ld1 { v31.4s, v0.4s }, [sp], #32 + ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 + ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 + ld2 { v0.16b, v1.16b }, [x0], x1 + ld3 { v15.8h, v16.8h, v17.8h }, [x15], x2 + ld4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64 + st1 { v15.8h }, [x15], x2 + st1 { v31.4s, v0.4s }, [sp], #32 + st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 + st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 + st2 { v0.16b, v1.16b }, [x0], x1 + st3 { v15.8h, v16.8h, v17.8h }, [x15], x2 + st4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64 + ld1r { v0.16b }, [x0] + ld1r { v15.8h }, [x15] + ld2r { v31.4s, v0.4s }, [sp] + ld2r { v0.2d, v1.2d }, [x0] + ld3r { v0.8b, v1.8b, v2.8b }, [x0] + ld3r { v15.4h, v16.4h, v17.4h }, [x15] + ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] + ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp] + ld1 { v0.b }[9], [x0] + ld2 { v15.h, v16.h }[7], [x15] + ld3 { v31.s, v0.s, v1.s }[3], [sp] + ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] + st1 { v0.d }[1], [x0] + st2 { v31.s, v0.s }[3], [sp] + st3 { v15.h, v16.h, v17.h }[7], [x15] + st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] + ld1r { v0.16b }, [x0], #1 + ld1r { v15.8h }, [x15], #2 + ld2r { v31.4s, v0.4s }, [sp], #8 + ld2r { v0.2d, v1.2d }, [x0], #16 + ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 + ld3r { v15.4h, v16.4h, v17.4h }, [x15], #6 + ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], x30 + ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp], x7 + ld1 { v0.b }[9], [x0], #1 + ld2 { v15.h, v16.h }[7], [x15], #4 + ld3 { v31.s, v0.s, v1.s }[3], [sp], x3 + ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 + ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 + st1 { v0.d }[1], [x0], #8 + st2 { v31.s, v0.s }[3], [sp], #8 + st3 { v15.h, v16.h, v17.h }[7], [x15], #6 + st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 + ext v0.8b, v1.8b, v2.8b, #3 + ext v0.16b, v1.16b, v2.16b, #3 + uzp1 v1.8b, v1.8b, v2.8b + uzp1 v2.16b, v1.16b, v2.16b + uzp1 v3.4h, v1.4h, v2.4h + uzp1 v4.8h, v1.8h, v2.8h + uzp1 v5.2s, v1.2s, v2.2s + uzp1 v6.4s, v1.4s, v2.4s + uzp1 v7.2d, v1.2d, v2.2d + trn1 v8.8b, v1.8b, v2.8b + trn1 v9.16b, v1.16b, v2.16b + trn1 v10.4h, v1.4h, v2.4h + trn1 v27.8h, v7.8h, v2.8h + trn1 v12.2s, v7.2s, v2.2s + trn1 v29.4s, v6.4s, v2.4s + trn1 v14.2d, v6.2d, v2.2d + zip1 v31.8b, v5.8b, v2.8b + zip1 v0.16b, v5.16b, v2.16b + zip1 v17.4h, v4.4h, v2.4h + zip1 v2.8h, v4.8h, v2.8h + zip1 v19.2s, v3.2s, v2.2s + zip1 v4.4s, v3.4s, v2.4s + zip1 v21.2d, v2.2d, v2.2d + uzp2 v6.8b, v2.8b, v2.8b + uzp2 v23.16b, v1.16b, v2.16b + uzp2 v8.4h, v1.4h, v2.4h + uzp2 v25.8h, v0.8h, v2.8h + uzp2 v10.2s, v0.2s, v2.2s + uzp2 v27.4s, v7.4s, v2.4s + uzp2 v12.2d, v7.2d, v2.2d + trn2 v29.8b, v6.8b, v2.8b + trn2 v14.16b, v6.16b, v2.16b + trn2 v31.4h, v5.4h, v2.4h + trn2 v0.8h, v5.8h, v2.8h + trn2 v17.2s, v4.2s, v2.2s + trn2 v2.4s, v4.4s, v2.4s + trn2 v19.2d, v3.2d, v2.2d + zip2 v4.8b, v3.8b, v2.8b + zip2 v21.16b, v2.16b, v2.16b + zip2 v6.4h, v2.4h, v2.4h + zip2 v23.8h, v1.8h, v2.8h + zip2 v8.2s, v1.2s, v2.2s + zip2 v25.4s, v0.4s, v2.4s + zip2 v10.2d, v0.2d, v2.2d + fmul s0, s1, v1.s[0] + fmul s0, s1, v1.s[3] + fmul d0, d1, v1.d[0] + fmul d0, d1, v1.d[1] + fmul d15, d15, v15.d[1] + fmulx s3, s5, v7.s[0] + fmulx s3, s5, v7.s[3] + fmulx s3, s5, v15.s[3] + fmulx d0, d4, v8.d[0] + fmulx d0, d4, v8.d[1] + fmla s0, s1, v1.s[0] + fmla s0, s1, v1.s[3] + fmla d0, d1, v1.d[0] + fmla d0, d1, v1.d[1] + fmla d15, d15, v15.d[1] + fmls s3, s5, v7.s[0] + fmls s3, s5, v7.s[3] + fmls s3, s5, v15.s[3] + fmls d0, d4, v8.d[0] + fmls d0, d4, v8.d[1] + sqdmlal s0, h0, v0.h[0] + sqdmlal s0, h0, v0.h[1] + sqdmlal s0, h0, v0.h[2] + sqdmlal s0, h0, v0.h[3] + sqdmlal s0, h0, v0.h[4] + sqdmlal s0, h0, v0.h[5] + sqdmlal s0, h0, v0.h[6] + sqdmlal s0, h0, v0.h[7] + sqdmlal d8, s9, v15.s[0] + sqdmlal d8, s9, v15.s[1] + sqdmlal d8, s9, v15.s[2] + sqdmlal d8, s9, v15.s[3] + sqdmlsl s0, h0, v0.h[0] + sqdmlsl s0, h0, v0.h[1] + sqdmlsl s0, h0, v0.h[2] + sqdmlsl s0, h0, v0.h[3] + sqdmlsl s0, h0, v0.h[4] + sqdmlsl s0, h0, v0.h[5] + sqdmlsl s0, h0, v0.h[6] + sqdmlsl s0, h0, v0.h[7] + sqdmlsl d8, s9, v15.s[0] + sqdmlsl d8, s9, v15.s[1] + sqdmlsl d8, s9, v15.s[2] + sqdmlsl d8, s9, v15.s[3] + sqdmull s1, h1, v1.h[0] + sqdmull s1, h1, v1.h[1] + sqdmull s1, h1, v1.h[2] + sqdmull s1, h1, v1.h[3] + sqdmull s1, h1, v1.h[4] + sqdmull s1, h1, v1.h[5] + sqdmull s1, h1, v1.h[6] + sqdmull s1, h1, v1.h[7] + sqdmull d1, s1, v4.s[0] + sqdmull d1, s1, v4.s[1] + sqdmull d1, s1, v4.s[2] + sqdmull d1, s1, v4.s[3] + sqdmulh h7, h1, v14.h[0] + sqdmulh h7, h15, v8.h[1] + sqdmulh h7, h15, v8.h[2] + sqdmulh h7, h15, v8.h[3] + sqdmulh h7, h15, v8.h[4] + sqdmulh h7, h15, v8.h[5] + sqdmulh h7, h15, v8.h[6] + sqdmulh h7, h15, v8.h[7] + sqdmulh s15, s3, v4.s[0] + sqdmulh s15, s14, v16.s[1] + sqdmulh s15, s15, v16.s[2] + sqdmulh s15, s16, v17.s[3] + sqrdmulh h7, h1, v14.h[0] + sqrdmulh h7, h15, v8.h[1] + sqrdmulh h7, h15, v8.h[2] + sqrdmulh h7, h15, v8.h[3] + sqrdmulh h7, h15, v8.h[4] + sqrdmulh h7, h15, v8.h[5] + sqrdmulh h7, h15, v8.h[6] + sqrdmulh h7, h15, v8.h[7] + sqrdmulh s15, s3, v4.s[0] + sqrdmulh s15, s14, v16.s[1] + sqrdmulh s15, s15, v16.s[2] + sqrdmulh s15, s16, v17.s[3] + mov b0, v0.b[15] + mov h2, v31.h[5] + mov s17, v2.s[2] + mov d6, v12.d[1] + tbl v0.8b, { v1.16b }, v2.8b + tbl v16.8b, { v31.16b, v0.16b }, v2.8b + tbl v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b + tbl v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b + tbl v0.16b, { v1.16b }, v2.16b + tbl v16.16b, { v31.16b, v0.16b }, v2.16b + tbl v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b + tbl v0.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b + tbx v0.8b, { v1.16b }, v2.8b + tbx v16.8b, { v31.16b, v0.16b }, v2.8b + tbx v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b + tbx v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b + tbx v0.16b, { v1.16b }, v2.16b + tbx v16.16b, { v31.16b, v0.16b }, v2.16b + tbx v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b + tbx v16.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b + fcvtxn s22, d13 + fcvtas s12, s13 + fcvtas d21, d14 + fcvtau s12, s13 + fcvtau d21, d14 + fcvtms s22, s13 + fcvtms d21, d14 + fcvtmu s12, s13 + fcvtmu d21, d14 + fcvtns s22, s13 + fcvtns d21, d14 + fcvtnu s12, s13 + fcvtnu d21, d14 + fcvtps s22, s13 + fcvtps d21, d14 + fcvtpu s12, s13 + fcvtpu d21, d14 + fcvtzs s12, s13 + fcvtzs d21, d14 + fcvtzu s12, s13 + fcvtzu d21, d14 + fabd s29, s24, s20 + fabd d29, d24, d20 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -1070,1065 +866,860 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.50 abs d29, d24 -# CHECK-NEXT: 1 4 1.00 abs v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 abs v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 abs v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 abs v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 abs v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 abs v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 abs v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 add d17, d31, d29 -# CHECK-NEXT: 1 4 0.50 add v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 addhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 addhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 addhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 addhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 addhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 addhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 addp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 addp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 and v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 bic v0.4h, #15, lsl #8 -# CHECK-NEXT: 1 4 0.50 bic v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 bif v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 bit v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 bsl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 cls v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 cls v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 cls v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 cls v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 cls v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 cls v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 clz v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 clz v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 clz v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 clz v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 clz v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 clz v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 cmeq d20, d21, #0 -# CHECK-NEXT: 1 4 0.50 cmeq d20, d21, d22 -# CHECK-NEXT: 1 4 1.00 cmeq v0.16b, v0.16b, #0 -# CHECK-NEXT: 1 4 1.00 cmeq v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 cmge d20, d21, #0 -# CHECK-NEXT: 1 4 0.50 cmge d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 cmge v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 cmge v0.8b, v0.8b, #0 -# CHECK-NEXT: 1 4 0.50 cmgt d20, d21, #0 -# CHECK-NEXT: 1 4 0.50 cmgt d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 cmgt v0.2s, v0.2s, #0 -# CHECK-NEXT: 1 4 1.00 cmgt v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 cmhi d20, d21, d22 -# CHECK-NEXT: 1 4 1.00 cmhi v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 cmhs d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 cmhs v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 cmle d20, d21, #0 -# CHECK-NEXT: 1 4 1.00 cmle v0.2d, v0.2d, #0 -# CHECK-NEXT: 1 4 0.50 cmlt d20, d21, #0 -# CHECK-NEXT: 1 4 1.00 cmlt v0.8h, v0.8h, #0 -# CHECK-NEXT: 1 4 0.50 cmtst d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 cmtst v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 cnt v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 cnt v0.8b, v0.8b -# CHECK-NEXT: 1 2 0.50 dup v0.16b, w28 -# CHECK-NEXT: 1 2 0.50 dup v0.2d, x28 -# CHECK-NEXT: 1 4 0.50 dup v0.2s, w28 -# CHECK-NEXT: 1 4 0.50 dup v0.4h, w28 -# CHECK-NEXT: 1 2 0.50 dup v0.4s, w28 -# CHECK-NEXT: 1 4 0.50 dup v0.8b, w28 -# CHECK-NEXT: 1 2 0.50 dup v0.8h, w28 -# CHECK-NEXT: 1 4 1.00 eor v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 ext v0.16b, v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 0.50 ext v0.8b, v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 0.50 fabd d29, d24, d20 -# CHECK-NEXT: 1 4 0.50 fabd s29, s24, s20 -# CHECK-NEXT: 1 4 1.00 fabd v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fabs v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fabs v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fabs v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 fabs v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fabs v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 facge d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 facge s10, s11, s12 -# CHECK-NEXT: 1 4 1.00 facge v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 facgt d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 facgt s10, s11, s12 -# CHECK-NEXT: 1 4 1.00 facgt v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 add v31.8b, v31.8b, v31.8b +# CHECK-NEXT: 1 2 1.00 sub v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1 4 1.00 fadd v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 faddp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 0.50 fsub v31.2s, v31.2s, v31.2s +# CHECK-NEXT: 1 4 0.50 mul v0.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 4 0.50 fmul v0.2s, v1.2s, v2.2s +# CHECK-NEXT: 1 13 10.00 fdiv v31.2s, v31.2s, v31.2s +# CHECK-NEXT: 1 3 0.50 pmul v0.8b, v15.8b, v16.8b +# CHECK-NEXT: 1 3 1.00 pmul v31.16b, v7.16b, v8.16b +# CHECK-NEXT: 1 1 0.50 and v2.8b, v2.8b, v2.8b +# CHECK-NEXT: 1 1 1.00 orr v31.16b, v31.16b, v30.16b +# CHECK-NEXT: 1 1 1.00 eor v0.16b, v1.16b, v2.16b +# CHECK-NEXT: 1 1 1.00 orn v9.16b, v10.16b, v11.16b +# CHECK-NEXT: 1 1 0.50 bic v31.8b, v30.8b, v29.8b +# CHECK-NEXT: 1 2 0.50 bsl v0.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 bit v31.16b, v31.16b, v31.16b +# CHECK-NEXT: 1 2 1.00 bif v0.16b, v1.16b, v2.16b +# CHECK-NEXT: 1 4 0.50 mla v0.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 4 0.50 mls v31.4h, v31.4h, v31.4h +# CHECK-NEXT: 1 4 0.50 fmla v0.2s, v1.2s, v2.2s +# CHECK-NEXT: 1 4 0.50 fmls v31.2s, v31.2s, v31.2s +# CHECK-NEXT: 1 1 1.00 movi v31.4s, #255, lsl #24 +# CHECK-NEXT: 1 1 0.50 mvni v0.2s, #0 +# CHECK-NEXT: 1 1 0.50 bic v15.4h, #15, lsl #8 +# CHECK-NEXT: 1 1 1.00 orr v16.8h, #31 +# CHECK-NEXT: 1 1 0.50 movi v8.2s, #8, msl #8 +# CHECK-NEXT: 1 1 1.00 mvni v16.4s, #16, msl #16 +# CHECK-NEXT: 1 1 0.50 movi v16.8b, #255 +# CHECK-NEXT: 1 1 1.00 movi v31.16b, #31 +# CHECK-NEXT: 1 1 0.50 movi d15, #0xff00ff00ff00ff +# CHECK-NEXT: 1 1 1.00 movi v31.2d, #0xff0000ff0000ffff +# CHECK-NEXT: 1 1 0.50 fmov v0.2s, #13.00000000 +# CHECK-NEXT: 1 1 0.50 fmov v15.4s, #1.00000000 +# CHECK-NEXT: 1 1 0.50 fmov v31.2d, #-1.25000000 +# CHECK-NEXT: 1 1 1.00 mov v1.16b, v15.16b +# CHECK-NEXT: 1 1 0.50 mov v25.8b, v4.8b +# CHECK-NEXT: 1 4 2.00 uaba v0.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 4 2.00 saba v31.16b, v30.16b, v29.16b +# CHECK-NEXT: 1 3 0.50 uabd v15.4h, v16.4h, v17.4h +# CHECK-NEXT: 1 3 0.50 sabd v5.4h, v4.4h, v6.4h +# CHECK-NEXT: 1 4 1.00 fabd v1.4s, v31.4s, v16.4s +# CHECK-NEXT: 1 2 0.50 add d17, d31, d29 +# CHECK-NEXT: 1 2 0.50 sub d15, d5, d16 +# CHECK-NEXT: 1 22 19.00 frsqrts v31.2d, v15.2d, v8.2d +# CHECK-NEXT: 1 4 1.00 frecps v5.4s, v7.4s, v16.4s +# CHECK-NEXT: 1 2 1.00 facge v0.4s, v31.4s, v16.4s +# CHECK-NEXT: 1 2 1.00 facgt v31.2d, v29.2d, v28.2d +# CHECK-NEXT: 1 2 1.00 cmeq v5.16b, v15.16b, v31.16b +# CHECK-NEXT: 1 2 0.50 cmhs v1.8b, v16.8b, v30.8b +# CHECK-NEXT: 1 2 0.50 cmge v20.4h, v11.4h, v23.4h +# CHECK-NEXT: 1 2 1.00 cmhi v13.8h, v3.8h, v27.8h +# CHECK-NEXT: 1 2 1.00 cmgt v9.4s, v4.4s, v28.4s +# CHECK-NEXT: 1 3 0.50 cmtst v21.2s, v19.2s, v18.2s +# CHECK-NEXT: 1 2 0.50 fcmeq v0.2s, v15.2s, v16.2s +# CHECK-NEXT: 1 2 1.00 fcmge v31.4s, v7.4s, v29.4s +# CHECK-NEXT: 1 2 1.00 fcmgt v17.4s, v8.4s, v25.4s +# CHECK-NEXT: 1 2 1.00 cmeq v31.16b, v15.16b, #0 +# CHECK-NEXT: 1 2 0.50 cmge v3.8b, v15.8b, #0 +# CHECK-NEXT: 1 2 0.50 cmgt v22.2s, v9.2s, #0 +# CHECK-NEXT: 1 2 1.00 cmle v5.2d, v14.2d, #0 +# CHECK-NEXT: 1 2 1.00 cmlt v13.8h, v11.8h, #0 +# CHECK-NEXT: 1 2 0.50 fcmeq v15.2s, v21.2s, #0.0 +# CHECK-NEXT: 1 2 1.00 fcmge v14.2d, v13.2d, #0.0 +# CHECK-NEXT: 1 2 1.00 fcmgt v9.4s, v23.4s, #0.0 +# CHECK-NEXT: 1 2 1.00 fcmle v11.2d, v6.2d, #0.0 +# CHECK-NEXT: 1 2 1.00 fcmlt v12.4s, v25.4s, #0.0 +# CHECK-NEXT: 1 2 0.50 shadd v0.8b, v31.8b, v29.8b +# CHECK-NEXT: 1 2 1.00 uhadd v15.16b, v16.16b, v17.16b +# CHECK-NEXT: 1 2 0.50 shsub v0.4h, v1.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 uhadd v5.8h, v7.8h, v8.8h +# CHECK-NEXT: 1 2 0.50 shsub v9.2s, v11.2s, v21.2s +# CHECK-NEXT: 1 2 1.00 uhsub v22.4s, v30.4s, v19.4s +# CHECK-NEXT: 1 2 0.50 srhadd v3.8b, v5.8b, v7.8b +# CHECK-NEXT: 1 2 1.00 urhadd v7.16b, v17.16b, v27.16b +# CHECK-NEXT: 1 2 0.50 srhadd v10.4h, v11.4h, v13.4h +# CHECK-NEXT: 1 2 1.00 urhadd v1.8h, v2.8h, v3.8h +# CHECK-NEXT: 1 2 0.50 srhadd v4.2s, v5.2s, v6.2s +# CHECK-NEXT: 1 2 1.00 urhadd v7.4s, v7.4s, v7.4s +# CHECK-NEXT: 1 3 0.50 sqsub v0.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 sqadd v0.16b, v1.16b, v2.16b +# CHECK-NEXT: 1 3 0.50 uqsub v0.4h, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 uqadd v0.8h, v1.8h, v2.8h +# CHECK-NEXT: 1 3 0.50 sqadd v0.2s, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 sqsub v0.4s, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 sqsub v0.2d, v1.2d, v2.2d +# CHECK-NEXT: 1 3 0.50 sqadd b20, b11, b15 +# CHECK-NEXT: 1 3 0.50 uqadd h0, h1, h5 +# CHECK-NEXT: 1 3 0.50 sqsub s20, s10, s7 +# CHECK-NEXT: 1 3 0.50 uqsub d16, d16, d16 +# CHECK-NEXT: 1 2 0.50 sshl v10.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 2 1.00 ushl v10.16b, v5.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 sshl v10.4h, v15.4h, v22.4h +# CHECK-NEXT: 1 2 1.00 ushl v10.8h, v5.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 sshl v10.2s, v15.2s, v22.2s +# CHECK-NEXT: 1 2 1.00 ushl v10.4s, v5.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 sshl v0.2d, v1.2d, v2.2d +# CHECK-NEXT: 1 4 0.50 sqshl v1.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 4 1.00 uqshl v2.16b, v14.16b, v23.16b +# CHECK-NEXT: 1 4 0.50 sqshl v3.4h, v13.4h, v24.4h +# CHECK-NEXT: 1 4 1.00 uqshl v4.8h, v12.8h, v25.8h +# CHECK-NEXT: 1 4 0.50 sqshl v5.2s, v11.2s, v26.2s +# CHECK-NEXT: 1 4 1.00 uqshl v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 4 1.00 uqshl v0.2d, v1.2d, v2.2d +# CHECK-NEXT: 1 3 0.50 srshl v10.8b, v5.8b, v22.8b +# CHECK-NEXT: 1 3 1.00 urshl v10.16b, v5.16b, v2.16b +# CHECK-NEXT: 1 3 0.50 srshl v1.4h, v5.4h, v31.4h +# CHECK-NEXT: 1 3 1.00 urshl v1.8h, v5.8h, v2.8h +# CHECK-NEXT: 1 3 0.50 srshl v10.2s, v15.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 urshl v1.4s, v5.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 urshl v0.2d, v1.2d, v2.2d +# CHECK-NEXT: 1 4 0.50 sqrshl v1.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 4 1.00 uqrshl v2.16b, v14.16b, v23.16b +# CHECK-NEXT: 1 4 0.50 sqrshl v3.4h, v13.4h, v24.4h +# CHECK-NEXT: 1 4 1.00 uqrshl v4.8h, v12.8h, v25.8h +# CHECK-NEXT: 1 4 0.50 sqrshl v5.2s, v11.2s, v26.2s +# CHECK-NEXT: 1 4 1.00 uqrshl v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 4 1.00 uqrshl v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 2 0.50 sshl d31, d31, d31 +# CHECK-NEXT: 1 2 0.50 ushl d0, d0, d0 +# CHECK-NEXT: 1 4 0.50 sqshl d31, d31, d31 +# CHECK-NEXT: 1 4 0.50 uqshl s23, s20, s16 +# CHECK-NEXT: 1 4 0.50 sqshl h3, h4, h15 +# CHECK-NEXT: 1 4 0.50 uqshl b11, b20, b30 +# CHECK-NEXT: 1 3 0.50 srshl d16, d16, d16 +# CHECK-NEXT: 1 3 0.50 urshl d8, d7, d4 +# CHECK-NEXT: 1 4 0.50 sqrshl d31, d31, d31 +# CHECK-NEXT: 1 4 0.50 uqrshl s23, s20, s16 +# CHECK-NEXT: 1 4 0.50 sqrshl h3, h4, h15 +# CHECK-NEXT: 1 4 0.50 uqrshl b11, b20, b30 +# CHECK-NEXT: 1 2 0.50 smax v1.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 2 1.00 umax v2.16b, v14.16b, v23.16b +# CHECK-NEXT: 1 2 0.50 smax v3.4h, v13.4h, v24.4h +# CHECK-NEXT: 1 2 1.00 umax v4.8h, v12.8h, v25.8h +# CHECK-NEXT: 1 2 0.50 smax v5.2s, v11.2s, v26.2s +# CHECK-NEXT: 1 2 1.00 umax v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 2 0.50 umin v1.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 2 1.00 smin v2.16b, v14.16b, v23.16b +# CHECK-NEXT: 1 2 0.50 umin v3.4h, v13.4h, v24.4h +# CHECK-NEXT: 1 2 1.00 smin v4.8h, v12.8h, v25.8h +# CHECK-NEXT: 1 2 0.50 umin v5.2s, v11.2s, v26.2s +# CHECK-NEXT: 1 2 1.00 smin v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 4 0.50 fmax v29.2s, v28.2s, v25.2s +# CHECK-NEXT: 1 4 1.00 fmax v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fmax v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 4 0.50 fmin v29.2s, v28.2s, v25.2s +# CHECK-NEXT: 1 4 1.00 fmin v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fmin v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 4 0.50 fmaxnm v9.2s, v8.2s, v5.2s +# CHECK-NEXT: 1 4 1.00 fmaxnm v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fmaxnm v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 4 0.50 fminnm v2.2s, v8.2s, v25.2s +# CHECK-NEXT: 1 4 1.00 fminnm v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fminnm v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 2 0.50 smaxp v1.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 2 1.00 umaxp v2.16b, v14.16b, v23.16b +# CHECK-NEXT: 1 2 0.50 smaxp v3.4h, v13.4h, v24.4h +# CHECK-NEXT: 1 2 1.00 umaxp v4.8h, v12.8h, v25.8h +# CHECK-NEXT: 1 2 0.50 smaxp v5.2s, v11.2s, v26.2s +# CHECK-NEXT: 1 2 1.00 umaxp v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 2 0.50 uminp v1.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 2 1.00 sminp v2.16b, v14.16b, v23.16b +# CHECK-NEXT: 1 2 0.50 uminp v3.4h, v13.4h, v24.4h +# CHECK-NEXT: 1 2 1.00 sminp v4.8h, v12.8h, v25.8h +# CHECK-NEXT: 1 2 0.50 uminp v5.2s, v11.2s, v26.2s +# CHECK-NEXT: 1 2 1.00 sminp v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 4 0.50 fmaxp v29.2s, v28.2s, v25.2s +# CHECK-NEXT: 1 4 1.00 fmaxp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fmaxp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 4 0.50 fminp v29.2s, v28.2s, v25.2s +# CHECK-NEXT: 1 4 1.00 fminp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fminp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 4 0.50 fmaxnmp v9.2s, v8.2s, v5.2s +# CHECK-NEXT: 1 4 1.00 fmaxnmp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fmaxnmp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 4 0.50 fminnmp v2.2s, v8.2s, v25.2s +# CHECK-NEXT: 1 4 1.00 fminnmp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fminnmp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 3 0.50 addp v31.8b, v31.8b, v31.8b +# CHECK-NEXT: 1 3 1.00 addp v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1 4 1.00 faddp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcmeq d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmeq d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 fcmeq s10, s11, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmeq s10, s11, s12 -# CHECK-NEXT: 1 4 0.50 fcmeq v0.2s, v0.2s, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmeq v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcmge d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmge d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 fcmge s10, s11, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmge s10, s11, s12 -# CHECK-NEXT: 1 4 1.00 fcmge v0.2d, v0.2d, #0.0 -# CHECK-NEXT: 1 4 1.00 fcmge v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcmgt d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmgt d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 fcmgt s10, s11, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmgt s10, s11, s12 -# CHECK-NEXT: 1 4 1.00 fcmgt v0.4s, v0.4s, #0.0 -# CHECK-NEXT: 1 4 1.00 fcmgt v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcmle d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmle s10, s11, #0.0 -# CHECK-NEXT: 1 4 1.00 fcmle v0.2d, v0.2d, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmlt d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmlt s10, s11, #0.0 -# CHECK-NEXT: 1 4 1.00 fcmlt v0.4s, v0.4s, #0.0 -# CHECK-NEXT: 1 4 0.50 fcvtas d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtas s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtas v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtas v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtas v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtas v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtas v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtau d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtau s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtau v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtau v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtau v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtau v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtau v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtl v0.2d, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtl v0.4s, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtl2 v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtl2 v0.4s, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtms d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtms s22, s13 -# CHECK-NEXT: 1 4 0.50 fcvtms v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtms v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtms v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtms v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtms v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtmu d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtmu s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtmu v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtmu v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtmu v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtmu v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtmu v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtn v0.2s, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtn v0.4h, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtn2 v0.4s, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtn2 v0.8h, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtns d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtns s22, s13 -# CHECK-NEXT: 1 4 0.50 fcvtns v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtns v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtns v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtns v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtns v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtnu d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtnu s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtnu v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtnu v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtnu v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtnu v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtnu v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtps d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtps s22, s13 -# CHECK-NEXT: 1 4 0.50 fcvtps v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtps v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtps v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtps v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtps v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtpu d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtpu s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtpu v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtpu v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtpu v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtpu v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtpu v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtxn s22, d13 -# CHECK-NEXT: 1 4 0.50 fcvtxn v0.2s, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtxn2 v0.4s, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtzs d21, d12, #1 -# CHECK-NEXT: 1 4 0.50 fcvtzs d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtzs s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtzs s21, s12, #1 -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtzu d21, d12, #1 -# CHECK-NEXT: 1 4 0.50 fcvtzu d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtzu s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtzu s21, s12, #1 -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.8h, v0.8h -# CHECK-NEXT: 1 13 10.00 fdiv v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmax v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fmax v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmax v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fmaxnm v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fmaxnm v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmaxnm v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fmaxnmp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fmaxnmp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmaxnmp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fmaxp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fmaxp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmaxp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fmin v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fmin v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmin v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fminnm v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fminnm v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fminnm v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fminnmp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fminnmp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fminnmp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fminp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fminp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fminp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fmla d0, d1, v0.d[1] -# CHECK-NEXT: 1 4 0.50 fmla s0, s1, v0.s[3] -# CHECK-NEXT: 1 4 0.50 fmla v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fmls d0, d4, v0.d[1] -# CHECK-NEXT: 1 4 0.50 fmls s3, s5, v0.s[3] -# CHECK-NEXT: 1 4 0.50 fmls v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmov v0.2d, #-1.25000000 -# CHECK-NEXT: 1 4 0.50 fmov v0.2s, #13.00000000 -# CHECK-NEXT: 1 4 1.00 fmov v0.4s, #1.00000000 -# CHECK-NEXT: 1 4 0.50 fmul d0, d1, v0.d[1] -# CHECK-NEXT: 1 4 0.50 fmul s0, s1, v0.s[3] -# CHECK-NEXT: 1 4 0.50 fmul v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fmulx d0, d4, v0.d[1] -# CHECK-NEXT: 1 4 0.50 fmulx d23, d11, d1 +# CHECK-NEXT: 1 4 0.50 faddp v31.2s, v31.2s, v31.2s +# CHECK-NEXT: 1 4 0.50 sqdmulh v31.2s, v31.2s, v31.2s +# CHECK-NEXT: 1 4 1.00 sqdmulh v5.4s, v7.4s, v9.4s +# CHECK-NEXT: 1 4 0.50 sqrdmulh v31.4h, v3.4h, v13.4h +# CHECK-NEXT: 1 4 1.00 sqrdmulh v0.8h, v10.8h, v20.8h +# CHECK-NEXT: 1 4 0.50 fmulx v1.2s, v22.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 fmulx v21.4s, v15.4s, v3.4s +# CHECK-NEXT: 1 4 1.00 fmulx v11.2d, v5.2d, v23.2d +# CHECK-NEXT: 1 2 1.00 shll2 v2.8h, v4.16b, #8 +# CHECK-NEXT: 1 2 1.00 shll2 v6.4s, v8.8h, #16 +# CHECK-NEXT: 1 2 1.00 shll2 v6.2d, v8.4s, #32 +# CHECK-NEXT: 1 2 1.00 shll v2.8h, v4.8b, #8 +# CHECK-NEXT: 1 2 1.00 shll v6.4s, v8.4h, #16 +# CHECK-NEXT: 1 2 1.00 shll v6.2d, v8.2s, #32 +# CHECK-NEXT: 1 2 0.50 shl v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 2 0.50 shl v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 2 0.50 shl v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 2 0.50 shl v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 2 1.00 sshll v0.2d, v1.2s, #3 +# CHECK-NEXT: 1 2 1.00 sshll2 v0.4s, v1.8h, #3 +# CHECK-NEXT: 1 2 1.00 ushll v0.4s, v1.4h, #3 +# CHECK-NEXT: 1 2 1.00 ushll2 v0.8h, v1.16b, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 3 0.50 ssra v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 3 0.50 ssra v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 3 0.50 ssra v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 3 0.50 usra v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 3 0.50 usra v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 3 0.50 usra v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 2 0.50 sri v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 2 0.50 sri v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 2 0.50 sri v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 2 1.00 sri v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 2 1.00 sri v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 2 1.00 sri v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 2 1.00 sri v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 2 0.50 sli v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 2 0.50 sli v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 2 0.50 sli v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 2 1.00 sli v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 2 1.00 sli v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 2 1.00 sli v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 2 1.00 sli v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 sqshlu v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 4 0.50 sqshlu v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 4 0.50 sqshlu v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 sqshl v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 4 0.50 sqshl v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 4 0.50 sqshl v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 uqshl v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 4 0.50 uqshl v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 4 0.50 uqshl v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 2 0.50 shrn v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 2 0.50 shrn v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 2 0.50 shrn v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 2 0.50 shrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 2 0.50 shrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 2 0.50 shrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 sqshrun v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 4 0.50 sqshrun v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 sqshrun v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 3 0.50 rshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 3 0.50 rshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 3 0.50 rshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 3 1.00 rshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 3 1.00 rshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 3 1.00 rshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 sqrshrun v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 4 0.50 sqrshrun v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 sqrshrun v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 sqshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 4 0.50 sqshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 sqshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 uqshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 4 0.50 uqshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 uqshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 sqrshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 4 0.50 sqrshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 sqrshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 uqrshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 4 0.50 uqrshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 uqrshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 scvtf v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 0.50 scvtf v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 scvtf v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 ucvtf v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 0.50 ucvtf v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 ucvtf v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 fcvtzs v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 0.50 fcvtzs v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 fcvtzs v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 fcvtzu v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 0.50 fcvtzu v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 fcvtzu v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 3 1.00 saddl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 saddl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 saddl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 saddl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 saddl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 saddl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 uaddl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 uaddl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 uaddl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 uaddl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 uaddl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 uaddl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 ssubl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 ssubl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 ssubl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 ssubl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 ssubl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 ssubl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 usubl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 usubl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 usubl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 usubl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 usubl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 usubl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 2.00 sabal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 2.00 sabal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 2.00 sabal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 2.00 sabal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 2.00 sabal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 2.00 sabal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 2.00 uabal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 2.00 uabal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 2.00 uabal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 2.00 uabal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 2.00 uabal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 2.00 uabal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 sabdl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 sabdl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 sabdl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 sabdl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 sabdl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 sabdl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 uabdl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 uabdl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 uabdl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 uabdl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 uabdl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 uabdl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 smlal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 1.00 smlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 smlal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 smlal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 1.00 smlal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 smlal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 umlal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 1.00 umlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 umlal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 umlal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 1.00 umlal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 umlal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 smlsl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 1.00 smlsl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 smlsl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 smlsl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 1.00 smlsl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 smlsl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 umlsl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 1.00 umlsl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 umlsl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 umlsl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 1.00 umlsl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 umlsl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 smull v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 1.00 smull v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 smull v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 smull2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 1.00 smull2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 smull2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 umull v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 1.00 umull v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 umull v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 umull2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 1.00 umull2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 umull2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 sqdmlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 sqdmlal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 sqdmlsl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 sqdmlsl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 sqdmull v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 sqdmull v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 sqdmull2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 sqdmull2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 pmull v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 pmull2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 saddw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: 1 3 1.00 saddw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: 1 3 1.00 saddw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: 1 3 1.00 saddw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: 1 3 1.00 saddw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: 1 3 1.00 saddw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: 1 3 1.00 uaddw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: 1 3 1.00 uaddw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: 1 3 1.00 uaddw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: 1 3 1.00 uaddw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: 1 3 1.00 uaddw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: 1 3 1.00 uaddw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: 1 3 1.00 ssubw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: 1 3 1.00 ssubw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: 1 3 1.00 ssubw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: 1 3 1.00 ssubw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: 1 3 1.00 ssubw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: 1 3 1.00 ssubw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: 1 3 1.00 usubw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: 1 3 1.00 usubw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: 1 3 1.00 usubw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: 1 3 1.00 usubw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: 1 3 1.00 usubw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: 1 3 1.00 usubw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: 1 3 1.00 addhn v0.8b, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 addhn v0.4h, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 addhn v0.2s, v1.2d, v2.2d +# CHECK-NEXT: 1 3 1.00 addhn2 v0.16b, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 addhn2 v0.8h, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 addhn2 v0.4s, v1.2d, v2.2d +# CHECK-NEXT: 1 4 2.00 raddhn v0.8b, v1.8h, v2.8h +# CHECK-NEXT: 1 4 2.00 raddhn v0.4h, v1.4s, v2.4s +# CHECK-NEXT: 1 4 2.00 raddhn v0.2s, v1.2d, v2.2d +# CHECK-NEXT: 1 4 2.00 raddhn2 v0.16b, v1.8h, v2.8h +# CHECK-NEXT: 1 4 2.00 raddhn2 v0.8h, v1.4s, v2.4s +# CHECK-NEXT: 1 4 2.00 raddhn2 v0.4s, v1.2d, v2.2d +# CHECK-NEXT: 1 4 2.00 rsubhn v0.8b, v1.8h, v2.8h +# CHECK-NEXT: 1 4 2.00 rsubhn v0.4h, v1.4s, v2.4s +# CHECK-NEXT: 1 4 2.00 rsubhn v0.2s, v1.2d, v2.2d +# CHECK-NEXT: 1 4 2.00 rsubhn2 v0.16b, v1.8h, v2.8h +# CHECK-NEXT: 1 4 2.00 rsubhn2 v0.8h, v1.4s, v2.4s +# CHECK-NEXT: 1 4 2.00 rsubhn2 v0.4s, v1.2d, v2.2d +# CHECK-NEXT: 1 4 0.50 sqdmulh h10, h11, h12 +# CHECK-NEXT: 1 4 0.50 sqdmulh s20, s21, s2 +# CHECK-NEXT: 1 4 0.50 sqrdmulh h10, h11, h12 +# CHECK-NEXT: 1 4 0.50 sqrdmulh s20, s21, s2 # CHECK-NEXT: 1 4 0.50 fmulx s20, s22, s15 -# CHECK-NEXT: 1 4 0.50 fmulx s3, s5, v0.s[3] -# CHECK-NEXT: 1 4 1.00 fmulx v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fmulx v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmulx v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fneg v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fneg v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fneg v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 fneg v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fneg v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 frecpe d13, d13 -# CHECK-NEXT: 1 4 0.50 frecpe s19, s14 -# CHECK-NEXT: 1 4 1.00 frecpe v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frecpe v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frecpe v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frecpe v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frecpe v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frecps v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 frecps d22, d30, d21 +# CHECK-NEXT: 1 4 0.50 fmulx d23, d11, d1 # CHECK-NEXT: 1 4 0.50 frecps s21, s16, s13 -# CHECK-NEXT: 1 4 0.50 frecpx d16, d19 -# CHECK-NEXT: 1 4 0.50 frecpx s18, s10 -# CHECK-NEXT: 1 4 1.00 frinta v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frinta v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frinta v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frinta v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frinta v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frinti v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frinti v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frinti v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frinti v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frinti v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frintm v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frintm v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frintm v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frintm v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frintm v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frintn v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frintn v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frintn v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frintn v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frintn v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frintp v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frintp v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frintp v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frintp v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frintp v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frintx v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frintx v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frintx v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frintx v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frintx v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frintz v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frintz v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frintz v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frintz v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frintz v0.8h, v0.8h -# CHECK-NEXT: 1 22 19.00 frsqrte d21, d12 -# CHECK-NEXT: 1 12 9.00 frsqrte s22, s13 -# CHECK-NEXT: 1 22 19.00 frsqrte v0.2d, v0.2d -# CHECK-NEXT: 1 12 9.00 frsqrte v0.2s, v0.2s -# CHECK-NEXT: 1 8 5.00 frsqrte v0.4h, v0.4h -# CHECK-NEXT: 1 12 9.00 frsqrte v0.4s, v0.4s -# CHECK-NEXT: 1 8 5.00 frsqrte v0.8h, v0.8h -# CHECK-NEXT: 1 22 19.00 frsqrts d8, d22, d18 +# CHECK-NEXT: 1 4 0.50 frecps d22, d30, d21 # CHECK-NEXT: 1 12 9.00 frsqrts s21, s5, s12 -# CHECK-NEXT: 1 22 19.00 frsqrts v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 22 19.00 fsqrt v0.2d, v0.2d -# CHECK-NEXT: 1 12 9.00 fsqrt v0.2s, v0.2s -# CHECK-NEXT: 1 8 5.00 fsqrt v0.4h, v0.4h -# CHECK-NEXT: 1 12 9.00 fsqrt v0.4s, v0.4s -# CHECK-NEXT: 1 8 5.00 fsqrt v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fsub v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 5 2.00 * ld1 { v0.16b }, [x0] -# CHECK-NEXT: 2 9 6.00 * ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 -# CHECK-NEXT: 1 11 8.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] -# CHECK-NEXT: 2 7 4.00 * ld1 { v0.4s, v1.4s }, [sp], #32 -# CHECK-NEXT: 1 9 6.00 * ld1 { v0.4s, v1.4s, v2.4s }, [sp] -# CHECK-NEXT: 2 7 4.00 * ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 -# CHECK-NEXT: 2 5 2.00 * ld1 { v0.8h }, [x15], x2 -# CHECK-NEXT: 1 7 4.00 * ld1 { v0.8h, v1.8h }, [x15] -# CHECK-NEXT: 1 4 1.00 * ld1 { v0.b }[9], [x0] -# CHECK-NEXT: 2 4 1.00 * ld1 { v0.b }[9], [x0], #1 -# CHECK-NEXT: 1 4 1.00 * ld1r { v0.16b }, [x0] -# CHECK-NEXT: 2 4 1.00 * ld1r { v0.16b }, [x0], #1 -# CHECK-NEXT: 1 4 1.00 * ld1r { v0.8h }, [x15] -# CHECK-NEXT: 2 4 1.00 * ld1r { v0.8h }, [x15], #2 -# CHECK-NEXT: 2 7 4.00 * ld2 { v0.16b, v1.16b }, [x0], x1 -# CHECK-NEXT: 1 5 2.00 * ld2 { v0.8b, v1.8b }, [x0] -# CHECK-NEXT: 1 5 2.00 * ld2 { v0.h, v1.h }[7], [x15] -# CHECK-NEXT: 2 5 2.00 * ld2 { v0.h, v1.h }[7], [x15], #4 -# CHECK-NEXT: 1 5 2.00 * ld2r { v0.2d, v1.2d }, [x0] -# CHECK-NEXT: 2 5 2.00 * ld2r { v0.2d, v1.2d }, [x0], #16 -# CHECK-NEXT: 1 5 2.00 * ld2r { v0.4s, v1.4s }, [sp] -# CHECK-NEXT: 2 5 2.00 * ld2r { v0.4s, v1.4s }, [sp], #8 -# CHECK-NEXT: 1 6 3.00 * ld3 { v0.4h, v1.4h, v2.4h }, [x15] -# CHECK-NEXT: 2 9 6.00 * ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2 -# CHECK-NEXT: 1 5 2.00 * ld3 { v0.s, v1.s, v2.s }[3], [sp] -# CHECK-NEXT: 2 5 2.00 * ld3 { v0.s, v1.s, v2.s }[3], [sp], x3 -# CHECK-NEXT: 1 5 2.00 * ld3r { v0.4h, v1.4h, v2.4h }, [x15] -# CHECK-NEXT: 2 5 2.00 * ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6 -# CHECK-NEXT: 1 5 2.00 * ld3r { v0.8b, v1.8b, v2.8b }, [x0] -# CHECK-NEXT: 2 5 2.00 * ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 -# CHECK-NEXT: 1 7 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# CHECK-NEXT: 2 11 8.00 * ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 -# CHECK-NEXT: 1 5 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] -# CHECK-NEXT: 2 5 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 -# CHECK-NEXT: 2 5 2.00 * ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 -# CHECK-NEXT: 1 5 2.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp] -# CHECK-NEXT: 2 5 2.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7 -# CHECK-NEXT: 1 5 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# CHECK-NEXT: 2 5 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30 -# CHECK-NEXT: 1 4 0.50 mla v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 mls v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 mov b0, v0.b[15] -# CHECK-NEXT: 1 4 0.50 mov d6, v0.d[1] -# CHECK-NEXT: 1 4 0.50 mov h2, v0.h[5] -# CHECK-NEXT: 1 4 0.50 mov s17, v0.s[2] -# CHECK-NEXT: 1 4 1.00 mov v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 mov v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 movi d15, #0xff00ff00ff00ff -# CHECK-NEXT: 1 4 1.00 movi v0.16b, #31 -# CHECK-NEXT: 1 4 1.00 movi v0.2d, #0xff0000ff0000ffff -# CHECK-NEXT: 1 4 0.50 movi v0.2s, #8, msl #8 -# CHECK-NEXT: 1 4 1.00 movi v0.4s, #255, lsl #24 -# CHECK-NEXT: 1 4 0.50 movi v0.8b, #255 -# CHECK-NEXT: 1 4 0.50 mul v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 mvni v0.2s, #0 -# CHECK-NEXT: 1 4 1.00 mvni v0.4s, #16, msl #16 -# CHECK-NEXT: 1 4 0.50 neg d29, d24 -# CHECK-NEXT: 1 4 1.00 neg v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 neg v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 neg v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 neg v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 neg v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 neg v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 neg v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 mvn v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 mvn v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 orn v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 mov v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 orr v0.8h, #31 -# CHECK-NEXT: 1 4 1.00 pmul v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 pmul v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 pmull v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 pmull2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 raddhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 raddhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 raddhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 raddhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 raddhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 raddhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 rbit v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 rbit v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 rev16 v21.8b, v1.8b -# CHECK-NEXT: 1 4 1.00 rev16 v30.16b, v31.16b -# CHECK-NEXT: 1 4 0.50 rev32 v0.4h, v9.4h -# CHECK-NEXT: 1 4 0.50 rev32 v21.8b, v1.8b -# CHECK-NEXT: 1 4 1.00 rev32 v30.16b, v31.16b -# CHECK-NEXT: 1 4 1.00 rev32 v4.8h, v7.8h -# CHECK-NEXT: 1 4 1.00 rev64 v0.16b, v31.16b -# CHECK-NEXT: 1 4 0.50 rev64 v1.8b, v9.8b -# CHECK-NEXT: 1 4 0.50 rev64 v13.4h, v21.4h -# CHECK-NEXT: 1 4 1.00 rev64 v2.8h, v4.8h -# CHECK-NEXT: 1 4 0.50 rev64 v4.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 rev64 v6.4s, v8.4s -# CHECK-NEXT: 1 4 0.50 rshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 rshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 rshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 rshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 rshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 rshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 1.00 rsubhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 rsubhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 rsubhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 rsubhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 rsubhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 rsubhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 saba v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 sabal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sabal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sabal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 sabal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sabal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 sabal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 sabd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sabdl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sabdl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sabdl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 sabdl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sabdl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 sabdl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 sadalp v0.1d, v0.2s -# CHECK-NEXT: 1 4 1.00 sadalp v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 sadalp v0.2s, v0.4h -# CHECK-NEXT: 1 4 0.50 sadalp v0.4h, v0.8b -# CHECK-NEXT: 1 4 1.00 sadalp v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 sadalp v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 saddl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 saddl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 saddl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 saddl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 saddl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 saddl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 saddlp v0.1d, v0.2s -# CHECK-NEXT: 1 4 1.00 saddlp v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 saddlp v0.2s, v0.4h -# CHECK-NEXT: 1 4 0.50 saddlp v0.4h, v0.8b -# CHECK-NEXT: 1 4 1.00 saddlp v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 saddlp v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 saddw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: 1 4 1.00 saddw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: 1 4 1.00 saddw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: 1 4 1.00 saddw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: 1 4 1.00 saddw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 saddw2 v0.8h, v0.8h, v0.16b -# CHECK-NEXT: 1 4 0.50 scvtf d21, d12 -# CHECK-NEXT: 1 4 0.50 scvtf d21, d12, #64 +# CHECK-NEXT: 1 22 19.00 frsqrts d8, d22, d18 # CHECK-NEXT: 1 4 0.50 scvtf s22, s13 -# CHECK-NEXT: 1 4 0.50 scvtf s22, s13, #32 -# CHECK-NEXT: 1 4 0.50 scvtf v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 scvtf v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 scvtf v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 scvtf v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 scvtf v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 scvtf v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 scvtf v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 scvtf v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 shadd v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 shl d7, d10, #12 -# CHECK-NEXT: 1 4 1.00 shl v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 shl v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 shl v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 shl v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 1.00 shll v0.2d, v0.2s, #32 -# CHECK-NEXT: 1 4 1.00 shll v0.4s, v0.4h, #16 -# CHECK-NEXT: 1 4 1.00 shll v0.8h, v0.8b, #8 -# CHECK-NEXT: 1 4 1.00 shll v0.2d, v0.2s, #32 -# CHECK-NEXT: 1 4 1.00 shll v0.4s, v0.4h, #16 -# CHECK-NEXT: 1 4 1.00 shll v0.8h, v0.8b, #8 -# CHECK-NEXT: 1 4 1.00 shll2 v0.2d, v0.4s, #32 -# CHECK-NEXT: 1 4 1.00 shll2 v0.4s, v0.8h, #16 -# CHECK-NEXT: 1 4 1.00 shll2 v0.8h, v0.16b, #8 -# CHECK-NEXT: 1 4 1.00 shll2 v0.2d, v0.4s, #32 -# CHECK-NEXT: 1 4 1.00 shll2 v0.4s, v0.8h, #16 -# CHECK-NEXT: 1 4 1.00 shll2 v0.8h, v0.16b, #8 -# CHECK-NEXT: 1 4 0.50 shrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 shrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 shrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 shrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 shrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 shrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 shsub v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 shsub v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 sli d10, d14, #12 -# CHECK-NEXT: 1 4 1.00 sli v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 sli v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sli v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 sli v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 sli v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sli v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 sli v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 smax v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 smax v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 smax v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 smaxp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 smaxp v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 smaxp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 smin v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 smin v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 smin v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 sminp v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 sminp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sminp v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 smlal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 smlal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 smlal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 smlal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 smlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 smlal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 smlsl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 smlsl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 smlsl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 smlsl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 smlsl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 smlsl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 smull v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 smull v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 smull v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 smull2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 smull2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 smull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 4 0.50 scvtf d21, d12 +# CHECK-NEXT: 1 4 0.50 ucvtf s22, s13 +# CHECK-NEXT: 1 4 0.50 ucvtf d21, d14 +# CHECK-NEXT: 1 4 0.50 frecpe s19, s14 +# CHECK-NEXT: 1 4 0.50 frecpe d13, d13 +# CHECK-NEXT: 1 4 0.50 frecpx s18, s10 +# CHECK-NEXT: 1 4 0.50 frecpx d16, d19 +# CHECK-NEXT: 1 12 9.00 frsqrte s22, s13 +# CHECK-NEXT: 1 22 19.00 frsqrte d21, d12 +# CHECK-NEXT: 1 2 0.50 cmeq d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmeq d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmhs d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmge d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmge d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmhi d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmgt d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmgt d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmle d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmlt d20, d21, #0 +# CHECK-NEXT: 1 3 0.50 cmtst d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 fcmeq s10, s11, s12 +# CHECK-NEXT: 1 2 0.50 fcmeq d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 fcmeq s10, s11, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmeq d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmge s10, s11, s12 +# CHECK-NEXT: 1 2 0.50 fcmge d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 fcmge s10, s11, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmge d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmgt s10, s11, s12 +# CHECK-NEXT: 1 2 0.50 fcmgt d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 fcmgt s10, s11, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmgt d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmle s10, s11, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmle d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmlt s10, s11, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmlt d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 facge s10, s11, s12 +# CHECK-NEXT: 1 2 0.50 facge d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 facgt s10, s11, s12 +# CHECK-NEXT: 1 2 0.50 facgt d20, d21, d22 +# CHECK-NEXT: 1 3 0.50 abs d29, d24 # CHECK-NEXT: 1 4 0.50 sqabs b19, b14 -# CHECK-NEXT: 1 4 0.50 sqabs d18, d12 # CHECK-NEXT: 1 4 0.50 sqabs h21, h15 # CHECK-NEXT: 1 4 0.50 sqabs s20, s12 -# CHECK-NEXT: 1 4 1.00 sqabs v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 sqabs v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 sqabs v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sqabs v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqabs v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 sqabs v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 sqabs v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqadd b20, b11, b15 -# CHECK-NEXT: 1 4 1.00 sqadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 sqadd v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sqdmlal d19, s24, s12 -# CHECK-NEXT: 1 4 0.50 sqdmlal d8, s9, v0.s[1] -# CHECK-NEXT: 1 4 0.50 sqdmlal s0, h0, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqdmlal s17, h27, h12 -# CHECK-NEXT: 1 4 1.00 sqdmlal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sqdmlal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqdmlsl d12, s23, s13 -# CHECK-NEXT: 1 4 0.50 sqdmlsl d8, s9, v0.s[1] -# CHECK-NEXT: 1 4 0.50 sqdmlsl s0, h0, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqdmlsl s14, h12, h25 -# CHECK-NEXT: 1 4 1.00 sqdmlsl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sqdmlsl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqdmulh h10, h11, h12 -# CHECK-NEXT: 1 4 0.50 sqdmulh h7, h15, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqdmulh s15, s14, v0.s[1] -# CHECK-NEXT: 1 4 0.50 sqdmulh s20, s21, s2 -# CHECK-NEXT: 1 4 0.50 sqdmulh v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sqdmulh v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 sqdmull d1, s1, v0.s[1] -# CHECK-NEXT: 1 4 0.50 sqdmull d15, s22, s12 -# CHECK-NEXT: 1 4 0.50 sqdmull s1, h1, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqdmull s12, h22, h12 -# CHECK-NEXT: 1 4 1.00 sqdmull v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sqdmull v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqdmull2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sqdmull2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqneg b19, b14 -# CHECK-NEXT: 1 4 0.50 sqneg d18, d12 -# CHECK-NEXT: 1 4 0.50 sqneg h21, h15 -# CHECK-NEXT: 1 4 0.50 sqneg s20, s12 -# CHECK-NEXT: 1 4 1.00 sqneg v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 sqneg v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 sqneg v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sqneg v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqneg v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 sqneg v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 sqneg v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqrdmulh h10, h11, h12 -# CHECK-NEXT: 1 4 0.50 sqrdmulh h7, h15, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqrdmulh s15, s14, v0.s[1] -# CHECK-NEXT: 1 4 0.50 sqrdmulh s20, s21, s2 -# CHECK-NEXT: 1 4 0.50 sqrdmulh v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqrdmulh v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqrshl d31, d31, d31 -# CHECK-NEXT: 1 4 0.50 sqrshl h3, h4, h15 -# CHECK-NEXT: 1 4 0.50 sqrshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sqrshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 sqrshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 sqrshrn b10, h13, #2 -# CHECK-NEXT: 1 4 0.50 sqrshrn h15, s10, #6 -# CHECK-NEXT: 1 4 0.50 sqrshrn s15, d12, #9 -# CHECK-NEXT: 1 4 0.50 sqrshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sqrshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqrshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqrshrun b17, h10, #6 -# CHECK-NEXT: 1 4 0.50 sqrshrun h10, s13, #15 -# CHECK-NEXT: 1 4 0.50 sqrshrun s22, d16, #31 -# CHECK-NEXT: 1 4 0.50 sqrshrun v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sqrshrun v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqrshrun v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 4 0.50 sqabs d18, d12 +# CHECK-NEXT: 1 2 0.50 neg d29, d24 +# CHECK-NEXT: 1 3 0.50 sqneg b19, b14 +# CHECK-NEXT: 1 3 0.50 sqneg h21, h15 +# CHECK-NEXT: 1 3 0.50 sqneg s20, s12 +# CHECK-NEXT: 1 3 0.50 sqneg d18, d12 +# CHECK-NEXT: 1 3 0.50 suqadd b19, b14 +# CHECK-NEXT: 1 3 0.50 suqadd h20, h15 +# CHECK-NEXT: 1 3 0.50 suqadd s21, s12 +# CHECK-NEXT: 1 3 0.50 suqadd d18, d22 +# CHECK-NEXT: 1 3 0.50 usqadd b19, b14 +# CHECK-NEXT: 1 3 0.50 usqadd h20, h15 +# CHECK-NEXT: 1 3 0.50 usqadd s21, s12 +# CHECK-NEXT: 1 3 0.50 usqadd d18, d22 +# CHECK-NEXT: 1 4 1.00 sqdmlal s17, h27, h12 +# CHECK-NEXT: 1 4 1.00 sqdmlal d19, s24, s12 +# CHECK-NEXT: 1 4 1.00 sqdmlsl s14, h12, h25 +# CHECK-NEXT: 1 4 1.00 sqdmlsl d12, s23, s13 +# CHECK-NEXT: 1 4 1.00 sqdmull s12, h22, h12 +# CHECK-NEXT: 1 4 1.00 sqdmull d15, s22, s12 +# CHECK-NEXT: 1 4 0.50 sqxtun b19, h14 +# CHECK-NEXT: 1 4 0.50 sqxtun h21, s15 +# CHECK-NEXT: 1 4 0.50 sqxtun s20, d12 +# CHECK-NEXT: 1 4 0.50 sqxtn b18, h18 +# CHECK-NEXT: 1 4 0.50 sqxtn h20, s17 +# CHECK-NEXT: 1 4 0.50 sqxtn s19, d14 +# CHECK-NEXT: 1 4 0.50 uqxtn b18, h18 +# CHECK-NEXT: 1 4 0.50 uqxtn h20, s17 +# CHECK-NEXT: 1 4 0.50 uqxtn s19, d14 +# CHECK-NEXT: 1 2 0.50 sshr d15, d16, #12 +# CHECK-NEXT: 1 2 0.50 ushr d10, d17, #18 +# CHECK-NEXT: 1 3 0.50 srshr d19, d18, #7 +# CHECK-NEXT: 1 3 0.50 urshr d20, d23, #31 +# CHECK-NEXT: 1 3 0.50 ssra d18, d12, #21 +# CHECK-NEXT: 1 3 0.50 usra d20, d13, #61 +# CHECK-NEXT: 1 4 2.00 srsra d15, d11, #19 +# CHECK-NEXT: 1 4 2.00 ursra d18, d10, #13 +# CHECK-NEXT: 1 2 0.50 shl d7, d10, #12 # CHECK-NEXT: 1 4 0.50 sqshl b11, b19, #7 -# CHECK-NEXT: 1 4 0.50 sqshl d15, d16, #51 -# CHECK-NEXT: 1 4 0.50 sqshl d31, d31, d31 # CHECK-NEXT: 1 4 0.50 sqshl h13, h18, #11 -# CHECK-NEXT: 1 4 0.50 sqshl h3, h4, h15 # CHECK-NEXT: 1 4 0.50 sqshl s14, s17, #22 -# CHECK-NEXT: 1 4 1.00 sqshl v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 sqshl v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sqshl v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 sqshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sqshl v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 0.50 sqshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqshl v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqshl v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 0.50 sqshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 sqshl v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 4 0.50 sqshl d15, d16, #51 +# CHECK-NEXT: 1 4 0.50 uqshl b18, b15, #6 +# CHECK-NEXT: 1 4 0.50 uqshl h11, h18, #7 +# CHECK-NEXT: 1 4 0.50 uqshl s14, s19, #18 +# CHECK-NEXT: 1 4 0.50 uqshl d15, d12, #19 # CHECK-NEXT: 1 4 0.50 sqshlu b15, b18, #6 -# CHECK-NEXT: 1 4 0.50 sqshlu d11, d13, #32 # CHECK-NEXT: 1 4 0.50 sqshlu h19, h17, #6 # CHECK-NEXT: 1 4 0.50 sqshlu s16, s14, #25 -# CHECK-NEXT: 1 4 1.00 sqshlu v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 sqshlu v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sqshlu v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 sqshlu v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 sqshlu v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqshlu v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 sqshlu v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 4 0.50 sqshlu d11, d13, #32 +# CHECK-NEXT: 1 2 0.50 sri d10, d12, #14 +# CHECK-NEXT: 1 2 0.50 sli d10, d14, #12 # CHECK-NEXT: 1 4 0.50 sqshrn b10, h15, #5 # CHECK-NEXT: 1 4 0.50 sqshrn h17, s10, #4 # CHECK-NEXT: 1 4 0.50 sqshrn s18, d10, #31 -# CHECK-NEXT: 1 4 0.50 sqshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sqshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 4 0.50 uqshrn b12, h10, #7 +# CHECK-NEXT: 1 4 0.50 uqshrn h10, s14, #5 +# CHECK-NEXT: 1 4 0.50 uqshrn s10, d12, #13 +# CHECK-NEXT: 1 4 0.50 sqrshrn b10, h13, #2 +# CHECK-NEXT: 1 4 0.50 sqrshrn h15, s10, #6 +# CHECK-NEXT: 1 4 0.50 sqrshrn s15, d12, #9 +# CHECK-NEXT: 1 4 0.50 uqrshrn b10, h12, #5 +# CHECK-NEXT: 1 4 0.50 uqrshrn h12, s10, #14 +# CHECK-NEXT: 1 4 0.50 uqrshrn s10, d10, #25 # CHECK-NEXT: 1 4 0.50 sqshrun b15, h10, #7 # CHECK-NEXT: 1 4 0.50 sqshrun h20, s14, #3 # CHECK-NEXT: 1 4 0.50 sqshrun s10, d15, #15 -# CHECK-NEXT: 1 4 0.50 sqshrun v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sqshrun v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqshrun v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqsub s20, s10, s7 -# CHECK-NEXT: 1 4 1.00 sqsub v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 sqsub v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 sqsub v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 sqxtn b18, h18 -# CHECK-NEXT: 1 4 0.50 sqxtn h20, s17 -# CHECK-NEXT: 1 4 0.50 sqxtn s19, d14 -# CHECK-NEXT: 1 4 1.00 sqxtn v0.2s, v0.2d -# CHECK-NEXT: 1 4 1.00 sqxtn v0.4h, v0.4s -# CHECK-NEXT: 1 4 1.00 sqxtn v0.8b, v0.8h -# CHECK-NEXT: 1 4 1.00 sqxtn2 v0.16b, v0.8h -# CHECK-NEXT: 1 4 1.00 sqxtn2 v0.4s, v0.2d -# CHECK-NEXT: 1 4 1.00 sqxtn2 v0.8h, v0.4s -# CHECK-NEXT: 1 4 0.50 sqxtun b19, h14 -# CHECK-NEXT: 1 4 0.50 sqxtun h21, s15 -# CHECK-NEXT: 1 4 0.50 sqxtun s20, d12 -# CHECK-NEXT: 1 4 1.00 sqxtun v0.2s, v0.2d -# CHECK-NEXT: 1 4 1.00 sqxtun v0.4h, v0.4s -# CHECK-NEXT: 1 4 1.00 sqxtun v0.8b, v0.8h -# CHECK-NEXT: 1 4 1.00 sqxtun2 v0.16b, v0.8h -# CHECK-NEXT: 1 4 1.00 sqxtun2 v0.4s, v0.2d -# CHECK-NEXT: 1 4 1.00 sqxtun2 v0.8h, v0.4s -# CHECK-NEXT: 1 4 0.50 srhadd v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 srhadd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 srhadd v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 sri d10, d12, #14 -# CHECK-NEXT: 1 4 1.00 sri v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 sri v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sri v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 sri v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 sri v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sri v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 sri v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 srshl d16, d16, d16 -# CHECK-NEXT: 1 4 0.50 srshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 srshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 srshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 srshr d19, d18, #7 -# CHECK-NEXT: 1 4 1.00 srshr v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 srshr v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 srshr v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 srshr v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 srshr v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 srshr v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 srshr v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 srsra d15, d11, #19 -# CHECK-NEXT: 1 4 1.00 srsra v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 srsra v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 srsra v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 srsra v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 srsra v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 srsra v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 srsra v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 sshl d31, d31, d31 -# CHECK-NEXT: 1 4 1.00 sshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 sshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 sshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 sshll v0.2d, v0.2s, #3 -# CHECK-NEXT: 1 4 1.00 sshll2 v0.4s, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 sshr d15, d16, #12 -# CHECK-NEXT: 1 4 1.00 sshr v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 sshr v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sshr v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 sshr v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 sshr v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sshr v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 sshr v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 ssra d18, d12, #21 -# CHECK-NEXT: 1 4 1.00 ssra v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 ssra v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 ssra v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 ssra v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 ssra v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 ssra v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 ssra v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 ssubl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 ssubl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 ssubl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 ssubl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 ssubl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 ssubl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 ssubw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: 1 4 1.00 ssubw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: 1 4 1.00 ssubw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: 1 4 1.00 ssubw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: 1 4 1.00 ssubw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 ssubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 1 4 0.50 sqrshrun b17, h10, #6 +# CHECK-NEXT: 1 4 0.50 sqrshrun h10, s13, #15 +# CHECK-NEXT: 1 4 0.50 sqrshrun s22, d16, #31 +# CHECK-NEXT: 1 4 0.50 scvtf s22, s13, #32 +# CHECK-NEXT: 1 4 0.50 scvtf d21, d12, #64 +# CHECK-NEXT: 1 4 0.50 ucvtf s22, s13, #32 +# CHECK-NEXT: 1 4 0.50 ucvtf d21, d14, #64 +# CHECK-NEXT: 1 4 0.50 fcvtzs s21, s12, #1 +# CHECK-NEXT: 1 4 0.50 fcvtzs d21, d12, #1 +# CHECK-NEXT: 1 4 0.50 fcvtzu s21, s12, #1 +# CHECK-NEXT: 1 4 0.50 fcvtzu d21, d12, #1 +# CHECK-NEXT: 1 5 2.00 * ld1 { v0.16b }, [x0] +# CHECK-NEXT: 1 7 4.00 * ld1 { v15.8h, v16.8h }, [x15] +# CHECK-NEXT: 1 9 6.00 * ld1 { v31.4s, v0.4s, v1.4s }, [sp] +# CHECK-NEXT: 1 11 8.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +# CHECK-NEXT: 1 5 2.00 * ld2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: 1 6 3.00 * ld3 { v15.4h, v16.4h, v17.4h }, [x15] +# CHECK-NEXT: 1 7 4.00 * ld4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] # CHECK-NEXT: 1 4 1.00 * st1 { v0.16b }, [x0] -# CHECK-NEXT: 2 5 2.00 * st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: 1 4 1.00 * st1 { v15.8h, v16.8h }, [x15] +# CHECK-NEXT: 1 5 2.00 * st1 { v31.4s, v0.4s, v1.4s }, [sp] # CHECK-NEXT: 1 5 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] -# CHECK-NEXT: 2 4 1.00 * st1 { v0.4s, v1.4s }, [sp], #32 -# CHECK-NEXT: 1 5 2.00 * st1 { v0.4s, v1.4s, v2.4s }, [sp] +# CHECK-NEXT: 1 5 2.00 * st2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: 1 5 4.00 * st3 { v15.4h, v16.4h, v17.4h }, [x15] +# CHECK-NEXT: 1 5 4.00 * st4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] +# CHECK-NEXT: 2 5 2.00 * ld1 { v15.8h }, [x15], x2 +# CHECK-NEXT: 2 7 4.00 * ld1 { v31.4s, v0.4s }, [sp], #32 +# CHECK-NEXT: 2 9 6.00 * ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: 2 7 4.00 * ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +# CHECK-NEXT: 2 7 4.00 * ld2 { v0.16b, v1.16b }, [x0], x1 +# CHECK-NEXT: 2 9 6.00 * ld3 { v15.8h, v16.8h, v17.8h }, [x15], x2 +# CHECK-NEXT: 2 11 8.00 * ld4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64 +# CHECK-NEXT: 2 4 1.00 * st1 { v15.8h }, [x15], x2 +# CHECK-NEXT: 2 4 1.00 * st1 { v31.4s, v0.4s }, [sp], #32 +# CHECK-NEXT: 2 5 2.00 * st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 # CHECK-NEXT: 2 5 4.00 * st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 -# CHECK-NEXT: 2 4 1.00 * st1 { v0.8h }, [x15], x2 -# CHECK-NEXT: 1 4 1.00 * st1 { v0.8h, v1.8h }, [x15] -# CHECK-NEXT: 1 4 1.00 * st1 { v0.d }[1], [x0] -# CHECK-NEXT: 2 4 1.00 * st1 { v0.d }[1], [x0], #8 # CHECK-NEXT: 2 5 4.00 * st2 { v0.16b, v1.16b }, [x0], x1 -# CHECK-NEXT: 1 5 2.00 * st2 { v0.8b, v1.8b }, [x0] -# CHECK-NEXT: 1 5 2.00 * st2 { v0.s, v1.s }[3], [sp] -# CHECK-NEXT: 2 5 2.00 * st2 { v0.s, v1.s }[3], [sp], #8 -# CHECK-NEXT: 1 5 4.00 * st3 { v0.4h, v1.4h, v2.4h }, [x15] -# CHECK-NEXT: 2 5 4.00 * st3 { v0.8h, v1.8h, v2.8h }, [x15], x2 -# CHECK-NEXT: 1 5 2.00 * st3 { v0.h, v1.h, v2.h }[7], [x15] -# CHECK-NEXT: 2 5 2.00 * st3 { v0.h, v1.h, v2.h }[7], [x15], #6 -# CHECK-NEXT: 1 5 4.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# CHECK-NEXT: 2 5 4.00 * st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +# CHECK-NEXT: 2 5 4.00 * st3 { v15.8h, v16.8h, v17.8h }, [x15], x2 +# CHECK-NEXT: 2 5 4.00 * st4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64 +# CHECK-NEXT: 1 4 1.00 * ld1r { v0.16b }, [x0] +# CHECK-NEXT: 1 4 1.00 * ld1r { v15.8h }, [x15] +# CHECK-NEXT: 1 5 2.00 * ld2r { v31.4s, v0.4s }, [sp] +# CHECK-NEXT: 1 5 2.00 * ld2r { v0.2d, v1.2d }, [x0] +# CHECK-NEXT: 1 5 2.00 * ld3r { v0.8b, v1.8b, v2.8b }, [x0] +# CHECK-NEXT: 1 5 2.00 * ld3r { v15.4h, v16.4h, v17.4h }, [x15] +# CHECK-NEXT: 1 5 2.00 * ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] +# CHECK-NEXT: 1 5 2.00 * ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp] +# CHECK-NEXT: 1 4 1.00 * ld1 { v0.b }[9], [x0] +# CHECK-NEXT: 1 5 2.00 * ld2 { v15.h, v16.h }[7], [x15] +# CHECK-NEXT: 1 5 2.00 * ld3 { v31.s, v0.s, v1.s }[3], [sp] +# CHECK-NEXT: 1 5 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] +# CHECK-NEXT: 1 4 1.00 * st1 { v0.d }[1], [x0] +# CHECK-NEXT: 1 5 2.00 * st2 { v31.s, v0.s }[3], [sp] +# CHECK-NEXT: 1 5 2.00 * st3 { v15.h, v16.h, v17.h }[7], [x15] # CHECK-NEXT: 1 5 2.00 * st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] +# CHECK-NEXT: 2 4 1.00 * ld1r { v0.16b }, [x0], #1 +# CHECK-NEXT: 2 4 1.00 * ld1r { v15.8h }, [x15], #2 +# CHECK-NEXT: 2 5 2.00 * ld2r { v31.4s, v0.4s }, [sp], #8 +# CHECK-NEXT: 2 5 2.00 * ld2r { v0.2d, v1.2d }, [x0], #16 +# CHECK-NEXT: 2 5 2.00 * ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 +# CHECK-NEXT: 2 5 2.00 * ld3r { v15.4h, v16.4h, v17.4h }, [x15], #6 +# CHECK-NEXT: 2 5 2.00 * ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], x30 +# CHECK-NEXT: 2 5 2.00 * ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp], x7 +# CHECK-NEXT: 2 4 1.00 * ld1 { v0.b }[9], [x0], #1 +# CHECK-NEXT: 2 5 2.00 * ld2 { v15.h, v16.h }[7], [x15], #4 +# CHECK-NEXT: 2 5 2.00 * ld3 { v31.s, v0.s, v1.s }[3], [sp], x3 +# CHECK-NEXT: 2 5 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 +# CHECK-NEXT: 2 5 2.00 * ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 +# CHECK-NEXT: 2 4 1.00 * st1 { v0.d }[1], [x0], #8 +# CHECK-NEXT: 2 5 2.00 * st2 { v31.s, v0.s }[3], [sp], #8 +# CHECK-NEXT: 2 5 2.00 * st3 { v15.h, v16.h, v17.h }[7], [x15], #6 # CHECK-NEXT: 2 5 2.00 * st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 -# CHECK-NEXT: 1 4 0.50 sub d15, d5, d16 -# CHECK-NEXT: 1 4 1.00 sub v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 suqadd b19, b14 -# CHECK-NEXT: 1 4 0.50 suqadd d18, d22 -# CHECK-NEXT: 1 4 0.50 suqadd h20, h15 -# CHECK-NEXT: 1 4 0.50 suqadd s21, s12 -# CHECK-NEXT: 1 4 1.00 suqadd v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 suqadd v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 suqadd v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 suqadd v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 suqadd v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 suqadd v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 suqadd v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -# CHECK-NEXT: 1 4 0.50 tbl v0.8b, { v0.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbl v0.8b, { v0.16b, v1.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b, v1.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -# CHECK-NEXT: 1 4 0.50 tbx v0.8b, { v0.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbx v0.8b, { v0.16b, v1.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -# CHECK-NEXT: 1 4 1.00 trn1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 trn1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 trn1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 trn1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 trn1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 trn1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 trn1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 trn2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 trn2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 trn2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 trn2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 trn2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 trn2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 trn2 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 uaba v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uabal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 uabal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uabal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uabal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 uabal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uabal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 uabd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uabdl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 uabdl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uabdl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uabdl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 uabdl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uabdl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 uadalp v0.1d, v0.2s -# CHECK-NEXT: 1 4 1.00 uadalp v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 uadalp v0.2s, v0.4h -# CHECK-NEXT: 1 4 0.50 uadalp v0.4h, v0.8b -# CHECK-NEXT: 1 4 1.00 uadalp v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 uadalp v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 uaddl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 uaddl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uaddl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uaddl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 uaddl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uaddl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 uaddlp v0.1d, v0.2s -# CHECK-NEXT: 1 4 1.00 uaddlp v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 uaddlp v0.2s, v0.4h -# CHECK-NEXT: 1 4 0.50 uaddlp v0.4h, v0.8b -# CHECK-NEXT: 1 4 1.00 uaddlp v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 uaddlp v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 uaddw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: 1 4 1.00 uaddw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: 1 4 1.00 uaddw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: 1 4 1.00 uaddw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: 1 4 1.00 uaddw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 uaddw2 v0.8h, v0.8h, v0.16b -# CHECK-NEXT: 1 4 0.50 ucvtf d21, d14 -# CHECK-NEXT: 1 4 0.50 ucvtf d21, d14, #64 -# CHECK-NEXT: 1 4 0.50 ucvtf s22, s13 -# CHECK-NEXT: 1 4 0.50 ucvtf s22, s13, #32 -# CHECK-NEXT: 1 4 0.50 ucvtf v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 ucvtf v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 ucvtf v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 ucvtf v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 ucvtf v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 ucvtf v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 ucvtf v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 ucvtf v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uhadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 uhadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uhsub v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umax v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 umax v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umax v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 umaxp v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 umaxp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umaxp v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 umin v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 umin v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 umin v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 uminp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 uminp v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 uminp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 umlal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 umlal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 umlal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 umlal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 umlal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 umlsl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 umlsl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 umlsl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 umlsl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umlsl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 umlsl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 umull v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 umull v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 umull v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 umull2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umull2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 umull2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 uqadd h0, h1, h5 -# CHECK-NEXT: 1 4 1.00 uqadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 uqrshl b11, b20, b30 -# CHECK-NEXT: 1 4 0.50 uqrshl s23, s20, s16 -# CHECK-NEXT: 1 4 1.00 uqrshl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 uqrshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 uqrshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 uqrshl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 uqrshrn b10, h12, #5 -# CHECK-NEXT: 1 4 0.50 uqrshrn h12, s10, #14 -# CHECK-NEXT: 1 4 0.50 uqrshrn s10, d10, #25 -# CHECK-NEXT: 1 4 0.50 uqrshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 uqrshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 uqrshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 uqshl b11, b20, b30 -# CHECK-NEXT: 1 4 0.50 uqshl b18, b15, #6 -# CHECK-NEXT: 1 4 0.50 uqshl d15, d12, #19 -# CHECK-NEXT: 1 4 0.50 uqshl h11, h18, #7 -# CHECK-NEXT: 1 4 0.50 uqshl s14, s19, #18 -# CHECK-NEXT: 1 4 0.50 uqshl s23, s20, s16 -# CHECK-NEXT: 1 4 1.00 uqshl v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 uqshl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 uqshl v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 uqshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 uqshl v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 uqshl v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 uqshl v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 1.00 uqshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 uqshl v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 uqshl v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 uqshl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 uqshrn b12, h10, #7 -# CHECK-NEXT: 1 4 0.50 uqshrn h10, s14, #5 -# CHECK-NEXT: 1 4 0.50 uqshrn s10, d12, #13 -# CHECK-NEXT: 1 4 0.50 uqshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 uqshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 uqshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 uqsub d16, d16, d16 -# CHECK-NEXT: 1 4 0.50 uqsub v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 uqxtn b18, h18 -# CHECK-NEXT: 1 4 0.50 uqxtn h20, s17 -# CHECK-NEXT: 1 4 0.50 uqxtn s19, d14 -# CHECK-NEXT: 1 4 1.00 uqxtn v0.2s, v0.2d -# CHECK-NEXT: 1 4 1.00 uqxtn v0.4h, v0.4s -# CHECK-NEXT: 1 4 1.00 uqxtn v0.8b, v0.8h -# CHECK-NEXT: 1 4 1.00 uqxtn2 v0.16b, v0.8h -# CHECK-NEXT: 1 4 1.00 uqxtn2 v0.4s, v0.2d -# CHECK-NEXT: 1 4 1.00 uqxtn2 v0.8h, v0.4s -# CHECK-NEXT: 1 4 0.50 urecpe v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 urecpe v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 urhadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 urhadd v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 urhadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 urshl d8, d7, d4 -# CHECK-NEXT: 1 4 1.00 urshl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 urshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 urshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 urshl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 urshr d20, d23, #31 -# CHECK-NEXT: 1 4 1.00 urshr v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 urshr v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 urshr v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 urshr v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 urshr v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 urshr v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 urshr v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 12 9.00 ursqrte v0.2s, v0.2s -# CHECK-NEXT: 1 12 9.00 ursqrte v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 ursra d18, d10, #13 -# CHECK-NEXT: 1 4 1.00 ursra v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 ursra v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 ursra v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 ursra v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 ursra v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 ursra v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 ursra v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 ushl d0, d0, d0 -# CHECK-NEXT: 1 4 1.00 ushl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 ushl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 ushl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 ushll v0.4s, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 ushll2 v0.8h, v0.16b, #3 -# CHECK-NEXT: 1 4 0.50 ushr d10, d17, #18 -# CHECK-NEXT: 1 4 1.00 ushr v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 ushr v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 ushr v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 ushr v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 ushr v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 ushr v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 ushr v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 usqadd b19, b14 -# CHECK-NEXT: 1 4 0.50 usqadd d18, d22 -# CHECK-NEXT: 1 4 0.50 usqadd h20, h15 -# CHECK-NEXT: 1 4 0.50 usqadd s21, s12 -# CHECK-NEXT: 1 4 1.00 usqadd v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 usqadd v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 usqadd v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 usqadd v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 usqadd v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 usqadd v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 usqadd v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 usra d20, d13, #61 -# CHECK-NEXT: 1 4 1.00 usra v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 usra v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 usra v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 usra v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 usra v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 usra v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 usra v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 usubl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 usubl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 usubl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 usubl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 usubl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 usubl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 usubw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: 1 4 1.00 usubw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: 1 4 1.00 usubw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: 1 4 1.00 usubw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: 1 4 1.00 usubw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 usubw2 v0.8h, v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 uzp1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 uzp1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 uzp1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 uzp1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uzp1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 uzp1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uzp1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uzp2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 uzp2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 uzp2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 uzp2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uzp2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 uzp2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uzp2 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 2 0.50 xtn v0.2s, v0.2d -# CHECK-NEXT: 1 2 0.50 xtn v0.4h, v0.4s -# CHECK-NEXT: 1 2 0.50 xtn v0.8b, v0.8h -# CHECK-NEXT: 1 2 0.50 xtn2 v0.16b, v0.8h -# CHECK-NEXT: 1 2 0.50 xtn2 v0.4s, v0.2d -# CHECK-NEXT: 1 2 0.50 xtn2 v0.8h, v0.4s -# CHECK-NEXT: 1 4 1.00 zip1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 zip1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 zip1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 zip1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 zip1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 zip1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 zip1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 zip2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 zip2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 zip2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 zip2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 zip2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 zip2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 zip2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 ext v0.8b, v1.8b, v2.8b, #3 +# CHECK-NEXT: 1 2 1.00 ext v0.16b, v1.16b, v2.16b, #3 +# CHECK-NEXT: 1 2 0.50 uzp1 v1.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 uzp1 v2.16b, v1.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 uzp1 v3.4h, v1.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 uzp1 v4.8h, v1.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 uzp1 v5.2s, v1.2s, v2.2s +# CHECK-NEXT: 1 2 1.00 uzp1 v6.4s, v1.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 uzp1 v7.2d, v1.2d, v2.2d +# CHECK-NEXT: 1 2 0.50 trn1 v8.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 trn1 v9.16b, v1.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 trn1 v10.4h, v1.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 trn1 v27.8h, v7.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 trn1 v12.2s, v7.2s, v2.2s +# CHECK-NEXT: 1 2 1.00 trn1 v29.4s, v6.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 trn1 v14.2d, v6.2d, v2.2d +# CHECK-NEXT: 1 2 0.50 zip1 v31.8b, v5.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 zip1 v0.16b, v5.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 zip1 v17.4h, v4.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 zip1 v2.8h, v4.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 zip1 v19.2s, v3.2s, v2.2s +# CHECK-NEXT: 1 2 1.00 zip1 v4.4s, v3.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 zip1 v21.2d, v2.2d, v2.2d +# CHECK-NEXT: 1 2 0.50 uzp2 v6.8b, v2.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 uzp2 v23.16b, v1.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 uzp2 v8.4h, v1.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 uzp2 v25.8h, v0.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 uzp2 v10.2s, v0.2s, v2.2s +# CHECK-NEXT: 1 2 1.00 uzp2 v27.4s, v7.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 uzp2 v12.2d, v7.2d, v2.2d +# CHECK-NEXT: 1 2 0.50 trn2 v29.8b, v6.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 trn2 v14.16b, v6.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 trn2 v31.4h, v5.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 trn2 v0.8h, v5.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 trn2 v17.2s, v4.2s, v2.2s +# CHECK-NEXT: 1 2 1.00 trn2 v2.4s, v4.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 trn2 v19.2d, v3.2d, v2.2d +# CHECK-NEXT: 1 2 0.50 zip2 v4.8b, v3.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 zip2 v21.16b, v2.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 zip2 v6.4h, v2.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 zip2 v23.8h, v1.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 zip2 v8.2s, v1.2s, v2.2s +# CHECK-NEXT: 1 2 1.00 zip2 v25.4s, v0.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 zip2 v10.2d, v0.2d, v2.2d +# CHECK-NEXT: 1 4 1.00 fmul s0, s1, v1.s[0] +# CHECK-NEXT: 1 4 1.00 fmul s0, s1, v1.s[3] +# CHECK-NEXT: 1 4 1.00 fmul d0, d1, v1.d[0] +# CHECK-NEXT: 1 4 1.00 fmul d0, d1, v1.d[1] +# CHECK-NEXT: 1 4 1.00 fmul d15, d15, v15.d[1] +# CHECK-NEXT: 1 4 1.00 fmulx s3, s5, v7.s[0] +# CHECK-NEXT: 1 4 1.00 fmulx s3, s5, v7.s[3] +# CHECK-NEXT: 1 4 1.00 fmulx s3, s5, v15.s[3] +# CHECK-NEXT: 1 4 1.00 fmulx d0, d4, v8.d[0] +# CHECK-NEXT: 1 4 1.00 fmulx d0, d4, v8.d[1] +# CHECK-NEXT: 1 4 0.50 fmla s0, s1, v1.s[0] +# CHECK-NEXT: 1 4 0.50 fmla s0, s1, v1.s[3] +# CHECK-NEXT: 1 4 0.50 fmla d0, d1, v1.d[0] +# CHECK-NEXT: 1 4 0.50 fmla d0, d1, v1.d[1] +# CHECK-NEXT: 1 4 0.50 fmla d15, d15, v15.d[1] +# CHECK-NEXT: 1 4 0.50 fmls s3, s5, v7.s[0] +# CHECK-NEXT: 1 4 0.50 fmls s3, s5, v7.s[3] +# CHECK-NEXT: 1 4 0.50 fmls s3, s5, v15.s[3] +# CHECK-NEXT: 1 4 0.50 fmls d0, d4, v8.d[0] +# CHECK-NEXT: 1 4 0.50 fmls d0, d4, v8.d[1] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[0] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[1] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[2] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[4] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[5] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[6] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmlal d8, s9, v15.s[0] +# CHECK-NEXT: 1 4 1.00 sqdmlal d8, s9, v15.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmlal d8, s9, v15.s[2] +# CHECK-NEXT: 1 4 1.00 sqdmlal d8, s9, v15.s[3] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[0] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[1] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[2] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[4] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[5] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[6] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmlsl d8, s9, v15.s[0] +# CHECK-NEXT: 1 4 1.00 sqdmlsl d8, s9, v15.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmlsl d8, s9, v15.s[2] +# CHECK-NEXT: 1 4 1.00 sqdmlsl d8, s9, v15.s[3] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[0] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[1] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[2] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[4] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[5] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[6] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmull d1, s1, v4.s[0] +# CHECK-NEXT: 1 4 1.00 sqdmull d1, s1, v4.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmull d1, s1, v4.s[2] +# CHECK-NEXT: 1 4 1.00 sqdmull d1, s1, v4.s[3] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h1, v14.h[0] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[1] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[2] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[4] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[5] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[6] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmulh s15, s3, v4.s[0] +# CHECK-NEXT: 1 4 1.00 sqdmulh s15, s14, v16.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmulh s15, s15, v16.s[2] +# CHECK-NEXT: 1 4 1.00 sqdmulh s15, s16, v17.s[3] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h1, v14.h[0] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[1] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[2] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[3] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[4] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[5] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[6] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[7] +# CHECK-NEXT: 1 4 1.00 sqrdmulh s15, s3, v4.s[0] +# CHECK-NEXT: 1 4 1.00 sqrdmulh s15, s14, v16.s[1] +# CHECK-NEXT: 1 4 1.00 sqrdmulh s15, s15, v16.s[2] +# CHECK-NEXT: 1 4 1.00 sqrdmulh s15, s16, v17.s[3] +# CHECK-NEXT: 1 2 0.50 mov b0, v0.b[15] +# CHECK-NEXT: 1 2 0.50 mov h2, v31.h[5] +# CHECK-NEXT: 1 2 0.50 mov s17, v2.s[2] +# CHECK-NEXT: 1 2 0.50 mov d6, v12.d[1] +# CHECK-NEXT: 1 2 1.00 tbl v0.8b, { v1.16b }, v2.8b +# CHECK-NEXT: 1 3 2.00 tbl v16.8b, { v31.16b, v0.16b }, v2.8b +# CHECK-NEXT: 1 4 3.00 tbl v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b +# CHECK-NEXT: 1 5 4.00 tbl v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b +# CHECK-NEXT: 1 2 1.00 tbl v0.16b, { v1.16b }, v2.16b +# CHECK-NEXT: 1 3 2.00 tbl v16.16b, { v31.16b, v0.16b }, v2.16b +# CHECK-NEXT: 1 4 3.00 tbl v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b +# CHECK-NEXT: 1 5 4.00 tbl v0.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b +# CHECK-NEXT: 1 3 2.00 tbx v0.8b, { v1.16b }, v2.8b +# CHECK-NEXT: 1 4 3.00 tbx v16.8b, { v31.16b, v0.16b }, v2.8b +# CHECK-NEXT: 1 5 4.00 tbx v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b +# CHECK-NEXT: 1 6 5.00 tbx v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b +# CHECK-NEXT: 1 3 2.00 tbx v0.16b, { v1.16b }, v2.16b +# CHECK-NEXT: 1 4 3.00 tbx v16.16b, { v31.16b, v0.16b }, v2.16b +# CHECK-NEXT: 1 5 4.00 tbx v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b +# CHECK-NEXT: 1 6 5.00 tbx v16.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b +# CHECK-NEXT: 1 4 0.50 fcvtxn s22, d13 +# CHECK-NEXT: 1 4 0.50 fcvtas s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtas d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtau s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtau d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtms s22, s13 +# CHECK-NEXT: 1 4 0.50 fcvtms d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtmu s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtmu d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtns s22, s13 +# CHECK-NEXT: 1 4 0.50 fcvtns d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtnu s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtnu d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtps s22, s13 +# CHECK-NEXT: 1 4 0.50 fcvtps d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtpu s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtpu d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtzs s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtzs d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtzu s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtzu d21, d14 +# CHECK-NEXT: 1 4 0.50 fabd s29, s24, s20 +# CHECK-NEXT: 1 4 0.50 fabd d29, d24, d20 # CHECK: Resources: # CHECK-NEXT: [0.0] - CortexA55UnitALU @@ -2146,1066 +1737,861 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] -# CHECK-NEXT: - - - - 716.50 716.50 197.00 3.00 3.00 107.00 - 52.00 +# CHECK-NEXT: - - - - 652.50 652.50 85.00 31.00 31.00 107.00 - 52.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] Instructions: -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - abs d29, d24 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - abs v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - abs v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - abs v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - abs v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - abs v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - abs v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - abs v0.8h, v0.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - add v31.8b, v31.8b, v31.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sub v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fadd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fsub v31.2s, v31.2s, v31.2s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mul v0.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmul v0.2s, v1.2s, v2.2s +# CHECK-NEXT: - - - - - - 10.00 - - - - - fdiv v31.2s, v31.2s, v31.2s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - pmul v0.8b, v15.8b, v16.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmul v31.16b, v7.16b, v8.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - and v2.8b, v2.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - orr v31.16b, v31.16b, v30.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - eor v0.16b, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - orn v9.16b, v10.16b, v11.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - bic v31.8b, v30.8b, v29.8b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - bsl v0.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - bit v31.16b, v31.16b, v31.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - bif v0.16b, v1.16b, v2.16b +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - mla v0.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - mls v31.4h, v31.4h, v31.4h +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla v0.2s, v1.2s, v2.2s +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls v31.2s, v31.2s, v31.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - movi v31.4s, #255, lsl #24 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mvni v0.2s, #0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - bic v15.4h, #15, lsl #8 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - orr v16.8h, #31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - movi v8.2s, #8, msl #8 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - mvni v16.4s, #16, msl #16 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - movi v16.8b, #255 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - movi v31.16b, #31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - movi d15, #0xff00ff00ff00ff +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - movi v31.2d, #0xff0000ff0000ffff +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmov v0.2s, #13.00000000 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmov v15.4s, #1.00000000 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmov v31.2d, #-1.25000000 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - mov v1.16b, v15.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov v25.8b, v4.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uaba v0.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - saba v31.16b, v30.16b, v29.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uabd v15.4h, v16.4h, v17.4h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sabd v5.4h, v4.4h, v6.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fabd v1.4s, v31.4s, v16.4s # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - add d17, d31, d29 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - add v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sub d15, d5, d16 +# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrts v31.2d, v15.2d, v8.2d +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frecps v5.4s, v7.4s, v16.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - facge v0.4s, v31.4s, v16.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - facgt v31.2d, v29.2d, v28.2d +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmeq v5.16b, v15.16b, v31.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmhs v1.8b, v16.8b, v30.8b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge v20.4h, v11.4h, v23.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmhi v13.8h, v3.8h, v27.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmgt v9.4s, v4.4s, v28.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmtst v21.2s, v19.2s, v18.2s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq v0.2s, v15.2s, v16.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmge v31.4s, v7.4s, v29.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmgt v17.4s, v8.4s, v25.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmeq v31.16b, v15.16b, #0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge v3.8b, v15.8b, #0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmgt v22.2s, v9.2s, #0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmle v5.2d, v14.2d, #0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmlt v13.8h, v11.8h, #0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq v15.2s, v21.2s, #0.0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmge v14.2d, v13.2d, #0.0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmgt v9.4s, v23.4s, #0.0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmle v11.2d, v6.2d, #0.0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmlt v12.4s, v25.4s, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shadd v0.8b, v31.8b, v29.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uhadd v15.16b, v16.16b, v17.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shsub v0.4h, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uhadd v5.8h, v7.8h, v8.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shsub v9.2s, v11.2s, v21.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uhsub v22.4s, v30.4s, v19.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v3.8b, v5.8b, v7.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urhadd v7.16b, v17.16b, v27.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v10.4h, v11.4h, v13.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urhadd v1.8h, v2.8h, v3.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v4.2s, v5.2s, v6.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urhadd v7.4s, v7.4s, v7.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqsub v0.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqadd v0.16b, v1.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqsub v0.4h, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqadd v0.8h, v1.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqadd v0.2s, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqsub v0.4s, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqsub v0.2d, v1.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqadd b20, b11, b15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqadd h0, h1, h5 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqsub s20, s10, s7 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqsub d16, d16, d16 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v10.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v10.16b, v5.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v10.4h, v15.4h, v22.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v10.8h, v5.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v10.2s, v15.2s, v22.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v10.4s, v5.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshl v0.2d, v1.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v1.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v2.16b, v14.16b, v23.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v3.4h, v13.4h, v24.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v4.8h, v12.8h, v25.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v5.2s, v11.2s, v26.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.2d, v1.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl v10.8b, v5.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v10.16b, v5.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl v1.4h, v5.4h, v31.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v1.8h, v5.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl v10.2s, v15.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v1.4s, v5.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v0.2d, v1.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl v1.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v2.16b, v14.16b, v23.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl v3.4h, v13.4h, v24.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v4.8h, v12.8h, v25.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl v5.2s, v11.2s, v26.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl d31, d31, d31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushl d0, d0, d0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl d31, d31, d31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl s23, s20, s16 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl h3, h4, h15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl b11, b20, b30 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl d16, d16, d16 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshl d8, d7, d4 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl d31, d31, d31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshl s23, s20, s16 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl h3, h4, h15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshl b11, b20, b30 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smax v1.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umax v2.16b, v14.16b, v23.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smax v3.4h, v13.4h, v24.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umax v4.8h, v12.8h, v25.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smax v5.2s, v11.2s, v26.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umax v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - umin v1.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smin v2.16b, v14.16b, v23.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - umin v3.4h, v13.4h, v24.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smin v4.8h, v12.8h, v25.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - umin v5.2s, v11.2s, v26.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smin v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmax v29.2s, v28.2s, v25.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmax v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmax v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmin v29.2s, v28.2s, v25.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmin v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmin v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmaxnm v9.2s, v8.2s, v5.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnm v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnm v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fminnm v2.2s, v8.2s, v25.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnm v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnm v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smaxp v1.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umaxp v2.16b, v14.16b, v23.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smaxp v3.4h, v13.4h, v24.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umaxp v4.8h, v12.8h, v25.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smaxp v5.2s, v11.2s, v26.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umaxp v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uminp v1.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sminp v2.16b, v14.16b, v23.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uminp v3.4h, v13.4h, v24.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sminp v4.8h, v12.8h, v25.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uminp v5.2s, v11.2s, v26.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sminp v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmaxp v29.2s, v28.2s, v25.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fminp v29.2s, v28.2s, v25.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmaxnmp v9.2s, v8.2s, v5.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnmp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnmp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fminnmp v2.2s, v8.2s, v25.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnmp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnmp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - addp v31.8b, v31.8b, v31.8b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - addp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - and v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - bic v0.4h, #15, lsl #8 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - bic v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - bif v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - bit v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - bsl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cls v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cls v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cls v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cls v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cls v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cls v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - clz v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - clz v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - clz v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - clz v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - clz v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - clz v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmeq d20, d21, #0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - faddp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - faddp v31.2s, v31.2s, v31.2s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh v31.2s, v31.2s, v31.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh v5.4s, v7.4s, v9.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh v31.4h, v3.4h, v13.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh v0.8h, v10.8h, v20.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx v1.2s, v22.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx v21.4s, v15.4s, v3.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx v11.2d, v5.2d, v23.2d +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v2.8h, v4.16b, #8 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v6.4s, v8.8h, #16 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v6.2d, v8.4s, #32 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v2.8h, v4.8b, #8 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v6.4s, v8.4h, #16 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v6.2d, v8.2s, #32 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshll v0.2d, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshll2 v0.4s, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushll v0.4s, v1.4h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushll2 v0.8h, v1.16b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrun2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrun2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrun2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrun2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrun2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrun2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlsl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlsl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlsl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlsl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlsl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlsl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlsl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlsl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlsl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlsl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlsl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlsl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmull v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmull2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn v0.8b, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn v0.4h, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn v0.2s, v1.2d, v2.2d +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn2 v0.16b, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn2 v0.8h, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn2 v0.4s, v1.2d, v2.2d +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn v0.8b, v1.8h, v2.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn v0.4h, v1.4s, v2.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn v0.2s, v1.2d, v2.2d +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn2 v0.16b, v1.8h, v2.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn2 v0.8h, v1.4s, v2.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn2 v0.4s, v1.2d, v2.2d +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn v0.8b, v1.8h, v2.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn v0.4h, v1.4s, v2.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn v0.2s, v1.2d, v2.2d +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn2 v0.16b, v1.8h, v2.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn2 v0.8h, v1.4s, v2.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn2 v0.4s, v1.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh h10, h11, h12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh s20, s21, s2 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh h10, h11, h12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh s20, s21, s2 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx s20, s22, s15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx d23, d11, d1 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecps s21, s16, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecps d22, d30, d21 +# CHECK-NEXT: - - - - - - 9.00 - - - - - frsqrts s21, s5, s12 +# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrts d8, d22, d18 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf s22, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf d21, d12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf s22, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpe s19, s14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpe d13, d13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpx s18, s10 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpx d16, d19 +# CHECK-NEXT: - - - - - - 9.00 - - - - - frsqrte s22, s13 +# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrte d21, d12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmeq d20, d21, d22 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmeq v0.16b, v0.16b, #0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmeq v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge d20, d21, #0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmeq d20, d21, #0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmhs d20, d21, d22 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge d20, d21, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge v0.8b, v0.8b, #0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmgt d20, d21, #0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmgt d20, d21, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmgt v0.2s, v0.2s, #0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmgt v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge d20, d21, #0 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmhi d20, d21, d22 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmhi v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmhs d20, d21, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmhs v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmgt d20, d21, d22 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmgt d20, d21, #0 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmle d20, d21, #0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmle v0.2d, v0.2d, #0 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmlt d20, d21, #0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmlt v0.8h, v0.8h, #0 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmtst d20, d21, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmtst v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cnt v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cnt v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.16b, w28 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.2d, x28 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.2s, w28 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.4h, w28 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.4s, w28 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.8b, w28 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.8h, w28 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - eor v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ext v0.16b, v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ext v0.8b, v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fabd d29, d24, d20 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fabd s29, s24, s20 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fabd v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fabs v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fabs v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fabs v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fabs v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fabs v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facge d20, d21, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facge s10, s11, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - facge v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facgt d20, d21, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facgt s10, s11, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - facgt v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fadd v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - faddp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - faddp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq s10, s11, s12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq d20, d21, d22 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq s10, s11, #0.0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq s10, s11, s12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq v0.2s, v0.2s, #0.0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmge d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmge s10, s11, s12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmge d20, d21, d22 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmge s10, s11, #0.0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmge s10, s11, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmge v0.2d, v0.2d, #0.0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmge v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmgt d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmge d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmgt s10, s11, s12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmgt d20, d21, d22 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmgt s10, s11, #0.0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmgt s10, s11, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmgt v0.4s, v0.4s, #0.0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmgt v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmle d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmgt d20, d21, #0.0 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmle s10, s11, #0.0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmle v0.2d, v0.2d, #0.0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmlt d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmle d20, d21, #0.0 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmlt s10, s11, #0.0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmlt v0.4s, v0.4s, #0.0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtl v0.2d, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtl v0.4s, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtl2 v0.2d, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtl2 v0.4s, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms s22, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtn v0.2s, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtn v0.4h, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtn2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtn2 v0.8h, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns s22, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps s22, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtxn s22, d13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtxn v0.2s, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtxn2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs d21, d12, #1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs s21, s12, #1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu d21, d12, #1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu s21, s12, #1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.8h, v0.8h -# CHECK-NEXT: - - - - - - 10.00 - - - - - fdiv v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmax v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmax v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmax v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnm v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmaxnm v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnm v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnmp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmaxnmp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnmp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmaxp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmin v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmin v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmin v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnm v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fminnm v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnm v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnmp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fminnmp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnmp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fminp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla d0, d1, v0.d[1] -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla s0, s1, v0.s[3] -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls d0, d4, v0.d[1] -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls s3, s5, v0.s[3] -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmov v0.2d, #-1.25000000 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmov v0.2s, #13.00000000 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmov v0.4s, #1.00000000 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmul d0, d1, v0.d[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmul s0, s1, v0.s[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmul v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx d0, d4, v0.d[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx d23, d11, d1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx s20, s22, s15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx s3, s5, v0.s[3] -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fneg v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fneg v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fneg v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fneg v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fneg v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpe d13, d13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpe s19, s14 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frecpe v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpe v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpe v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frecpe v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frecpe v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frecps v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecps d22, d30, d21 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecps s21, s16, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpx d16, d19 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpx s18, s10 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frinta v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frinta v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frinta v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frinta v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frinta v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frinti v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frinti v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frinti v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frinti v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frinti v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintm v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintm v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintm v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintm v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintm v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintn v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintn v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintn v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintn v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintn v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintp v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintp v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintp v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintp v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintp v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintx v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintx v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintx v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintx v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintx v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintz v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintz v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintz v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintz v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintz v0.8h, v0.8h -# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrte d21, d12 -# CHECK-NEXT: - - - - - - 9.00 - - - - - frsqrte s22, s13 -# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrte v0.2d, v0.2d -# CHECK-NEXT: - - - - - - 9.00 - - - - - frsqrte v0.2s, v0.2s -# CHECK-NEXT: - - - - - - 5.00 - - - - - frsqrte v0.4h, v0.4h -# CHECK-NEXT: - - - - - - 9.00 - - - - - frsqrte v0.4s, v0.4s -# CHECK-NEXT: - - - - - - 5.00 - - - - - frsqrte v0.8h, v0.8h -# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrts d8, d22, d18 -# CHECK-NEXT: - - - - - - 9.00 - - - - - frsqrts s21, s5, s12 -# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrts v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - - - 19.00 - - - - - fsqrt v0.2d, v0.2d -# CHECK-NEXT: - - - - - - 9.00 - - - - - fsqrt v0.2s, v0.2s -# CHECK-NEXT: - - - - - - 5.00 - - - - - fsqrt v0.4h, v0.4h -# CHECK-NEXT: - - - - - - 9.00 - - - - - fsqrt v0.4s, v0.4s -# CHECK-NEXT: - - - - - - 5.00 - - - - - fsqrt v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fsub v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld1 { v0.16b }, [x0] -# CHECK-NEXT: - - - - - - - - - 6.00 - - ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 -# CHECK-NEXT: - - - - - - - - - 8.00 - - ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] -# CHECK-NEXT: - - - - - - - - - 4.00 - - ld1 { v0.4s, v1.4s }, [sp], #32 -# CHECK-NEXT: - - - - - - - - - 6.00 - - ld1 { v0.4s, v1.4s, v2.4s }, [sp] -# CHECK-NEXT: - - - - - - - - - 4.00 - - ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld1 { v0.8h }, [x15], x2 -# CHECK-NEXT: - - - - - - - - - 4.00 - - ld1 { v0.8h, v1.8h }, [x15] -# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1 { v0.b }[9], [x0] -# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1 { v0.b }[9], [x0], #1 -# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v0.16b }, [x0] -# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v0.16b }, [x0], #1 -# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v0.8h }, [x15] -# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v0.8h }, [x15], #2 -# CHECK-NEXT: - - - - - - - - - 4.00 - - ld2 { v0.16b, v1.16b }, [x0], x1 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2 { v0.8b, v1.8b }, [x0] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2 { v0.h, v1.h }[7], [x15] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2 { v0.h, v1.h }[7], [x15], #4 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v0.2d, v1.2d }, [x0] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v0.2d, v1.2d }, [x0], #16 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v0.4s, v1.4s }, [sp] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v0.4s, v1.4s }, [sp], #8 -# CHECK-NEXT: - - - - - - - - - 3.00 - - ld3 { v0.4h, v1.4h, v2.4h }, [x15] -# CHECK-NEXT: - - - - - - - - - 6.00 - - ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3 { v0.s, v1.s, v2.s }[3], [sp] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3 { v0.s, v1.s, v2.s }[3], [sp], x3 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v0.4h, v1.4h, v2.4h }, [x15] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v0.8b, v1.8b, v2.8b }, [x0] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 -# CHECK-NEXT: - - - - - - - - - 4.00 - - ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# CHECK-NEXT: - - - - - - - - - 8.00 - - ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mla v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mls v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov b0, v0.b[15] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov d6, v0.d[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov h2, v0.h[5] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov s17, v0.s[2] -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - mov v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - movi d15, #0xff00ff00ff00ff -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - movi v0.16b, #31 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - movi v0.2d, #0xff0000ff0000ffff -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - movi v0.2s, #8, msl #8 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - movi v0.4s, #255, lsl #24 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - movi v0.8b, #255 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mul v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mvni v0.2s, #0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - mvni v0.4s, #16, msl #16 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - neg d29, d24 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - neg v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - neg v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - neg v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - neg v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - neg v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - neg v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - neg v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - mvn v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mvn v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - orn v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - mov v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - orr v0.8h, #31 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmul v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - pmul v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmull v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmull2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rbit v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rbit v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev16 v21.8b, v1.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rev16 v30.16b, v31.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev32 v0.4h, v9.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev32 v21.8b, v1.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rev32 v30.16b, v31.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rev32 v4.8h, v7.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rev64 v0.16b, v31.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev64 v1.8b, v9.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev64 v13.4h, v21.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rev64 v2.8h, v4.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev64 v4.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rev64 v6.4s, v8.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saba v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sabd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sadalp v0.1d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.2d, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sadalp v0.2s, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sadalp v0.4h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.8h, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - saddlp v0.1d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddlp v0.2d, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - saddlp v0.2s, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - saddlp v0.4h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddlp v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddlp v0.8h, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw2 v0.8h, v0.8h, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf d21, d12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf d21, d12, #64 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf s22, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf s22, s13, #32 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shadd v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl d7, d10, #12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shl v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shl v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shl v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.2d, v0.2s, #32 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.4s, v0.4h, #16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.8h, v0.8b, #8 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.2d, v0.2s, #32 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.4s, v0.4h, #16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.8h, v0.8b, #8 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v0.2d, v0.4s, #32 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v0.4s, v0.8h, #16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v0.8h, v0.16b, #8 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v0.2d, v0.4s, #32 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v0.4s, v0.8h, #16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v0.8h, v0.16b, #8 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shsub v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shsub v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli d10, d14, #12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smax v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smax v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smax v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smaxp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smaxp v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smaxp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smin v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smin v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smin v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sminp v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sminp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sminp v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlsl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlsl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlsl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlsl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlsl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlsl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmlt d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facge s10, s11, s12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facge d20, d21, d22 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facgt s10, s11, s12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facgt d20, d21, d22 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - abs d29, d24 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs b19, b14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs d18, d12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs h21, h15 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs s20, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqabs v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqabs v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqabs v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqabs v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqadd b20, b11, b15 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqadd v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlal d19, s24, s12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlal d8, s9, v0.s[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlal s0, h0, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlal s17, h27, h12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlsl d12, s23, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlsl d8, s9, v0.s[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlsl s0, h0, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlsl s14, h12, h25 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh h10, h11, h12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh h7, h15, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh s15, s14, v0.s[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh s20, s21, s2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmull d1, s1, v0.s[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmull d15, s22, s12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmull s1, h1, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmull s12, h22, h12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs d18, d12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - neg d29, d24 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg b19, b14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg d18, d12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg h21, h15 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg s20, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqneg v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqneg v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqneg v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqneg v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh h10, h11, h12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh h7, h15, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh s15, s14, v0.s[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh s20, s21, s2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl d31, d31, d31 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl h3, h4, h15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn b10, h13, #2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn h15, s10, #6 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn s15, d12, #9 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun b17, h10, #6 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun h10, s13, #15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun s22, d16, #31 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrun2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrun2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrun2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg d18, d12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd b19, b14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd h20, h15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd s21, s12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd d18, d22 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd b19, b14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd h20, h15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd s21, s12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd d18, d22 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s17, h27, h12 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal d19, s24, s12 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s14, h12, h25 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl d12, s23, s13 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s12, h22, h12 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull d15, s22, s12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun b19, h14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun h21, s15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun s20, d12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn b18, h18 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn h20, s17 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn s19, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn b18, h18 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn h20, s17 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn s19, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr d15, d16, #12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr d10, d17, #18 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr d19, d18, #7 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr d20, d23, #31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra d18, d12, #21 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra d20, d13, #61 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra d15, d11, #19 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra d18, d10, #13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl d7, d10, #12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl b11, b19, #7 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl d15, d16, #51 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl d31, d31, d31 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl h13, h18, #11 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl h3, h4, h15 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl s14, s17, #22 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl d15, d16, #51 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl b18, b15, #6 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl h11, h18, #7 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl s14, s19, #18 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl d15, d12, #19 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu b15, b18, #6 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu d11, d13, #32 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu h19, h17, #6 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu s16, s14, #25 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu d11, d13, #32 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri d10, d12, #14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli d10, d14, #12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn b10, h15, #5 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn h17, s10, #4 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn s18, d10, #31 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn b12, h10, #7 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn h10, s14, #5 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn s10, d12, #13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn b10, h13, #2 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn h15, s10, #6 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn s15, d12, #9 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn b10, h12, #5 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn h12, s10, #14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn s10, d10, #25 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun b15, h10, #7 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun h20, s14, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun s10, d15, #15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrun2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrun2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrun2 v0.8h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqsub s20, s10, s7 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqsub v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqsub v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqsub v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn b18, h18 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn h20, s17 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn s19, d14 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn v0.2s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn v0.4h, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn v0.8b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn2 v0.16b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn2 v0.8h, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun b19, h14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun h21, s15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun s20, d12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun v0.2s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun v0.4h, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun v0.8b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun2 v0.16b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun2 v0.8h, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri d10, d12, #14 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl d16, d16, d16 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr d19, d18, #7 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srsra d15, d11, #19 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srsra v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srsra v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srsra v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl d31, d31, d31 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshll v0.2d, v0.2s, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshll2 v0.4s, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr d15, d16, #12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshr v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshr v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshr v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshr v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra d18, d12, #21 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun b17, h10, #6 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun h10, s13, #15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun s22, d16, #31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf s22, s13, #32 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf d21, d12, #64 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf s22, s13, #32 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf d21, d14, #64 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs s21, s12, #1 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs d21, d12, #1 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu s21, s12, #1 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu d21, d12, #1 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld1 { v0.16b }, [x0] +# CHECK-NEXT: - - - - - - - - - 4.00 - - ld1 { v15.8h, v16.8h }, [x15] +# CHECK-NEXT: - - - - - - - - - 6.00 - - ld1 { v31.4s, v0.4s, v1.4s }, [sp] +# CHECK-NEXT: - - - - - - - - - 8.00 - - ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: - - - - - - - - - 3.00 - - ld3 { v15.4h, v16.4h, v17.4h }, [x15] +# CHECK-NEXT: - - - - - - - - - 4.00 - - ld4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] # CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.16b }, [x0] -# CHECK-NEXT: - - - - - - - - - - - 2.00 st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v15.8h, v16.8h }, [x15] +# CHECK-NEXT: - - - - - - - - - - - 2.00 st1 { v31.4s, v0.4s, v1.4s }, [sp] # CHECK-NEXT: - - - - - - - - - - - 4.00 st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] -# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.4s, v1.4s }, [sp], #32 -# CHECK-NEXT: - - - - - - - - - - - 2.00 st1 { v0.4s, v1.4s, v2.4s }, [sp] +# CHECK-NEXT: - - - - - - - - - - - 2.00 st2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: - - - - - - - - - - - 4.00 st3 { v15.4h, v16.4h, v17.4h }, [x15] +# CHECK-NEXT: - - - - - - - - - - - 4.00 st4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld1 { v15.8h }, [x15], x2 +# CHECK-NEXT: - - - - - - - - - 4.00 - - ld1 { v31.4s, v0.4s }, [sp], #32 +# CHECK-NEXT: - - - - - - - - - 6.00 - - ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: - - - - - - - - - 4.00 - - ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +# CHECK-NEXT: - - - - - - - - - 4.00 - - ld2 { v0.16b, v1.16b }, [x0], x1 +# CHECK-NEXT: - - - - - - - - - 6.00 - - ld3 { v15.8h, v16.8h, v17.8h }, [x15], x2 +# CHECK-NEXT: - - - - - - - - - 8.00 - - ld4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64 +# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v15.8h }, [x15], x2 +# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v31.4s, v0.4s }, [sp], #32 +# CHECK-NEXT: - - - - - - - - - - - 2.00 st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 # CHECK-NEXT: - - - - - - - - - - - 4.00 st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 -# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.8h }, [x15], x2 -# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.8h, v1.8h }, [x15] -# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.d }[1], [x0] -# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.d }[1], [x0], #8 # CHECK-NEXT: - - - - - - - - - - - 4.00 st2 { v0.16b, v1.16b }, [x0], x1 -# CHECK-NEXT: - - - - - - - - - - - 2.00 st2 { v0.8b, v1.8b }, [x0] -# CHECK-NEXT: - - - - - - - - - - - 2.00 st2 { v0.s, v1.s }[3], [sp] -# CHECK-NEXT: - - - - - - - - - - - 2.00 st2 { v0.s, v1.s }[3], [sp], #8 -# CHECK-NEXT: - - - - - - - - - - - 4.00 st3 { v0.4h, v1.4h, v2.4h }, [x15] -# CHECK-NEXT: - - - - - - - - - - - 4.00 st3 { v0.8h, v1.8h, v2.8h }, [x15], x2 -# CHECK-NEXT: - - - - - - - - - - - 2.00 st3 { v0.h, v1.h, v2.h }[7], [x15] -# CHECK-NEXT: - - - - - - - - - - - 2.00 st3 { v0.h, v1.h, v2.h }[7], [x15], #6 -# CHECK-NEXT: - - - - - - - - - - - 4.00 st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# CHECK-NEXT: - - - - - - - - - - - 4.00 st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +# CHECK-NEXT: - - - - - - - - - - - 4.00 st3 { v15.8h, v16.8h, v17.8h }, [x15], x2 +# CHECK-NEXT: - - - - - - - - - - - 4.00 st4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64 +# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v0.16b }, [x0] +# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v15.8h }, [x15] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v31.4s, v0.4s }, [sp] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v0.2d, v1.2d }, [x0] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v0.8b, v1.8b, v2.8b }, [x0] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v15.4h, v16.4h, v17.4h }, [x15] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp] +# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1 { v0.b }[9], [x0] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2 { v15.h, v16.h }[7], [x15] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3 { v31.s, v0.s, v1.s }[3], [sp] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] +# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.d }[1], [x0] +# CHECK-NEXT: - - - - - - - - - - - 2.00 st2 { v31.s, v0.s }[3], [sp] +# CHECK-NEXT: - - - - - - - - - - - 2.00 st3 { v15.h, v16.h, v17.h }[7], [x15] # CHECK-NEXT: - - - - - - - - - - - 2.00 st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] +# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v0.16b }, [x0], #1 +# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v15.8h }, [x15], #2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v31.4s, v0.4s }, [sp], #8 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v0.2d, v1.2d }, [x0], #16 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v15.4h, v16.4h, v17.4h }, [x15], #6 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], x30 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp], x7 +# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1 { v0.b }[9], [x0], #1 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2 { v15.h, v16.h }[7], [x15], #4 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3 { v31.s, v0.s, v1.s }[3], [sp], x3 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 +# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.d }[1], [x0], #8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 st2 { v31.s, v0.s }[3], [sp], #8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 st3 { v15.h, v16.h, v17.h }[7], [x15], #6 # CHECK-NEXT: - - - - - - - - - - - 2.00 st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sub d15, d5, d16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sub v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd b19, b14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd d18, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd h20, h15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd s21, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - suqadd v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - suqadd v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - suqadd v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - suqadd v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v0.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v0.16b, v1.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbl v0.8b, { v0.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbl v0.8b, { v0.16b, v1.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbx v0.16b, { v0.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbx v0.16b, { v0.16b, v1.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbx v0.8b, { v0.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbx v0.8b, { v0.16b, v1.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uaba v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uabd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uadalp v0.1d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.2d, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uadalp v0.2s, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uadalp v0.4h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.8h, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uaddlp v0.1d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddlp v0.2d, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uaddlp v0.2s, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uaddlp v0.4h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddlp v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddlp v0.8h, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw2 v0.8h, v0.8h, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf d21, d14, #64 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf s22, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf s22, s13, #32 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uhadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uhadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uhsub v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umax v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umax v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umax v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umaxp v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umaxp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umaxp v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - umin v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - umin v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - umin v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uminp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uminp v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uminp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlsl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlsl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlsl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlsl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlsl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlsl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqadd h0, h1, h5 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshl b11, b20, b30 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshl s23, s20, s16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn b10, h12, #5 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn h12, s10, #14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn s10, d10, #25 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl b11, b20, b30 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl b18, b15, #6 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl d15, d12, #19 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl h11, h18, #7 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl s14, s19, #18 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl s23, s20, s16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn b12, h10, #7 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn h10, s14, #5 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn s10, d12, #13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqsub d16, d16, d16 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqsub v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn b18, h18 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn h20, s17 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn s19, d14 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn v0.2s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn v0.4h, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn v0.8b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn2 v0.16b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn2 v0.8h, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urecpe v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urecpe v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urhadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urhadd v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urhadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshl d8, d7, d4 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr d20, d23, #31 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - - - 9.00 - - - - - ursqrte v0.2s, v0.2s -# CHECK-NEXT: - - - - - - 9.00 - - - - - ursqrte v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ursra d18, d10, #13 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ursra v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ursra v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ursra v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushl d0, d0, d0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushll v0.4s, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushll2 v0.8h, v0.16b, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr d10, d17, #18 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushr v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushr v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushr v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushr v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd b19, b14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd d18, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd h20, h15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd s21, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usqadd v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usqadd v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usqadd v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usqadd v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra d20, d13, #61 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw2 v0.8h, v0.8h, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - xtn v0.2s, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - xtn v0.4h, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - xtn v0.8b, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - xtn2 v0.16b, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - xtn2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - xtn2 v0.8h, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ext v0.8b, v1.8b, v2.8b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ext v0.16b, v1.16b, v2.16b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp1 v1.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v2.16b, v1.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp1 v3.4h, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v4.8h, v1.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp1 v5.2s, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v6.4s, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v7.2d, v1.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v8.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v9.16b, v1.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v10.4h, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v27.8h, v7.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v12.2s, v7.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v29.4s, v6.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v14.2d, v6.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip1 v31.8b, v5.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v0.16b, v5.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip1 v17.4h, v4.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v2.8h, v4.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip1 v19.2s, v3.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v4.4s, v3.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v21.2d, v2.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp2 v6.8b, v2.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v23.16b, v1.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp2 v8.4h, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v25.8h, v0.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp2 v10.2s, v0.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v27.4s, v7.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v12.2d, v7.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v29.8b, v6.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v14.16b, v6.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v31.4h, v5.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.8h, v5.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v17.2s, v4.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v2.4s, v4.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v19.2d, v3.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip2 v4.8b, v3.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v21.16b, v2.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip2 v6.4h, v2.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v23.8h, v1.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip2 v8.2s, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v25.4s, v0.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v10.2d, v0.2d, v2.2d +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmul s0, s1, v1.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmul s0, s1, v1.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmul d0, d1, v1.d[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmul d0, d1, v1.d[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmul d15, d15, v15.d[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx s3, s5, v7.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx s3, s5, v7.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx s3, s5, v15.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx d0, d4, v8.d[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx d0, d4, v8.d[1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla s0, s1, v1.s[0] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla s0, s1, v1.s[3] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla d0, d1, v1.d[0] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla d0, d1, v1.d[1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla d15, d15, v15.d[1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls s3, s5, v7.s[0] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls s3, s5, v7.s[3] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls s3, s5, v15.s[3] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls d0, d4, v8.d[0] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls d0, d4, v8.d[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[4] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[5] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[6] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[7] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal d8, s9, v15.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal d8, s9, v15.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal d8, s9, v15.s[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal d8, s9, v15.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[4] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[5] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[6] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[7] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl d8, s9, v15.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl d8, s9, v15.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl d8, s9, v15.s[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl d8, s9, v15.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[4] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[5] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[6] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[7] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull d1, s1, v4.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull d1, s1, v4.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull d1, s1, v4.s[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull d1, s1, v4.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h1, v14.h[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[4] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[5] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[6] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[7] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh s15, s3, v4.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh s15, s14, v16.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh s15, s15, v16.s[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh s15, s16, v17.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h1, v14.h[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[4] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[5] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[6] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[7] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh s15, s3, v4.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh s15, s14, v16.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh s15, s15, v16.s[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh s15, s16, v17.s[3] +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov b0, v0.b[15] +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov h2, v31.h[5] +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov s17, v2.s[2] +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov d6, v12.d[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.8b, { v1.16b }, v2.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - tbl v16.8b, { v31.16b, v0.16b }, v2.8b +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - tbl v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - tbl v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v1.16b }, v2.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - tbl v16.16b, { v31.16b, v0.16b }, v2.16b +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - tbl v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - tbl v0.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - tbx v0.8b, { v1.16b }, v2.8b +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - tbx v16.8b, { v31.16b, v0.16b }, v2.8b +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - tbx v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b +# CHECK-NEXT: - - - - 5.00 5.00 - - - - - - tbx v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - tbx v0.16b, { v1.16b }, v2.16b +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - tbx v16.16b, { v31.16b, v0.16b }, v2.16b +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - tbx v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b +# CHECK-NEXT: - - - - 5.00 5.00 - - - - - - tbx v16.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtxn s22, d13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms s22, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns s22, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps s22, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fabd s29, s24, s20 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fabd d29, d24, d20 diff --git a/llvm/utils/update_llc_test_checks.py b/llvm/utils/update_llc_test_checks.py --- a/llvm/utils/update_llc_test_checks.py +++ b/llvm/utils/update_llc_test_checks.py @@ -24,6 +24,8 @@ help='The "llc" binary to use to generate the test case') parser.add_argument( '--function', help='The function in the test file to update') + parser.add_argument( + '--prefix', help='The FileCheck tool prefix to update') parser.add_argument( '--extra_scrub', action='store_true', help='Always use additional regex to further reduce diffs between various subtargets') @@ -44,7 +46,8 @@ initial_args = common.parse_commandline_args(parser) script_name = os.path.basename(__file__) - + filter_prefixes = [] if initial_args.prefix is None else initial_args.prefix.split(",") + filter_functions = [] if initial_args.function is None else initial_args.function.split(",") for ti in common.itertests(initial_args.tests, parser, script_name='utils/' + script_name): triple_in_ir = None @@ -96,6 +99,8 @@ for item in m.group(1).split(',')] if not check_prefixes: check_prefixes = ['CHECK'] + if filter_prefixes: + check_prefixes = list(set(filter_prefixes) & set(check_prefixes)) # FIXME: We should use multiple check prefixes to common check lines. For # now, we just ignore all but the last. @@ -196,7 +201,8 @@ if not m: continue func_name = m.group(1) - if args.function is not None and func_name != args.function: + #if args.function is not None and func_name != args.function: + if filter_functions and func_name not in filter_functions: # When filtering on a specific function, skip all others. continue is_in_function = is_in_function_start = True