diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3130,7 +3130,7 @@ if (Reg.isPhysical()) return AArch64::FPR128RegClass.contains(Reg); - const TargetRegisterClass *TRC = getRegClass(MI, Reg); + const TargetRegisterClass *TRC = ::getRegClass(MI, Reg); return TRC == &AArch64::FPR128RegClass || TRC == &AArch64::FPR128_loRegClass; } @@ -3145,7 +3145,7 @@ AArch64::FPR16RegClass.contains(Reg) || AArch64::FPR8RegClass.contains(Reg); - const TargetRegisterClass *TRC = getRegClass(MI, Reg); + const TargetRegisterClass *TRC = ::getRegClass(MI, Reg); return TRC == &AArch64::FPR128RegClass || TRC == &AArch64::FPR128_loRegClass || TRC == &AArch64::FPR64RegClass || TRC == &AArch64::FPR64_loRegClass || TRC == &AArch64::FPR32RegClass || diff --git a/llvm/lib/Target/AArch64/AArch64SchedA55.td b/llvm/lib/Target/AArch64/AArch64SchedA55.td --- a/llvm/lib/Target/AArch64/AArch64SchedA55.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA55.td @@ -26,7 +26,8 @@ let PostRAScheduler = 1; // Enable PostRA scheduler pass. let CompleteModel = 0; // Covers instructions applicable to Cortex-A55. - list UnsupportedFeatures = [HasSVE]; + list UnsupportedFeatures = !listconcat(SVEUnsupported.F, + PAUnsupported.F); // FIXME: Remove when all errors have been fixed. let FullInstRWOverlapCheck = 0; @@ -149,8 +150,63 @@ def : WriteRes { let Latency = 4; } def : WriteRes { let Latency = 3; } def : WriteRes { let Latency = 3; } -def : WriteRes { let Latency = 4; } -def : WriteRes { let Latency = 4; let BeginGroup = 1; } + +class CortexA55WriteVd : SchedWriteRes<[res]> { + let Latency = n; +} +class CortexA55WriteVq : SchedWriteRes<[res, res]> { + let Latency = n; + let BeginGroup = 1; +} + +def CortexA55QFormPred : MCSchedPredicate; +def CortexA55FpInstPred : MCSchedPredicate; + +class CortexA55WriteV : SchedWriteVariant<[ + SchedVar]>, + SchedVar]> +]>; +class CortexA55WriteVSlot2 rcl, list resl> : SchedWriteRes { + let Latency = n; + let ResourceCycles = rcl; + let EndGroup = 1; +} +class CortexA55WriteVAlu : CortexA55WriteV {} +class CortexA55WriteVAlud : CortexA55WriteVd {} +class CortexA55WriteVAluSlot2 : + CortexA55WriteVSlot2 {} +class CortexA55WriteVMac : CortexA55WriteV {} +class CortexA55WriteVMacSlot2 : + CortexA55WriteVSlot2 {} +class CortexA55WriteVDivSlot2 : + CortexA55WriteVSlot2 {} +class CortexA55WriteCrypto : + SchedWriteRes<[CortexA55UnitFPALU, CortexA55UnitFPALU]> { let Latency = n; } + +def CortexA55WriteCOPY : SchedWriteVariant<[ + SchedVar]>, + SchedVar +]>; + +def CortexA55WriteMLA : CortexA55WriteVMac<4>; +def CortexA55WriteMLAIx : CortexA55WriteVMacSlot2<4>; +def CortexA55WriteMLAL : CortexA55WriteVMacSlot2<4>; +def CortexA55WriteDOT : CortexA55WriteVMac<4>; +def CortexA55WriteDOTSc : CortexA55WriteVMacSlot2<4>; +def CortexA55WriteAESEncrypt : CortexA55WriteVAluSlot2<2, 1>; +def CortexA55WriteAESDecrypt : CortexA55WriteVAluSlot2<2, 1>; + +// NEON ALU/MAC forwarding paths +def CortexA55ReadMLA : SchedReadAdvance<3, [CortexA55WriteMLA]>; +def CortexA55ReadMLAIx : SchedReadAdvance<3, [CortexA55WriteMLAIx]>; +def CortexA55ReadMLAL : SchedReadAdvance<3, [CortexA55WriteMLAL]>; +def CortexA55ReadDOT : SchedReadAdvance<3, [CortexA55WriteDOT]>; +def CortexA55ReadDOTSc : SchedReadAdvance<3, [CortexA55WriteDOTSc]>; +def CortexA55ReadAESMC : SchedReadAdvance<1, [CortexA55WriteAESEncrypt]>; +def CortexA55ReadAESIMC : SchedReadAdvance<1, [CortexA55WriteAESDecrypt]>; + +def : SchedAlias>; +def : SchedAlias>; // FP ALU specific new schedwrite definitions def CortexA55WriteFPALU_F2 : SchedWriteRes<[CortexA55UnitFPALU]> { let Latency = 2;} @@ -235,11 +291,15 @@ //--- // Miscellaneous //--- -def : InstRW<[CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W")>; -def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS[^W]")>; -def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)")>; -def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ")>; -def : InstRW<[WriteI], (instrs COPY)>; +def : InstRW<[CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?Wi")>; +def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPSi")>; +def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)i")>; +def : InstRW<[CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQi")>; +def : InstRW<[WriteAdr, CortexA55WriteVLD1SI,CortexA55WriteLDP1], (instregex "LDPS?W(pre|post)")>; +def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP1], (instregex "LDPS(pre|post)")>; +def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP2], (instregex "LDP(X|D)(pre|post)")>; +def : InstRW<[WriteAdr, CortexA55WriteVLD1,CortexA55WriteLDP4], (instregex "LDPQ(pre|post)")>; + //--- // Vector Loads - 64-bit per cycle //--- @@ -354,4 +414,183 @@ def : InstRW<[CortexA55WriteFSqrtSP], (instregex "^.*SQRT.*32$")>; def : InstRW<[CortexA55WriteFSqrtDP], (instregex "^.*SQRT.*64$")>; +// 4.15. Advanced SIMD integer instructions +// ASIMD absolute diff +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "SABDv", "UABDv")>; +// ASIMD absolute diff accum +def : InstRW<[CortexA55WriteVAluSlot2<4, 2>], (instregex "SABAL?v", + "UABAL?v")>; +// ASIMD absolute diff long +def : InstRW<[CortexA55WriteVAluSlot2<3, 1>], (instregex "SABDLv", "UABDLv")>; +// ASIMD arith #1 +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "ADDv", "SUBv", "NEGv", + "SR?HADDv", "UR?HADDv", "SHSUBv", "UHSUBv")>; + +// ASIMD arith #2 +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "ABSv", "SADDLPv", + "UADDLPv", "SQADDv", "UQADDv", "SQNEGv", "SQSUBv", "UQSUBv", "SUQADDv", + "USQADDv", "ADDPv(2i32|2i64|4i16|4i32|8i8|8i16|16i8)$")>; +// ASIMD arith #3 +def : InstRW<[CortexA55WriteVAluSlot2<3, 1>], (instregex "SADDLv", "UADDLv", "SADDWv", + "UADDWv", "SSUBLv", "USUBLv", "SSUBWv", "USUBWv", "ADDHNv", "SUBHNv")>; +// ASIMD arith #5 +def : InstRW<[CortexA55WriteVAluSlot2<4, 2>], (instregex "RADDHNv", "RSUBHNv")>; +// ASIMD arith, reduce +def : InstRW<[CortexA55WriteVAluSlot2<3, 1>], (instregex "ADDVv", "SADDLVv", "UADDLVv")>; +// ASIMD compare #1 +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "CM(EQ|GE|GT|HI|HS|LE|LT)v")>; +// ASIMD compare #2 +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "CMTSTv")>; +// ASIMD logical $1 +def : InstRW<[CortexA55WriteVAlu<1>], (instregex "ANDv", "EORv", "NOTv", + "ORNv", "ORRv(4i32|8i8|8i16|16i8)$", "BICv(4i32|8i8|8i16|16i8)$")>; +def : InstRW<[CortexA55WriteVAlu<1>], (instregex "BICv(2i32|4i16)$", + "ORRv(2i32|4i16)$", "MVNIv")>; +def : InstRW<[CortexA55WriteVd<2, CortexA55UnitFPALU>], (instregex "CPY")>; +// ASIMD max/min, basic +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "SMAXP?v", "SMINP?v", "UMAXP?v", + "UMINP?v", "UMINP?v")>; +// SIMD max/min, reduce +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex "SMAXVv", "SMINVv", "UMAXVv", + "UMINVv")>; +// ASIMD multiply, by element +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex + "MULv(2i32|4i16|4i32|8i16)_indexed$", + "SQR?DMULHv(1i16|1i32|2i32|4i16|4i32|8i16)_indexed$")>; +// ASIMD multiply +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "PMULv")>; +// ASIMD multiply accumulate +def : InstRW<[CortexA55WriteMLA, CortexA55ReadMLA], (instregex "ML[AS]v(16i8|2i32|4i16|4i32|8i16|8i8)$")>; +def : InstRW<[CortexA55WriteMLAIx, CortexA55ReadMLAIx], (instregex "ML[AS]v(2i32|4i16|4i32|8i16)_indexed$")>; +// ASIMD multiply accumulate half +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex "SQRDML[AS]H[vi]")>; +// ASIMD multiply accumulate long +def : InstRW<[CortexA55WriteMLAL, CortexA55ReadMLAL], (instregex "[SU]ML[AS]Lv")>; +// ASIMD multiply accumulate long #2 +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex "SQDML[AS]L[iv]")>; +// ASIMD dot product +def : InstRW<[CortexA55WriteDOT, CortexA55ReadDOT], (instregex "[SU]DOTv")>; +// ASIMD dot product, by scalar +def : InstRW<[CortexA55WriteDOTSc, CortexA55ReadDOTSc], (instregex "[SU]DOTlanev")>; +// ASIMD multiply long +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex "[SU]MULLv", "SQDMULL[iv]")>; +// ASIMD polynomial (8x8) multiply long +def : InstRW<[CortexA55WriteVAluSlot2<3, 1>], (instrs PMULLv8i8, PMULLv16i8)>; +// ASIMD pairwise add and accumulate +def : InstRW<[CortexA55WriteVAluSlot2<4, 2>], (instregex "[SU]ADALPv")>; +// ASIMD shift accumulate +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "[SU]SRA[vd]")>; +// ASIMD shift accumulate #2 +def : InstRW<[CortexA55WriteVAluSlot2<4, 2>], (instregex "[SU]RSRA[vd]")>; +// ASIMD shift by immed +def : InstRW<[CortexA55WriteVAlud<2>], (instregex "SHLd$", "SHLv", + "SLId$", "SRId$", "[SU]SHR[vd]", "SHRNv")>; +// ASIMD shift by immed and insert +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "SLIv", "SRIv")>; +// ASIMD shift by immed +// SXTL and UXTL are aliases for SHLL +def : InstRW<[CortexA55WriteVAluSlot2<2, 1>], (instregex "[US]?SHLLv")>; +// ASIMD shift by immed #2 +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "[SU]RSHR[vd]", "RSHRNv")>; +// ASIMD shift by register +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "[SU]SHLv")>; +// ASIMD shift by register #2 +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "[SU]RSHLv")>; + +// 4.16. Advanced SIMD floating-point instructions +// ASIMD FP compare +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "FAC(GT|GE)(16|32|64|v)", + "FCM(EQ|GT|GE)(16|32|64|v)", "FCM(LE|LT)v")>; +// ASIMD FP convert, long +//def : InstRW<[CortexA55WriteVAluSlot2<4, 2>], (instregex "FCVTLv")>; +// ASIMD FP convert, other +// ASIMD FP divide, H-form +def : InstRW<[CortexA55WriteVDivSlot2<8, 5>], (instrs FDIVv4f16, FDIVv8f16)>; +// ASIMD FP divide, S-form +def : InstRW<[CortexA55WriteVDivSlot2<13, 10>], (instrs FDIVv2f32, FDIVv4f32)>; +// ASIMD FP divide, D-form +def : InstRW<[CortexA55WriteVDivSlot2<22, 19>], (instrs FDIVv2f64)>; +// ASIMD FP max/min, reduce +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex "FMAX(NM)?Vv", "FMIN(NM)?Vv")>; +// ASIMD FP multiply, by element +def : InstRW<[CortexA55WriteVAluSlot2<4, 1>], (instregex "FMULX?v[1248]i")>; + +// 4.17. Advanced SIMD miscellaneous instructions +// ASIMD bit reverse / ASIMD bitwise insert +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "R?BITv", "BIFv", "BSLv")>; +// ASIMD count +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "CLZv", "CNTv")>; +// ASIMD count #2 +def : InstRW<[CortexA55WriteVAlu<3>], (instregex "CLSv")>; +// ASIMD extract +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "EXTv")>; +// ASIMD extract narrow +def : InstRW<[CortexA55WriteVAlu<1>], (instregex "XTNv")>; +// ASIMD extract narrow, saturating +def : InstRW<[CortexA55WriteVAlud<4>], (instregex "[SU]QXTNv", "SQXTUNv")>; +// ASIMD insert, element to element +def : InstRW<[CortexA55WriteVAlud<2>], (instregex "INSvi(8|16|32|64)lane$")>; +// ASIMD move, integer immed +def : InstRW<[CortexA55WriteVAlu<1>], (instregex "MOVI[Dv]")>; +// ASIMD move, FP immed +def : InstRW<[CortexA55WriteVAlud<1>], (instregex "FMOVv")>; +// ASIMD reverse +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "REV(16|32|64)v")>; +// ASIMD table lookup (TBL, 1 reg) +def : InstRW<[CortexA55WriteVAluSlot2<2, 1>], (instrs TBLv8i8One, TBLv16i8One)>; +// ASIMD table lookup (TBL, 2 regs) +def : InstRW<[CortexA55WriteVAluSlot2<3, 2>], (instrs TBLv8i8Two, TBLv16i8Two)>; +// ASIMD table lookup (TBL, 3 regs) +def : InstRW<[CortexA55WriteVAluSlot2<4, 3>], (instrs TBLv8i8Three, TBLv16i8Three)>; +// ASIMD table lookup (TBL, 4 regs) +def : InstRW<[CortexA55WriteVAluSlot2<5, 4>], (instrs TBLv8i8Four, TBLv16i8Four)>; +// ASIMD table lookup (TBX, 1 reg) +def : InstRW<[CortexA55WriteVAluSlot2<3, 2>], (instrs TBXv8i8One, TBXv16i8One)>; +// ASIMD table lookup (TBX, 2 regs) +def : InstRW<[CortexA55WriteVAluSlot2<4, 3>], (instrs TBXv8i8Two, TBXv16i8Two)>; +// ASIMD table lookup (TBX, 3 regs) +def : InstRW<[CortexA55WriteVAluSlot2<5, 4>], (instrs TBXv8i8Three, TBXv16i8Three)>; +// ASIMD table lookup (TBX, 4 regs) +def : InstRW<[CortexA55WriteVAluSlot2<6, 5>], (instrs TBXv8i8Four, TBXv16i8Four)>; +// ASIMD transfer, element to gen reg +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "[SU]MOVv")>; +// ASIMD transfer, gen reg to element +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "INSvi(8|16|32|64)gpr$")>; +// ASIMD transpose, 64-bit (.2D) +def : InstRW<[CortexA55WriteVAlu<2>], (instrs TRN1v2i32, TRN2v2i32)>; +// ASIMD transpose, other +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "TRN[12]v(2i64|4i16|4i32|8i8|8i16|16i8)$")>; +// ASIMD unzip/zip +def : InstRW<[CortexA55WriteVAlu<2>], (instregex "UZP[12]v", "ZIP[12]v")>; + +// 4.20. Cryptographic Extension +// Crypto AES ops +def : InstRW<[CortexA55WriteAESEncrypt], (instrs AESErr)>; +def : InstRW<[CortexA55WriteAESDecrypt], (instrs AESDrr)>; +// Crypto AES ops #2 +def : InstRW<[CortexA55WriteCrypto<2>, CortexA55ReadAESIMC], (instrs AESIMCrr, AESIMCrrTied)>; +def : InstRW<[CortexA55WriteCrypto<2>, CortexA55ReadAESMC], (instrs AESMCrr, AESMCrrTied)>; +// Crypto polynomial (64x64) multiply long +def : InstRW<[CortexA55WriteCrypto<2>], (instrs PMULLv1i64, PMULLv2i64)>; +// Crypto SHA1 xor ops +def : InstRW<[CortexA55WriteCrypto<2>], (instrs SHA1SU0rrr)>; +// Crypto SHA1 schedule acceleration ops +def : InstRW<[CortexA55WriteCrypto<2>], (instrs SHA1Hrr, SHA1SU1rr)>; +// Crypto SHA1 hash acceleration ops +def : InstRW<[CortexA55WriteCrypto<4>], (instregex "SHA1[CMP]rrr$")>; +// Crypto SHA256 schedule acceleration ops +def : InstRW<[CortexA55WriteCrypto<3>], (instrs SHA256SU0rr)>; +// Crypto SHA256 schedule acceleration ops +def : InstRW<[CortexA55WriteCrypto<3>], (instrs SHA256SU1rrr)>; +// Crypto SHA256 hash acceleration ops +def : InstRW<[CortexA55WriteCrypto<4>], (instregex "SHA256H2?rrr$")>; + +// 4.21. CRC +// CRC checksum ops +def : InstRW<[CortexA55WriteCrypto<2>], (instregex "CRC32C?[BHX]rr$")>; +// CRC checksum ops #2 +def : InstRW<[CortexA55WriteCrypto<1>], (instrs CRC32CWrr, CRC32Wrr)>; + +// COPY +def : InstRW<[CortexA55WriteCOPY], (instrs COPY)>; } diff --git a/llvm/test/Analysis/CostModel/AArch64/free-widening-casts.ll b/llvm/test/Analysis/CostModel/AArch64/free-widening-casts.ll --- a/llvm/test/Analysis/CostModel/AArch64/free-widening-casts.ll +++ b/llvm/test/Analysis/CostModel/AArch64/free-widening-casts.ll @@ -4,9 +4,11 @@ ; COST-LABEL: uaddl_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16> -; CODE-LABEL: uaddl_8h -; CODE: uaddl v0.8h, v0.8b, v1.8b define <8 x i16> @uaddl_8h(<8 x i8> %a, <8 x i8> %b) { +; CODE-LABEL: uaddl_8h: +; CODE: // %bb.0: +; CODE-NEXT: uaddl v0.8h, v0.8b, v1.8b +; CODE-NEXT: ret %tmp0 = zext <8 x i8> %a to <8 x i16> %tmp1 = zext <8 x i8> %b to <8 x i16> %tmp2 = add <8 x i16> %tmp0, %tmp1 @@ -16,9 +18,11 @@ ; COST-LABEL: uaddl_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32> -; CODE-LABEL: uaddl_4s -; CODE: uaddl v0.4s, v0.4h, v1.4h define <4 x i32> @uaddl_4s(<4 x i16> %a, <4 x i16> %b) { +; CODE-LABEL: uaddl_4s: +; CODE: // %bb.0: +; CODE-NEXT: uaddl v0.4s, v0.4h, v1.4h +; CODE-NEXT: ret %tmp0 = zext <4 x i16> %a to <4 x i32> %tmp1 = zext <4 x i16> %b to <4 x i32> %tmp2 = add <4 x i32> %tmp0, %tmp1 @@ -28,9 +32,11 @@ ; COST-LABEL: uaddl_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <2 x i32> %b to <2 x i64> -; CODE-LABEL: uaddl_2d -; CODE: uaddl v0.2d, v0.2s, v1.2s define <2 x i64> @uaddl_2d(<2 x i32> %a, <2 x i32> %b) { +; CODE-LABEL: uaddl_2d: +; CODE: // %bb.0: +; CODE-NEXT: uaddl v0.2d, v0.2s, v1.2s +; CODE-NEXT: ret %tmp0 = zext <2 x i32> %a to <2 x i64> %tmp1 = zext <2 x i32> %b to <2 x i64> %tmp2 = add <2 x i64> %tmp0, %tmp1 @@ -40,10 +46,13 @@ ; COST-LABEL: uaddl2_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <16 x i8> %b to <16 x i16> -; CODE-LABEL: uaddl2_8h -; CODE: uaddl2 v2.8h, v0.16b, v1.16b -; CODE-NEXT: uaddl v0.8h, v0.8b, v1.8b define <16 x i16> @uaddl2_8h(<16 x i8> %a, <16 x i8> %b) { +; CODE-LABEL: uaddl2_8h: +; CODE: // %bb.0: +; CODE-NEXT: uaddl v2.8h, v0.8b, v1.8b +; CODE-NEXT: uaddl2 v1.8h, v0.16b, v1.16b +; CODE-NEXT: mov v0.16b, v2.16b +; CODE-NEXT: ret %tmp0 = zext <16 x i8> %a to <16 x i16> %tmp1 = zext <16 x i8> %b to <16 x i16> %tmp2 = add <16 x i16> %tmp0, %tmp1 @@ -53,10 +62,13 @@ ; COST-LABEL: uaddl2_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i16> %b to <8 x i32> -; CODE-LABEL: uaddl2_4s -; CODE: uaddl2 v2.4s, v0.8h, v1.8h -; CODE-NEXT: uaddl v0.4s, v0.4h, v1.4h define <8 x i32> @uaddl2_4s(<8 x i16> %a, <8 x i16> %b) { +; CODE-LABEL: uaddl2_4s: +; CODE: // %bb.0: +; CODE-NEXT: uaddl v2.4s, v0.4h, v1.4h +; CODE-NEXT: uaddl2 v1.4s, v0.8h, v1.8h +; CODE-NEXT: mov v0.16b, v2.16b +; CODE-NEXT: ret %tmp0 = zext <8 x i16> %a to <8 x i32> %tmp1 = zext <8 x i16> %b to <8 x i32> %tmp2 = add <8 x i32> %tmp0, %tmp1 @@ -66,10 +78,13 @@ ; COST-LABEL: uaddl2_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i32> %b to <4 x i64> -; CODE-LABEL: uaddl2_2d -; CODE: uaddl2 v2.2d, v0.4s, v1.4s -; CODE-NEXT: uaddl v0.2d, v0.2s, v1.2s define <4 x i64> @uaddl2_2d(<4 x i32> %a, <4 x i32> %b) { +; CODE-LABEL: uaddl2_2d: +; CODE: // %bb.0: +; CODE-NEXT: uaddl v2.2d, v0.2s, v1.2s +; CODE-NEXT: uaddl2 v1.2d, v0.4s, v1.4s +; CODE-NEXT: mov v0.16b, v2.16b +; CODE-NEXT: ret %tmp0 = zext <4 x i32> %a to <4 x i64> %tmp1 = zext <4 x i32> %b to <4 x i64> %tmp2 = add <4 x i64> %tmp0, %tmp1 @@ -79,9 +94,11 @@ ; COST-LABEL: saddl_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i8> %b to <8 x i16> -; CODE-LABEL: saddl_8h -; CODE: saddl v0.8h, v0.8b, v1.8b define <8 x i16> @saddl_8h(<8 x i8> %a, <8 x i8> %b) { +; CODE-LABEL: saddl_8h: +; CODE: // %bb.0: +; CODE-NEXT: saddl v0.8h, v0.8b, v1.8b +; CODE-NEXT: ret %tmp0 = sext <8 x i8> %a to <8 x i16> %tmp1 = sext <8 x i8> %b to <8 x i16> %tmp2 = add <8 x i16> %tmp0, %tmp1 @@ -91,9 +108,11 @@ ; COST-LABEL: saddl_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i16> %b to <4 x i32> -; CODE-LABEL: saddl_4s -; CODE: saddl v0.4s, v0.4h, v1.4h define <4 x i32> @saddl_4s(<4 x i16> %a, <4 x i16> %b) { +; CODE-LABEL: saddl_4s: +; CODE: // %bb.0: +; CODE-NEXT: saddl v0.4s, v0.4h, v1.4h +; CODE-NEXT: ret %tmp0 = sext <4 x i16> %a to <4 x i32> %tmp1 = sext <4 x i16> %b to <4 x i32> %tmp2 = add <4 x i32> %tmp0, %tmp1 @@ -103,9 +122,11 @@ ; COST-LABEL: saddl_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <2 x i32> %b to <2 x i64> -; CODE-LABEL: saddl_2d -; CODE: saddl v0.2d, v0.2s, v1.2s define <2 x i64> @saddl_2d(<2 x i32> %a, <2 x i32> %b) { +; CODE-LABEL: saddl_2d: +; CODE: // %bb.0: +; CODE-NEXT: saddl v0.2d, v0.2s, v1.2s +; CODE-NEXT: ret %tmp0 = sext <2 x i32> %a to <2 x i64> %tmp1 = sext <2 x i32> %b to <2 x i64> %tmp2 = add <2 x i64> %tmp0, %tmp1 @@ -115,10 +136,13 @@ ; COST-LABEL: saddl2_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <16 x i8> %b to <16 x i16> -; CODE-LABEL: saddl2_8h -; CODE: saddl2 v2.8h, v0.16b, v1.16b -; CODE-NEXT: saddl v0.8h, v0.8b, v1.8b define <16 x i16> @saddl2_8h(<16 x i8> %a, <16 x i8> %b) { +; CODE-LABEL: saddl2_8h: +; CODE: // %bb.0: +; CODE-NEXT: saddl v2.8h, v0.8b, v1.8b +; CODE-NEXT: saddl2 v1.8h, v0.16b, v1.16b +; CODE-NEXT: mov v0.16b, v2.16b +; CODE-NEXT: ret %tmp0 = sext <16 x i8> %a to <16 x i16> %tmp1 = sext <16 x i8> %b to <16 x i16> %tmp2 = add <16 x i16> %tmp0, %tmp1 @@ -128,10 +152,13 @@ ; COST-LABEL: saddl2_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i16> %b to <8 x i32> -; CODE-LABEL: saddl2_4s -; CODE: saddl2 v2.4s, v0.8h, v1.8h -; CODE-NEXT: saddl v0.4s, v0.4h, v1.4h define <8 x i32> @saddl2_4s(<8 x i16> %a, <8 x i16> %b) { +; CODE-LABEL: saddl2_4s: +; CODE: // %bb.0: +; CODE-NEXT: saddl v2.4s, v0.4h, v1.4h +; CODE-NEXT: saddl2 v1.4s, v0.8h, v1.8h +; CODE-NEXT: mov v0.16b, v2.16b +; CODE-NEXT: ret %tmp0 = sext <8 x i16> %a to <8 x i32> %tmp1 = sext <8 x i16> %b to <8 x i32> %tmp2 = add <8 x i32> %tmp0, %tmp1 @@ -141,10 +168,13 @@ ; COST-LABEL: saddl2_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i32> %b to <4 x i64> -; CODE-LABEL: saddl2_2d -; CODE: saddl2 v2.2d, v0.4s, v1.4s -; CODE-NEXT: saddl v0.2d, v0.2s, v1.2s define <4 x i64> @saddl2_2d(<4 x i32> %a, <4 x i32> %b) { +; CODE-LABEL: saddl2_2d: +; CODE: // %bb.0: +; CODE-NEXT: saddl v2.2d, v0.2s, v1.2s +; CODE-NEXT: saddl2 v1.2d, v0.4s, v1.4s +; CODE-NEXT: mov v0.16b, v2.16b +; CODE-NEXT: ret %tmp0 = sext <4 x i32> %a to <4 x i64> %tmp1 = sext <4 x i32> %b to <4 x i64> %tmp2 = add <4 x i64> %tmp0, %tmp1 @@ -154,9 +184,11 @@ ; COST-LABEL: usubl_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i8> %b to <8 x i16> -; CODE-LABEL: usubl_8h -; CODE: usubl v0.8h, v0.8b, v1.8b define <8 x i16> @usubl_8h(<8 x i8> %a, <8 x i8> %b) { +; CODE-LABEL: usubl_8h: +; CODE: // %bb.0: +; CODE-NEXT: usubl v0.8h, v0.8b, v1.8b +; CODE-NEXT: ret %tmp0 = zext <8 x i8> %a to <8 x i16> %tmp1 = zext <8 x i8> %b to <8 x i16> %tmp2 = sub <8 x i16> %tmp0, %tmp1 @@ -166,9 +198,11 @@ ; COST-LABEL: usubl_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i16> %b to <4 x i32> -; CODE-LABEL: usubl_4s -; CODE: usubl v0.4s, v0.4h, v1.4h define <4 x i32> @usubl_4s(<4 x i16> %a, <4 x i16> %b) { +; CODE-LABEL: usubl_4s: +; CODE: // %bb.0: +; CODE-NEXT: usubl v0.4s, v0.4h, v1.4h +; CODE-NEXT: ret %tmp0 = zext <4 x i16> %a to <4 x i32> %tmp1 = zext <4 x i16> %b to <4 x i32> %tmp2 = sub <4 x i32> %tmp0, %tmp1 @@ -178,9 +212,11 @@ ; COST-LABEL: usubl_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <2 x i32> %b to <2 x i64> -; CODE-LABEL: usubl_2d -; CODE: usubl v0.2d, v0.2s, v1.2s define <2 x i64> @usubl_2d(<2 x i32> %a, <2 x i32> %b) { +; CODE-LABEL: usubl_2d: +; CODE: // %bb.0: +; CODE-NEXT: usubl v0.2d, v0.2s, v1.2s +; CODE-NEXT: ret %tmp0 = zext <2 x i32> %a to <2 x i64> %tmp1 = zext <2 x i32> %b to <2 x i64> %tmp2 = sub <2 x i64> %tmp0, %tmp1 @@ -190,10 +226,13 @@ ; COST-LABEL: usubl2_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <16 x i8> %b to <16 x i16> -; CODE-LABEL: usubl2_8h -; CODE: usubl2 v2.8h, v0.16b, v1.16b -; CODE-NEXT: usubl v0.8h, v0.8b, v1.8b define <16 x i16> @usubl2_8h(<16 x i8> %a, <16 x i8> %b) { +; CODE-LABEL: usubl2_8h: +; CODE: // %bb.0: +; CODE-NEXT: usubl v2.8h, v0.8b, v1.8b +; CODE-NEXT: usubl2 v1.8h, v0.16b, v1.16b +; CODE-NEXT: mov v0.16b, v2.16b +; CODE-NEXT: ret %tmp0 = zext <16 x i8> %a to <16 x i16> %tmp1 = zext <16 x i8> %b to <16 x i16> %tmp2 = sub <16 x i16> %tmp0, %tmp1 @@ -203,10 +242,13 @@ ; COST-LABEL: usubl2_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <8 x i16> %b to <8 x i32> -; CODE-LABEL: usubl2_4s -; CODE: usubl2 v2.4s, v0.8h, v1.8h -; CODE-NEXT: usubl v0.4s, v0.4h, v1.4h define <8 x i32> @usubl2_4s(<8 x i16> %a, <8 x i16> %b) { +; CODE-LABEL: usubl2_4s: +; CODE: // %bb.0: +; CODE-NEXT: usubl v2.4s, v0.4h, v1.4h +; CODE-NEXT: usubl2 v1.4s, v0.8h, v1.8h +; CODE-NEXT: mov v0.16b, v2.16b +; CODE-NEXT: ret %tmp0 = zext <8 x i16> %a to <8 x i32> %tmp1 = zext <8 x i16> %b to <8 x i32> %tmp2 = sub <8 x i32> %tmp0, %tmp1 @@ -216,10 +258,13 @@ ; COST-LABEL: usubl2_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = zext <4 x i32> %b to <4 x i64> -; CODE-LABEL: usubl2_2d -; CODE: usubl2 v2.2d, v0.4s, v1.4s -; CODE-NEXT: usubl v0.2d, v0.2s, v1.2s define <4 x i64> @usubl2_2d(<4 x i32> %a, <4 x i32> %b) { +; CODE-LABEL: usubl2_2d: +; CODE: // %bb.0: +; CODE-NEXT: usubl v2.2d, v0.2s, v1.2s +; CODE-NEXT: usubl2 v1.2d, v0.4s, v1.4s +; CODE-NEXT: mov v0.16b, v2.16b +; CODE-NEXT: ret %tmp0 = zext <4 x i32> %a to <4 x i64> %tmp1 = zext <4 x i32> %b to <4 x i64> %tmp2 = sub <4 x i64> %tmp0, %tmp1 @@ -229,9 +274,11 @@ ; COST-LABEL: ssubl_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i8> %b to <8 x i16> -; CODE-LABEL: ssubl_8h -; CODE: ssubl v0.8h, v0.8b, v1.8b define <8 x i16> @ssubl_8h(<8 x i8> %a, <8 x i8> %b) { +; CODE-LABEL: ssubl_8h: +; CODE: // %bb.0: +; CODE-NEXT: ssubl v0.8h, v0.8b, v1.8b +; CODE-NEXT: ret %tmp0 = sext <8 x i8> %a to <8 x i16> %tmp1 = sext <8 x i8> %b to <8 x i16> %tmp2 = sub <8 x i16> %tmp0, %tmp1 @@ -241,9 +288,11 @@ ; COST-LABEL: ssubl_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i16> %b to <4 x i32> -; CODE-LABEL: ssubl_4s -; CODE: ssubl v0.4s, v0.4h, v1.4h define <4 x i32> @ssubl_4s(<4 x i16> %a, <4 x i16> %b) { +; CODE-LABEL: ssubl_4s: +; CODE: // %bb.0: +; CODE-NEXT: ssubl v0.4s, v0.4h, v1.4h +; CODE-NEXT: ret %tmp0 = sext <4 x i16> %a to <4 x i32> %tmp1 = sext <4 x i16> %b to <4 x i32> %tmp2 = sub <4 x i32> %tmp0, %tmp1 @@ -253,9 +302,11 @@ ; COST-LABEL: ssubl_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <2 x i32> %b to <2 x i64> -; CODE-LABEL: ssubl_2d -; CODE: ssubl v0.2d, v0.2s, v1.2s define <2 x i64> @ssubl_2d(<2 x i32> %a, <2 x i32> %b) { +; CODE-LABEL: ssubl_2d: +; CODE: // %bb.0: +; CODE-NEXT: ssubl v0.2d, v0.2s, v1.2s +; CODE-NEXT: ret %tmp0 = sext <2 x i32> %a to <2 x i64> %tmp1 = sext <2 x i32> %b to <2 x i64> %tmp2 = sub <2 x i64> %tmp0, %tmp1 @@ -265,10 +316,13 @@ ; COST-LABEL: ssubl2_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <16 x i8> %b to <16 x i16> -; CODE-LABEL: ssubl2_8h -; CODE: ssubl2 v2.8h, v0.16b, v1.16b -; CODE-NEXT: ssubl v0.8h, v0.8b, v1.8b define <16 x i16> @ssubl2_8h(<16 x i8> %a, <16 x i8> %b) { +; CODE-LABEL: ssubl2_8h: +; CODE: // %bb.0: +; CODE-NEXT: ssubl v2.8h, v0.8b, v1.8b +; CODE-NEXT: ssubl2 v1.8h, v0.16b, v1.16b +; CODE-NEXT: mov v0.16b, v2.16b +; CODE-NEXT: ret %tmp0 = sext <16 x i8> %a to <16 x i16> %tmp1 = sext <16 x i8> %b to <16 x i16> %tmp2 = sub <16 x i16> %tmp0, %tmp1 @@ -278,10 +332,13 @@ ; COST-LABEL: ssubl2_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <8 x i16> %b to <8 x i32> -; CODE-LABEL: ssubl2_4s -; CODE: ssubl2 v2.4s, v0.8h, v1.8h -; CODE-NEXT: ssubl v0.4s, v0.4h, v1.4h define <8 x i32> @ssubl2_4s(<8 x i16> %a, <8 x i16> %b) { +; CODE-LABEL: ssubl2_4s: +; CODE: // %bb.0: +; CODE-NEXT: ssubl v2.4s, v0.4h, v1.4h +; CODE-NEXT: ssubl2 v1.4s, v0.8h, v1.8h +; CODE-NEXT: mov v0.16b, v2.16b +; CODE-NEXT: ret %tmp0 = sext <8 x i16> %a to <8 x i32> %tmp1 = sext <8 x i16> %b to <8 x i32> %tmp2 = sub <8 x i32> %tmp0, %tmp1 @@ -291,10 +348,13 @@ ; COST-LABEL: ssubl2_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64> ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp1 = sext <4 x i32> %b to <4 x i64> -; CODE-LABEL: ssubl2_2d -; CODE: ssubl2 v2.2d, v0.4s, v1.4s -; CODE-NEXT: ssubl v0.2d, v0.2s, v1.2s define <4 x i64> @ssubl2_2d(<4 x i32> %a, <4 x i32> %b) { +; CODE-LABEL: ssubl2_2d: +; CODE: // %bb.0: +; CODE-NEXT: ssubl v2.2d, v0.2s, v1.2s +; CODE-NEXT: ssubl2 v1.2d, v0.4s, v1.4s +; CODE-NEXT: mov v0.16b, v2.16b +; CODE-NEXT: ret %tmp0 = sext <4 x i32> %a to <4 x i64> %tmp1 = sext <4 x i32> %b to <4 x i64> %tmp2 = sub <4 x i64> %tmp0, %tmp1 @@ -303,9 +363,11 @@ ; COST-LABEL: uaddw_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16> -; CODE-LABEL: uaddw_8h -; CODE: uaddw v0.8h, v1.8h, v0.8b define <8 x i16> @uaddw_8h(<8 x i8> %a, <8 x i16> %b) { +; CODE-LABEL: uaddw_8h: +; CODE: // %bb.0: +; CODE-NEXT: uaddw v0.8h, v1.8h, v0.8b +; CODE-NEXT: ret %tmp0 = zext <8 x i8> %a to <8 x i16> %tmp1 = add <8 x i16> %b, %tmp0 ret <8 x i16> %tmp1 @@ -313,9 +375,11 @@ ; COST-LABEL: uaddw_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32> -; CODE-LABEL: uaddw_4s -; CODE: uaddw v0.4s, v1.4s, v0.4h define <4 x i32> @uaddw_4s(<4 x i16> %a, <4 x i32> %b) { +; CODE-LABEL: uaddw_4s: +; CODE: // %bb.0: +; CODE-NEXT: uaddw v0.4s, v1.4s, v0.4h +; CODE-NEXT: ret %tmp0 = zext <4 x i16> %a to <4 x i32> %tmp1 = add <4 x i32> %b, %tmp0 ret <4 x i32> %tmp1 @@ -323,9 +387,11 @@ ; COST-LABEL: uaddw_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64> -; CODE-LABEL: uaddw_2d -; CODE: uaddw v0.2d, v1.2d, v0.2s define <2 x i64> @uaddw_2d(<2 x i32> %a, <2 x i64> %b) { +; CODE-LABEL: uaddw_2d: +; CODE: // %bb.0: +; CODE-NEXT: uaddw v0.2d, v1.2d, v0.2s +; CODE-NEXT: ret %tmp0 = zext <2 x i32> %a to <2 x i64> %tmp1 = add <2 x i64> %b, %tmp0 ret <2 x i64> %tmp1 @@ -333,10 +399,13 @@ ; COST-LABEL: uaddw2_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16> -; CODE-LABEL: uaddw2_8h -; CODE: uaddw2 v2.8h, v2.8h, v0.16b -; CODE-NEXT: uaddw v0.8h, v1.8h, v0.8b define <16 x i16> @uaddw2_8h(<16 x i8> %a, <16 x i16> %b) { +; CODE-LABEL: uaddw2_8h: +; CODE: // %bb.0: +; CODE-NEXT: uaddw v3.8h, v1.8h, v0.8b +; CODE-NEXT: uaddw2 v1.8h, v2.8h, v0.16b +; CODE-NEXT: mov v0.16b, v3.16b +; CODE-NEXT: ret %tmp0 = zext <16 x i8> %a to <16 x i16> %tmp1 = add <16 x i16> %b, %tmp0 ret <16 x i16> %tmp1 @@ -344,10 +413,13 @@ ; COST-LABEL: uaddw2_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32> -; CODE-LABEL: uaddw2_4s -; CODE: uaddw2 v2.4s, v2.4s, v0.8h -; CODE-NEXT: uaddw v0.4s, v1.4s, v0.4h define <8 x i32> @uaddw2_4s(<8 x i16> %a, <8 x i32> %b) { +; CODE-LABEL: uaddw2_4s: +; CODE: // %bb.0: +; CODE-NEXT: uaddw v3.4s, v1.4s, v0.4h +; CODE-NEXT: uaddw2 v1.4s, v2.4s, v0.8h +; CODE-NEXT: mov v0.16b, v3.16b +; CODE-NEXT: ret %tmp0 = zext <8 x i16> %a to <8 x i32> %tmp1 = add <8 x i32> %b, %tmp0 ret <8 x i32> %tmp1 @@ -355,10 +427,13 @@ ; COST-LABEL: uaddw2_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64> -; CODE-LABEL: uaddw2_2d -; CODE: uaddw2 v2.2d, v2.2d, v0.4s -; CODE-NEXT: uaddw v0.2d, v1.2d, v0.2s define <4 x i64> @uaddw2_2d(<4 x i32> %a, <4 x i64> %b) { +; CODE-LABEL: uaddw2_2d: +; CODE: // %bb.0: +; CODE-NEXT: uaddw v3.2d, v1.2d, v0.2s +; CODE-NEXT: uaddw2 v1.2d, v2.2d, v0.4s +; CODE-NEXT: mov v0.16b, v3.16b +; CODE-NEXT: ret %tmp0 = zext <4 x i32> %a to <4 x i64> %tmp1 = add <4 x i64> %b, %tmp0 ret <4 x i64> %tmp1 @@ -366,9 +441,11 @@ ; COST-LABEL: saddw_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16> -; CODE-LABEL: saddw_8h -; CODE: saddw v0.8h, v1.8h, v0.8b define <8 x i16> @saddw_8h(<8 x i8> %a, <8 x i16> %b) { +; CODE-LABEL: saddw_8h: +; CODE: // %bb.0: +; CODE-NEXT: saddw v0.8h, v1.8h, v0.8b +; CODE-NEXT: ret %tmp0 = sext <8 x i8> %a to <8 x i16> %tmp1 = add <8 x i16> %b, %tmp0 ret <8 x i16> %tmp1 @@ -376,9 +453,11 @@ ; COST-LABEL: saddw_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32> -; CODE-LABEL: saddw_4s -; CODE: saddw v0.4s, v1.4s, v0.4h define <4 x i32> @saddw_4s(<4 x i16> %a, <4 x i32> %b) { +; CODE-LABEL: saddw_4s: +; CODE: // %bb.0: +; CODE-NEXT: saddw v0.4s, v1.4s, v0.4h +; CODE-NEXT: ret %tmp0 = sext <4 x i16> %a to <4 x i32> %tmp1 = add <4 x i32> %b, %tmp0 ret <4 x i32> %tmp1 @@ -386,9 +465,11 @@ ; COST-LABEL: saddw_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64> -; CODE-LABEL: saddw_2d -; CODE: saddw v0.2d, v1.2d, v0.2s define <2 x i64> @saddw_2d(<2 x i32> %a, <2 x i64> %b) { +; CODE-LABEL: saddw_2d: +; CODE: // %bb.0: +; CODE-NEXT: saddw v0.2d, v1.2d, v0.2s +; CODE-NEXT: ret %tmp0 = sext <2 x i32> %a to <2 x i64> %tmp1 = add <2 x i64> %b, %tmp0 ret <2 x i64> %tmp1 @@ -396,10 +477,13 @@ ; COST-LABEL: saddw2_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16> -; CODE-LABEL: saddw2_8h -; CODE: saddw2 v2.8h, v2.8h, v0.16b -; CODE-NEXT: saddw v0.8h, v1.8h, v0.8b define <16 x i16> @saddw2_8h(<16 x i8> %a, <16 x i16> %b) { +; CODE-LABEL: saddw2_8h: +; CODE: // %bb.0: +; CODE-NEXT: saddw v3.8h, v1.8h, v0.8b +; CODE-NEXT: saddw2 v1.8h, v2.8h, v0.16b +; CODE-NEXT: mov v0.16b, v3.16b +; CODE-NEXT: ret %tmp0 = sext <16 x i8> %a to <16 x i16> %tmp1 = add <16 x i16> %b, %tmp0 ret <16 x i16> %tmp1 @@ -407,10 +491,13 @@ ; COST-LABEL: saddw2_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32> -; CODE-LABEL: saddw2_4s -; CODE: saddw2 v2.4s, v2.4s, v0.8h -; CODE-NEXT: saddw v0.4s, v1.4s, v0.4h define <8 x i32> @saddw2_4s(<8 x i16> %a, <8 x i32> %b) { +; CODE-LABEL: saddw2_4s: +; CODE: // %bb.0: +; CODE-NEXT: saddw v3.4s, v1.4s, v0.4h +; CODE-NEXT: saddw2 v1.4s, v2.4s, v0.8h +; CODE-NEXT: mov v0.16b, v3.16b +; CODE-NEXT: ret %tmp0 = sext <8 x i16> %a to <8 x i32> %tmp1 = add <8 x i32> %b, %tmp0 ret <8 x i32> %tmp1 @@ -418,10 +505,13 @@ ; COST-LABEL: saddw2_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64> -; CODE-LABEL: saddw2_2d -; CODE: saddw2 v2.2d, v2.2d, v0.4s -; CODE-NEXT: saddw v0.2d, v1.2d, v0.2s define <4 x i64> @saddw2_2d(<4 x i32> %a, <4 x i64> %b) { +; CODE-LABEL: saddw2_2d: +; CODE: // %bb.0: +; CODE-NEXT: saddw v3.2d, v1.2d, v0.2s +; CODE-NEXT: saddw2 v1.2d, v2.2d, v0.4s +; CODE-NEXT: mov v0.16b, v3.16b +; CODE-NEXT: ret %tmp0 = sext <4 x i32> %a to <4 x i64> %tmp1 = add <4 x i64> %b, %tmp0 ret <4 x i64> %tmp1 @@ -429,9 +519,11 @@ ; COST-LABEL: usubw_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i8> %a to <8 x i16> -; CODE-LABEL: usubw_8h -; CODE: usubw v0.8h, v1.8h, v0.8b define <8 x i16> @usubw_8h(<8 x i8> %a, <8 x i16> %b) { +; CODE-LABEL: usubw_8h: +; CODE: // %bb.0: +; CODE-NEXT: usubw v0.8h, v1.8h, v0.8b +; CODE-NEXT: ret %tmp0 = zext <8 x i8> %a to <8 x i16> %tmp1 = sub <8 x i16> %b, %tmp0 ret <8 x i16> %tmp1 @@ -439,9 +531,11 @@ ; COST-LABEL: usubw_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i16> %a to <4 x i32> -; CODE-LABEL: usubw_4s -; CODE: usubw v0.4s, v1.4s, v0.4h define <4 x i32> @usubw_4s(<4 x i16> %a, <4 x i32> %b) { +; CODE-LABEL: usubw_4s: +; CODE: // %bb.0: +; CODE-NEXT: usubw v0.4s, v1.4s, v0.4h +; CODE-NEXT: ret %tmp0 = zext <4 x i16> %a to <4 x i32> %tmp1 = sub <4 x i32> %b, %tmp0 ret <4 x i32> %tmp1 @@ -449,9 +543,11 @@ ; COST-LABEL: usubw_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <2 x i32> %a to <2 x i64> -; CODE-LABEL: usubw_2d -; CODE: usubw v0.2d, v1.2d, v0.2s define <2 x i64> @usubw_2d(<2 x i32> %a, <2 x i64> %b) { +; CODE-LABEL: usubw_2d: +; CODE: // %bb.0: +; CODE-NEXT: usubw v0.2d, v1.2d, v0.2s +; CODE-NEXT: ret %tmp0 = zext <2 x i32> %a to <2 x i64> %tmp1 = sub <2 x i64> %b, %tmp0 ret <2 x i64> %tmp1 @@ -459,10 +555,13 @@ ; COST-LABEL: usubw2_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <16 x i8> %a to <16 x i16> -; CODE-LABEL: usubw2_8h -; CODE: usubw2 v2.8h, v2.8h, v0.16b -; CODE-NEXT: usubw v0.8h, v1.8h, v0.8b define <16 x i16> @usubw2_8h(<16 x i8> %a, <16 x i16> %b) { +; CODE-LABEL: usubw2_8h: +; CODE: // %bb.0: +; CODE-NEXT: usubw v3.8h, v1.8h, v0.8b +; CODE-NEXT: usubw2 v1.8h, v2.8h, v0.16b +; CODE-NEXT: mov v0.16b, v3.16b +; CODE-NEXT: ret %tmp0 = zext <16 x i8> %a to <16 x i16> %tmp1 = sub <16 x i16> %b, %tmp0 ret <16 x i16> %tmp1 @@ -470,10 +569,13 @@ ; COST-LABEL: usubw2_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <8 x i16> %a to <8 x i32> -; CODE-LABEL: usubw2_4s -; CODE: usubw2 v2.4s, v2.4s, v0.8h -; CODE-NEXT: usubw v0.4s, v1.4s, v0.4h define <8 x i32> @usubw2_4s(<8 x i16> %a, <8 x i32> %b) { +; CODE-LABEL: usubw2_4s: +; CODE: // %bb.0: +; CODE-NEXT: usubw v3.4s, v1.4s, v0.4h +; CODE-NEXT: usubw2 v1.4s, v2.4s, v0.8h +; CODE-NEXT: mov v0.16b, v3.16b +; CODE-NEXT: ret %tmp0 = zext <8 x i16> %a to <8 x i32> %tmp1 = sub <8 x i32> %b, %tmp0 ret <8 x i32> %tmp1 @@ -481,10 +583,13 @@ ; COST-LABEL: usubw2_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = zext <4 x i32> %a to <4 x i64> -; CODE-LABEL: usubw2_2d -; CODE: usubw2 v2.2d, v2.2d, v0.4s -; CODE-NEXT: usubw v0.2d, v1.2d, v0.2s define <4 x i64> @usubw2_2d(<4 x i32> %a, <4 x i64> %b) { +; CODE-LABEL: usubw2_2d: +; CODE: // %bb.0: +; CODE-NEXT: usubw v3.2d, v1.2d, v0.2s +; CODE-NEXT: usubw2 v1.2d, v2.2d, v0.4s +; CODE-NEXT: mov v0.16b, v3.16b +; CODE-NEXT: ret %tmp0 = zext <4 x i32> %a to <4 x i64> %tmp1 = sub <4 x i64> %b, %tmp0 ret <4 x i64> %tmp1 @@ -492,9 +597,11 @@ ; COST-LABEL: ssubw_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i8> %a to <8 x i16> -; CODE-LABEL: ssubw_8h -; CODE: ssubw v0.8h, v1.8h, v0.8b define <8 x i16> @ssubw_8h(<8 x i8> %a, <8 x i16> %b) { +; CODE-LABEL: ssubw_8h: +; CODE: // %bb.0: +; CODE-NEXT: ssubw v0.8h, v1.8h, v0.8b +; CODE-NEXT: ret %tmp0 = sext <8 x i8> %a to <8 x i16> %tmp1 = sub <8 x i16> %b, %tmp0 ret <8 x i16> %tmp1 @@ -502,9 +609,11 @@ ; COST-LABEL: ssubw_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i16> %a to <4 x i32> -; CODE-LABEL: ssubw_4s -; CODE: ssubw v0.4s, v1.4s, v0.4h define <4 x i32> @ssubw_4s(<4 x i16> %a, <4 x i32> %b) { +; CODE-LABEL: ssubw_4s: +; CODE: // %bb.0: +; CODE-NEXT: ssubw v0.4s, v1.4s, v0.4h +; CODE-NEXT: ret %tmp0 = sext <4 x i16> %a to <4 x i32> %tmp1 = sub <4 x i32> %b, %tmp0 ret <4 x i32> %tmp1 @@ -512,9 +621,11 @@ ; COST-LABEL: ssubw_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <2 x i32> %a to <2 x i64> -; CODE-LABEL: ssubw_2d -; CODE: ssubw v0.2d, v1.2d, v0.2s define <2 x i64> @ssubw_2d(<2 x i32> %a, <2 x i64> %b) { +; CODE-LABEL: ssubw_2d: +; CODE: // %bb.0: +; CODE-NEXT: ssubw v0.2d, v1.2d, v0.2s +; CODE-NEXT: ret %tmp0 = sext <2 x i32> %a to <2 x i64> %tmp1 = sub <2 x i64> %b, %tmp0 ret <2 x i64> %tmp1 @@ -522,10 +633,13 @@ ; COST-LABEL: ssubw2_8h ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <16 x i8> %a to <16 x i16> -; CODE-LABEL: ssubw2_8h -; CODE: ssubw2 v2.8h, v2.8h, v0.16b -; CODE-NEXT: ssubw v0.8h, v1.8h, v0.8b define <16 x i16> @ssubw2_8h(<16 x i8> %a, <16 x i16> %b) { +; CODE-LABEL: ssubw2_8h: +; CODE: // %bb.0: +; CODE-NEXT: ssubw v3.8h, v1.8h, v0.8b +; CODE-NEXT: ssubw2 v1.8h, v2.8h, v0.16b +; CODE-NEXT: mov v0.16b, v3.16b +; CODE-NEXT: ret %tmp0 = sext <16 x i8> %a to <16 x i16> %tmp1 = sub <16 x i16> %b, %tmp0 ret <16 x i16> %tmp1 @@ -533,10 +647,13 @@ ; COST-LABEL: ssubw2_4s ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <8 x i16> %a to <8 x i32> -; CODE-LABEL: ssubw2_4s -; CODE: ssubw2 v2.4s, v2.4s, v0.8h -; CODE-NEXT: ssubw v0.4s, v1.4s, v0.4h define <8 x i32> @ssubw2_4s(<8 x i16> %a, <8 x i32> %b) { +; CODE-LABEL: ssubw2_4s: +; CODE: // %bb.0: +; CODE-NEXT: ssubw v3.4s, v1.4s, v0.4h +; CODE-NEXT: ssubw2 v1.4s, v2.4s, v0.8h +; CODE-NEXT: mov v0.16b, v3.16b +; CODE-NEXT: ret %tmp0 = sext <8 x i16> %a to <8 x i32> %tmp1 = sub <8 x i32> %b, %tmp0 ret <8 x i32> %tmp1 @@ -544,10 +661,13 @@ ; COST-LABEL: ssubw2_2d ; COST-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %tmp0 = sext <4 x i32> %a to <4 x i64> -; CODE-LABEL: ssubw2_2d -; CODE: ssubw2 v2.2d, v2.2d, v0.4s -; CODE-NEXT: ssubw v0.2d, v1.2d, v0.2s define <4 x i64> @ssubw2_2d(<4 x i32> %a, <4 x i64> %b) { +; CODE-LABEL: ssubw2_2d: +; CODE: // %bb.0: +; CODE-NEXT: ssubw v3.2d, v1.2d, v0.2s +; CODE-NEXT: ssubw2 v1.2d, v2.2d, v0.4s +; CODE-NEXT: mov v0.16b, v3.16b +; CODE-NEXT: ret %tmp0 = sext <4 x i32> %a to <4 x i64> %tmp1 = sub <4 x i64> %b, %tmp0 ret <4 x i64> %tmp1 diff --git a/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll b/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll --- a/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/shuffle-select.ll @@ -4,7 +4,7 @@ ; COST-LABEL: sel.v8i8 ; COST: Found an estimated cost of 42 for instruction: %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ; CODE-LABEL: sel.v8i8 -; CODE: tbl v0.8b, { v0.16b }, v1.8b +; CODE: tbl v0.8b, { v0.16b }, v2.8b define <8 x i8> @sel.v8i8(<8 x i8> %v0, <8 x i8> %v1) { %tmp0 = shufflevector <8 x i8> %v0, <8 x i8> %v1, <8 x i32> ret <8 x i8> %tmp0 diff --git a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll --- a/llvm/test/Analysis/CostModel/AArch64/vector-select.ll +++ b/llvm/test/Analysis/CostModel/AArch64/vector-select.ll @@ -5,13 +5,12 @@ ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp eq <8 x i8> %a, %b ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i8> %a, <8 x i8> %c -; CODE-LABEL: v8i8_select_eq -; CODE: bb.0 -; CODE-NEXT: cmeq v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b -; CODE-NEXT: bif v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b -; CODE-NEXT: ret - define <8 x i8> @v8i8_select_eq(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) { +; CODE-LABEL: v8i8_select_eq: +; CODE: // %bb.0: +; CODE-NEXT: cmeq v1.8b, v0.8b, v1.8b +; CODE-NEXT: bif v0.8b, v2.8b, v1.8b +; CODE-NEXT: ret %cmp.1 = icmp eq <8 x i8> %a, %b %s.1 = select <8 x i1> %cmp.1, <8 x i8> %a, <8 x i8> %c ret <8 x i8> %s.1 @@ -21,13 +20,12 @@ ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp sgt <16 x i8> %a, %b ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <16 x i1> %cmp.1, <16 x i8> %a, <16 x i8> %c -; CODE-LABEL: v16i8_select_sgt -; CODE: bb.0 -; CODE-NEXT: cmgt v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b -; CODE-NEXT: bif v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b -; CODE-NEXT: ret - define <16 x i8> @v16i8_select_sgt(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { +; CODE-LABEL: v16i8_select_sgt: +; CODE: // %bb.0: +; CODE-NEXT: cmgt v1.16b, v0.16b, v1.16b +; CODE-NEXT: bif v0.16b, v2.16b, v1.16b +; CODE-NEXT: ret %cmp.1 = icmp sgt <16 x i8> %a, %b %s.1 = select <16 x i1> %cmp.1, <16 x i8> %a, <16 x i8> %c ret <16 x i8> %s.1 @@ -37,13 +35,12 @@ ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ne <4 x i16> %a, %b ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i16> %a, <4 x i16> %c -; CODE-LABEL: v4i16_select_ne -; CODE: bb.0 -; CODE-NEXT: cmeq v{{.+}}.4h, v{{.+}}.4h, v{{.+}}.4h -; CODE-NEXT: bit v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b -; CODE-NEXT: ret - define <4 x i16> @v4i16_select_ne(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) { +; CODE-LABEL: v4i16_select_ne: +; CODE: // %bb.0: +; CODE-NEXT: cmeq v1.4h, v0.4h, v1.4h +; CODE-NEXT: bit v0.8b, v2.8b, v1.8b +; CODE-NEXT: ret %cmp.1 = icmp ne <4 x i16> %a, %b %s.1 = select <4 x i1> %cmp.1, <4 x i16> %a, <4 x i16> %c ret <4 x i16> %s.1 @@ -53,13 +50,12 @@ ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ugt <8 x i16> %a, %b ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <8 x i1> %cmp.1, <8 x i16> %a, <8 x i16> %c -; CODE-LABEL: v8i16_select_ugt -; CODE: bb.0 -; CODE-NEXT: cmhi v{{.+}}.8h, v{{.+}}.8h, v{{.+}}.8h -; CODE-NEXT: bif v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b -; CODE-NEXT: ret - define <8 x i16> @v8i16_select_ugt(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { +; CODE-LABEL: v8i16_select_ugt: +; CODE: // %bb.0: +; CODE-NEXT: cmhi v1.8h, v0.8h, v1.8h +; CODE-NEXT: bif v0.16b, v2.16b, v1.16b +; CODE-NEXT: ret %cmp.1 = icmp ugt <8 x i16> %a, %b %s.1 = select <8 x i1> %cmp.1, <8 x i16> %a, <8 x i16> %c ret <8 x i16> %s.1 @@ -69,13 +65,12 @@ ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ule <2 x i32> %a, %b ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i32> %a, <2 x i32> %c -; CODE-LABEL: v2i32_select_ule -; CODE: bb.0 -; CODE-NEXT: cmhs v{{.+}}.2s, v{{.+}}.2s, v{{.+}}.2s -; CODE-NEXT: bif v{{.+}}.8b, v{{.+}}.8b, v{{.+}}.8b -; CODE-NEXT: ret - define <2 x i32> @v2i32_select_ule(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) { +; CODE-LABEL: v2i32_select_ule: +; CODE: // %bb.0: +; CODE-NEXT: cmhs v1.2s, v1.2s, v0.2s +; CODE-NEXT: bif v0.8b, v2.8b, v1.8b +; CODE-NEXT: ret %cmp.1 = icmp ule <2 x i32> %a, %b %s.1 = select <2 x i1> %cmp.1, <2 x i32> %a, <2 x i32> %c ret <2 x i32> %s.1 @@ -85,13 +80,12 @@ ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp ult <4 x i32> %a, %b ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <4 x i1> %cmp.1, <4 x i32> %a, <4 x i32> %c -; CODE-LABEL: v4i32_select_ult -; CODE: bb.0 -; CODE-NEXT: cmhi v{{.+}}.4s, v{{.+}}.4s, v{{.+}}.4s -; CODE-NEXT: bif v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b -; CODE-NEXT: ret - define <4 x i32> @v4i32_select_ult(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { +; CODE-LABEL: v4i32_select_ult: +; CODE: // %bb.0: +; CODE-NEXT: cmhi v1.4s, v1.4s, v0.4s +; CODE-NEXT: bif v0.16b, v2.16b, v1.16b +; CODE-NEXT: ret %cmp.1 = icmp ult <4 x i32> %a, %b %s.1 = select <4 x i1> %cmp.1, <4 x i32> %a, <4 x i32> %c ret <4 x i32> %s.1 @@ -101,13 +95,12 @@ ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %cmp.1 = icmp sle <2 x i64> %a, %b ; COST-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s.1 = select <2 x i1> %cmp.1, <2 x i64> %a, <2 x i64> %c -; CODE-LABEL: v2i64_select_sle -; CODE: bb.0 -; CODE-NEXT: cmge v{{.+}}.2d, v{{.+}}.2d, v{{.+}}.2d -; CODE-NEXT: bif v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b -; CODE-NEXT: ret - define <2 x i64> @v2i64_select_sle(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { +; CODE-LABEL: v2i64_select_sle: +; CODE: // %bb.0: +; CODE-NEXT: cmge v1.2d, v1.2d, v0.2d +; CODE-NEXT: bif v0.16b, v2.16b, v1.16b +; CODE-NEXT: ret %cmp.1 = icmp sle <2 x i64> %a, %b %s.1 = select <2 x i1> %cmp.1, <2 x i64> %a, <2 x i64> %c ret <2 x i64> %s.1 @@ -117,20 +110,19 @@ ; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %cmp.1 = icmp sle <3 x i64> %a, %b ; COST-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s.1 = select <3 x i1> %cmp.1, <3 x i64> %a, <3 x i64> %c -; CODE-LABEL: v3i64_select_sle -; CODE: bb.0 -; CODE: mov -; CODE: ldr -; CODE: mov -; CODE: mov -; CODE: cmge -; CODE: cmge -; CODE: bif -; CODE: bif -; CODE: ext -; CODE: ret - define <3 x i64> @v3i64_select_sle(<3 x i64> %a, <3 x i64> %b, <3 x i64> %c) { +; CODE-LABEL: v3i64_select_sle: +; CODE: // %bb.0: +; CODE: ldr d16, [sp] +; CODE: mov v0.d[1], v1.d[0] +; CODE-NEXT: mov v3.d[1], v4.d[0] +; CODE-NEXT: mov v6.d[1], v7.d[0] +; CODE-NEXT: cmge v1.2d, v3.2d, v0.2d +; CODE-NEXT: cmge v3.2d, v5.2d, v2.2d +; CODE-NEXT: bif v0.16b, v6.16b, v1.16b +; CODE-NEXT: bif v2.8b, v16.8b, v3.8b +; CODE-NEXT: ext v1.16b, v0.16b, v0.16b, #8 +; CODE: ret %cmp.1 = icmp sle <3 x i64> %a, %b %s.1 = select <3 x i1> %cmp.1, <3 x i64> %a, <3 x i64> %c ret <3 x i64> %s.1 @@ -139,15 +131,14 @@ ; COST-LABEL: v2i64_select_no_cmp ; COST-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %s.1 = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b -; CODE-LABEL: v2i64_select_no_cmp -; CODE: bb.0 -; CODE-NEXT: ushll v{{.+}}.2d, v{{.+}}.2s, #0 -; CODE-NEXT: shl v{{.+}}.2d, v{{.+}}.2d, #63 -; CODE-NEXT: sshr v{{.+}}.2d, v{{.+}}.2d, #63 -; CODE-NEXT: bif v{{.+}}.16b, v{{.+}}.16b, v{{.+}}.16b -; CODE-NEXT: ret - define <2 x i64> @v2i64_select_no_cmp(<2 x i64> %a, <2 x i64> %b, <2 x i1> %cond) { +; CODE-LABEL: v2i64_select_no_cmp: +; CODE: // %bb.0: +; CODE-NEXT: ushll v2.2d, v2.2s, #0 +; CODE-NEXT: shl v2.2d, v2.2d, #63 +; CODE-NEXT: sshr v2.2d, v2.2d, #63 +; CODE-NEXT: bif v0.16b, v1.16b, v2.16b +; CODE-NEXT: ret %s.1 = select <2 x i1> %cond, <2 x i64> %a, <2 x i64> %b ret <2 x i64> %s.1 } diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/call-translator-variadic-musttail.ll @@ -56,17 +56,17 @@ ; CHECK-NEXT: Lloh1: ; CHECK-NEXT: add x0, x0, _asdf@PAGEOFF ; CHECK-NEXT: mov x20, x1 +; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill ; CHECK-NEXT: mov x21, x2 ; CHECK-NEXT: mov x22, x3 +; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: mov x23, x4 ; CHECK-NEXT: mov x24, x5 +; CHECK-NEXT: stp q6, q4, [sp, #32] ; 32-byte Folded Spill ; CHECK-NEXT: mov x25, x6 ; CHECK-NEXT: mov x26, x7 -; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill ; CHECK-NEXT: mov x27, x8 -; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill -; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill -; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill +; CHECK-NEXT: stp q5, q7, [sp] ; 32-byte Folded Spill ; CHECK-NEXT: bl _puts ; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload ; CHECK-NEXT: mov w0, w19 @@ -79,8 +79,8 @@ ; CHECK-NEXT: mov x7, x26 ; CHECK-NEXT: mov x8, x27 ; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload -; CHECK-NEXT: ldp q5, q4, [sp, #32] ; 32-byte Folded Reload -; CHECK-NEXT: ldp q7, q6, [sp] ; 32-byte Folded Reload +; CHECK-NEXT: ldp q6, q4, [sp, #32] ; 32-byte Folded Reload +; CHECK-NEXT: ldp q5, q7, [sp] ; 32-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #208] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #192] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #176] ; 16-byte Folded Reload @@ -122,24 +122,24 @@ ; CHECK-NEXT: .cfi_offset w26, -80 ; CHECK-NEXT: .cfi_offset w27, -88 ; CHECK-NEXT: .cfi_offset w28, -96 -; CHECK-NEXT: add x9, sp, #128 -; CHECK-NEXT: add x10, sp, #256 +; CHECK-NEXT: mov x27, x8 +; CHECK-NEXT: add x8, sp, #128 +; CHECK-NEXT: add x9, sp, #256 ; CHECK-NEXT: mov x19, x0 ; CHECK-NEXT: mov x20, x1 ; CHECK-NEXT: mov x21, x2 ; CHECK-NEXT: mov x22, x3 ; CHECK-NEXT: mov x23, x4 ; CHECK-NEXT: mov x24, x5 +; CHECK-NEXT: stp q1, q0, [sp, #64] ; 32-byte Folded Spill ; CHECK-NEXT: mov x25, x6 ; CHECK-NEXT: mov x26, x7 -; CHECK-NEXT: stp q1, q0, [sp, #96] ; 32-byte Folded Spill -; CHECK-NEXT: mov x27, x8 -; CHECK-NEXT: stp q3, q2, [sp, #64] ; 32-byte Folded Spill -; CHECK-NEXT: stp q5, q4, [sp, #32] ; 32-byte Folded Spill -; CHECK-NEXT: stp q7, q6, [sp] ; 32-byte Folded Spill -; CHECK-NEXT: str x10, [x9] +; CHECK-NEXT: stp q3, q2, [sp, #32] ; 32-byte Folded Spill +; CHECK-NEXT: stp q5, q4, [sp] ; 32-byte Folded Spill +; CHECK-NEXT: stp q7, q6, [sp, #96] ; 32-byte Folded Spill +; CHECK-NEXT: str x9, [x8] ; CHECK-NEXT: bl _get_f -; CHECK-NEXT: ldp q1, q0, [sp, #96] ; 32-byte Folded Reload +; CHECK-NEXT: ldp q1, q0, [sp, #64] ; 32-byte Folded Reload ; CHECK-NEXT: mov x9, x0 ; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: mov x1, x20 @@ -150,9 +150,9 @@ ; CHECK-NEXT: mov x6, x25 ; CHECK-NEXT: mov x7, x26 ; CHECK-NEXT: mov x8, x27 -; CHECK-NEXT: ldp q3, q2, [sp, #64] ; 32-byte Folded Reload -; CHECK-NEXT: ldp q5, q4, [sp, #32] ; 32-byte Folded Reload -; CHECK-NEXT: ldp q7, q6, [sp] ; 32-byte Folded Reload +; CHECK-NEXT: ldp q3, q2, [sp, #32] ; 32-byte Folded Reload +; CHECK-NEXT: ldp q5, q4, [sp] ; 32-byte Folded Reload +; CHECK-NEXT: ldp q7, q6, [sp, #96] ; 32-byte Folded Reload ; CHECK-NEXT: ldp x29, x30, [sp, #240] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x20, x19, [sp, #224] ; 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #208] ; 16-byte Folded Reload @@ -193,14 +193,14 @@ ; CHECK-NEXT: br x9 ; CHECK-NEXT: LBB5_2: ; %else ; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x10, _g@GOTPAGE -; CHECK-NEXT: ldr x9, [x0, #16] +; CHECK-NEXT: adrp x9, _g@GOTPAGE ; CHECK-NEXT: mov w11, #42 ; CHECK-NEXT: Lloh3: -; CHECK-NEXT: ldr x10, [x10, _g@GOTPAGEOFF] +; CHECK-NEXT: ldr x9, [x9, _g@GOTPAGEOFF] +; CHECK-NEXT: ldr x10, [x0, #16] ; CHECK-NEXT: Lloh4: -; CHECK-NEXT: str w11, [x10] -; CHECK-NEXT: br x9 +; CHECK-NEXT: str w11, [x9] +; CHECK-NEXT: br x10 ; CHECK-NEXT: .loh AdrpLdrGotStr Lloh2, Lloh3, Lloh4 %cond_p = getelementptr %struct.Foo, %struct.Foo* %this, i32 0, i32 0 %cond = load i1, i1* %cond_p diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll @@ -35,11 +35,11 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI1_0 +; SDAG-NEXT: adrp x9, .LCPI1_1 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] -; SDAG-NEXT: adrp x8, .LCPI1_1 -; SDAG-NEXT: ushl v1.8h, v0.8h, v1.8h -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_1] ; SDAG-NEXT: adrp x8, .LCPI1_2 +; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI1_1] +; SDAG-NEXT: ushl v1.8h, v0.8h, v1.8h ; SDAG-NEXT: umull2 v3.4s, v1.8h, v2.8h ; SDAG-NEXT: umull v1.4s, v1.4h, v2.4h ; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_2] @@ -48,41 +48,41 @@ ; SDAG-NEXT: sub v0.8h, v0.8h, v1.8h ; SDAG-NEXT: umull2 v3.4s, v0.8h, v2.8h ; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI1_3] ; SDAG-NEXT: uzp2 v0.8h, v0.8h, v3.8h ; SDAG-NEXT: add v0.8h, v0.8h, v1.8h -; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI1_3] -; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h +; SDAG-NEXT: ushl v0.8h, v0.8h, v2.8h ; SDAG-NEXT: ret ; ; GISEL-LABEL: combine_vec_udiv_nonuniform: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI1_4 -; GISEL-NEXT: adrp x10, .LCPI1_0 -; GISEL-NEXT: adrp x9, .LCPI1_1 +; GISEL-NEXT: adrp x9, .LCPI1_0 +; GISEL-NEXT: adrp x10, .LCPI1_1 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI1_4] ; GISEL-NEXT: adrp x8, .LCPI1_3 -; GISEL-NEXT: ldr q5, [x10, :lo12:.LCPI1_0] -; GISEL-NEXT: ldr q6, [x9, :lo12:.LCPI1_1] +; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI1_0] +; GISEL-NEXT: ldr q6, [x10, :lo12:.LCPI1_1] ; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_3] ; GISEL-NEXT: adrp x8, .LCPI1_2 ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h ; GISEL-NEXT: umull2 v3.4s, v1.8h, v2.8h ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h -; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_2] +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI1_2] ; GISEL-NEXT: adrp x8, .LCPI1_5 -; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h -; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h -; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h +; GISEL-NEXT: uzp2 v1.8h, v1.8h, v3.8h +; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h +; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h +; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI1_5] -; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; GISEL-NEXT: neg v4.8h, v6.8h +; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h -; GISEL-NEXT: shl v2.8h, v3.8h, #15 -; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h -; GISEL-NEXT: sshr v2.8h, v2.8h, #15 +; GISEL-NEXT: neg v2.8h, v6.8h +; GISEL-NEXT: shl v3.8h, v3.8h, #15 +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -93,38 +93,38 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform2: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI2_0 +; SDAG-NEXT: adrp x9, .LCPI2_1 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] -; SDAG-NEXT: adrp x8, .LCPI2_1 -; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h -; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_1] ; SDAG-NEXT: adrp x8, .LCPI2_2 -; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h -; SDAG-NEXT: umull v0.4s, v0.4h, v1.4h -; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI2_2] -; SDAG-NEXT: uzp2 v0.8h, v0.8h, v2.8h +; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] ; SDAG-NEXT: ushl v0.8h, v0.8h, v1.8h +; SDAG-NEXT: umull2 v1.4s, v0.8h, v2.8h +; SDAG-NEXT: umull v0.4s, v0.4h, v2.4h +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI2_2] +; SDAG-NEXT: uzp2 v0.8h, v0.8h, v1.8h +; SDAG-NEXT: ushl v0.8h, v0.8h, v2.8h ; SDAG-NEXT: ret ; ; GISEL-LABEL: combine_vec_udiv_nonuniform2: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI2_3 ; GISEL-NEXT: adrp x9, .LCPI2_4 -; GISEL-NEXT: adrp x10, .LCPI2_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI2_3] ; GISEL-NEXT: adrp x8, .LCPI2_2 ; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI2_4] -; GISEL-NEXT: ldr q4, [x10, :lo12:.LCPI2_0] +; GISEL-NEXT: adrp x9, .LCPI2_1 ; GISEL-NEXT: neg v1.8h, v1.8h ; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_2] -; GISEL-NEXT: adrp x8, .LCPI2_1 -; GISEL-NEXT: cmeq v3.8h, v3.8h, v4.8h +; GISEL-NEXT: adrp x8, .LCPI2_0 ; GISEL-NEXT: ushl v1.8h, v0.8h, v1.8h -; GISEL-NEXT: shl v3.8h, v3.8h, #15 -; GISEL-NEXT: umull2 v5.4s, v1.8h, v2.8h +; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI2_0] +; GISEL-NEXT: umull2 v4.4s, v1.8h, v2.8h ; GISEL-NEXT: umull v1.4s, v1.4h, v2.4h -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI2_1] +; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] +; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h +; GISEL-NEXT: uzp2 v1.8h, v1.8h, v4.8h +; GISEL-NEXT: shl v3.8h, v3.8h, #15 ; GISEL-NEXT: neg v2.8h, v2.8h -; GISEL-NEXT: uzp2 v1.8h, v1.8h, v5.8h ; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h ; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b @@ -151,23 +151,23 @@ ; GISEL-LABEL: combine_vec_udiv_nonuniform3: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI3_2 -; GISEL-NEXT: adrp x10, .LCPI3_0 -; GISEL-NEXT: adrp x9, .LCPI3_1 +; GISEL-NEXT: adrp x9, .LCPI3_0 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2] ; GISEL-NEXT: adrp x8, .LCPI3_3 -; GISEL-NEXT: ldr q3, [x10, :lo12:.LCPI3_0] -; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_1] +; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_0] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_3] +; GISEL-NEXT: adrp x8, .LCPI3_1 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3] -; GISEL-NEXT: cmeq v2.8h, v2.8h, v3.8h -; GISEL-NEXT: sub v5.8h, v0.8h, v1.8h -; GISEL-NEXT: neg v3.8h, v4.8h -; GISEL-NEXT: shl v2.8h, v2.8h, #15 -; GISEL-NEXT: usra v1.8h, v5.8h, #1 -; GISEL-NEXT: sshr v2.8h, v2.8h, #15 -; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h +; GISEL-NEXT: cmeq v3.8h, v3.8h, v4.8h +; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI3_1] +; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h +; GISEL-NEXT: shl v3.8h, v3.8h, #15 +; GISEL-NEXT: usra v1.8h, v2.8h, #1 +; GISEL-NEXT: neg v2.8h, v4.8h +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %x, @@ -178,41 +178,41 @@ ; SDAG-LABEL: combine_vec_udiv_nonuniform4: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI4_0 -; SDAG-NEXT: adrp x9, .LCPI4_3 +; SDAG-NEXT: adrp x9, .LCPI4_2 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; SDAG-NEXT: adrp x8, .LCPI4_1 -; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI4_3] ; SDAG-NEXT: umull2 v2.8h, v0.16b, v1.16b ; SDAG-NEXT: umull v1.8h, v0.8b, v1.8b -; SDAG-NEXT: and v0.16b, v0.16b, v3.16b +; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI4_1] +; SDAG-NEXT: adrp x8, .LCPI4_3 ; SDAG-NEXT: uzp2 v1.16b, v1.16b, v2.16b -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_1] -; SDAG-NEXT: adrp x8, .LCPI4_2 -; SDAG-NEXT: ushl v1.16b, v1.16b, v2.16b -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI4_2] +; SDAG-NEXT: ldr q2, [x9, :lo12:.LCPI4_2] +; SDAG-NEXT: ushl v1.16b, v1.16b, v3.16b +; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI4_3] ; SDAG-NEXT: and v1.16b, v1.16b, v2.16b +; SDAG-NEXT: and v0.16b, v0.16b, v3.16b ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b ; SDAG-NEXT: ret ; ; GISEL-LABEL: combine_vec_udiv_nonuniform4: ; GISEL: // %bb.0: -; GISEL-NEXT: adrp x8, .LCPI4_3 -; GISEL-NEXT: adrp x9, .LCPI4_2 -; GISEL-NEXT: adrp x10, .LCPI4_1 -; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_3] +; GISEL-NEXT: adrp x8, .LCPI4_2 +; GISEL-NEXT: adrp x9, .LCPI4_3 +; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI4_2] ; GISEL-NEXT: adrp x8, .LCPI4_0 -; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI4_2] -; GISEL-NEXT: ldr q3, [x10, :lo12:.LCPI4_1] -; GISEL-NEXT: ldr q4, [x8, :lo12:.LCPI4_0] -; GISEL-NEXT: umull2 v5.8h, v0.16b, v2.16b -; GISEL-NEXT: umull v2.8h, v0.8b, v2.8b -; GISEL-NEXT: cmeq v1.16b, v1.16b, v4.16b +; GISEL-NEXT: ldr q2, [x9, :lo12:.LCPI4_3] +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_0] +; GISEL-NEXT: adrp x8, .LCPI4_1 +; GISEL-NEXT: umull2 v4.8h, v0.16b, v1.16b +; GISEL-NEXT: umull v1.8h, v0.8b, v1.8b +; GISEL-NEXT: cmeq v2.16b, v2.16b, v3.16b +; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI4_1] +; GISEL-NEXT: shl v2.16b, v2.16b, #7 +; GISEL-NEXT: uzp2 v1.16b, v1.16b, v4.16b ; GISEL-NEXT: neg v3.16b, v3.16b -; GISEL-NEXT: uzp2 v2.16b, v2.16b, v5.16b -; GISEL-NEXT: shl v1.16b, v1.16b, #7 -; GISEL-NEXT: ushl v2.16b, v2.16b, v3.16b -; GISEL-NEXT: sshr v1.16b, v1.16b, #7 -; GISEL-NEXT: bif v0.16b, v2.16b, v1.16b +; GISEL-NEXT: sshr v2.16b, v2.16b, #7 +; GISEL-NEXT: ushl v1.16b, v1.16b, v3.16b +; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %div = udiv <16 x i8> %x, ret <16 x i8> %div @@ -222,54 +222,54 @@ ; SDAG-LABEL: pr38477: ; SDAG: // %bb.0: ; SDAG-NEXT: adrp x8, .LCPI5_0 -; SDAG-NEXT: adrp x9, .LCPI5_4 +; SDAG-NEXT: adrp x9, .LCPI5_3 ; SDAG-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] ; SDAG-NEXT: adrp x8, .LCPI5_1 ; SDAG-NEXT: umull2 v2.4s, v0.8h, v1.8h -; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_1] ; SDAG-NEXT: umull v1.4s, v0.4h, v1.4h -; SDAG-NEXT: adrp x8, .LCPI5_2 ; SDAG-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; SDAG-NEXT: sub v2.8h, v0.8h, v1.8h -; SDAG-NEXT: umull2 v4.4s, v2.8h, v3.8h -; SDAG-NEXT: umull v2.4s, v2.4h, v3.4h -; SDAG-NEXT: ldr q3, [x9, :lo12:.LCPI5_4] -; SDAG-NEXT: and v0.16b, v0.16b, v3.16b +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_1] +; SDAG-NEXT: adrp x8, .LCPI5_2 +; SDAG-NEXT: sub v3.8h, v0.8h, v1.8h +; SDAG-NEXT: umull2 v4.4s, v3.8h, v2.8h +; SDAG-NEXT: umull v2.4s, v3.4h, v2.4h +; SDAG-NEXT: ldr q3, [x8, :lo12:.LCPI5_2] +; SDAG-NEXT: adrp x8, .LCPI5_4 ; SDAG-NEXT: uzp2 v2.8h, v2.8h, v4.8h +; SDAG-NEXT: ldr q4, [x9, :lo12:.LCPI5_3] ; SDAG-NEXT: add v1.8h, v2.8h, v1.8h -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_2] -; SDAG-NEXT: adrp x8, .LCPI5_3 -; SDAG-NEXT: ushl v1.8h, v1.8h, v2.8h -; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_3] -; SDAG-NEXT: and v1.16b, v1.16b, v2.16b +; SDAG-NEXT: ldr q2, [x8, :lo12:.LCPI5_4] +; SDAG-NEXT: ushl v1.8h, v1.8h, v3.8h +; SDAG-NEXT: and v0.16b, v0.16b, v2.16b +; SDAG-NEXT: and v1.16b, v1.16b, v4.16b ; SDAG-NEXT: orr v0.16b, v0.16b, v1.16b ; SDAG-NEXT: ret ; ; GISEL-LABEL: pr38477: ; GISEL: // %bb.0: ; GISEL-NEXT: adrp x8, .LCPI5_3 -; GISEL-NEXT: adrp x10, .LCPI5_0 -; GISEL-NEXT: adrp x9, .LCPI5_1 +; GISEL-NEXT: adrp x9, .LCPI5_0 +; GISEL-NEXT: adrp x10, .LCPI5_1 ; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI5_3] ; GISEL-NEXT: adrp x8, .LCPI5_2 -; GISEL-NEXT: ldr q5, [x10, :lo12:.LCPI5_0] -; GISEL-NEXT: ldr q6, [x9, :lo12:.LCPI5_1] +; GISEL-NEXT: ldr q5, [x9, :lo12:.LCPI5_0] +; GISEL-NEXT: ldr q6, [x10, :lo12:.LCPI5_1] ; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h -; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_2] ; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h -; GISEL-NEXT: adrp x8, .LCPI5_4 ; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h -; GISEL-NEXT: sub v2.8h, v0.8h, v1.8h -; GISEL-NEXT: umull2 v4.4s, v2.8h, v3.8h -; GISEL-NEXT: umull v2.4s, v2.4h, v3.4h +; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI5_2] +; GISEL-NEXT: adrp x8, .LCPI5_4 +; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h +; GISEL-NEXT: umull2 v4.4s, v3.8h, v2.8h +; GISEL-NEXT: umull v2.4s, v3.4h, v2.4h ; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI5_4] -; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: uzp2 v2.8h, v2.8h, v4.8h -; GISEL-NEXT: neg v4.8h, v6.8h +; GISEL-NEXT: cmeq v3.8h, v3.8h, v5.8h ; GISEL-NEXT: add v1.8h, v2.8h, v1.8h -; GISEL-NEXT: shl v2.8h, v3.8h, #15 -; GISEL-NEXT: ushl v1.8h, v1.8h, v4.8h -; GISEL-NEXT: sshr v2.8h, v2.8h, #15 +; GISEL-NEXT: neg v2.8h, v6.8h +; GISEL-NEXT: shl v3.8h, v3.8h, #15 +; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h +; GISEL-NEXT: sshr v2.8h, v3.8h, #15 ; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b ; GISEL-NEXT: ret %1 = udiv <8 x i16> %a0, diff --git a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll --- a/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-be-bv.ll @@ -167,8 +167,8 @@ ; CHECK-LABEL: fmov_modimm_t11: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 -; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 ; CHECK-NEXT: fmov v1.4s, #3.00000000 +; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 ; CHECK-NEXT: ld1 { v0.8h }, [x8] ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] @@ -183,8 +183,8 @@ ; CHECK-LABEL: fmov_modimm_t12: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, vec_v8i16 -; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 ; CHECK-NEXT: fmov v1.2d, #0.17968750 +; CHECK-NEXT: add x8, x8, :lo12:vec_v8i16 ; CHECK-NEXT: ld1 { v0.8h }, [x8] ; CHECK-NEXT: add v0.8h, v0.8h, v1.8h ; CHECK-NEXT: st1 { v0.8h }, [x8] diff --git a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll --- a/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-dup-ext.ll @@ -98,10 +98,10 @@ define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) { ; CHECK-LABEL: dupsext_v2i8_v2i16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: dup v1.2s, w8 +; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: sshr v0.2s, v0.2s, #24 +; CHECK-NEXT: dup v1.2s, w8 ; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll --- a/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-load-ext.ll @@ -106,18 +106,18 @@ ; CHECK-LE-LABEL: fsext_v2i32: ; CHECK-LE: // %bb.0: ; CHECK-LE-NEXT: ldrsb w8, [x0] +; CHECK-LE-NEXT: ldrsb w9, [x0, #1] ; CHECK-LE-NEXT: fmov s0, w8 -; CHECK-LE-NEXT: ldrsb w8, [x0, #1] -; CHECK-LE-NEXT: mov v0.s[1], w8 +; CHECK-LE-NEXT: mov v0.s[1], w9 ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: fsext_v2i32: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ldrsb w8, [x0] +; CHECK-BE-NEXT: ldrsb w9, [x0, #1] ; CHECK-BE-NEXT: fmov s0, w8 -; CHECK-BE-NEXT: ldrsb w8, [x0, #1] -; CHECK-BE-NEXT: mov v0.s[1], w8 +; CHECK-BE-NEXT: mov v0.s[1], w9 ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s ; CHECK-BE-NEXT: ret %x = load <2 x i8>, <2 x i8>* %a @@ -178,21 +178,21 @@ ; CHECK-LE-LABEL: fsext_v8i32: ; CHECK-LE: // %bb.0: ; CHECK-LE-NEXT: ldr d0, [x0] -; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-LE-NEXT: sshll2 v1.4s, v0.8h, #0 -; CHECK-LE-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-LE-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-LE-NEXT: sshll v0.4s, v1.4h, #0 +; CHECK-LE-NEXT: sshll2 v1.4s, v1.8h, #0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: fsext_v8i32: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ld1 { v0.8b }, [x0] ; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-BE-NEXT: sshll2 v1.4s, v0.8h, #0 -; CHECK-BE-NEXT: sshll v0.4s, v0.4h, #0 +; CHECK-BE-NEXT: sshll v1.4s, v0.4h, #0 +; CHECK-BE-NEXT: sshll2 v0.4s, v0.8h, #0 ; CHECK-BE-NEXT: rev64 v1.4s, v1.4s -; CHECK-BE-NEXT: rev64 v0.4s, v0.4s -; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: rev64 v2.4s, v0.4s +; CHECK-BE-NEXT: ext v0.16b, v1.16b, v1.16b, #8 +; CHECK-BE-NEXT: ext v1.16b, v2.16b, v2.16b, #8 ; CHECK-BE-NEXT: ret %x = load <8 x i8>, <8 x i8>* %a %y = sext <8 x i8> %x to <8 x i32> @@ -251,18 +251,18 @@ ; CHECK-LE-LABEL: fsext_v2i16: ; CHECK-LE: // %bb.0: ; CHECK-LE-NEXT: ldrsb w8, [x0] +; CHECK-LE-NEXT: ldrsb w9, [x0, #1] ; CHECK-LE-NEXT: fmov s0, w8 -; CHECK-LE-NEXT: ldrsb w8, [x0, #1] -; CHECK-LE-NEXT: mov v0.s[1], w8 +; CHECK-LE-NEXT: mov v0.s[1], w9 ; CHECK-LE-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: fsext_v2i16: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ldrsb w8, [x0] +; CHECK-BE-NEXT: ldrsb w9, [x0, #1] ; CHECK-BE-NEXT: fmov s0, w8 -; CHECK-BE-NEXT: ldrsb w8, [x0, #1] -; CHECK-BE-NEXT: mov v0.s[1], w8 +; CHECK-BE-NEXT: mov v0.s[1], w9 ; CHECK-BE-NEXT: rev64 v0.2s, v0.2s ; CHECK-BE-NEXT: ret %x = load <2 x i8>, <2 x i8>* %a @@ -336,20 +336,20 @@ define <16 x i16> @fsext_v16i16(<16 x i8>* %a) { ; CHECK-LE-LABEL: fsext_v16i16: ; CHECK-LE: // %bb.0: -; CHECK-LE-NEXT: ldr q0, [x0] -; CHECK-LE-NEXT: sshll2 v1.8h, v0.16b, #0 -; CHECK-LE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-LE-NEXT: ldr q1, [x0] +; CHECK-LE-NEXT: sshll v0.8h, v1.8b, #0 +; CHECK-LE-NEXT: sshll2 v1.8h, v1.16b, #0 ; CHECK-LE-NEXT: ret ; ; CHECK-BE-LABEL: fsext_v16i16: ; CHECK-BE: // %bb.0: ; CHECK-BE-NEXT: ld1 { v0.16b }, [x0] -; CHECK-BE-NEXT: sshll2 v1.8h, v0.16b, #0 -; CHECK-BE-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-BE-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-BE-NEXT: sshll2 v0.8h, v0.16b, #0 ; CHECK-BE-NEXT: rev64 v1.8h, v1.8h -; CHECK-BE-NEXT: rev64 v0.8h, v0.8h -; CHECK-BE-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; CHECK-BE-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-BE-NEXT: rev64 v2.8h, v0.8h +; CHECK-BE-NEXT: ext v0.16b, v1.16b, v1.16b, #8 +; CHECK-BE-NEXT: ext v1.16b, v2.16b, v2.16b, #8 ; CHECK-BE-NEXT: ret %x = load <16 x i8>, <16 x i8>* %a %y = sext <16 x i8> %x to <16 x i16> diff --git a/llvm/test/CodeGen/AArch64/aarch64-smull.ll b/llvm/test/CodeGen/AArch64/aarch64-smull.ll --- a/llvm/test/CodeGen/AArch64/aarch64-smull.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-smull.ll @@ -111,11 +111,11 @@ define <4 x i32> @amull_v4i16_v4i32(<4 x i16>* %A, <4 x i16>* %B) nounwind { ; CHECK-LABEL: amull_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i16>, <4 x i16>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B @@ -129,11 +129,11 @@ define <2 x i64> @amull_v2i32_v2i64(<2 x i32>* %A, <2 x i32>* %B) nounwind { ; CHECK-LABEL: amull_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smull v1.2d, v1.2s, v2.2s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i32>, <2 x i32>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B @@ -275,12 +275,12 @@ define <4 x i32> @amlal_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { ; CHECK-LABEL: amlal_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlal v1.4s, v0.4h, v2.4h ; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smlal v2.4s, v1.4h, v3.4h -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B @@ -296,12 +296,12 @@ define <2 x i64> @amlal_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { ; CHECK-LABEL: amlal_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlal v1.2d, v0.2s, v2.2s ; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smlal v2.2d, v1.2s, v3.2s -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B @@ -445,12 +445,12 @@ define <4 x i32> @amlsl_v4i16_v4i32(<4 x i32>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind { ; CHECK-LABEL: amlsl_v4i16_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlsl v1.4s, v0.4h, v2.4h ; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff -; CHECK-NEXT: smlsl v2.4s, v1.4h, v3.4h -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <4 x i32>, <4 x i32>* %A %tmp2 = load <4 x i16>, <4 x i16>* %B @@ -466,12 +466,12 @@ define <2 x i64> @amlsl_v2i32_v2i64(<2 x i64>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind { ; CHECK-LABEL: amlsl_v2i32_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: ldr d3, [x2] +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: ldr d2, [x2] +; CHECK-NEXT: smlsl v1.2d, v0.2s, v2.2s ; CHECK-NEXT: movi v0.2d, #0x000000ffffffff -; CHECK-NEXT: smlsl v2.2d, v1.2s, v3.2s -; CHECK-NEXT: and v0.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %tmp1 = load <2 x i64>, <2 x i64>* %A %tmp2 = load <2 x i32>, <2 x i32>* %B @@ -599,9 +599,9 @@ ; CHECK-LABEL: amull_extvec_v4i16_v4i32: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1234 +; CHECK-NEXT: dup v1.4h, w8 +; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h ; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff -; CHECK-NEXT: dup v2.4h, w8 -; CHECK-NEXT: smull v0.4s, v0.4h, v2.4h ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp3 = zext <4 x i16> %arg to <4 x i32> @@ -614,9 +614,9 @@ ; CHECK-LABEL: amull_extvec_v2i32_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #1234 +; CHECK-NEXT: dup v1.2s, w8 +; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s ; CHECK-NEXT: movi v1.2d, #0x000000ffffffff -; CHECK-NEXT: dup v2.2s, w8 -; CHECK-NEXT: smull v0.2d, v0.2s, v2.2s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %tmp3 = zext <2 x i32> %arg to <2 x i64> @@ -674,9 +674,9 @@ define <16 x i16> @umull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) { ; CHECK-LABEL: umull2_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b -; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b -; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: umull v2.8h, v0.8b, v1.8b +; CHECK-NEXT: umull2 v1.8h, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %arg1_ext = zext <16 x i8> %arg1 to <16 x i16> %arg2_ext = zext <16 x i8> %arg2 to <16 x i16> @@ -687,9 +687,9 @@ define <16 x i16> @smull2_i8(<16 x i8> %arg1, <16 x i8> %arg2) { ; CHECK-LABEL: smull2_i8: ; CHECK: // %bb.0: -; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b -; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b -; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: smull v2.8h, v0.8b, v1.8b +; CHECK-NEXT: smull2 v1.8h, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %arg1_ext = sext <16 x i8> %arg1 to <16 x i16> %arg2_ext = sext <16 x i8> %arg2 to <16 x i16> @@ -700,9 +700,9 @@ define <8 x i32> @umull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) { ; CHECK-LABEL: umull2_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h -; CHECK-NEXT: umull v0.4s, v0.4h, v1.4h -; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: umull v2.4s, v0.4h, v1.4h +; CHECK-NEXT: umull2 v1.4s, v0.8h, v1.8h +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %arg1_ext = zext <8 x i16> %arg1 to <8 x i32> %arg2_ext = zext <8 x i16> %arg2 to <8 x i32> @@ -713,9 +713,9 @@ define <8 x i32> @smull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) { ; CHECK-LABEL: smull2_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h -; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h -; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: smull v2.4s, v0.4h, v1.4h +; CHECK-NEXT: smull2 v1.4s, v0.8h, v1.8h +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %arg1_ext = sext <8 x i16> %arg1 to <8 x i32> %arg2_ext = sext <8 x i16> %arg2 to <8 x i32> @@ -726,9 +726,9 @@ define <4 x i64> @umull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) { ; CHECK-LABEL: umull2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s -; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s -; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: umull v2.2d, v0.2s, v1.2s +; CHECK-NEXT: umull2 v1.2d, v0.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %arg1_ext = zext <4 x i32> %arg1 to <4 x i64> %arg2_ext = zext <4 x i32> %arg2 to <4 x i64> @@ -739,9 +739,9 @@ define <4 x i64> @smull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) { ; CHECK-LABEL: smull2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s -; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s -; CHECK-NEXT: mov v1.16b, v2.16b +; CHECK-NEXT: smull v2.2d, v0.2s, v1.2s +; CHECK-NEXT: smull2 v1.2d, v0.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %arg1_ext = sext <4 x i32> %arg1 to <4 x i64> %arg2_ext = sext <4 x i32> %arg2 to <4 x i64> @@ -768,11 +768,11 @@ define <8 x i32> @amull2_i16(<8 x i16> %arg1, <8 x i16> %arg2) { ; CHECK-LABEL: amull2_i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff -; CHECK-NEXT: smull2 v3.4s, v0.8h, v1.8h +; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h ; CHECK-NEXT: smull v0.4s, v0.4h, v1.4h -; CHECK-NEXT: and v1.16b, v3.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v1.16b, v2.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %arg1_ext = zext <8 x i16> %arg1 to <8 x i32> %arg2_ext = zext <8 x i16> %arg2 to <8 x i32> @@ -784,11 +784,11 @@ define <4 x i64> @amull2_i32(<4 x i32> %arg1, <4 x i32> %arg2) { ; CHECK-LABEL: amull2_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff -; CHECK-NEXT: smull2 v3.2d, v0.4s, v1.4s +; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s -; CHECK-NEXT: and v1.16b, v3.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v3.2d, #0x000000ffffffff +; CHECK-NEXT: and v1.16b, v2.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v3.16b ; CHECK-NEXT: ret %arg1_ext = zext <4 x i32> %arg1 to <4 x i64> %arg2_ext = zext <4 x i32> %arg2 to <4 x i64> diff --git a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll --- a/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll +++ b/llvm/test/CodeGen/AArch64/addsub-constant-folding.ll @@ -57,9 +57,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -134,9 +134,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -213,9 +213,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -290,9 +290,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: mvni v0.4s, #5 ; CHECK-NEXT: add v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -367,9 +367,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -446,9 +446,9 @@ ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -526,9 +526,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #10 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -606,9 +606,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #6 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret @@ -685,9 +685,9 @@ ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl vec_use -; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: movi v0.4s, #2 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: add sp, sp, #32 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll --- a/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll +++ b/llvm/test/CodeGen/AArch64/argument-blocks-array-of-struct.ll @@ -556,8 +556,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, no_block_store ; CHECK-NEXT: add x8, x8, :lo12:no_block_store -; CHECK-NEXT: str w1, [x8, #24] ; CHECK-NEXT: str d1, [x8, #16] +; CHECK-NEXT: str w1, [x8, #24] ; CHECK-NEXT: str w0, [x8, #8] ; CHECK-NEXT: str d0, [x8] ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-dup.ll b/llvm/test/CodeGen/AArch64/arm64-dup.ll --- a/llvm/test/CodeGen/AArch64/arm64-dup.ll +++ b/llvm/test/CodeGen/AArch64/arm64-dup.ll @@ -446,11 +446,11 @@ define void @disguised_dup(<4 x float> %x, <4 x float>* %p1, <4 x float>* %p2) { ; CHECK-LABEL: disguised_dup: ; CHECK: // %bb.0: -; CHECK-NEXT: ext.16b v1, v0, v0, #12 -; CHECK-NEXT: dup.4s v0, v0[0] -; CHECK-NEXT: ext.16b v1, v1, v0, #8 -; CHECK-NEXT: str q1, [x0] -; CHECK-NEXT: str q0, [x1] +; CHECK-NEXT: dup.4s v1, v0[0] +; CHECK-NEXT: ext.16b v0, v0, v0, #12 +; CHECK-NEXT: ext.16b v0, v0, v1, #8 +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: str q1, [x1] ; CHECK-NEXT: ret %shuf = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> %dup = shufflevector <4 x float> %shuf, <4 x float> undef, <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fcopysign.ll @@ -6,8 +6,8 @@ define float @test1(float %x, float %y) nounwind { ; CHECK-LABEL: test1: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: movi.4s v2, #128, lsl #24 ; CHECK-NEXT: ; kill: def $s0 killed $s0 def $q0 +; CHECK-NEXT: movi.4s v2, #128, lsl #24 ; CHECK-NEXT: ; kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: bit.16b v0, v1, v2 ; CHECK-NEXT: ; kill: def $s0 killed $s0 killed $q0 @@ -36,10 +36,10 @@ define double @test3(double %a, float %b, float %c) nounwind { ; CHECK-LABEL: test3: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.2d v3, #0000000000000000 -; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: fadd s1, s1, s2 -; CHECK-NEXT: fneg.2d v2, v3 +; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: movi.2d v2, #0000000000000000 +; CHECK-NEXT: fneg.2d v2, v2 ; CHECK-NEXT: fcvt d1, s1 ; CHECK-NEXT: bit.16b v0, v1, v2 ; CHECK-NEXT: ; kill: def $d0 killed $d0 killed $q0 @@ -55,11 +55,11 @@ ; CHECK: ; %bb.0: ; %entry ; CHECK-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill ; CHECK-NEXT: bl _bar -; CHECK-NEXT: movi.4s v1, #128, lsl #24 ; CHECK-NEXT: fcvt s0, d0 -; CHECK-NEXT: fmov s2, #0.50000000 -; CHECK-NEXT: bit.16b v2, v0, v1 -; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fmov s1, #0.50000000 +; CHECK-NEXT: movi.4s v2, #128, lsl #24 +; CHECK-NEXT: bit.16b v1, v0, v2 +; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll --- a/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll +++ b/llvm/test/CodeGen/AArch64/arm64-memset-inline.ll @@ -185,8 +185,8 @@ ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload @@ -206,9 +206,9 @@ ; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str xzr, [sp, #32] +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload @@ -228,8 +228,8 @@ ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something @@ -250,9 +250,9 @@ ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp ; CHECK-NEXT: str xzr, [sp, #64] +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something @@ -273,8 +273,8 @@ ; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 144 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: stp q0, q0, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #32] @@ -298,8 +298,8 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 272 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: stp q0, q0, [sp, #192] ; CHECK-NEXT: stp q0, q0, [sp, #160] @@ -451,8 +451,8 @@ ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload @@ -472,11 +472,11 @@ ; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: str x8, [sp, #32] +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp] +; CHECK-NEXT: str x8, [sp, #32] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload ; CHECK-NEXT: add sp, sp, #64 @@ -495,8 +495,8 @@ ; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 80 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp, #32] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something @@ -517,11 +517,11 @@ ; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 96 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x8, #-6148914691236517206 ; CHECK-NEXT: mov x0, sp -; CHECK-NEXT: str x8, [sp, #64] +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp, #32] +; CHECK-NEXT: str x8, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp] ; CHECK-NEXT: bl something ; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload @@ -541,8 +541,8 @@ ; CHECK-NEXT: str x30, [sp, #128] // 8-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 144 ; CHECK-NEXT: .cfi_offset w30, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp, #96] ; CHECK-NEXT: stp q0, q0, [sp, #64] ; CHECK-NEXT: stp q0, q0, [sp, #32] @@ -566,8 +566,8 @@ ; CHECK-NEXT: .cfi_def_cfa_offset 272 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 -; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: movi v0.16b, #170 ; CHECK-NEXT: stp q0, q0, [sp, #224] ; CHECK-NEXT: stp q0, q0, [sp, #192] ; CHECK-NEXT: stp q0, q0, [sp, #160] diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-3vdiff.ll @@ -117,9 +117,9 @@ define <4 x i32> @test_vaddl_a16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vaddl_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddl v0.4s, v0.4h, v1.4h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <4 x i16> %a to <4 x i32> @@ -132,9 +132,9 @@ define <2 x i64> @test_vaddl_a32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vaddl_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: uaddl v0.2d, v0.2s, v1.2s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <2 x i32> %a to <2 x i64> @@ -247,9 +247,9 @@ define <4 x i32> @test_vaddl_high_a16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vaddl_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddl2 v0.4s, v0.8h, v1.8h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> @@ -264,9 +264,9 @@ define <2 x i64> @test_vaddl_high_a32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vaddl_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: uaddl2 v0.2d, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> @@ -360,9 +360,9 @@ define <4 x i32> @test_vaddw_a16(<4 x i32> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vaddw_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddw v0.4s, v0.4s, v1.4h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <4 x i16> %b to <4 x i32> @@ -374,9 +374,9 @@ define <2 x i64> @test_vaddw_a32(<2 x i64> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vaddw_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: uaddw v0.2d, v0.2d, v1.2s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <2 x i32> %b to <2 x i64> @@ -474,9 +474,9 @@ define <4 x i32> @test_vaddw_high_a16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vaddw_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddw2 v0.4s, v0.4s, v1.8h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> @@ -489,9 +489,9 @@ define <2 x i64> @test_vaddw_high_a32(<2 x i64> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vaddw_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: uaddw2 v0.2d, v0.2d, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> @@ -590,9 +590,9 @@ define <4 x i32> @test_vsubl_a16(<4 x i16> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vsubl_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: usubl v0.4s, v0.4h, v1.4h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <4 x i16> %a to <4 x i32> @@ -605,9 +605,9 @@ define <2 x i64> @test_vsubl_a32(<2 x i32> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vsubl_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: usubl v0.2d, v0.2s, v1.2s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <2 x i32> %a to <2 x i64> @@ -720,9 +720,9 @@ define <4 x i32> @test_vsubl_high_a16(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vsubl_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: usubl2 v0.4s, v0.8h, v1.8h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <8 x i16> %a, <8 x i16> undef, <4 x i32> @@ -737,9 +737,9 @@ define <2 x i64> @test_vsubl_high_a32(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vsubl_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: usubl2 v0.2d, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <4 x i32> %a, <4 x i32> undef, <2 x i32> @@ -833,9 +833,9 @@ define <4 x i32> @test_vsubw_a16(<4 x i32> %a, <4 x i16> %b) { ; CHECK-LABEL: test_vsubw_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: usubw v0.4s, v0.4s, v1.4h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <4 x i16> %b to <4 x i32> @@ -847,9 +847,9 @@ define <2 x i64> @test_vsubw_a32(<2 x i64> %a, <2 x i32> %b) { ; CHECK-LABEL: test_vsubw_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: usubw v0.2d, v0.2d, v1.2s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %vmovl.i.i = zext <2 x i32> %b to <2 x i64> @@ -947,9 +947,9 @@ define <4 x i32> @test_vsubw_high_a16(<4 x i32> %a, <8 x i16> %b) { ; CHECK-LABEL: test_vsubw_high_a16: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x00ffff0000ffff ; CHECK-NEXT: usubw2 v0.4s, v0.4s, v1.8h -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <8 x i16> %b, <8 x i16> undef, <4 x i32> @@ -962,9 +962,9 @@ define <2 x i64> @test_vsubw_high_a32(<2 x i64> %a, <4 x i32> %b) { ; CHECK-LABEL: test_vsubw_high_a32: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v2.2d, #0x000000ffffffff ; CHECK-NEXT: usubw2 v0.2d, v0.2d, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %shuffle.i.i.i = shufflevector <4 x i32> %b, <4 x i32> undef, <2 x i32> @@ -2510,8 +2510,8 @@ ; CHECK-NEXT: fmov d0, x0 ; CHECK-NEXT: fmov d1, x1 ; CHECK-NEXT: pmull v0.1q, v0.1d, v1.1d -; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: ret entry: %vmull2.i = tail call <16 x i8> @llvm.aarch64.neon.pmull64(i64 %a, i64 %b) @@ -2523,8 +2523,8 @@ ; CHECK-LABEL: test_vmull_high_p64: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: pmull2 v0.1q, v0.2d, v1.2d -; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: fmov x0, d0 +; CHECK-NEXT: mov x1, v0.d[1] ; CHECK-NEXT: ret entry: %0 = extractelement <2 x i64> %a, i32 1 diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-aba-abd.ll @@ -200,9 +200,9 @@ ; CHECK-LABEL: test_sabd_v2i32_const: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI19_0 -; CHECK-NEXT: movi d0, #0x00ffffffff0000 -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: sabd v0.2s, v1.2s, v0.2s +; CHECK-NEXT: movi d1, #0x00ffffffff0000 +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %1 = tail call <2 x i32> @llvm.aarch64.neon.sabd.v2i32( <2 x i32> , diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-copy.ll @@ -1493,10 +1493,10 @@ ; CHECK-LABEL: test_concat_diff_v1i32_v1i32: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: fmov s0, w1 -; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: sqabs s2, s0 -; CHECK-NEXT: sqabs s0, s1 -; CHECK-NEXT: fmov w8, s2 +; CHECK-NEXT: sqabs s1, s0 +; CHECK-NEXT: fmov s0, w0 +; CHECK-NEXT: sqabs s0, s0 +; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll --- a/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll +++ b/llvm/test/CodeGen/AArch64/arm64-neon-mul-div.ll @@ -73,14 +73,14 @@ define <2 x i64> @mul2xi64(<2 x i64> %A, <2 x i64> %B) { ; CHECK-LABEL: mul2xi64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: fmov x10, d0 -; CHECK-NEXT: mov x8, v1.d[1] +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: mov x10, v1.d[1] ; CHECK-NEXT: mov x11, v0.d[1] -; CHECK-NEXT: mul x9, x10, x9 -; CHECK-NEXT: mul x8, x11, x8 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: mul x9, x11, x10 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %tmp3 = mul <2 x i64> %A, %B; ret <2 x i64> %tmp3 @@ -162,33 +162,33 @@ ; CHECK-NEXT: smov w9, v0.b[1] ; CHECK-NEXT: smov w10, v0.b[0] ; CHECK-NEXT: smov w11, v0.b[2] -; CHECK-NEXT: smov w12, v0.b[3] -; CHECK-NEXT: smov w13, v0.b[4] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.b[0] +; CHECK-NEXT: smov w12, v0.b[3] +; CHECK-NEXT: smov w13, v0.b[4] +; CHECK-NEXT: smov w14, v0.b[5] +; CHECK-NEXT: smov w15, v0.b[6] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.b[5] +; CHECK-NEXT: smov w9, v0.b[7] ; CHECK-NEXT: mov v2.b[1], w8 +; CHECK-NEXT: smov w8, v1.b[7] ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 -; CHECK-NEXT: smov w10, v0.b[6] ; CHECK-NEXT: sdiv w12, w13, w12 -; CHECK-NEXT: smov w13, v0.b[5] +; CHECK-NEXT: smov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 -; CHECK-NEXT: smov w11, v0.b[7] -; CHECK-NEXT: sdiv w8, w13, w9 -; CHECK-NEXT: smov w9, v1.b[6] +; CHECK-NEXT: sdiv w13, w14, w13 +; CHECK-NEXT: smov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 -; CHECK-NEXT: sdiv w9, w10, w9 -; CHECK-NEXT: smov w10, v1.b[7] -; CHECK-NEXT: mov v2.b[5], w8 -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[6], w9 +; CHECK-NEXT: sdiv w14, w15, w14 +; CHECK-NEXT: mov v2.b[5], w13 +; CHECK-NEXT: sdiv w8, w9, w8 +; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret @@ -203,66 +203,66 @@ ; CHECK-NEXT: smov w9, v0.b[1] ; CHECK-NEXT: smov w10, v0.b[0] ; CHECK-NEXT: smov w11, v0.b[2] +; CHECK-NEXT: sdiv w8, w9, w8 +; CHECK-NEXT: smov w9, v1.b[0] ; CHECK-NEXT: smov w12, v0.b[3] ; CHECK-NEXT: smov w13, v0.b[4] ; CHECK-NEXT: smov w14, v0.b[5] ; CHECK-NEXT: smov w15, v0.b[6] -; CHECK-NEXT: sdiv w8, w9, w8 -; CHECK-NEXT: smov w9, v1.b[0] ; CHECK-NEXT: smov w16, v0.b[7] ; CHECK-NEXT: smov w17, v0.b[8] +; CHECK-NEXT: smov w18, v0.b[9] +; CHECK-NEXT: smov w0, v0.b[10] +; CHECK-NEXT: smov w1, v0.b[11] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.b[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.b[9] +; CHECK-NEXT: smov w9, v0.b[12] ; CHECK-NEXT: mov v2.b[1], w8 +; CHECK-NEXT: smov w8, v1.b[12] ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 -; CHECK-NEXT: smov w10, v0.b[10] +; CHECK-NEXT: smov w10, v0.b[13] ; CHECK-NEXT: sdiv w12, w13, w12 ; CHECK-NEXT: smov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 -; CHECK-NEXT: smov w11, v0.b[11] +; CHECK-NEXT: smov w11, v0.b[14] ; CHECK-NEXT: sdiv w13, w14, w13 ; CHECK-NEXT: smov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 -; CHECK-NEXT: smov w12, v0.b[12] +; CHECK-NEXT: smov w12, v0.b[15] ; CHECK-NEXT: sdiv w14, w15, w14 ; CHECK-NEXT: smov w15, v1.b[7] ; CHECK-NEXT: mov v2.b[5], w13 -; CHECK-NEXT: smov w13, v0.b[13] ; CHECK-NEXT: sdiv w15, w16, w15 ; CHECK-NEXT: smov w16, v1.b[8] ; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: sdiv w16, w17, w16 -; CHECK-NEXT: smov w17, v0.b[9] +; CHECK-NEXT: smov w17, v1.b[9] ; CHECK-NEXT: mov v2.b[7], w15 -; CHECK-NEXT: sdiv w8, w17, w9 -; CHECK-NEXT: smov w9, v1.b[10] +; CHECK-NEXT: sdiv w17, w18, w17 +; CHECK-NEXT: smov w18, v1.b[10] ; CHECK-NEXT: mov v2.b[8], w16 +; CHECK-NEXT: sdiv w18, w0, w18 +; CHECK-NEXT: smov w0, v1.b[11] +; CHECK-NEXT: mov v2.b[9], w17 +; CHECK-NEXT: sdiv w0, w1, w0 +; CHECK-NEXT: mov v2.b[10], w18 +; CHECK-NEXT: sdiv w8, w9, w8 +; CHECK-NEXT: smov w9, v1.b[13] +; CHECK-NEXT: mov v2.b[11], w0 ; CHECK-NEXT: sdiv w9, w10, w9 -; CHECK-NEXT: smov w10, v1.b[11] -; CHECK-NEXT: mov v2.b[9], w8 +; CHECK-NEXT: smov w10, v1.b[14] +; CHECK-NEXT: mov v2.b[12], w8 ; CHECK-NEXT: sdiv w10, w11, w10 -; CHECK-NEXT: smov w11, v1.b[12] -; CHECK-NEXT: mov v2.b[10], w9 -; CHECK-NEXT: smov w9, v1.b[14] +; CHECK-NEXT: smov w11, v1.b[15] +; CHECK-NEXT: mov v2.b[13], w9 ; CHECK-NEXT: sdiv w11, w12, w11 -; CHECK-NEXT: smov w12, v1.b[13] -; CHECK-NEXT: mov v2.b[11], w10 -; CHECK-NEXT: smov w10, v1.b[15] -; CHECK-NEXT: sdiv w8, w13, w12 -; CHECK-NEXT: smov w12, v0.b[14] -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: smov w11, v0.b[15] -; CHECK-NEXT: sdiv w9, w12, w9 -; CHECK-NEXT: mov v2.b[13], w8 -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[14], w9 -; CHECK-NEXT: mov v2.b[15], w8 +; CHECK-NEXT: mov v2.b[14], w10 +; CHECK-NEXT: mov v2.b[15], w11 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp3 = sdiv <16 x i8> %A, %B; @@ -292,18 +292,18 @@ ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w10, v0.h[0] ; CHECK-NEXT: smov w11, v0.h[2] -; CHECK-NEXT: smov w12, v0.h[3] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.h[0] +; CHECK-NEXT: smov w12, v0.h[3] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.h[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.h[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: sdiv w8, w12, w11 +; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: mov v0.h[3], w11 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = sdiv <4 x i16> %A, %B; @@ -317,33 +317,33 @@ ; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: smov w10, v0.h[0] ; CHECK-NEXT: smov w11, v0.h[2] -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: smov w13, v0.h[4] ; CHECK-NEXT: sdiv w8, w9, w8 ; CHECK-NEXT: smov w9, v1.h[0] +; CHECK-NEXT: smov w12, v0.h[3] +; CHECK-NEXT: smov w13, v0.h[4] +; CHECK-NEXT: smov w14, v0.h[5] +; CHECK-NEXT: smov w15, v0.h[6] ; CHECK-NEXT: sdiv w9, w10, w9 ; CHECK-NEXT: smov w10, v1.h[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: smov w11, v1.h[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: smov w9, v1.h[5] +; CHECK-NEXT: smov w9, v0.h[7] ; CHECK-NEXT: mov v2.h[1], w8 +; CHECK-NEXT: smov w8, v1.h[7] ; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: smov w12, v1.h[4] ; CHECK-NEXT: mov v2.h[2], w10 -; CHECK-NEXT: smov w10, v0.h[6] ; CHECK-NEXT: sdiv w12, w13, w12 -; CHECK-NEXT: smov w13, v0.h[5] +; CHECK-NEXT: smov w13, v1.h[5] ; CHECK-NEXT: mov v2.h[3], w11 -; CHECK-NEXT: smov w11, v0.h[7] -; CHECK-NEXT: sdiv w8, w13, w9 -; CHECK-NEXT: smov w9, v1.h[6] +; CHECK-NEXT: sdiv w13, w14, w13 +; CHECK-NEXT: smov w14, v1.h[6] ; CHECK-NEXT: mov v2.h[4], w12 -; CHECK-NEXT: sdiv w9, w10, w9 -; CHECK-NEXT: smov w10, v1.h[7] -; CHECK-NEXT: mov v2.h[5], w8 -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.h[6], w9 +; CHECK-NEXT: sdiv w14, w15, w14 +; CHECK-NEXT: mov v2.h[5], w13 +; CHECK-NEXT: sdiv w8, w9, w8 +; CHECK-NEXT: mov v2.h[6], w14 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret @@ -387,22 +387,22 @@ define <4 x i32> @sdiv4x32(<4 x i32> %A, <4 x i32> %B) { ; CHECK-LABEL: sdiv4x32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov w9, v0.s[1] -; CHECK-NEXT: fmov w10, s0 +; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: mov w10, v0.s[1] +; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w11, v0.s[2] -; CHECK-NEXT: mov w12, v0.s[3] -; CHECK-NEXT: sdiv w8, w9, w8 -; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: sdiv w9, w10, w9 +; CHECK-NEXT: fmov w10, s0 +; CHECK-NEXT: mov w12, v0.s[3] +; CHECK-NEXT: sdiv w8, w10, w8 ; CHECK-NEXT: mov w10, v1.s[2] ; CHECK-NEXT: sdiv w10, w11, w10 ; CHECK-NEXT: mov w11, v1.s[3] -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: sdiv w8, w12, w11 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: sdiv w11, w12, w11 ; CHECK-NEXT: mov v0.s[2], w10 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: mov v0.s[3], w11 ; CHECK-NEXT: ret %tmp3 = sdiv <4 x i32> %A, %B; ret <4 x i32> %tmp3 @@ -461,33 +461,33 @@ ; CHECK-NEXT: umov w9, v0.b[1] ; CHECK-NEXT: umov w10, v0.b[0] ; CHECK-NEXT: umov w11, v0.b[2] -; CHECK-NEXT: umov w12, v0.b[3] -; CHECK-NEXT: umov w13, v0.b[4] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.b[0] +; CHECK-NEXT: umov w12, v0.b[3] +; CHECK-NEXT: umov w13, v0.b[4] +; CHECK-NEXT: umov w14, v0.b[5] +; CHECK-NEXT: umov w15, v0.b[6] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.b[5] +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: mov v2.b[1], w8 +; CHECK-NEXT: umov w8, v1.b[7] ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 -; CHECK-NEXT: umov w10, v0.b[6] ; CHECK-NEXT: udiv w12, w13, w12 -; CHECK-NEXT: umov w13, v0.b[5] +; CHECK-NEXT: umov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: udiv w8, w13, w9 -; CHECK-NEXT: umov w9, v1.b[6] +; CHECK-NEXT: udiv w13, w14, w13 +; CHECK-NEXT: umov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 -; CHECK-NEXT: udiv w9, w10, w9 -; CHECK-NEXT: umov w10, v1.b[7] -; CHECK-NEXT: mov v2.b[5], w8 -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[6], w9 +; CHECK-NEXT: udiv w14, w15, w14 +; CHECK-NEXT: mov v2.b[5], w13 +; CHECK-NEXT: udiv w8, w9, w8 +; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret @@ -502,66 +502,66 @@ ; CHECK-NEXT: umov w9, v0.b[1] ; CHECK-NEXT: umov w10, v0.b[0] ; CHECK-NEXT: umov w11, v0.b[2] +; CHECK-NEXT: udiv w8, w9, w8 +; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: umov w12, v0.b[3] ; CHECK-NEXT: umov w13, v0.b[4] ; CHECK-NEXT: umov w14, v0.b[5] ; CHECK-NEXT: umov w15, v0.b[6] -; CHECK-NEXT: udiv w8, w9, w8 -; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: umov w16, v0.b[7] ; CHECK-NEXT: umov w17, v0.b[8] +; CHECK-NEXT: umov w18, v0.b[9] +; CHECK-NEXT: umov w0, v0.b[10] +; CHECK-NEXT: umov w1, v0.b[11] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.b[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.b[9] +; CHECK-NEXT: umov w9, v0.b[12] ; CHECK-NEXT: mov v2.b[1], w8 +; CHECK-NEXT: umov w8, v1.b[12] ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.b[4] ; CHECK-NEXT: mov v2.b[2], w10 -; CHECK-NEXT: umov w10, v0.b[10] +; CHECK-NEXT: umov w10, v0.b[13] ; CHECK-NEXT: udiv w12, w13, w12 ; CHECK-NEXT: umov w13, v1.b[5] ; CHECK-NEXT: mov v2.b[3], w11 -; CHECK-NEXT: umov w11, v0.b[11] +; CHECK-NEXT: umov w11, v0.b[14] ; CHECK-NEXT: udiv w13, w14, w13 ; CHECK-NEXT: umov w14, v1.b[6] ; CHECK-NEXT: mov v2.b[4], w12 -; CHECK-NEXT: umov w12, v0.b[12] +; CHECK-NEXT: umov w12, v0.b[15] ; CHECK-NEXT: udiv w14, w15, w14 ; CHECK-NEXT: umov w15, v1.b[7] ; CHECK-NEXT: mov v2.b[5], w13 -; CHECK-NEXT: umov w13, v0.b[13] ; CHECK-NEXT: udiv w15, w16, w15 ; CHECK-NEXT: umov w16, v1.b[8] ; CHECK-NEXT: mov v2.b[6], w14 ; CHECK-NEXT: udiv w16, w17, w16 -; CHECK-NEXT: umov w17, v0.b[9] +; CHECK-NEXT: umov w17, v1.b[9] ; CHECK-NEXT: mov v2.b[7], w15 -; CHECK-NEXT: udiv w8, w17, w9 -; CHECK-NEXT: umov w9, v1.b[10] +; CHECK-NEXT: udiv w17, w18, w17 +; CHECK-NEXT: umov w18, v1.b[10] ; CHECK-NEXT: mov v2.b[8], w16 +; CHECK-NEXT: udiv w18, w0, w18 +; CHECK-NEXT: umov w0, v1.b[11] +; CHECK-NEXT: mov v2.b[9], w17 +; CHECK-NEXT: udiv w0, w1, w0 +; CHECK-NEXT: mov v2.b[10], w18 +; CHECK-NEXT: udiv w8, w9, w8 +; CHECK-NEXT: umov w9, v1.b[13] +; CHECK-NEXT: mov v2.b[11], w0 ; CHECK-NEXT: udiv w9, w10, w9 -; CHECK-NEXT: umov w10, v1.b[11] -; CHECK-NEXT: mov v2.b[9], w8 +; CHECK-NEXT: umov w10, v1.b[14] +; CHECK-NEXT: mov v2.b[12], w8 ; CHECK-NEXT: udiv w10, w11, w10 -; CHECK-NEXT: umov w11, v1.b[12] -; CHECK-NEXT: mov v2.b[10], w9 -; CHECK-NEXT: umov w9, v1.b[14] +; CHECK-NEXT: umov w11, v1.b[15] +; CHECK-NEXT: mov v2.b[13], w9 ; CHECK-NEXT: udiv w11, w12, w11 -; CHECK-NEXT: umov w12, v1.b[13] -; CHECK-NEXT: mov v2.b[11], w10 -; CHECK-NEXT: umov w10, v1.b[15] -; CHECK-NEXT: udiv w8, w13, w12 -; CHECK-NEXT: umov w12, v0.b[14] -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: umov w11, v0.b[15] -; CHECK-NEXT: udiv w9, w12, w9 -; CHECK-NEXT: mov v2.b[13], w8 -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[14], w9 -; CHECK-NEXT: mov v2.b[15], w8 +; CHECK-NEXT: mov v2.b[14], w10 +; CHECK-NEXT: mov v2.b[15], w11 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp3 = udiv <16 x i8> %A, %B; @@ -591,18 +591,18 @@ ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: umov w12, v0.h[3] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.h[0] +; CHECK-NEXT: umov w12, v0.h[3] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.h[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.h[3] ; CHECK-NEXT: fmov s0, w9 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: udiv w8, w12, w11 +; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: mov v0.h[3], w11 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %tmp3 = udiv <4 x i16> %A, %B; @@ -616,33 +616,33 @@ ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: umov w12, v0.h[3] -; CHECK-NEXT: umov w13, v0.h[4] ; CHECK-NEXT: udiv w8, w9, w8 ; CHECK-NEXT: umov w9, v1.h[0] +; CHECK-NEXT: umov w12, v0.h[3] +; CHECK-NEXT: umov w13, v0.h[4] +; CHECK-NEXT: umov w14, v0.h[5] +; CHECK-NEXT: umov w15, v0.h[6] ; CHECK-NEXT: udiv w9, w10, w9 ; CHECK-NEXT: umov w10, v1.h[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: umov w11, v1.h[3] ; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: umov w9, v1.h[5] +; CHECK-NEXT: umov w9, v0.h[7] ; CHECK-NEXT: mov v2.h[1], w8 +; CHECK-NEXT: umov w8, v1.h[7] ; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: umov w12, v1.h[4] ; CHECK-NEXT: mov v2.h[2], w10 -; CHECK-NEXT: umov w10, v0.h[6] ; CHECK-NEXT: udiv w12, w13, w12 -; CHECK-NEXT: umov w13, v0.h[5] +; CHECK-NEXT: umov w13, v1.h[5] ; CHECK-NEXT: mov v2.h[3], w11 -; CHECK-NEXT: umov w11, v0.h[7] -; CHECK-NEXT: udiv w8, w13, w9 -; CHECK-NEXT: umov w9, v1.h[6] +; CHECK-NEXT: udiv w13, w14, w13 +; CHECK-NEXT: umov w14, v1.h[6] ; CHECK-NEXT: mov v2.h[4], w12 -; CHECK-NEXT: udiv w9, w10, w9 -; CHECK-NEXT: umov w10, v1.h[7] -; CHECK-NEXT: mov v2.h[5], w8 -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.h[6], w9 +; CHECK-NEXT: udiv w14, w15, w14 +; CHECK-NEXT: mov v2.h[5], w13 +; CHECK-NEXT: udiv w8, w9, w8 +; CHECK-NEXT: mov v2.h[6], w14 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret @@ -686,22 +686,22 @@ define <4 x i32> @udiv4x32(<4 x i32> %A, <4 x i32> %B) { ; CHECK-LABEL: udiv4x32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov w9, v0.s[1] -; CHECK-NEXT: fmov w10, s0 +; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: mov w10, v0.s[1] +; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: mov w11, v0.s[2] -; CHECK-NEXT: mov w12, v0.s[3] -; CHECK-NEXT: udiv w8, w9, w8 -; CHECK-NEXT: fmov w9, s1 ; CHECK-NEXT: udiv w9, w10, w9 +; CHECK-NEXT: fmov w10, s0 +; CHECK-NEXT: mov w12, v0.s[3] +; CHECK-NEXT: udiv w8, w10, w8 ; CHECK-NEXT: mov w10, v1.s[2] ; CHECK-NEXT: udiv w10, w11, w10 ; CHECK-NEXT: mov w11, v1.s[3] -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: udiv w8, w12, w11 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: udiv w11, w12, w11 ; CHECK-NEXT: mov v0.s[2], w10 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: mov v0.s[3], w11 ; CHECK-NEXT: ret %tmp3 = udiv <4 x i32> %A, %B; ret <4 x i32> %tmp3 @@ -755,49 +755,53 @@ define <8 x i8> @srem8x8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: srem8x8: ; CHECK: // %bb.0: +; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: smov w11, v1.b[0] ; CHECK-NEXT: smov w12, v0.b[0] ; CHECK-NEXT: smov w8, v1.b[1] ; CHECK-NEXT: smov w9, v0.b[1] +; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w14, v1.b[2] ; CHECK-NEXT: smov w15, v0.b[2] ; CHECK-NEXT: smov w17, v1.b[3] ; CHECK-NEXT: smov w18, v0.b[3] -; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.b[4] ; CHECK-NEXT: smov w2, v0.b[4] +; CHECK-NEXT: smov w4, v1.b[5] +; CHECK-NEXT: smov w5, v0.b[5] +; CHECK-NEXT: smov w7, v1.b[6] +; CHECK-NEXT: smov w19, v0.b[6] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.b[5] +; CHECK-NEXT: smov w13, v0.b[7] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.b[5] ; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: smov w11, v0.b[6] +; CHECK-NEXT: smov w11, v1.b[7] ; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: smov w10, v1.b[6] ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.b[7] +; CHECK-NEXT: msub w9, w16, w14, w15 ; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.b[7] -; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 +; CHECK-NEXT: mov v2.b[2], w9 +; CHECK-NEXT: msub w10, w0, w17, w18 ; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: mov v2.b[3], w10 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: sdiv w6, w5, w4 +; CHECK-NEXT: mov v2.b[4], w8 +; CHECK-NEXT: msub w9, w6, w4, w5 +; CHECK-NEXT: sdiv w12, w19, w7 ; CHECK-NEXT: mov v2.b[5], w9 -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: sdiv w12, w16, w15 -; CHECK-NEXT: mov v2.b[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 +; CHECK-NEXT: msub w10, w12, w7, w19 +; CHECK-NEXT: sdiv w14, w13, w11 +; CHECK-NEXT: mov v2.b[6], w10 +; CHECK-NEXT: msub w8, w14, w11, w13 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = srem <8 x i8> %A, %B; ret <8 x i8> %tmp3 @@ -806,11 +810,14 @@ define <16 x i8> @srem16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: srem16x8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: sub sp, sp, #128 +; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 128 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -819,15 +826,19 @@ ; CHECK-NEXT: .cfi_offset w24, -48 ; CHECK-NEXT: .cfi_offset w25, -56 ; CHECK-NEXT: .cfi_offset w26, -64 -; CHECK-NEXT: smov w11, v1.b[0] -; CHECK-NEXT: smov w12, v0.b[0] -; CHECK-NEXT: smov w8, v1.b[1] -; CHECK-NEXT: smov w9, v0.b[1] +; CHECK-NEXT: .cfi_offset w27, -72 +; CHECK-NEXT: .cfi_offset w28, -80 +; CHECK-NEXT: .cfi_offset w30, -88 +; CHECK-NEXT: .cfi_offset w29, -96 +; CHECK-NEXT: smov w3, v1.b[1] +; CHECK-NEXT: smov w0, v0.b[1] +; CHECK-NEXT: smov w16, v1.b[0] +; CHECK-NEXT: smov w13, v0.b[0] +; CHECK-NEXT: sdiv w9, w0, w3 ; CHECK-NEXT: smov w14, v1.b[2] ; CHECK-NEXT: smov w15, v0.b[2] ; CHECK-NEXT: smov w17, v1.b[3] ; CHECK-NEXT: smov w18, v0.b[3] -; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.b[4] ; CHECK-NEXT: smov w2, v0.b[4] ; CHECK-NEXT: smov w4, v1.b[5] @@ -838,72 +849,82 @@ ; CHECK-NEXT: smov w22, v0.b[7] ; CHECK-NEXT: smov w24, v1.b[8] ; CHECK-NEXT: smov w25, v0.b[8] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.b[9] -; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.b[9] -; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: smov w11, v0.b[10] -; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: smov w10, v1.b[10] -; CHECK-NEXT: sdiv w16, w15, w14 -; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.b[11] -; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.b[11] -; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 -; CHECK-NEXT: smov w18, v1.b[12] -; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: smov w0, v0.b[12] -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: smov w2, v1.b[13] -; CHECK-NEXT: sdiv w6, w5, w4 -; CHECK-NEXT: smov w3, v0.b[13] -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w17, w6, w4, w5 +; CHECK-NEXT: smov w27, v1.b[9] +; CHECK-NEXT: sdiv w8, w13, w16 +; CHECK-NEXT: smov w28, v0.b[9] +; CHECK-NEXT: smov w30, v1.b[10] +; CHECK-NEXT: smov w10, v0.b[10] +; CHECK-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: sdiv w9, w15, w14 +; CHECK-NEXT: ldr w6, [sp, #28] // 4-byte Folded Reload +; CHECK-NEXT: msub w0, w6, w3, w0 +; CHECK-NEXT: sdiv w8, w18, w17 +; CHECK-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: sdiv w9, w2, w1 +; CHECK-NEXT: ldp w6, w3, [sp, #20] // 8-byte Folded Reload +; CHECK-NEXT: msub w13, w3, w16, w13 +; CHECK-NEXT: smov w16, v1.b[12] +; CHECK-NEXT: msub w14, w6, w14, w15 +; CHECK-NEXT: smov w3, v0.b[12] +; CHECK-NEXT: sdiv w8, w5, w4 +; CHECK-NEXT: fmov s2, w13 +; CHECK-NEXT: ldr w13, [sp, #16] // 4-byte Folded Reload +; CHECK-NEXT: mov v2.b[1], w0 +; CHECK-NEXT: msub w13, w13, w17, w18 +; CHECK-NEXT: smov w17, v1.b[13] +; CHECK-NEXT: mov v2.b[2], w14 +; CHECK-NEXT: smov w18, v0.b[13] +; CHECK-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: sdiv w20, w19, w7 -; CHECK-NEXT: mov v2.b[5], w17 -; CHECK-NEXT: msub w17, w20, w7, w19 +; CHECK-NEXT: ldp w0, w14, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: mov v2.b[3], w13 +; CHECK-NEXT: smov w9, v1.b[11] +; CHECK-NEXT: smov w8, v0.b[11] +; CHECK-NEXT: msub w14, w14, w1, w2 +; CHECK-NEXT: smov w1, v1.b[14] +; CHECK-NEXT: msub w0, w0, w4, w5 +; CHECK-NEXT: smov w2, v0.b[14] ; CHECK-NEXT: sdiv w23, w22, w21 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[6], w17 -; CHECK-NEXT: msub w1, w23, w21, w22 +; CHECK-NEXT: mov v2.b[4], w14 +; CHECK-NEXT: msub w14, w20, w7, w19 +; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[5], w0 +; CHECK-NEXT: mov v2.b[6], w14 +; CHECK-NEXT: msub w0, w23, w21, w22 ; CHECK-NEXT: sdiv w26, w25, w24 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[7], w1 -; CHECK-NEXT: msub w1, w26, w24, w25 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[8], w1 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: smov w13, v1.b[15] -; CHECK-NEXT: sdiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[9], w9 -; CHECK-NEXT: smov w9, v1.b[14] -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: smov w10, v0.b[14] -; CHECK-NEXT: sdiv w14, w16, w15 -; CHECK-NEXT: mov v2.b[10], w8 -; CHECK-NEXT: msub w11, w14, w15, w16 -; CHECK-NEXT: smov w14, v0.b[15] -; CHECK-NEXT: sdiv w17, w0, w18 -; CHECK-NEXT: mov v2.b[11], w11 -; CHECK-NEXT: msub w11, w17, w18, w0 -; CHECK-NEXT: sdiv w12, w3, w2 -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: msub w12, w12, w2, w3 -; CHECK-NEXT: sdiv w8, w10, w9 -; CHECK-NEXT: mov v2.b[13], w12 -; CHECK-NEXT: msub w8, w8, w9, w10 -; CHECK-NEXT: sdiv w11, w14, w13 +; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[7], w0 +; CHECK-NEXT: msub w4, w26, w24, w25 +; CHECK-NEXT: sdiv w29, w28, w27 +; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[8], w4 +; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: msub w5, w29, w27, w28 +; CHECK-NEXT: sdiv w12, w10, w30 +; CHECK-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[9], w5 +; CHECK-NEXT: msub w10, w12, w30, w10 +; CHECK-NEXT: sdiv w11, w8, w9 +; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[10], w10 +; CHECK-NEXT: msub w8, w11, w9, w8 +; CHECK-NEXT: smov w9, v1.b[15] +; CHECK-NEXT: sdiv w15, w3, w16 +; CHECK-NEXT: smov w11, v0.b[15] +; CHECK-NEXT: mov v2.b[11], w8 +; CHECK-NEXT: msub w12, w15, w16, w3 +; CHECK-NEXT: sdiv w13, w18, w17 +; CHECK-NEXT: mov v2.b[12], w12 +; CHECK-NEXT: msub w10, w13, w17, w18 +; CHECK-NEXT: sdiv w14, w2, w1 +; CHECK-NEXT: mov v2.b[13], w10 +; CHECK-NEXT: msub w8, w14, w1, w2 +; CHECK-NEXT: sdiv w15, w11, w9 ; CHECK-NEXT: mov v2.b[14], w8 -; CHECK-NEXT: msub w8, w11, w13, w14 -; CHECK-NEXT: mov v2.b[15], w8 +; CHECK-NEXT: msub w9, w15, w9, w11 +; CHECK-NEXT: mov v2.b[15], w9 ; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret %tmp3 = srem <16 x i8> %A, %B; ret <16 x i8> %tmp3 @@ -933,21 +954,21 @@ ; CHECK-NEXT: smov w12, v0.h[0] ; CHECK-NEXT: smov w8, v1.h[1] ; CHECK-NEXT: smov w9, v0.h[1] +; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w14, v1.h[2] ; CHECK-NEXT: smov w15, v0.h[2] -; CHECK-NEXT: sdiv w13, w12, w11 +; CHECK-NEXT: smov w17, v1.h[3] +; CHECK-NEXT: smov w18, v0.h[3] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.h[3] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.h[3] ; CHECK-NEXT: fmov s0, w11 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w9, w12, w13 +; CHECK-NEXT: msub w9, w16, w14, w15 +; CHECK-NEXT: sdiv w0, w18, w17 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: msub w8, w0, w17, w18 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -958,47 +979,51 @@ define <8 x i16> @srem8x16(<8 x i16> %A, <8 x i16> %B) { ; CHECK-LABEL: srem8x16: ; CHECK: // %bb.0: +; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -16 ; CHECK-NEXT: smov w11, v1.h[0] ; CHECK-NEXT: smov w12, v0.h[0] ; CHECK-NEXT: smov w8, v1.h[1] ; CHECK-NEXT: smov w9, v0.h[1] +; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w14, v1.h[2] ; CHECK-NEXT: smov w15, v0.h[2] ; CHECK-NEXT: smov w17, v1.h[3] ; CHECK-NEXT: smov w18, v0.h[3] -; CHECK-NEXT: sdiv w13, w12, w11 ; CHECK-NEXT: smov w1, v1.h[4] ; CHECK-NEXT: smov w2, v0.h[4] +; CHECK-NEXT: smov w4, v1.h[5] +; CHECK-NEXT: smov w5, v0.h[5] +; CHECK-NEXT: smov w7, v1.h[6] +; CHECK-NEXT: smov w19, v0.h[6] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: smov w12, v1.h[5] +; CHECK-NEXT: smov w13, v0.h[7] ; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: smov w13, v0.h[5] ; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: smov w11, v0.h[6] +; CHECK-NEXT: smov w11, v1.h[7] ; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: smov w10, v1.h[6] ; CHECK-NEXT: sdiv w16, w15, w14 ; CHECK-NEXT: mov v2.h[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: smov w15, v1.h[7] +; CHECK-NEXT: msub w9, w16, w14, w15 ; CHECK-NEXT: sdiv w0, w18, w17 -; CHECK-NEXT: smov w16, v0.h[7] -; CHECK-NEXT: mov v2.h[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 +; CHECK-NEXT: mov v2.h[2], w9 +; CHECK-NEXT: msub w10, w0, w17, w18 ; CHECK-NEXT: sdiv w3, w2, w1 -; CHECK-NEXT: mov v2.h[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: sdiv w9, w13, w12 -; CHECK-NEXT: mov v2.h[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: sdiv w8, w11, w10 +; CHECK-NEXT: mov v2.h[3], w10 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: sdiv w6, w5, w4 +; CHECK-NEXT: mov v2.h[4], w8 +; CHECK-NEXT: msub w9, w6, w4, w5 +; CHECK-NEXT: sdiv w12, w19, w7 ; CHECK-NEXT: mov v2.h[5], w9 -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: sdiv w12, w16, w15 -; CHECK-NEXT: mov v2.h[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 +; CHECK-NEXT: msub w10, w12, w7, w19 +; CHECK-NEXT: sdiv w14, w13, w11 +; CHECK-NEXT: mov v2.h[6], w10 +; CHECK-NEXT: msub w8, w14, w11, w13 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = srem <8 x i16> %A, %B; ret <8 x i16> %tmp3 @@ -1043,25 +1068,25 @@ define <4 x i32> @srem4x32(<4 x i32> %A, <4 x i32> %B) { ; CHECK-LABEL: srem4x32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov w11, s1 +; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: fmov w12, s0 -; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: mov w10, v0.s[1] ; CHECK-NEXT: mov w14, v1.s[2] ; CHECK-NEXT: mov w15, v0.s[2] -; CHECK-NEXT: sdiv w13, w12, w11 +; CHECK-NEXT: sdiv w13, w12, w8 ; CHECK-NEXT: mov w17, v1.s[3] ; CHECK-NEXT: mov w18, v0.s[3] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: sdiv w10, w9, w8 -; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: msub w8, w10, w8, w9 +; CHECK-NEXT: msub w8, w13, w8, w12 +; CHECK-NEXT: sdiv w11, w10, w9 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: msub w9, w11, w9, w10 ; CHECK-NEXT: sdiv w16, w15, w14 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: sdiv w9, w18, w17 +; CHECK-NEXT: sdiv w0, w18, w17 ; CHECK-NEXT: mov v0.s[2], w10 -; CHECK-NEXT: msub w8, w9, w17, w18 +; CHECK-NEXT: msub w8, w0, w17, w18 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %tmp3 = srem <4 x i32> %A, %B; @@ -1119,49 +1144,53 @@ define <8 x i8> @urem8x8(<8 x i8> %A, <8 x i8> %B) { ; CHECK-LABEL: urem8x8: ; CHECK: // %bb.0: +; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -16 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w11, v1.b[0] ; CHECK-NEXT: umov w12, v0.b[0] ; CHECK-NEXT: umov w8, v1.b[1] ; CHECK-NEXT: umov w9, v0.b[1] +; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w14, v1.b[2] ; CHECK-NEXT: umov w15, v0.b[2] ; CHECK-NEXT: umov w17, v1.b[3] ; CHECK-NEXT: umov w18, v0.b[3] -; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.b[4] ; CHECK-NEXT: umov w2, v0.b[4] +; CHECK-NEXT: umov w4, v1.b[5] +; CHECK-NEXT: umov w5, v0.b[5] +; CHECK-NEXT: umov w7, v1.b[6] +; CHECK-NEXT: umov w19, v0.b[6] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.b[5] +; CHECK-NEXT: umov w13, v0.b[7] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: umov w11, v0.b[6] +; CHECK-NEXT: umov w11, v1.b[7] ; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: umov w10, v1.b[6] ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.b[7] +; CHECK-NEXT: msub w9, w16, w14, w15 ; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.b[7] -; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 +; CHECK-NEXT: mov v2.b[2], w9 +; CHECK-NEXT: msub w10, w0, w17, w18 ; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: mov v2.b[3], w10 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: udiv w6, w5, w4 +; CHECK-NEXT: mov v2.b[4], w8 +; CHECK-NEXT: msub w9, w6, w4, w5 +; CHECK-NEXT: udiv w12, w19, w7 ; CHECK-NEXT: mov v2.b[5], w9 -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: udiv w12, w16, w15 -; CHECK-NEXT: mov v2.b[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 +; CHECK-NEXT: msub w10, w12, w7, w19 +; CHECK-NEXT: udiv w14, w13, w11 +; CHECK-NEXT: mov v2.b[6], w10 +; CHECK-NEXT: msub w8, w14, w11, w13 ; CHECK-NEXT: mov v2.b[7], w8 ; CHECK-NEXT: fmov d0, d2 +; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = urem <8 x i8> %A, %B; ret <8 x i8> %tmp3 @@ -1170,11 +1199,14 @@ define <16 x i8> @urem16x8(<16 x i8> %A, <16 x i8> %B) { ; CHECK-LABEL: urem16x8: ; CHECK: // %bb.0: -; CHECK-NEXT: stp x26, x25, [sp, #-64]! // 16-byte Folded Spill -; CHECK-NEXT: stp x24, x23, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 64 +; CHECK-NEXT: sub sp, sp, #128 +; CHECK-NEXT: stp x29, x30, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp x28, x27, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: stp x26, x25, [sp, #64] // 16-byte Folded Spill +; CHECK-NEXT: stp x24, x23, [sp, #80] // 16-byte Folded Spill +; CHECK-NEXT: stp x22, x21, [sp, #96] // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #112] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 128 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 @@ -1183,15 +1215,19 @@ ; CHECK-NEXT: .cfi_offset w24, -48 ; CHECK-NEXT: .cfi_offset w25, -56 ; CHECK-NEXT: .cfi_offset w26, -64 -; CHECK-NEXT: umov w11, v1.b[0] -; CHECK-NEXT: umov w12, v0.b[0] -; CHECK-NEXT: umov w8, v1.b[1] -; CHECK-NEXT: umov w9, v0.b[1] +; CHECK-NEXT: .cfi_offset w27, -72 +; CHECK-NEXT: .cfi_offset w28, -80 +; CHECK-NEXT: .cfi_offset w30, -88 +; CHECK-NEXT: .cfi_offset w29, -96 +; CHECK-NEXT: umov w3, v1.b[1] +; CHECK-NEXT: umov w0, v0.b[1] +; CHECK-NEXT: umov w16, v1.b[0] +; CHECK-NEXT: umov w13, v0.b[0] +; CHECK-NEXT: udiv w9, w0, w3 ; CHECK-NEXT: umov w14, v1.b[2] ; CHECK-NEXT: umov w15, v0.b[2] ; CHECK-NEXT: umov w17, v1.b[3] ; CHECK-NEXT: umov w18, v0.b[3] -; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.b[4] ; CHECK-NEXT: umov w2, v0.b[4] ; CHECK-NEXT: umov w4, v1.b[5] @@ -1202,72 +1238,82 @@ ; CHECK-NEXT: umov w22, v0.b[7] ; CHECK-NEXT: umov w24, v1.b[8] ; CHECK-NEXT: umov w25, v0.b[8] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.b[9] -; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.b[9] -; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: umov w11, v0.b[10] -; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: umov w10, v1.b[10] -; CHECK-NEXT: udiv w16, w15, w14 -; CHECK-NEXT: mov v2.b[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.b[11] -; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.b[11] -; CHECK-NEXT: mov v2.b[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 -; CHECK-NEXT: umov w18, v1.b[12] -; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: umov w0, v0.b[12] -; CHECK-NEXT: mov v2.b[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: umov w2, v1.b[13] -; CHECK-NEXT: udiv w6, w5, w4 -; CHECK-NEXT: umov w3, v0.b[13] -; CHECK-NEXT: mov v2.b[4], w14 -; CHECK-NEXT: msub w17, w6, w4, w5 +; CHECK-NEXT: umov w27, v1.b[9] +; CHECK-NEXT: udiv w8, w13, w16 +; CHECK-NEXT: umov w28, v0.b[9] +; CHECK-NEXT: umov w30, v1.b[10] +; CHECK-NEXT: umov w10, v0.b[10] +; CHECK-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: udiv w9, w15, w14 +; CHECK-NEXT: ldr w6, [sp, #28] // 4-byte Folded Reload +; CHECK-NEXT: msub w0, w6, w3, w0 +; CHECK-NEXT: udiv w8, w18, w17 +; CHECK-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: udiv w9, w2, w1 +; CHECK-NEXT: ldp w6, w3, [sp, #20] // 8-byte Folded Reload +; CHECK-NEXT: msub w13, w3, w16, w13 +; CHECK-NEXT: umov w16, v1.b[12] +; CHECK-NEXT: msub w14, w6, w14, w15 +; CHECK-NEXT: umov w3, v0.b[12] +; CHECK-NEXT: udiv w8, w5, w4 +; CHECK-NEXT: fmov s2, w13 +; CHECK-NEXT: ldr w13, [sp, #16] // 4-byte Folded Reload +; CHECK-NEXT: mov v2.b[1], w0 +; CHECK-NEXT: msub w13, w13, w17, w18 +; CHECK-NEXT: umov w17, v1.b[13] +; CHECK-NEXT: mov v2.b[2], w14 +; CHECK-NEXT: umov w18, v0.b[13] +; CHECK-NEXT: stp w8, w9, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: udiv w20, w19, w7 -; CHECK-NEXT: mov v2.b[5], w17 -; CHECK-NEXT: msub w17, w20, w7, w19 +; CHECK-NEXT: ldp w0, w14, [sp, #8] // 8-byte Folded Reload +; CHECK-NEXT: mov v2.b[3], w13 +; CHECK-NEXT: umov w9, v1.b[11] +; CHECK-NEXT: umov w8, v0.b[11] +; CHECK-NEXT: msub w14, w14, w1, w2 +; CHECK-NEXT: umov w1, v1.b[14] +; CHECK-NEXT: msub w0, w0, w4, w5 +; CHECK-NEXT: umov w2, v0.b[14] ; CHECK-NEXT: udiv w23, w22, w21 -; CHECK-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[6], w17 -; CHECK-NEXT: msub w1, w23, w21, w22 +; CHECK-NEXT: mov v2.b[4], w14 +; CHECK-NEXT: msub w14, w20, w7, w19 +; CHECK-NEXT: ldp x20, x19, [sp, #112] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[5], w0 +; CHECK-NEXT: mov v2.b[6], w14 +; CHECK-NEXT: msub w0, w23, w21, w22 ; CHECK-NEXT: udiv w26, w25, w24 -; CHECK-NEXT: ldp x22, x21, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[7], w1 -; CHECK-NEXT: msub w1, w26, w24, w25 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: ldp x24, x23, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov v2.b[8], w1 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: umov w13, v1.b[15] -; CHECK-NEXT: udiv w8, w11, w10 -; CHECK-NEXT: mov v2.b[9], w9 -; CHECK-NEXT: umov w9, v1.b[14] -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: umov w10, v0.b[14] -; CHECK-NEXT: udiv w14, w16, w15 -; CHECK-NEXT: mov v2.b[10], w8 -; CHECK-NEXT: msub w11, w14, w15, w16 -; CHECK-NEXT: umov w14, v0.b[15] -; CHECK-NEXT: udiv w17, w0, w18 -; CHECK-NEXT: mov v2.b[11], w11 -; CHECK-NEXT: msub w11, w17, w18, w0 -; CHECK-NEXT: udiv w12, w3, w2 -; CHECK-NEXT: mov v2.b[12], w11 -; CHECK-NEXT: msub w12, w12, w2, w3 -; CHECK-NEXT: udiv w8, w10, w9 -; CHECK-NEXT: mov v2.b[13], w12 -; CHECK-NEXT: msub w8, w8, w9, w10 -; CHECK-NEXT: udiv w11, w14, w13 +; CHECK-NEXT: ldp x22, x21, [sp, #96] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[7], w0 +; CHECK-NEXT: msub w4, w26, w24, w25 +; CHECK-NEXT: udiv w29, w28, w27 +; CHECK-NEXT: ldp x24, x23, [sp, #80] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[8], w4 +; CHECK-NEXT: ldp x26, x25, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: msub w5, w29, w27, w28 +; CHECK-NEXT: udiv w12, w10, w30 +; CHECK-NEXT: ldp x28, x27, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[9], w5 +; CHECK-NEXT: msub w10, w12, w30, w10 +; CHECK-NEXT: udiv w11, w8, w9 +; CHECK-NEXT: ldp x29, x30, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v2.b[10], w10 +; CHECK-NEXT: msub w8, w11, w9, w8 +; CHECK-NEXT: umov w9, v1.b[15] +; CHECK-NEXT: udiv w15, w3, w16 +; CHECK-NEXT: umov w11, v0.b[15] +; CHECK-NEXT: mov v2.b[11], w8 +; CHECK-NEXT: msub w12, w15, w16, w3 +; CHECK-NEXT: udiv w13, w18, w17 +; CHECK-NEXT: mov v2.b[12], w12 +; CHECK-NEXT: msub w10, w13, w17, w18 +; CHECK-NEXT: udiv w14, w2, w1 +; CHECK-NEXT: mov v2.b[13], w10 +; CHECK-NEXT: msub w8, w14, w1, w2 +; CHECK-NEXT: udiv w15, w11, w9 ; CHECK-NEXT: mov v2.b[14], w8 -; CHECK-NEXT: msub w8, w11, w13, w14 -; CHECK-NEXT: mov v2.b[15], w8 +; CHECK-NEXT: msub w9, w15, w9, w11 +; CHECK-NEXT: mov v2.b[15], w9 ; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: ldp x26, x25, [sp], #64 // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #128 ; CHECK-NEXT: ret %tmp3 = urem <16 x i8> %A, %B; ret <16 x i8> %tmp3 @@ -1297,21 +1343,21 @@ ; CHECK-NEXT: umov w12, v0.h[0] ; CHECK-NEXT: umov w8, v1.h[1] ; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w14, v1.h[2] ; CHECK-NEXT: umov w15, v0.h[2] -; CHECK-NEXT: udiv w13, w12, w11 +; CHECK-NEXT: umov w17, v1.h[3] +; CHECK-NEXT: umov w18, v0.h[3] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.h[3] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.h[3] ; CHECK-NEXT: fmov s0, w11 ; CHECK-NEXT: msub w8, w10, w8, w9 ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w9, w12, w13 +; CHECK-NEXT: msub w9, w16, w14, w15 +; CHECK-NEXT: udiv w0, w18, w17 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: msub w8, w0, w17, w18 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -1322,47 +1368,51 @@ define <8 x i16> @urem8x16(<8 x i16> %A, <8 x i16> %B) { ; CHECK-LABEL: urem8x16: ; CHECK: // %bb.0: +; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: .cfi_offset w19, -16 ; CHECK-NEXT: umov w11, v1.h[0] ; CHECK-NEXT: umov w12, v0.h[0] ; CHECK-NEXT: umov w8, v1.h[1] ; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w14, v1.h[2] ; CHECK-NEXT: umov w15, v0.h[2] ; CHECK-NEXT: umov w17, v1.h[3] ; CHECK-NEXT: umov w18, v0.h[3] -; CHECK-NEXT: udiv w13, w12, w11 ; CHECK-NEXT: umov w1, v1.h[4] ; CHECK-NEXT: umov w2, v0.h[4] +; CHECK-NEXT: umov w4, v1.h[5] +; CHECK-NEXT: umov w5, v0.h[5] +; CHECK-NEXT: umov w7, v1.h[6] +; CHECK-NEXT: umov w19, v0.h[6] ; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: umov w12, v1.h[5] +; CHECK-NEXT: umov w13, v0.h[7] ; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: umov w13, v0.h[5] ; CHECK-NEXT: fmov s2, w11 -; CHECK-NEXT: umov w11, v0.h[6] +; CHECK-NEXT: umov w11, v1.h[7] ; CHECK-NEXT: msub w8, w10, w8, w9 -; CHECK-NEXT: umov w10, v1.h[6] ; CHECK-NEXT: udiv w16, w15, w14 ; CHECK-NEXT: mov v2.h[1], w8 -; CHECK-NEXT: msub w8, w16, w14, w15 -; CHECK-NEXT: umov w15, v1.h[7] +; CHECK-NEXT: msub w9, w16, w14, w15 ; CHECK-NEXT: udiv w0, w18, w17 -; CHECK-NEXT: umov w16, v0.h[7] -; CHECK-NEXT: mov v2.h[2], w8 -; CHECK-NEXT: msub w14, w0, w17, w18 +; CHECK-NEXT: mov v2.h[2], w9 +; CHECK-NEXT: msub w10, w0, w17, w18 ; CHECK-NEXT: udiv w3, w2, w1 -; CHECK-NEXT: mov v2.h[3], w14 -; CHECK-NEXT: msub w14, w3, w1, w2 -; CHECK-NEXT: udiv w9, w13, w12 -; CHECK-NEXT: mov v2.h[4], w14 -; CHECK-NEXT: msub w9, w9, w12, w13 -; CHECK-NEXT: udiv w8, w11, w10 +; CHECK-NEXT: mov v2.h[3], w10 +; CHECK-NEXT: msub w8, w3, w1, w2 +; CHECK-NEXT: udiv w6, w5, w4 +; CHECK-NEXT: mov v2.h[4], w8 +; CHECK-NEXT: msub w9, w6, w4, w5 +; CHECK-NEXT: udiv w12, w19, w7 ; CHECK-NEXT: mov v2.h[5], w9 -; CHECK-NEXT: msub w8, w8, w10, w11 -; CHECK-NEXT: udiv w12, w16, w15 -; CHECK-NEXT: mov v2.h[6], w8 -; CHECK-NEXT: msub w8, w12, w15, w16 +; CHECK-NEXT: msub w10, w12, w7, w19 +; CHECK-NEXT: udiv w14, w13, w11 +; CHECK-NEXT: mov v2.h[6], w10 +; CHECK-NEXT: msub w8, w14, w11, w13 ; CHECK-NEXT: mov v2.h[7], w8 ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %tmp3 = urem <8 x i16> %A, %B; ret <8 x i16> %tmp3 @@ -1407,25 +1457,25 @@ define <4 x i32> @urem4x32(<4 x i32> %A, <4 x i32> %B) { ; CHECK-LABEL: urem4x32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov w11, s1 +; CHECK-NEXT: fmov w8, s1 ; CHECK-NEXT: fmov w12, s0 -; CHECK-NEXT: mov w8, v1.s[1] -; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: mov w9, v1.s[1] +; CHECK-NEXT: mov w10, v0.s[1] ; CHECK-NEXT: mov w14, v1.s[2] ; CHECK-NEXT: mov w15, v0.s[2] -; CHECK-NEXT: udiv w13, w12, w11 +; CHECK-NEXT: udiv w13, w12, w8 ; CHECK-NEXT: mov w17, v1.s[3] ; CHECK-NEXT: mov w18, v0.s[3] -; CHECK-NEXT: msub w11, w13, w11, w12 -; CHECK-NEXT: udiv w10, w9, w8 -; CHECK-NEXT: fmov s0, w11 -; CHECK-NEXT: msub w8, w10, w8, w9 +; CHECK-NEXT: msub w8, w13, w8, w12 +; CHECK-NEXT: udiv w11, w10, w9 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: msub w9, w11, w9, w10 ; CHECK-NEXT: udiv w16, w15, w14 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: msub w10, w16, w14, w15 -; CHECK-NEXT: udiv w9, w18, w17 +; CHECK-NEXT: udiv w0, w18, w17 ; CHECK-NEXT: mov v0.s[2], w10 -; CHECK-NEXT: msub w8, w9, w17, w18 +; CHECK-NEXT: msub w8, w0, w17, w18 ; CHECK-NEXT: mov v0.s[3], w8 ; CHECK-NEXT: ret %tmp3 = urem <4 x i32> %A, %B; diff --git a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll --- a/llvm/test/CodeGen/AArch64/arm64-nvcast.ll +++ b/llvm/test/CodeGen/AArch64/arm64-nvcast.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: and x8, x1, #0x3 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: fmov.2d v0, #2.00000000 -; CHECK-NEXT: bfi x9, x8, #2, #2 ; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: bfi x9, x8, #2, #2 ; CHECK-NEXT: ldr s0, [x9] ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: add sp, sp, #16 @@ -31,8 +31,8 @@ ; CHECK-NEXT: and x8, x1, #0x3 ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: movi.16b v0, #63 -; CHECK-NEXT: bfi x9, x8, #2, #2 ; CHECK-NEXT: str q0, [sp] +; CHECK-NEXT: bfi x9, x8, #2, #2 ; CHECK-NEXT: ldr s0, [x9] ; CHECK-NEXT: str s0, [x0] ; CHECK-NEXT: add sp, sp, #16 diff --git a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll --- a/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll +++ b/llvm/test/CodeGen/AArch64/arm64-promote-const-complex-initializers.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -o - %s | FileCheck %s ; AsmPrinter cannot lower floating point constant expressions in global @@ -8,54 +9,37 @@ target triple = "arm64-apple-ios14.0.0" define [1 x <4 x float>] @test1() { -; CHECK-LABEL: .p2align 4 ; -- Begin function test1 -; CHECK-NEXT: lCPI0_0: -; CHECK-NEXT: .quad 0 ; 0x0 -; CHECK-NEXT: .quad 4575657221408423936 ; 0x3f80000000000000 -; CHECK-NEXT: .section __TEXT,__text,regular,pure_instructions -; CHECK-NEXT: .globl _test1 -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: _test1: ; @test1 -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: ; %bb.0: -; CHECK-NEXT: Lloh0: -; CHECK-NEXT: adrp x8, lCPI0_0@PAGE -; CHECK-NEXT: Lloh1: -; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF] -; CHECK-NEXT: ret +; CHECK-LABEL: test1: +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh0: +; CHECK-NEXT: adrp x8, lCPI0_0@PAGE +; CHECK-NEXT: Lloh1: +; CHECK-NEXT: ldr q0, [x8, lCPI0_0@PAGEOFF] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 ret [1 x <4 x float>] [<4 x float> bitcast (<1 x i128> to <4 x float>)] } define [1 x <4 x float>] @test2() { -; CHECK-LABEL: .p2align 4 ; -- Begin function test2 -; CHECK-NEXT: lCPI1_0: -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x00000000 ; float 0 -; CHECK-NEXT: .long 0x3f800000 ; float 1 -; CHECK-NEXT: .section __TEXT,__text,regular,pure_instructions -; CHECK-NEXT: .globl _test2 -; CHECK-NEXT: .p2align 2 -; CHECK-NEXT: _test2: ; @test2 -; CHECK-NEXT: .cfi_startproc -; CHECK-NEXT: ; %bb.0: -; CHECK-NEXT: Lloh2: -; CHECK-NEXT: adrp x8, lCPI1_0@PAGE -; CHECK-NEXT: Lloh3: -; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] -; CHECK-NEXT: mov s2, v1[1] -; CHECK-NEXT: fneg s0, s1 -; CHECK-NEXT: mov s3, v1[2] -; CHECK-NEXT: mov s1, v1[3] -; CHECK-NEXT: fneg s2, s2 -; CHECK-NEXT: fneg s1, s1 -; CHECK-NEXT: mov.s v0[1], v2[0] -; CHECK-NEXT: fneg s2, s3 -; CHECK-NEXT: mov.s v0[2], v2[0] -; CHECK-NEXT: mov.s v0[3], v1[0] -; CHECK-NEXT: ret -; +; CHECK-LABEL: test2: +; CHECK: ; %bb.0: +; CHECK-NEXT: Lloh2: +; CHECK-NEXT: adrp x8, lCPI1_0@PAGE +; CHECK-NEXT: Lloh3: +; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] +; CHECK-NEXT: mov s0, v1[1] +; CHECK-NEXT: mov s3, v1[2] +; CHECK-NEXT: fneg s2, s0 +; CHECK-NEXT: fneg s0, s1 +; CHECK-NEXT: fneg s3, s3 +; CHECK-NEXT: mov s1, v1[3] +; CHECK-NEXT: fneg s1, s1 +; CHECK-NEXT: mov.s v0[1], v2[0] +; CHECK-NEXT: mov.s v0[2], v3[0] +; CHECK-NEXT: mov.s v0[3], v1[0] +; CHECK-NEXT: ret +; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3 ret [1 x <4 x float>] [<4 x float> bitcast (<1 x i128> to <4 x float>), i32 0)), float fneg (float extractelement (<4 x float> bitcast (<1 x i128> to <4 x float>), i32 1)), diff --git a/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll --- a/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll +++ b/llvm/test/CodeGen/AArch64/arm64-setcc-int-to-fp-combine.ll @@ -4,9 +4,9 @@ define <4 x float> @foo(<4 x float> %val, <4 x float> %test) nounwind { ; CHECK-LABEL: foo: ; CHECK: ; %bb.0: -; CHECK-NEXT: fmov.4s v2, #1.00000000 ; CHECK-NEXT: fcmeq.4s v0, v0, v1 -; CHECK-NEXT: and.16b v0, v0, v2 +; CHECK-NEXT: fmov.4s v1, #1.00000000 +; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: ret %cmp = fcmp oeq <4 x float> %val, %test %ext = zext <4 x i1> %cmp to <4 x i32> @@ -19,9 +19,9 @@ define void @foo1(<4 x float> %val, <4 x float> %test, <4 x double>* %p) nounwind { ; CHECK-LABEL: foo1: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.4s v2, #1 ; CHECK-NEXT: fcmeq.4s v0, v0, v1 -; CHECK-NEXT: and.16b v0, v0, v2 +; CHECK-NEXT: movi.4s v1, #1 +; CHECK-NEXT: and.16b v0, v0, v1 ; CHECK-NEXT: ushll2.2d v1, v0, #0 ; CHECK-NEXT: ushll.2d v0, v0, #0 ; CHECK-NEXT: scvtf.2d v1, v1 @@ -44,8 +44,8 @@ ; CHECK-NEXT: adrp x8, lCPI2_0@PAGE ; CHECK-NEXT: fcmeq.4s v0, v0, v1 ; CHECK-NEXT: Lloh1: -; CHECK-NEXT: ldr q1, [x8, lCPI2_0@PAGEOFF] -; CHECK-NEXT: and.16b v0, v0, v1 +; CHECK-NEXT: ldr q2, [x8, lCPI2_0@PAGEOFF] +; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: ret ; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 %cmp = fcmp oeq <4 x float> %val, %test diff --git a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll --- a/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-sli-sri-opt.ll @@ -374,8 +374,8 @@ ; CHECK-LABEL: testLeftBad2x64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #10 -; CHECK-NEXT: movk x8, #1, lsl #48 ; CHECK-NEXT: shl.2d v1, v1, #48 +; CHECK-NEXT: movk x8, #1, lsl #48 ; CHECK-NEXT: dup.2d v2, x8 ; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: orr.16b v0, v0, v1 @@ -405,8 +405,8 @@ ; CHECK-LABEL: testRightBad2x64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #10 -; CHECK-NEXT: movk x8, #1, lsl #48 ; CHECK-NEXT: ushr.2d v1, v1, #48 +; CHECK-NEXT: movk x8, #1, lsl #48 ; CHECK-NEXT: dup.2d v2, x8 ; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: orr.16b v0, v0, v1 diff --git a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll --- a/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll +++ b/llvm/test/CodeGen/AArch64/arm64-subvector-extend.ll @@ -32,8 +32,9 @@ define <16 x i16> @func3(<16 x i8> %v0) nounwind { ; CHECK-LABEL: func3: ; CHECK: // %bb.0: +; CHECK-NEXT: ushll.8h v2, v0, #0 ; CHECK-NEXT: ushll2.8h v1, v0, #0 -; CHECK-NEXT: ushll.8h v0, v0, #0 +; CHECK-NEXT: mov.16b v0, v2 ; CHECK-NEXT: ret %r = zext <16 x i8> %v0 to <16 x i16> ret <16 x i16> %r @@ -42,8 +43,9 @@ define <16 x i16> @func4(<16 x i8> %v0) nounwind { ; CHECK-LABEL: func4: ; CHECK: // %bb.0: +; CHECK-NEXT: sshll.8h v2, v0, #0 ; CHECK-NEXT: sshll2.8h v1, v0, #0 -; CHECK-NEXT: sshll.8h v0, v0, #0 +; CHECK-NEXT: mov.16b v0, v2 ; CHECK-NEXT: ret %r = sext <16 x i8> %v0 to <16 x i16> ret <16 x i16> %r @@ -76,8 +78,9 @@ define <8 x i32> @afunc3(<8 x i16> %v0) nounwind { ; CHECK-LABEL: afunc3: ; CHECK: // %bb.0: +; CHECK-NEXT: ushll.4s v2, v0, #0 ; CHECK-NEXT: ushll2.4s v1, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: mov.16b v0, v2 ; CHECK-NEXT: ret %r = zext <8 x i16> %v0 to <8 x i32> ret <8 x i32> %r @@ -86,8 +89,9 @@ define <8 x i32> @afunc4(<8 x i16> %v0) nounwind { ; CHECK-LABEL: afunc4: ; CHECK: // %bb.0: +; CHECK-NEXT: sshll.4s v2, v0, #0 ; CHECK-NEXT: sshll2.4s v1, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #0 +; CHECK-NEXT: mov.16b v0, v2 ; CHECK-NEXT: ret %r = sext <8 x i16> %v0 to <8 x i32> ret <8 x i32> %r @@ -96,9 +100,9 @@ define <8 x i32> @bfunc1(<8 x i8> %v0) nounwind { ; CHECK-LABEL: bfunc1: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushll2.4s v1, v0, #0 -; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll.8h v1, v0, #0 +; CHECK-NEXT: ushll.4s v0, v1, #0 +; CHECK-NEXT: ushll2.4s v1, v1, #0 ; CHECK-NEXT: ret %r = zext <8 x i8> %v0 to <8 x i32> ret <8 x i32> %r @@ -107,9 +111,9 @@ define <8 x i32> @bfunc2(<8 x i8> %v0) nounwind { ; CHECK-LABEL: bfunc2: ; CHECK: // %bb.0: -; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll2.4s v1, v0, #0 -; CHECK-NEXT: sshll.4s v0, v0, #0 +; CHECK-NEXT: sshll.8h v1, v0, #0 +; CHECK-NEXT: sshll.4s v0, v1, #0 +; CHECK-NEXT: sshll2.4s v1, v1, #0 ; CHECK-NEXT: ret %r = sext <8 x i8> %v0 to <8 x i32> ret <8 x i32> %r @@ -122,8 +126,9 @@ define <4 x i64> @zfunc1(<4 x i32> %v0) nounwind { ; CHECK-LABEL: zfunc1: ; CHECK: // %bb.0: +; CHECK-NEXT: ushll.2d v2, v0, #0 ; CHECK-NEXT: ushll2.2d v1, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: mov.16b v0, v2 ; CHECK-NEXT: ret %r = zext <4 x i32> %v0 to <4 x i64> ret <4 x i64> %r @@ -132,8 +137,9 @@ define <4 x i64> @zfunc2(<4 x i32> %v0) nounwind { ; CHECK-LABEL: zfunc2: ; CHECK: // %bb.0: +; CHECK-NEXT: sshll.2d v2, v0, #0 ; CHECK-NEXT: sshll2.2d v1, v0, #0 -; CHECK-NEXT: sshll.2d v0, v0, #0 +; CHECK-NEXT: mov.16b v0, v2 ; CHECK-NEXT: ret %r = sext <4 x i32> %v0 to <4 x i64> ret <4 x i64> %r @@ -142,9 +148,9 @@ define <4 x i64> @bfunc3(<4 x i16> %v0) nounwind { ; CHECK-LABEL: bfunc3: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll2.2d v1, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: ushll.4s v1, v0, #0 +; CHECK-NEXT: ushll.2d v0, v1, #0 +; CHECK-NEXT: ushll2.2d v1, v1, #0 ; CHECK-NEXT: ret %r = zext <4 x i16> %v0 to <4 x i64> ret <4 x i64> %r @@ -153,9 +159,9 @@ define <4 x i64> @cfunc4(<4 x i16> %v0) nounwind { ; CHECK-LABEL: cfunc4: ; CHECK: // %bb.0: -; CHECK-NEXT: sshll.4s v0, v0, #0 -; CHECK-NEXT: sshll2.2d v1, v0, #0 -; CHECK-NEXT: sshll.2d v0, v0, #0 +; CHECK-NEXT: sshll.4s v1, v0, #0 +; CHECK-NEXT: sshll.2d v0, v1, #0 +; CHECK-NEXT: sshll2.2d v1, v1, #0 ; CHECK-NEXT: ret %r = sext <4 x i16> %v0 to <4 x i64> ret <4 x i64> %r @@ -165,9 +171,9 @@ ; CHECK-LABEL: zext_v4i8_to_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: bic.4h v0, #255, lsl #8 -; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll2.2d v1, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: ushll.4s v1, v0, #0 +; CHECK-NEXT: ushll.2d v0, v1, #0 +; CHECK-NEXT: ushll2.2d v1, v1, #0 ; CHECK-NEXT: ret %r = zext <4 x i8> %v0 to <4 x i64> ret <4 x i64> %r @@ -177,12 +183,12 @@ ; CHECK-LABEL: sext_v4i8_to_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ushll.4s v0, v0, #0 -; CHECK-NEXT: ushll2.2d v1, v0, #0 -; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: ushll.2d v1, v0, #0 +; CHECK-NEXT: ushll2.2d v0, v0, #0 ; CHECK-NEXT: shl.2d v1, v1, #56 -; CHECK-NEXT: shl.2d v0, v0, #56 -; CHECK-NEXT: sshr.2d v1, v1, #56 -; CHECK-NEXT: sshr.2d v0, v0, #56 +; CHECK-NEXT: shl.2d v2, v0, #56 +; CHECK-NEXT: sshr.2d v0, v1, #56 +; CHECK-NEXT: sshr.2d v1, v2, #56 ; CHECK-NEXT: ret %r = sext <4 x i8> %v0 to <4 x i64> ret <4 x i64> %r @@ -192,12 +198,12 @@ ; CHECK-LABEL: zext_v8i8_to_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ushll.8h v0, v0, #0 -; CHECK-NEXT: ushll.4s v2, v0, #0 -; CHECK-NEXT: ushll2.4s v4, v0, #0 -; CHECK-NEXT: ushll2.2d v1, v2, #0 -; CHECK-NEXT: ushll.2d v0, v2, #0 -; CHECK-NEXT: ushll2.2d v3, v4, #0 -; CHECK-NEXT: ushll.2d v2, v4, #0 +; CHECK-NEXT: ushll2.4s v2, v0, #0 +; CHECK-NEXT: ushll.4s v0, v0, #0 +; CHECK-NEXT: ushll2.2d v3, v2, #0 +; CHECK-NEXT: ushll2.2d v1, v0, #0 +; CHECK-NEXT: ushll.2d v0, v0, #0 +; CHECK-NEXT: ushll.2d v2, v2, #0 ; CHECK-NEXT: ret %r = zext <8 x i8> %v0 to <8 x i64> ret <8 x i64> %r @@ -207,12 +213,12 @@ ; CHECK-LABEL: sext_v8i8_to_v8i64: ; CHECK: // %bb.0: ; CHECK-NEXT: sshll.8h v0, v0, #0 -; CHECK-NEXT: sshll.4s v2, v0, #0 -; CHECK-NEXT: sshll2.4s v4, v0, #0 -; CHECK-NEXT: sshll2.2d v1, v2, #0 -; CHECK-NEXT: sshll.2d v0, v2, #0 -; CHECK-NEXT: sshll2.2d v3, v4, #0 -; CHECK-NEXT: sshll.2d v2, v4, #0 +; CHECK-NEXT: sshll2.4s v2, v0, #0 +; CHECK-NEXT: sshll.4s v0, v0, #0 +; CHECK-NEXT: sshll2.2d v3, v2, #0 +; CHECK-NEXT: sshll2.2d v1, v0, #0 +; CHECK-NEXT: sshll.2d v0, v0, #0 +; CHECK-NEXT: sshll.2d v2, v2, #0 ; CHECK-NEXT: ret %r = sext <8 x i8> %v0 to <8 x i64> ret <8 x i64> %r @@ -225,63 +231,63 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #64] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp] -; CHECK-NEXT: ldr w10, [sp, #8] +; CHECK-NEXT: ldr w9, [sp, #72] +; CHECK-NEXT: ldr w10, [sp, #96] ; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: movi.16b v2, #1 -; CHECK-NEXT: mov.b v1[1], w8 ; CHECK-NEXT: ldr w8, [sp, #80] +; CHECK-NEXT: mov.b v0[1], w1 +; CHECK-NEXT: ldr w11, [sp, #112] +; CHECK-NEXT: ldr w12, [sp, #128] +; CHECK-NEXT: mov.b v1[1], w9 +; CHECK-NEXT: ldr w9, [sp, #88] ; CHECK-NEXT: mov.b v0[2], w2 +; CHECK-NEXT: ldr w13, [sp, #144] ; CHECK-NEXT: mov.b v1[2], w8 -; CHECK-NEXT: ldr w8, [sp, #88] +; CHECK-NEXT: ldr w8, [sp, #104] ; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: mov.b v1[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] +; CHECK-NEXT: ldr w14, [sp, #160] +; CHECK-NEXT: mov.b v1[3], w9 +; CHECK-NEXT: ldr w9, [sp, #120] ; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: mov.b v1[4], w8 -; CHECK-NEXT: ldr w8, [sp, #104] +; CHECK-NEXT: ldr w15, [sp, #176] +; CHECK-NEXT: mov.b v1[4], w10 +; CHECK-NEXT: ldr w10, [sp, #136] ; CHECK-NEXT: mov.b v0[5], w5 +; CHECK-NEXT: ldr w16, [sp, #184] ; CHECK-NEXT: mov.b v1[5], w8 -; CHECK-NEXT: ldr w8, [sp, #112] +; CHECK-NEXT: ldr w8, [sp, #152] ; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w8 -; CHECK-NEXT: ldr w8, [sp, #120] +; CHECK-NEXT: mov.b v1[6], w11 +; CHECK-NEXT: ldr w11, [sp, #168] ; CHECK-NEXT: mov.b v0[7], w7 -; CHECK-NEXT: mov.b v1[7], w8 -; CHECK-NEXT: ldr w8, [sp, #128] +; CHECK-NEXT: mov.b v1[7], w9 +; CHECK-NEXT: ldr w9, [sp] +; CHECK-NEXT: movi.16b v2, #1 +; CHECK-NEXT: mov.b v1[8], w12 +; CHECK-NEXT: ldr w12, [sp, #8] ; CHECK-NEXT: mov.b v0[8], w9 ; CHECK-NEXT: ldr w9, [sp, #16] -; CHECK-NEXT: mov.b v1[8], w8 -; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v0[9], w10 +; CHECK-NEXT: mov.b v1[9], w10 ; CHECK-NEXT: ldr w10, [sp, #24] -; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] +; CHECK-NEXT: mov.b v0[9], w12 +; CHECK-NEXT: mov.b v1[10], w13 ; CHECK-NEXT: mov.b v0[10], w9 -; CHECK-NEXT: ldr w9, [sp, #32] -; CHECK-NEXT: mov.b v1[10], w8 -; CHECK-NEXT: ldr w8, [sp, #152] -; CHECK-NEXT: mov.b v0[11], w10 -; CHECK-NEXT: ldr w10, [sp, #40] +; CHECK-NEXT: ldr w9, [sp, #40] ; CHECK-NEXT: mov.b v1[11], w8 -; CHECK-NEXT: ldr w8, [sp, #160] -; CHECK-NEXT: mov.b v0[12], w9 -; CHECK-NEXT: ldr w9, [sp, #48] -; CHECK-NEXT: mov.b v1[12], w8 -; CHECK-NEXT: ldr w8, [sp, #168] -; CHECK-NEXT: mov.b v0[13], w10 -; CHECK-NEXT: ldr w10, [sp, #56] -; CHECK-NEXT: mov.b v1[13], w8 -; CHECK-NEXT: ldr w8, [sp, #176] -; CHECK-NEXT: mov.b v0[14], w9 -; CHECK-NEXT: mov.b v1[14], w8 -; CHECK-NEXT: ldr w8, [sp, #184] -; CHECK-NEXT: mov.b v0[15], w10 -; CHECK-NEXT: mov.b v1[15], w8 -; CHECK-NEXT: and.16b v0, v0, v2 +; CHECK-NEXT: ldr w8, [sp, #32] +; CHECK-NEXT: mov.b v0[11], w10 +; CHECK-NEXT: mov.b v1[12], w14 +; CHECK-NEXT: mov.b v0[12], w8 +; CHECK-NEXT: ldr w8, [sp, #48] +; CHECK-NEXT: mov.b v1[13], w11 +; CHECK-NEXT: mov.b v0[13], w9 +; CHECK-NEXT: ldr w9, [sp, #56] +; CHECK-NEXT: mov.b v1[14], w15 +; CHECK-NEXT: mov.b v0[14], w8 +; CHECK-NEXT: mov.b v1[15], w16 +; CHECK-NEXT: mov.b v0[15], w9 ; CHECK-NEXT: and.16b v1, v1, v2 +; CHECK-NEXT: and.16b v0, v0, v2 ; CHECK-NEXT: ret %res = zext <32 x i1> %arg to <32 x i8> ret <32 x i8> %res @@ -291,65 +297,65 @@ ; CHECK-LABEL: sext_v32i1: ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #64] -; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp] -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: fmov s1, w0 +; CHECK-NEXT: ldr w9, [sp, #80] +; CHECK-NEXT: ldr w10, [sp, #88] +; CHECK-NEXT: fmov s0, w8 ; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: mov.b v1[1], w8 -; CHECK-NEXT: ldr w8, [sp, #80] -; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: mov.b v1[2], w8 -; CHECK-NEXT: ldr w8, [sp, #88] -; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: mov.b v1[3], w8 +; CHECK-NEXT: mov.b v1[1], w1 +; CHECK-NEXT: ldr w11, [sp, #104] +; CHECK-NEXT: ldr w12, [sp, #120] +; CHECK-NEXT: mov.b v0[1], w8 ; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: mov.b v1[4], w8 -; CHECK-NEXT: ldr w8, [sp, #104] -; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: mov.b v1[5], w8 -; CHECK-NEXT: ldr w8, [sp, #112] -; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w8 -; CHECK-NEXT: ldr w8, [sp, #120] -; CHECK-NEXT: mov.b v0[7], w7 -; CHECK-NEXT: mov.b v1[7], w8 -; CHECK-NEXT: ldr w8, [sp, #128] -; CHECK-NEXT: mov.b v0[8], w9 -; CHECK-NEXT: ldr w9, [sp, #16] -; CHECK-NEXT: mov.b v1[8], w8 -; CHECK-NEXT: ldr w8, [sp, #136] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #24] -; CHECK-NEXT: mov.b v1[9], w8 +; CHECK-NEXT: mov.b v1[2], w2 +; CHECK-NEXT: ldr w13, [sp, #136] +; CHECK-NEXT: mov.b v0[2], w9 +; CHECK-NEXT: ldr w9, [sp, #112] +; CHECK-NEXT: mov.b v1[3], w3 +; CHECK-NEXT: ldr w14, [sp, #152] +; CHECK-NEXT: mov.b v0[3], w10 +; CHECK-NEXT: ldr w10, [sp, #128] +; CHECK-NEXT: mov.b v1[4], w4 +; CHECK-NEXT: ldr w15, [sp, #168] +; CHECK-NEXT: mov.b v0[4], w8 ; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v0[10], w9 -; CHECK-NEXT: ldr w9, [sp, #32] -; CHECK-NEXT: mov.b v1[10], w8 -; CHECK-NEXT: ldr w8, [sp, #152] -; CHECK-NEXT: mov.b v0[11], w10 +; CHECK-NEXT: mov.b v1[5], w5 +; CHECK-NEXT: ldr w16, [sp, #176] +; CHECK-NEXT: mov.b v0[5], w11 +; CHECK-NEXT: ldr w11, [sp, #160] +; CHECK-NEXT: mov.b v1[6], w6 +; CHECK-NEXT: mov.b v0[6], w9 +; CHECK-NEXT: ldr w9, [sp] +; CHECK-NEXT: mov.b v1[7], w7 +; CHECK-NEXT: mov.b v0[7], w12 +; CHECK-NEXT: ldr w12, [sp, #8] +; CHECK-NEXT: mov.b v1[8], w9 +; CHECK-NEXT: ldr w9, [sp, #184] +; CHECK-NEXT: mov.b v0[8], w10 +; CHECK-NEXT: ldr w10, [sp, #16] +; CHECK-NEXT: mov.b v1[9], w12 +; CHECK-NEXT: ldr w12, [sp, #24] +; CHECK-NEXT: mov.b v0[9], w13 +; CHECK-NEXT: mov.b v1[10], w10 ; CHECK-NEXT: ldr w10, [sp, #40] -; CHECK-NEXT: mov.b v1[11], w8 -; CHECK-NEXT: ldr w8, [sp, #160] -; CHECK-NEXT: mov.b v0[12], w9 -; CHECK-NEXT: ldr w9, [sp, #48] +; CHECK-NEXT: mov.b v0[10], w8 +; CHECK-NEXT: ldr w8, [sp, #32] +; CHECK-NEXT: mov.b v1[11], w12 +; CHECK-NEXT: mov.b v0[11], w14 ; CHECK-NEXT: mov.b v1[12], w8 -; CHECK-NEXT: ldr w8, [sp, #168] -; CHECK-NEXT: mov.b v0[13], w10 +; CHECK-NEXT: ldr w8, [sp, #48] +; CHECK-NEXT: mov.b v0[12], w11 +; CHECK-NEXT: mov.b v1[13], w10 ; CHECK-NEXT: ldr w10, [sp, #56] -; CHECK-NEXT: mov.b v1[13], w8 -; CHECK-NEXT: ldr w8, [sp, #176] -; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: mov.b v0[13], w15 ; CHECK-NEXT: mov.b v1[14], w8 -; CHECK-NEXT: ldr w8, [sp, #184] -; CHECK-NEXT: mov.b v0[15], w10 -; CHECK-NEXT: mov.b v1[15], w8 -; CHECK-NEXT: shl.16b v0, v0, #7 +; CHECK-NEXT: mov.b v0[14], w16 +; CHECK-NEXT: mov.b v1[15], w10 +; CHECK-NEXT: mov.b v0[15], w9 ; CHECK-NEXT: shl.16b v1, v1, #7 -; CHECK-NEXT: sshr.16b v0, v0, #7 -; CHECK-NEXT: sshr.16b v1, v1, #7 +; CHECK-NEXT: shl.16b v2, v0, #7 +; CHECK-NEXT: sshr.16b v0, v1, #7 +; CHECK-NEXT: sshr.16b v1, v2, #7 ; CHECK-NEXT: ret %res = sext <32 x i1> %arg to <32 x i8> ret <32 x i8> %res @@ -360,129 +366,129 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #320] ; CHECK-NEXT: fmov s0, w0 -; CHECK-NEXT: ldr w9, [sp, #64] ; CHECK-NEXT: ldr w10, [sp, #192] +; CHECK-NEXT: ldr w9, [sp, #328] ; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: ldr w8, [sp, #328] -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldr w9, [sp, #200] +; CHECK-NEXT: ldr w8, [sp, #200] ; CHECK-NEXT: fmov s2, w10 -; CHECK-NEXT: ldr w10, [sp, #336] -; CHECK-NEXT: mov.b v3[1], w8 -; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: ldr w10, [sp, #208] ; CHECK-NEXT: mov.b v0[1], w1 -; CHECK-NEXT: ldr w11, [sp, #352] -; CHECK-NEXT: mov.b v2[1], w9 -; CHECK-NEXT: ldr w9, [sp, #80] -; CHECK-NEXT: mov.b v1[1], w8 +; CHECK-NEXT: ldr w11, [sp, #112] +; CHECK-NEXT: mov.b v3[1], w9 +; CHECK-NEXT: ldr w9, [sp, #336] +; CHECK-NEXT: mov.b v2[1], w8 ; CHECK-NEXT: ldr w8, [sp, #344] -; CHECK-NEXT: mov.b v3[2], w10 -; CHECK-NEXT: ldr w10, [sp, #208] ; CHECK-NEXT: mov.b v0[2], w2 -; CHECK-NEXT: ldr w12, [sp, #368] -; CHECK-NEXT: ldr w13, [sp, #384] -; CHECK-NEXT: mov.b v1[2], w9 -; CHECK-NEXT: ldr w9, [sp, #360] +; CHECK-NEXT: ldr w12, [sp, #128] +; CHECK-NEXT: mov.b v3[2], w9 +; CHECK-NEXT: ldr w9, [sp, #216] ; CHECK-NEXT: mov.b v2[2], w10 -; CHECK-NEXT: ldr w10, [sp, #88] +; CHECK-NEXT: ldr w10, [sp, #352] ; CHECK-NEXT: mov.b v3[3], w8 -; CHECK-NEXT: ldr w8, [sp, #216] -; CHECK-NEXT: mov.b v0[3], w3 -; CHECK-NEXT: ldr w14, [sp, #400] -; CHECK-NEXT: mov.b v1[3], w10 +; CHECK-NEXT: ldr w8, [sp, #224] +; CHECK-NEXT: mov.b v2[3], w9 +; CHECK-NEXT: ldr w9, [sp, #360] +; CHECK-NEXT: mov.b v3[4], w10 +; CHECK-NEXT: ldr w10, [sp, #232] +; CHECK-NEXT: mov.b v2[4], w8 +; CHECK-NEXT: ldr w8, [sp, #368] +; CHECK-NEXT: mov.b v3[5], w9 +; CHECK-NEXT: ldr w9, [sp, #240] +; CHECK-NEXT: mov.b v2[5], w10 ; CHECK-NEXT: ldr w10, [sp, #376] -; CHECK-NEXT: mov.b v2[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v3[4], w11 -; CHECK-NEXT: ldr w11, [sp, #224] -; CHECK-NEXT: mov.b v0[4], w4 -; CHECK-NEXT: ldr w15, [sp, #416] -; CHECK-NEXT: mov.b v1[4], w8 +; CHECK-NEXT: mov.b v3[6], w8 +; CHECK-NEXT: ldr w8, [sp, #248] +; CHECK-NEXT: mov.b v2[6], w9 +; CHECK-NEXT: ldr w9, [sp, #384] +; CHECK-NEXT: mov.b v3[7], w10 +; CHECK-NEXT: ldr w10, [sp, #256] +; CHECK-NEXT: mov.b v2[7], w8 ; CHECK-NEXT: ldr w8, [sp, #392] -; CHECK-NEXT: mov.b v2[4], w11 -; CHECK-NEXT: ldr w11, [sp, #104] -; CHECK-NEXT: mov.b v3[5], w9 -; CHECK-NEXT: ldr w9, [sp, #232] +; CHECK-NEXT: mov.b v3[8], w9 +; CHECK-NEXT: ldr w9, [sp, #264] +; CHECK-NEXT: mov.b v2[8], w10 +; CHECK-NEXT: ldr w10, [sp, #400] +; CHECK-NEXT: mov.b v3[9], w8 +; CHECK-NEXT: ldr w8, [sp, #272] +; CHECK-NEXT: mov.b v2[9], w9 +; CHECK-NEXT: ldr w9, [sp, #408] +; CHECK-NEXT: mov.b v3[10], w10 +; CHECK-NEXT: ldr w10, [sp, #280] +; CHECK-NEXT: mov.b v2[10], w8 +; CHECK-NEXT: ldr w8, [sp, #416] +; CHECK-NEXT: mov.b v3[11], w9 +; CHECK-NEXT: ldr w9, [sp, #288] +; CHECK-NEXT: mov.b v2[11], w10 +; CHECK-NEXT: ldr w10, [sp, #424] +; CHECK-NEXT: mov.b v3[12], w8 +; CHECK-NEXT: ldr w8, [sp, #296] +; CHECK-NEXT: mov.b v2[12], w9 +; CHECK-NEXT: ldr w9, [sp, #432] +; CHECK-NEXT: mov.b v3[13], w10 +; CHECK-NEXT: ldr w10, [sp, #304] +; CHECK-NEXT: mov.b v2[13], w8 +; CHECK-NEXT: ldr w8, [sp, #440] +; CHECK-NEXT: mov.b v3[14], w9 +; CHECK-NEXT: ldr w9, [sp, #312] +; CHECK-NEXT: mov.b v2[14], w10 +; CHECK-NEXT: ldr w10, [sp, #64] +; CHECK-NEXT: mov.b v3[15], w8 +; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: mov.b v2[15], w9 +; CHECK-NEXT: ldr w9, [sp, #88] +; CHECK-NEXT: fmov s1, w10 +; CHECK-NEXT: ldr w10, [sp, #96] +; CHECK-NEXT: mov.b v0[3], w3 +; CHECK-NEXT: ldr w13, [sp, #144] +; CHECK-NEXT: ldr w14, [sp, #160] +; CHECK-NEXT: mov.b v1[1], w8 +; CHECK-NEXT: ldr w8, [sp, #80] +; CHECK-NEXT: mov.b v0[4], w4 +; CHECK-NEXT: ldr w15, [sp, #176] +; CHECK-NEXT: ldr w16, [sp, #184] +; CHECK-NEXT: mov.b v1[2], w8 +; CHECK-NEXT: ldr w8, [sp, #104] ; CHECK-NEXT: mov.b v0[5], w5 -; CHECK-NEXT: ldr w16, [sp, #432] -; CHECK-NEXT: mov.b v1[5], w11 -; CHECK-NEXT: ldr w11, [sp, #408] -; CHECK-NEXT: mov.b v2[5], w9 -; CHECK-NEXT: ldr w9, [sp, #112] -; CHECK-NEXT: mov.b v3[6], w12 -; CHECK-NEXT: ldr w12, [sp, #240] +; CHECK-NEXT: mov.b v1[3], w9 +; CHECK-NEXT: ldr w9, [sp, #120] ; CHECK-NEXT: mov.b v0[6], w6 -; CHECK-NEXT: mov.b v1[6], w9 -; CHECK-NEXT: ldr w9, [sp, #424] -; CHECK-NEXT: mov.b v2[6], w12 -; CHECK-NEXT: ldr w12, [sp, #120] -; CHECK-NEXT: mov.b v3[7], w10 -; CHECK-NEXT: ldr w10, [sp, #248] +; CHECK-NEXT: mov.b v1[4], w10 +; CHECK-NEXT: ldr w10, [sp, #136] ; CHECK-NEXT: mov.b v0[7], w7 -; CHECK-NEXT: mov.b v1[7], w12 -; CHECK-NEXT: ldr w12, [sp] -; CHECK-NEXT: mov.b v2[7], w10 -; CHECK-NEXT: ldr w10, [sp, #128] -; CHECK-NEXT: mov.b v3[8], w13 -; CHECK-NEXT: ldr w13, [sp, #256] -; CHECK-NEXT: mov.b v0[8], w12 -; CHECK-NEXT: ldr w12, [sp, #440] -; CHECK-NEXT: mov.b v1[8], w10 -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: mov.b v2[8], w13 -; CHECK-NEXT: ldr w13, [sp, #136] -; CHECK-NEXT: mov.b v3[9], w8 -; CHECK-NEXT: ldr w8, [sp, #264] -; CHECK-NEXT: mov.b v0[9], w10 -; CHECK-NEXT: ldr w10, [sp, #272] -; CHECK-NEXT: mov.b v1[9], w13 -; CHECK-NEXT: ldr w13, [sp, #16] -; CHECK-NEXT: mov.b v2[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v3[10], w14 -; CHECK-NEXT: ldr w14, [sp, #280] -; CHECK-NEXT: mov.b v0[10], w13 -; CHECK-NEXT: ldr w13, [sp, #296] -; CHECK-NEXT: mov.b v1[10], w8 -; CHECK-NEXT: ldr w8, [sp, #24] -; CHECK-NEXT: mov.b v2[10], w10 -; CHECK-NEXT: ldr w10, [sp, #152] -; CHECK-NEXT: mov.b v3[11], w11 -; CHECK-NEXT: ldr w11, [sp, #288] -; CHECK-NEXT: mov.b v0[11], w8 +; CHECK-NEXT: mov.b v1[5], w8 +; CHECK-NEXT: ldr w8, [sp, #152] +; CHECK-NEXT: movi.16b v4, #1 +; CHECK-NEXT: mov.b v1[6], w11 +; CHECK-NEXT: ldr w11, [sp, #168] +; CHECK-NEXT: and.16b v2, v2, v4 +; CHECK-NEXT: mov.b v1[7], w9 +; CHECK-NEXT: ldr w9, [sp] +; CHECK-NEXT: and.16b v3, v3, v4 +; CHECK-NEXT: mov.b v1[8], w12 +; CHECK-NEXT: ldr w12, [sp, #8] +; CHECK-NEXT: mov.b v0[8], w9 +; CHECK-NEXT: ldr w9, [sp, #16] +; CHECK-NEXT: mov.b v1[9], w10 +; CHECK-NEXT: ldr w10, [sp, #24] +; CHECK-NEXT: mov.b v0[9], w12 +; CHECK-NEXT: mov.b v1[10], w13 +; CHECK-NEXT: mov.b v0[10], w9 +; CHECK-NEXT: ldr w9, [sp, #40] +; CHECK-NEXT: mov.b v1[11], w8 ; CHECK-NEXT: ldr w8, [sp, #32] -; CHECK-NEXT: mov.b v1[11], w10 -; CHECK-NEXT: ldr w10, [sp, #160] -; CHECK-NEXT: mov.b v2[11], w14 -; CHECK-NEXT: mov.b v3[12], w15 +; CHECK-NEXT: mov.b v0[11], w10 +; CHECK-NEXT: mov.b v1[12], w14 ; CHECK-NEXT: mov.b v0[12], w8 -; CHECK-NEXT: ldr w8, [sp, #40] -; CHECK-NEXT: mov.b v1[12], w10 -; CHECK-NEXT: ldr w10, [sp, #168] -; CHECK-NEXT: mov.b v2[12], w11 -; CHECK-NEXT: ldr w11, [sp, #312] -; CHECK-NEXT: mov.b v3[13], w9 -; CHECK-NEXT: ldr w9, [sp, #304] -; CHECK-NEXT: mov.b v0[13], w8 ; CHECK-NEXT: ldr w8, [sp, #48] -; CHECK-NEXT: mov.b v1[13], w10 -; CHECK-NEXT: ldr w10, [sp, #176] -; CHECK-NEXT: mov.b v2[13], w13 -; CHECK-NEXT: mov.b v3[14], w16 +; CHECK-NEXT: mov.b v1[13], w11 +; CHECK-NEXT: mov.b v0[13], w9 +; CHECK-NEXT: ldr w9, [sp, #56] +; CHECK-NEXT: mov.b v1[14], w15 ; CHECK-NEXT: mov.b v0[14], w8 -; CHECK-NEXT: ldr w8, [sp, #56] -; CHECK-NEXT: mov.b v1[14], w10 -; CHECK-NEXT: mov.b v2[14], w9 -; CHECK-NEXT: ldr w9, [sp, #184] -; CHECK-NEXT: movi.16b v4, #1 -; CHECK-NEXT: mov.b v0[15], w8 -; CHECK-NEXT: mov.b v1[15], w9 -; CHECK-NEXT: mov.b v2[15], w11 -; CHECK-NEXT: mov.b v3[15], w12 -; CHECK-NEXT: and.16b v0, v0, v4 +; CHECK-NEXT: mov.b v1[15], w16 +; CHECK-NEXT: mov.b v0[15], w9 ; CHECK-NEXT: and.16b v1, v1, v4 -; CHECK-NEXT: and.16b v2, v2, v4 -; CHECK-NEXT: and.16b v3, v3, v4 +; CHECK-NEXT: and.16b v0, v0, v4 ; CHECK-NEXT: ret %res = zext <64 x i1> %arg to <64 x i8> ret <64 x i8> %res @@ -493,131 +499,131 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr w8, [sp, #320] ; CHECK-NEXT: fmov s3, w0 -; CHECK-NEXT: ldr w9, [sp, #64] ; CHECK-NEXT: ldr w10, [sp, #192] +; CHECK-NEXT: ldr w9, [sp, #328] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldr w8, [sp, #72] -; CHECK-NEXT: fmov s2, w9 -; CHECK-NEXT: ldr w9, [sp, #200] +; CHECK-NEXT: ldr w8, [sp, #200] ; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: ldr w10, [sp, #328] -; CHECK-NEXT: mov.b v3[1], w1 -; CHECK-NEXT: ldr w11, [sp, #344] -; CHECK-NEXT: mov.b v2[1], w8 -; CHECK-NEXT: ldr w8, [sp, #336] -; CHECK-NEXT: mov.b v1[1], w9 -; CHECK-NEXT: ldr w9, [sp, #80] -; CHECK-NEXT: mov.b v0[1], w10 ; CHECK-NEXT: ldr w10, [sp, #208] +; CHECK-NEXT: mov.b v3[1], w1 +; CHECK-NEXT: ldr w11, [sp, #112] +; CHECK-NEXT: mov.b v0[1], w9 +; CHECK-NEXT: ldr w9, [sp, #336] +; CHECK-NEXT: mov.b v1[1], w8 +; CHECK-NEXT: ldr w8, [sp, #344] ; CHECK-NEXT: mov.b v3[2], w2 -; CHECK-NEXT: ldr w12, [sp, #360] -; CHECK-NEXT: mov.b v2[2], w9 -; CHECK-NEXT: ldr w9, [sp, #352] +; CHECK-NEXT: ldr w12, [sp, #128] +; CHECK-NEXT: mov.b v0[2], w9 +; CHECK-NEXT: ldr w9, [sp, #216] ; CHECK-NEXT: mov.b v1[2], w10 -; CHECK-NEXT: ldr w10, [sp, #88] -; CHECK-NEXT: mov.b v0[2], w8 -; CHECK-NEXT: ldr w8, [sp, #216] +; CHECK-NEXT: ldr w10, [sp, #352] +; CHECK-NEXT: mov.b v0[3], w8 +; CHECK-NEXT: ldr w8, [sp, #224] +; CHECK-NEXT: mov.b v1[3], w9 +; CHECK-NEXT: ldr w9, [sp, #360] +; CHECK-NEXT: mov.b v0[4], w10 +; CHECK-NEXT: ldr w10, [sp, #232] +; CHECK-NEXT: mov.b v1[4], w8 +; CHECK-NEXT: ldr w8, [sp, #368] +; CHECK-NEXT: mov.b v0[5], w9 +; CHECK-NEXT: ldr w9, [sp, #240] +; CHECK-NEXT: mov.b v1[5], w10 +; CHECK-NEXT: ldr w10, [sp, #376] +; CHECK-NEXT: mov.b v0[6], w8 +; CHECK-NEXT: ldr w8, [sp, #248] +; CHECK-NEXT: mov.b v1[6], w9 +; CHECK-NEXT: ldr w9, [sp, #384] +; CHECK-NEXT: mov.b v0[7], w10 +; CHECK-NEXT: ldr w10, [sp, #256] +; CHECK-NEXT: mov.b v1[7], w8 +; CHECK-NEXT: ldr w8, [sp, #392] +; CHECK-NEXT: mov.b v0[8], w9 +; CHECK-NEXT: ldr w9, [sp, #264] +; CHECK-NEXT: mov.b v1[8], w10 +; CHECK-NEXT: ldr w10, [sp, #400] +; CHECK-NEXT: mov.b v0[9], w8 +; CHECK-NEXT: ldr w8, [sp, #272] +; CHECK-NEXT: mov.b v1[9], w9 +; CHECK-NEXT: ldr w9, [sp, #408] +; CHECK-NEXT: mov.b v0[10], w10 +; CHECK-NEXT: ldr w10, [sp, #280] +; CHECK-NEXT: mov.b v1[10], w8 +; CHECK-NEXT: ldr w8, [sp, #416] +; CHECK-NEXT: mov.b v0[11], w9 +; CHECK-NEXT: ldr w9, [sp, #288] +; CHECK-NEXT: mov.b v1[11], w10 +; CHECK-NEXT: ldr w10, [sp, #424] +; CHECK-NEXT: mov.b v0[12], w8 +; CHECK-NEXT: ldr w8, [sp, #296] +; CHECK-NEXT: mov.b v1[12], w9 +; CHECK-NEXT: ldr w9, [sp, #432] +; CHECK-NEXT: mov.b v0[13], w10 +; CHECK-NEXT: ldr w10, [sp, #304] +; CHECK-NEXT: mov.b v1[13], w8 +; CHECK-NEXT: ldr w8, [sp, #440] +; CHECK-NEXT: mov.b v0[14], w9 +; CHECK-NEXT: ldr w9, [sp, #312] +; CHECK-NEXT: mov.b v1[14], w10 +; CHECK-NEXT: ldr w10, [sp, #64] +; CHECK-NEXT: mov.b v0[15], w8 +; CHECK-NEXT: ldr w8, [sp, #72] +; CHECK-NEXT: mov.b v1[15], w9 +; CHECK-NEXT: ldr w9, [sp, #88] +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: ldr w10, [sp, #96] ; CHECK-NEXT: mov.b v3[3], w3 -; CHECK-NEXT: ldr w13, [sp, #376] -; CHECK-NEXT: mov.b v2[3], w10 -; CHECK-NEXT: ldr w10, [sp, #368] -; CHECK-NEXT: mov.b v1[3], w8 -; CHECK-NEXT: ldr w8, [sp, #96] -; CHECK-NEXT: mov.b v0[3], w11 -; CHECK-NEXT: ldr w11, [sp, #224] +; CHECK-NEXT: ldr w13, [sp, #144] +; CHECK-NEXT: ldr w14, [sp, #160] +; CHECK-NEXT: shl.16b v4, v1, #7 +; CHECK-NEXT: mov.b v2[1], w8 +; CHECK-NEXT: ldr w8, [sp, #80] ; CHECK-NEXT: mov.b v3[4], w4 -; CHECK-NEXT: ldr w14, [sp, #392] -; CHECK-NEXT: mov.b v2[4], w8 -; CHECK-NEXT: ldr w8, [sp, #384] -; CHECK-NEXT: mov.b v1[4], w11 -; CHECK-NEXT: ldr w11, [sp, #104] -; CHECK-NEXT: mov.b v0[4], w9 -; CHECK-NEXT: ldr w9, [sp, #232] +; CHECK-NEXT: ldr w15, [sp, #176] +; CHECK-NEXT: ldr w16, [sp, #184] +; CHECK-NEXT: shl.16b v5, v0, #7 +; CHECK-NEXT: mov.b v2[2], w8 +; CHECK-NEXT: ldr w8, [sp, #104] ; CHECK-NEXT: mov.b v3[5], w5 -; CHECK-NEXT: ldr w15, [sp, #408] -; CHECK-NEXT: mov.b v2[5], w11 -; CHECK-NEXT: ldr w11, [sp, #400] -; CHECK-NEXT: mov.b v1[5], w9 -; CHECK-NEXT: ldr w9, [sp, #112] -; CHECK-NEXT: mov.b v0[5], w12 -; CHECK-NEXT: ldr w12, [sp, #240] +; CHECK-NEXT: mov.b v2[3], w9 +; CHECK-NEXT: ldr w9, [sp, #120] ; CHECK-NEXT: mov.b v3[6], w6 -; CHECK-NEXT: ldr w16, [sp, #424] -; CHECK-NEXT: mov.b v2[6], w9 -; CHECK-NEXT: ldr w9, [sp, #416] -; CHECK-NEXT: mov.b v1[6], w12 -; CHECK-NEXT: ldr w12, [sp, #120] -; CHECK-NEXT: mov.b v0[6], w10 -; CHECK-NEXT: ldr w10, [sp, #248] +; CHECK-NEXT: mov.b v2[4], w10 +; CHECK-NEXT: ldr w10, [sp, #136] ; CHECK-NEXT: mov.b v3[7], w7 -; CHECK-NEXT: mov.b v2[7], w12 -; CHECK-NEXT: ldr w12, [sp] -; CHECK-NEXT: mov.b v1[7], w10 -; CHECK-NEXT: ldr w10, [sp, #128] -; CHECK-NEXT: mov.b v0[7], w13 -; CHECK-NEXT: ldr w13, [sp, #256] -; CHECK-NEXT: mov.b v3[8], w12 -; CHECK-NEXT: ldr w12, [sp, #432] -; CHECK-NEXT: mov.b v2[8], w10 -; CHECK-NEXT: ldr w10, [sp, #8] -; CHECK-NEXT: mov.b v1[8], w13 -; CHECK-NEXT: ldr w13, [sp, #136] -; CHECK-NEXT: mov.b v0[8], w8 -; CHECK-NEXT: ldr w8, [sp, #264] -; CHECK-NEXT: mov.b v3[9], w10 -; CHECK-NEXT: ldr w10, [sp, #440] -; CHECK-NEXT: mov.b v2[9], w13 -; CHECK-NEXT: ldr w13, [sp, #16] -; CHECK-NEXT: mov.b v1[9], w8 -; CHECK-NEXT: ldr w8, [sp, #144] -; CHECK-NEXT: mov.b v0[9], w14 -; CHECK-NEXT: ldr w14, [sp, #272] -; CHECK-NEXT: mov.b v3[10], w13 -; CHECK-NEXT: ldr w13, [sp, #280] -; CHECK-NEXT: mov.b v2[10], w8 -; CHECK-NEXT: ldr w8, [sp, #24] -; CHECK-NEXT: mov.b v1[10], w14 -; CHECK-NEXT: ldr w14, [sp, #152] -; CHECK-NEXT: mov.b v0[10], w11 -; CHECK-NEXT: ldr w11, [sp, #288] -; CHECK-NEXT: mov.b v3[11], w8 +; CHECK-NEXT: mov.b v2[5], w8 +; CHECK-NEXT: ldr w8, [sp, #152] +; CHECK-NEXT: mov.b v2[6], w11 +; CHECK-NEXT: ldr w11, [sp, #168] +; CHECK-NEXT: mov.b v2[7], w9 +; CHECK-NEXT: ldr w9, [sp] +; CHECK-NEXT: mov.b v2[8], w12 +; CHECK-NEXT: ldr w12, [sp, #8] +; CHECK-NEXT: mov.b v3[8], w9 +; CHECK-NEXT: ldr w9, [sp, #16] +; CHECK-NEXT: mov.b v2[9], w10 +; CHECK-NEXT: ldr w10, [sp, #24] +; CHECK-NEXT: mov.b v3[9], w12 +; CHECK-NEXT: mov.b v2[10], w13 +; CHECK-NEXT: mov.b v3[10], w9 +; CHECK-NEXT: ldr w9, [sp, #40] +; CHECK-NEXT: mov.b v2[11], w8 ; CHECK-NEXT: ldr w8, [sp, #32] -; CHECK-NEXT: mov.b v2[11], w14 -; CHECK-NEXT: ldr w14, [sp, #296] -; CHECK-NEXT: mov.b v1[11], w13 -; CHECK-NEXT: ldr w13, [sp, #160] -; CHECK-NEXT: mov.b v0[11], w15 +; CHECK-NEXT: mov.b v3[11], w10 +; CHECK-NEXT: mov.b v2[12], w14 ; CHECK-NEXT: mov.b v3[12], w8 -; CHECK-NEXT: ldr w8, [sp, #40] -; CHECK-NEXT: mov.b v2[12], w13 -; CHECK-NEXT: ldr w13, [sp, #312] -; CHECK-NEXT: mov.b v1[12], w11 -; CHECK-NEXT: ldr w11, [sp, #168] -; CHECK-NEXT: mov.b v0[12], w9 -; CHECK-NEXT: ldr w9, [sp, #304] -; CHECK-NEXT: mov.b v3[13], w8 ; CHECK-NEXT: ldr w8, [sp, #48] ; CHECK-NEXT: mov.b v2[13], w11 -; CHECK-NEXT: ldr w11, [sp, #176] -; CHECK-NEXT: mov.b v1[13], w14 -; CHECK-NEXT: mov.b v0[13], w16 +; CHECK-NEXT: mov.b v3[13], w9 +; CHECK-NEXT: ldr w9, [sp, #56] +; CHECK-NEXT: mov.b v2[14], w15 ; CHECK-NEXT: mov.b v3[14], w8 -; CHECK-NEXT: ldr w8, [sp, #56] -; CHECK-NEXT: mov.b v2[14], w11 -; CHECK-NEXT: mov.b v1[14], w9 -; CHECK-NEXT: ldr w9, [sp, #184] -; CHECK-NEXT: mov.b v0[14], w12 -; CHECK-NEXT: mov.b v3[15], w8 -; CHECK-NEXT: mov.b v2[15], w9 -; CHECK-NEXT: mov.b v1[15], w13 -; CHECK-NEXT: mov.b v0[15], w10 -; CHECK-NEXT: shl.16b v3, v3, #7 +; CHECK-NEXT: mov.b v2[15], w16 +; CHECK-NEXT: mov.b v3[15], w9 ; CHECK-NEXT: shl.16b v2, v2, #7 -; CHECK-NEXT: shl.16b v4, v1, #7 -; CHECK-NEXT: shl.16b v5, v0, #7 -; CHECK-NEXT: sshr.16b v0, v3, #7 +; CHECK-NEXT: shl.16b v3, v3, #7 ; CHECK-NEXT: sshr.16b v1, v2, #7 ; CHECK-NEXT: sshr.16b v2, v4, #7 +; CHECK-NEXT: sshr.16b v0, v3, #7 ; CHECK-NEXT: sshr.16b v3, v5, #7 ; CHECK-NEXT: ret %res = sext <64 x i1> %arg to <64 x i8> diff --git a/llvm/test/CodeGen/AArch64/arm64-vabs.ll b/llvm/test/CodeGen/AArch64/arm64-vabs.ll --- a/llvm/test/CodeGen/AArch64/arm64-vabs.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vabs.ll @@ -307,9 +307,9 @@ ; ; GISEL-LABEL: uabdl4s_rdx_i32: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: usubl.4s v0, v0, v1 -; GISEL-NEXT: cmgt.4s v1, v2, v0 +; GISEL-NEXT: movi.2d v1, #0000000000000000 +; GISEL-NEXT: cmgt.4s v1, v1, v0 ; GISEL-NEXT: neg.4s v2, v0 ; GISEL-NEXT: shl.4s v1, v1, #31 ; GISEL-NEXT: sshr.4s v1, v1, #31 @@ -384,9 +384,9 @@ ; ; GISEL-LABEL: uabdl2d_rdx_i64: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: usubl.2d v0, v0, v1 -; GISEL-NEXT: cmgt.2d v1, v2, v0 +; GISEL-NEXT: movi.2d v1, #0000000000000000 +; GISEL-NEXT: cmgt.2d v1, v1, v0 ; GISEL-NEXT: neg.2d v2, v0 ; GISEL-NEXT: shl.2d v1, v1, #63 ; GISEL-NEXT: sshr.2d v1, v1, #63 @@ -1650,8 +1650,8 @@ ; GISEL-LABEL: abspattern4: ; GISEL: // %bb.0: ; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.4s v2, v0 ; GISEL-NEXT: cmge.4s v1, v0, v1 +; GISEL-NEXT: neg.4s v2, v0 ; GISEL-NEXT: shl.4s v1, v1, #31 ; GISEL-NEXT: sshr.4s v1, v1, #31 ; GISEL-NEXT: bif.16b v0, v2, v1 @@ -1672,8 +1672,8 @@ ; GISEL-LABEL: abspattern5: ; GISEL: // %bb.0: ; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.8h v2, v0 ; GISEL-NEXT: cmgt.8h v1, v0, v1 +; GISEL-NEXT: neg.8h v2, v0 ; GISEL-NEXT: shl.8h v1, v1, #15 ; GISEL-NEXT: sshr.8h v1, v1, #15 ; GISEL-NEXT: bif.16b v0, v2, v1 @@ -1694,8 +1694,8 @@ ; GISEL-LABEL: abspattern6: ; GISEL: // %bb.0: ; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.16b v2, v0 ; GISEL-NEXT: cmgt.16b v1, v1, v0 +; GISEL-NEXT: neg.16b v2, v0 ; GISEL-NEXT: shl.16b v1, v1, #7 ; GISEL-NEXT: sshr.16b v1, v1, #7 ; GISEL-NEXT: bit.16b v0, v2, v1 @@ -1716,8 +1716,8 @@ ; GISEL-LABEL: abspattern7: ; GISEL: // %bb.0: ; GISEL-NEXT: movi.2d v1, #0000000000000000 -; GISEL-NEXT: neg.2d v2, v0 ; GISEL-NEXT: cmge.2d v1, v1, v0 +; GISEL-NEXT: neg.2d v2, v0 ; GISEL-NEXT: shl.2d v1, v1, #63 ; GISEL-NEXT: sshr.2d v1, v1, #63 ; GISEL-NEXT: bit.16b v0, v2, v1 @@ -1737,9 +1737,9 @@ ; ; GISEL-LABEL: uabd_i32: ; GISEL: // %bb.0: -; GISEL-NEXT: movi.2d v2, #0000000000000000 ; GISEL-NEXT: ssubl.2d v0, v0, v1 -; GISEL-NEXT: cmgt.2d v1, v2, v0 +; GISEL-NEXT: movi.2d v1, #0000000000000000 +; GISEL-NEXT: cmgt.2d v1, v1, v0 ; GISEL-NEXT: neg.2d v2, v0 ; GISEL-NEXT: shl.2d v1, v1, #63 ; GISEL-NEXT: sshr.2d v1, v1, #63 @@ -1758,28 +1758,28 @@ define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) { ; CHECK-LABEL: uabd_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov x9, d0 -; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: mov.d x8, v0[1] -; CHECK-NEXT: mov.d x10, v1[1] -; CHECK-NEXT: asr x12, x9, #63 +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: mov.d x9, v0[1] +; CHECK-NEXT: mov.d x11, v1[1] +; CHECK-NEXT: asr x12, x10, #63 +; CHECK-NEXT: asr x13, x8, #63 +; CHECK-NEXT: subs x8, x10, x8 +; CHECK-NEXT: asr x10, x9, #63 +; CHECK-NEXT: sbcs x12, x12, x13 ; CHECK-NEXT: asr x13, x11, #63 ; CHECK-NEXT: subs x9, x9, x11 -; CHECK-NEXT: sbcs x11, x12, x13 -; CHECK-NEXT: asr x12, x8, #63 -; CHECK-NEXT: asr x13, x10, #63 -; CHECK-NEXT: subs x8, x8, x10 -; CHECK-NEXT: sbcs x10, x12, x13 -; CHECK-NEXT: negs x12, x8 +; CHECK-NEXT: sbcs x10, x10, x13 +; CHECK-NEXT: negs x11, x9 ; CHECK-NEXT: ngcs x13, x10 ; CHECK-NEXT: cmp x10, #0 -; CHECK-NEXT: csel x2, x12, x8, lt +; CHECK-NEXT: csel x2, x11, x9, lt ; CHECK-NEXT: csel x3, x13, x10, lt -; CHECK-NEXT: negs x8, x9 -; CHECK-NEXT: ngcs x10, x11 -; CHECK-NEXT: cmp x11, #0 -; CHECK-NEXT: csel x8, x8, x9, lt -; CHECK-NEXT: csel x1, x10, x11, lt +; CHECK-NEXT: negs x9, x8 +; CHECK-NEXT: ngcs x10, x12 +; CHECK-NEXT: cmp x12, #0 +; CHECK-NEXT: csel x8, x9, x8, lt +; CHECK-NEXT: csel x1, x10, x12, lt ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: mov.d v0[1], x1 ; CHECK-NEXT: fmov x0, d0 diff --git a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll --- a/llvm/test/CodeGen/AArch64/arm64-vhadd.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vhadd.ll @@ -759,10 +759,10 @@ define <4 x i64> @hadd32_sext_asr(<4 x i32> %src1, <4 x i32> %src2) nounwind { ; CHECK-LABEL: hadd32_sext_asr: ; CHECK: // %bb.0: -; CHECK-NEXT: saddl2.2d v2, v0, v1 -; CHECK-NEXT: saddl.2d v0, v0, v1 -; CHECK-NEXT: sshr.2d v1, v2, #1 -; CHECK-NEXT: sshr.2d v0, v0, #1 +; CHECK-NEXT: saddl.2d v2, v0, v1 +; CHECK-NEXT: saddl2.2d v1, v0, v1 +; CHECK-NEXT: sshr.2d v0, v2, #1 +; CHECK-NEXT: sshr.2d v1, v1, #1 ; CHECK-NEXT: ret %zextsrc1 = sext <4 x i32> %src1 to <4 x i64> %zextsrc2 = sext <4 x i32> %src2 to <4 x i64> @@ -774,10 +774,10 @@ define <4 x i64> @hadd32_zext_asr(<4 x i32> %src1, <4 x i32> %src2) nounwind { ; CHECK-LABEL: hadd32_zext_asr: ; CHECK: // %bb.0: -; CHECK-NEXT: uaddl2.2d v2, v0, v1 -; CHECK-NEXT: uaddl.2d v0, v0, v1 -; CHECK-NEXT: ushr.2d v1, v2, #1 -; CHECK-NEXT: ushr.2d v0, v0, #1 +; CHECK-NEXT: uaddl.2d v2, v0, v1 +; CHECK-NEXT: uaddl2.2d v1, v0, v1 +; CHECK-NEXT: ushr.2d v0, v2, #1 +; CHECK-NEXT: ushr.2d v1, v1, #1 ; CHECK-NEXT: ret %zextsrc1 = zext <4 x i32> %src1 to <4 x i64> %zextsrc2 = zext <4 x i32> %src2 to <4 x i64> @@ -789,10 +789,10 @@ define <4 x i64> @hadd32_sext_lsr(<4 x i32> %src1, <4 x i32> %src2) nounwind { ; CHECK-LABEL: hadd32_sext_lsr: ; CHECK: // %bb.0: -; CHECK-NEXT: saddl2.2d v2, v0, v1 -; CHECK-NEXT: saddl.2d v0, v0, v1 -; CHECK-NEXT: ushr.2d v1, v2, #1 -; CHECK-NEXT: ushr.2d v0, v0, #1 +; CHECK-NEXT: saddl.2d v2, v0, v1 +; CHECK-NEXT: saddl2.2d v1, v0, v1 +; CHECK-NEXT: ushr.2d v0, v2, #1 +; CHECK-NEXT: ushr.2d v1, v1, #1 ; CHECK-NEXT: ret %zextsrc1 = sext <4 x i32> %src1 to <4 x i64> %zextsrc2 = sext <4 x i32> %src2 to <4 x i64> @@ -804,10 +804,10 @@ define <4 x i64> @hadd32_zext_lsr(<4 x i32> %src1, <4 x i32> %src2) nounwind { ; CHECK-LABEL: hadd32_zext_lsr: ; CHECK: // %bb.0: -; CHECK-NEXT: uaddl2.2d v2, v0, v1 -; CHECK-NEXT: uaddl.2d v0, v0, v1 -; CHECK-NEXT: ushr.2d v1, v2, #1 -; CHECK-NEXT: ushr.2d v0, v0, #1 +; CHECK-NEXT: uaddl.2d v2, v0, v1 +; CHECK-NEXT: uaddl2.2d v1, v0, v1 +; CHECK-NEXT: ushr.2d v0, v2, #1 +; CHECK-NEXT: ushr.2d v1, v1, #1 ; CHECK-NEXT: ret %zextsrc1 = zext <4 x i32> %src1 to <4 x i64> %zextsrc2 = zext <4 x i32> %src2 to <4 x i64> diff --git a/llvm/test/CodeGen/AArch64/arm64-vmul.ll b/llvm/test/CodeGen/AArch64/arm64-vmul.ll --- a/llvm/test/CodeGen/AArch64/arm64-vmul.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vmul.ll @@ -904,14 +904,14 @@ define <2 x i64> @mul_2d(<2 x i64> %A, <2 x i64> %B) nounwind { ; CHECK-LABEL: mul_2d: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov x9, d1 -; CHECK-NEXT: fmov x10, d0 -; CHECK-NEXT: mov.d x8, v1[1] +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: fmov x9, d0 +; CHECK-NEXT: mov.d x10, v1[1] ; CHECK-NEXT: mov.d x11, v0[1] -; CHECK-NEXT: mul x9, x10, x9 -; CHECK-NEXT: mul x8, x11, x8 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: mov.d v0[1], x8 +; CHECK-NEXT: mul x8, x9, x8 +; CHECK-NEXT: mul x9, x11, x10 +; CHECK-NEXT: fmov d0, x8 +; CHECK-NEXT: mov.d v0[1], x9 ; CHECK-NEXT: ret %tmp1 = mul <2 x i64> %A, %B ret <2 x i64> %tmp1 diff --git a/llvm/test/CodeGen/AArch64/build-vector-extract.ll b/llvm/test/CodeGen/AArch64/build-vector-extract.ll --- a/llvm/test/CodeGen/AArch64/build-vector-extract.ll +++ b/llvm/test/CodeGen/AArch64/build-vector-extract.ll @@ -16,10 +16,9 @@ define <2 x i64> @extract0_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract0_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 0 %z = zext i32 %e to i64 @@ -30,8 +29,8 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_undef(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert0_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: zip1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 @@ -43,10 +42,9 @@ define <2 x i64> @extract1_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 %z = zext i32 %e to i64 @@ -57,8 +55,8 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_undef(<4 x i32> %x) { ; CHECK-LABEL: extract2_i32_zext_insert0_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: uzp1 v0.4s, v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #12 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 2 @@ -70,10 +68,9 @@ define <2 x i64> @extract2_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract2_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[2] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 2 %z = zext i32 %e to i64 @@ -96,10 +93,9 @@ define <2 x i64> @extract3_i32_zext_insert0_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract3_i32_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 3 %z = zext i32 %e to i64 @@ -123,10 +119,9 @@ define <2 x i64> @extract0_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract0_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fmov w8, s0 -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 0 %z = zext i32 %e to i64 @@ -137,8 +132,8 @@ define <2 x i64> @extract1_i32_zext_insert1_i64_undef(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert1_i64_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: ext v0.16b, v0.16b, v1.16b, #4 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 @@ -150,10 +145,9 @@ define <2 x i64> @extract1_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract1_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 1 %z = zext i32 %e to i64 @@ -175,10 +169,9 @@ define <2 x i64> @extract2_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract2_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[2] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 2 %z = zext i32 %e to i64 @@ -201,10 +194,9 @@ define <2 x i64> @extract3_i32_zext_insert1_i64_zero(<4 x i32> %x) { ; CHECK-LABEL: extract3_i32_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <4 x i32> %x, i32 3 %z = zext i32 %e to i64 @@ -227,10 +219,9 @@ define <2 x i64> @extract0_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract0_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 0 %z = zext i16 %e to i64 @@ -253,10 +244,9 @@ define <2 x i64> @extract1_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract1_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 1 %z = zext i16 %e to i64 @@ -279,10 +269,9 @@ define <2 x i64> @extract2_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract2_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 2 %z = zext i16 %e to i64 @@ -305,10 +294,9 @@ define <2 x i64> @extract3_i16_zext_insert0_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract3_i16_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 3 %z = zext i16 %e to i64 @@ -331,10 +319,9 @@ define <2 x i64> @extract0_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract0_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 0 %z = zext i16 %e to i64 @@ -357,10 +344,9 @@ define <2 x i64> @extract1_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract1_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[1] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 1 %z = zext i16 %e to i64 @@ -383,10 +369,9 @@ define <2 x i64> @extract2_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract2_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[2] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 2 %z = zext i16 %e to i64 @@ -409,10 +394,9 @@ define <2 x i64> @extract3_i16_zext_insert1_i64_zero(<8 x i16> %x) { ; CHECK-LABEL: extract3_i16_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.h[3] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <8 x i16> %x, i32 3 %z = zext i16 %e to i64 @@ -437,10 +421,9 @@ define <2 x i64> @extract0_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract0_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 0 %z = zext i8 %e to i64 @@ -463,10 +446,9 @@ define <2 x i64> @extract1_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract1_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 1 %z = zext i8 %e to i64 @@ -489,10 +471,9 @@ define <2 x i64> @extract2_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract2_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 2 %z = zext i8 %e to i64 @@ -515,10 +496,9 @@ define <2 x i64> @extract3_i8_zext_insert0_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract3_i8_zext_insert0_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: mov v1.d[0], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[0], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 3 %z = zext i8 %e to i64 @@ -541,10 +521,9 @@ define <2 x i64> @extract0_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract0_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 0 %z = zext i8 %e to i64 @@ -567,10 +546,9 @@ define <2 x i64> @extract1_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract1_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[1] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 1 %z = zext i8 %e to i64 @@ -593,10 +571,9 @@ define <2 x i64> @extract2_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract2_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[2] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 2 %z = zext i8 %e to i64 @@ -619,10 +596,9 @@ define <2 x i64> @extract3_i8_zext_insert1_i64_zero(<16 x i8> %x) { ; CHECK-LABEL: extract3_i8_zext_insert1_i64_zero: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: umov w8, v0.b[3] -; CHECK-NEXT: mov v1.d[1], x8 -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x8 ; CHECK-NEXT: ret %e = extractelement <16 x i8> %x, i32 3 %z = zext i8 %e to i64 diff --git a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll --- a/llvm/test/CodeGen/AArch64/cmp-select-sign.ll +++ b/llvm/test/CodeGen/AArch64/cmp-select-sign.ll @@ -114,8 +114,8 @@ define <7 x i8> @sign_7xi8(<7 x i8> %a) { ; CHECK-LABEL: sign_7xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: sshr v0.8b, v0.8b, #7 +; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %c = icmp sgt <7 x i8> %a, @@ -126,8 +126,8 @@ define <8 x i8> @sign_8xi8(<8 x i8> %a) { ; CHECK-LABEL: sign_8xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: sshr v0.8b, v0.8b, #7 +; CHECK-NEXT: movi v1.8b, #1 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %c = icmp sgt <8 x i8> %a, @@ -138,8 +138,8 @@ define <16 x i8> @sign_16xi8(<16 x i8> %a) { ; CHECK-LABEL: sign_16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: sshr v0.16b, v0.16b, #7 +; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %c = icmp sgt <16 x i8> %a, @@ -178,9 +178,9 @@ ; CHECK-NEXT: .cfi_offset w30, -16 ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: sshr v2.4s, v0.4s, #31 -; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmgt v1.4s, v0.4s, v1.4s ; CHECK-NEXT: orr v2.4s, #1 -; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: xtn v0.4h, v1.4s ; CHECK-NEXT: str q2, [sp] // 16-byte Folded Spill ; CHECK-NEXT: bl use_4xi1 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -198,9 +198,9 @@ ; CHECK-LABEL: not_sign_4xi32: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: cmgt v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v1.16b, v0.16b, v1.16b ; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret @@ -227,10 +227,10 @@ define <4 x i32> @not_sign_4xi32_3(<4 x i32> %a) { ; CHECK-LABEL: not_sign_4xi32_3: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: adrp x8, .LCPI18_0 -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: cmgt v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: bsl v0.16b, v1.16b, v2.16b ; CHECK-NEXT: ret @@ -244,18 +244,18 @@ ; CHECK-LABEL: sign_4xi65: ; CHECK: // %bb.0: ; CHECK-NEXT: sbfx x8, x1, #0, #1 -; CHECK-NEXT: sbfx x10, x5, #0, #1 -; CHECK-NEXT: orr x9, x8, #0x1 +; CHECK-NEXT: sbfx x9, x7, #0, #1 +; CHECK-NEXT: orr x12, x8, #0x1 ; CHECK-NEXT: lsr x1, x8, #63 -; CHECK-NEXT: sbfx x8, x7, #0, #1 +; CHECK-NEXT: sbfx x10, x5, #0, #1 +; CHECK-NEXT: sbfx x11, x3, #0, #1 +; CHECK-NEXT: orr x2, x11, #0x1 +; CHECK-NEXT: lsr x3, x11, #63 +; CHECK-NEXT: fmov d0, x12 ; CHECK-NEXT: orr x4, x10, #0x1 ; CHECK-NEXT: lsr x5, x10, #63 -; CHECK-NEXT: orr x6, x8, #0x1 -; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: sbfx x9, x3, #0, #1 -; CHECK-NEXT: orr x2, x9, #0x1 -; CHECK-NEXT: lsr x3, x9, #63 -; CHECK-NEXT: lsr x7, x8, #63 +; CHECK-NEXT: orr x6, x9, #0x1 +; CHECK-NEXT: lsr x7, x9, #63 ; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll b/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll --- a/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll +++ b/llvm/test/CodeGen/AArch64/dag-combine-trunc-build-vec.ll @@ -10,13 +10,13 @@ define void @no_combine(i32 %p) local_unnamed_addr { ; CHECK-LABEL: no_combine: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v0.4h, #4 -; CHECK-NEXT: dup v1.4s, w0 -; CHECK-NEXT: xtn v1.4h, v1.4s -; CHECK-NEXT: mov v1.d[1], v0.d[0] -; CHECK-NEXT: xtn v0.8b, v1.8h -; CHECK-NEXT: xtn2 v0.16b, v1.8h -; CHECK-NEXT: str q0, [x8] +; CHECK-NEXT: dup v0.4s, w0 +; CHECK-NEXT: movi v1.4h, #4 +; CHECK-NEXT: xtn v0.4h, v0.4s +; CHECK-NEXT: mov v0.d[1], v1.d[0] +; CHECK-NEXT: xtn v1.8b, v0.8h +; CHECK-NEXT: xtn2 v1.16b, v0.8h +; CHECK-NEXT: str q1, [x8] ; CHECK-NEXT: ret ; The two shufflevector operations are needed to force the DAGCombine to happen diff --git a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll --- a/llvm/test/CodeGen/AArch64/dag-numsignbits.ll +++ b/llvm/test/CodeGen/AArch64/dag-numsignbits.ll @@ -15,9 +15,9 @@ ; CHECK-NEXT: adrp x8, .LCPI0_1 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: movi v1.4h, #1 +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI0_1] ; CHECK-NEXT: cmgt v0.4h, v1.4h, v0.4h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_1] -; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: shl v0.4h, v0.4h, #15 ; CHECK-NEXT: sshr v0.4h, v0.4h, #15 ; CHECK-NEXT: umov w0, v0.h[0] diff --git a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll --- a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll +++ b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-signed.ll @@ -74,66 +74,66 @@ ; ALL-NEXT: smov w9, v0.b[1] ; ALL-NEXT: smov w10, v0.b[0] ; ALL-NEXT: smov w11, v0.b[2] +; ALL-NEXT: sdiv w8, w9, w8 +; ALL-NEXT: smov w9, v1.b[0] ; ALL-NEXT: smov w12, v0.b[3] ; ALL-NEXT: smov w13, v0.b[4] ; ALL-NEXT: smov w14, v0.b[5] ; ALL-NEXT: smov w15, v0.b[6] -; ALL-NEXT: sdiv w8, w9, w8 -; ALL-NEXT: smov w9, v1.b[0] ; ALL-NEXT: smov w16, v0.b[7] ; ALL-NEXT: smov w17, v0.b[8] +; ALL-NEXT: smov w18, v0.b[9] +; ALL-NEXT: smov w1, v0.b[10] +; ALL-NEXT: smov w2, v0.b[11] ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: smov w10, v1.b[2] ; ALL-NEXT: sdiv w10, w11, w10 ; ALL-NEXT: smov w11, v1.b[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: smov w9, v1.b[9] +; ALL-NEXT: smov w9, v0.b[12] ; ALL-NEXT: mov v2.b[1], w8 +; ALL-NEXT: smov w8, v1.b[12] ; ALL-NEXT: sdiv w11, w12, w11 ; ALL-NEXT: smov w12, v1.b[4] ; ALL-NEXT: mov v2.b[2], w10 -; ALL-NEXT: smov w10, v0.b[10] +; ALL-NEXT: smov w10, v0.b[13] ; ALL-NEXT: sdiv w12, w13, w12 ; ALL-NEXT: smov w13, v1.b[5] ; ALL-NEXT: mov v2.b[3], w11 -; ALL-NEXT: smov w11, v0.b[11] +; ALL-NEXT: smov w11, v0.b[14] ; ALL-NEXT: sdiv w13, w14, w13 ; ALL-NEXT: smov w14, v1.b[6] ; ALL-NEXT: mov v2.b[4], w12 -; ALL-NEXT: smov w12, v0.b[12] +; ALL-NEXT: smov w12, v0.b[15] ; ALL-NEXT: sdiv w14, w15, w14 ; ALL-NEXT: smov w15, v1.b[7] ; ALL-NEXT: mov v2.b[5], w13 -; ALL-NEXT: smov w13, v0.b[13] ; ALL-NEXT: sdiv w15, w16, w15 ; ALL-NEXT: smov w16, v1.b[8] ; ALL-NEXT: mov v2.b[6], w14 ; ALL-NEXT: sdiv w16, w17, w16 -; ALL-NEXT: smov w17, v0.b[9] +; ALL-NEXT: smov w17, v1.b[9] ; ALL-NEXT: mov v2.b[7], w15 -; ALL-NEXT: sdiv w8, w17, w9 -; ALL-NEXT: smov w9, v1.b[10] +; ALL-NEXT: sdiv w17, w18, w17 +; ALL-NEXT: smov w18, v1.b[10] ; ALL-NEXT: mov v2.b[8], w16 +; ALL-NEXT: sdiv w18, w1, w18 +; ALL-NEXT: smov w1, v1.b[11] +; ALL-NEXT: mov v2.b[9], w17 +; ALL-NEXT: sdiv w1, w2, w1 +; ALL-NEXT: mov v2.b[10], w18 +; ALL-NEXT: sdiv w8, w9, w8 +; ALL-NEXT: smov w9, v1.b[13] +; ALL-NEXT: mov v2.b[11], w1 ; ALL-NEXT: sdiv w9, w10, w9 -; ALL-NEXT: smov w10, v1.b[11] -; ALL-NEXT: mov v2.b[9], w8 +; ALL-NEXT: smov w10, v1.b[14] +; ALL-NEXT: mov v2.b[12], w8 ; ALL-NEXT: sdiv w10, w11, w10 -; ALL-NEXT: smov w11, v1.b[12] -; ALL-NEXT: mov v2.b[10], w9 -; ALL-NEXT: smov w9, v1.b[14] +; ALL-NEXT: smov w11, v1.b[15] +; ALL-NEXT: mov v2.b[13], w9 ; ALL-NEXT: sdiv w11, w12, w11 -; ALL-NEXT: smov w12, v1.b[13] -; ALL-NEXT: mov v2.b[11], w10 -; ALL-NEXT: smov w10, v1.b[15] -; ALL-NEXT: sdiv w8, w13, w12 -; ALL-NEXT: smov w12, v0.b[14] -; ALL-NEXT: mov v2.b[12], w11 -; ALL-NEXT: smov w11, v0.b[15] -; ALL-NEXT: sdiv w9, w12, w9 -; ALL-NEXT: mov v2.b[13], w8 -; ALL-NEXT: sdiv w8, w11, w10 -; ALL-NEXT: mov v2.b[14], w9 -; ALL-NEXT: mov v2.b[15], w8 +; ALL-NEXT: mov v2.b[14], w10 +; ALL-NEXT: mov v2.b[15], w11 ; ALL-NEXT: mls v0.16b, v2.16b, v1.16b ; ALL-NEXT: str q2, [x0] ; ALL-NEXT: ret @@ -151,33 +151,33 @@ ; ALL-NEXT: smov w9, v0.h[1] ; ALL-NEXT: smov w10, v0.h[0] ; ALL-NEXT: smov w11, v0.h[2] -; ALL-NEXT: smov w12, v0.h[3] -; ALL-NEXT: smov w13, v0.h[4] ; ALL-NEXT: sdiv w8, w9, w8 ; ALL-NEXT: smov w9, v1.h[0] +; ALL-NEXT: smov w12, v0.h[3] +; ALL-NEXT: smov w13, v0.h[4] +; ALL-NEXT: smov w14, v0.h[5] +; ALL-NEXT: smov w15, v0.h[6] ; ALL-NEXT: sdiv w9, w10, w9 ; ALL-NEXT: smov w10, v1.h[2] ; ALL-NEXT: sdiv w10, w11, w10 ; ALL-NEXT: smov w11, v1.h[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: smov w9, v1.h[5] +; ALL-NEXT: smov w9, v0.h[7] ; ALL-NEXT: mov v2.h[1], w8 +; ALL-NEXT: smov w8, v1.h[7] ; ALL-NEXT: sdiv w11, w12, w11 ; ALL-NEXT: smov w12, v1.h[4] ; ALL-NEXT: mov v2.h[2], w10 -; ALL-NEXT: smov w10, v0.h[6] ; ALL-NEXT: sdiv w12, w13, w12 -; ALL-NEXT: smov w13, v0.h[5] +; ALL-NEXT: smov w13, v1.h[5] ; ALL-NEXT: mov v2.h[3], w11 -; ALL-NEXT: smov w11, v0.h[7] -; ALL-NEXT: sdiv w8, w13, w9 -; ALL-NEXT: smov w9, v1.h[6] +; ALL-NEXT: sdiv w13, w14, w13 +; ALL-NEXT: smov w14, v1.h[6] ; ALL-NEXT: mov v2.h[4], w12 -; ALL-NEXT: sdiv w9, w10, w9 -; ALL-NEXT: smov w10, v1.h[7] -; ALL-NEXT: mov v2.h[5], w8 -; ALL-NEXT: sdiv w8, w11, w10 -; ALL-NEXT: mov v2.h[6], w9 +; ALL-NEXT: sdiv w14, w15, w14 +; ALL-NEXT: mov v2.h[5], w13 +; ALL-NEXT: sdiv w8, w9, w8 +; ALL-NEXT: mov v2.h[6], w14 ; ALL-NEXT: mov v2.h[7], w8 ; ALL-NEXT: mls v0.8h, v2.8h, v1.8h ; ALL-NEXT: str q2, [x0] @@ -192,22 +192,22 @@ define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %divdst) nounwind { ; ALL-LABEL: vector_i128_i32: ; ALL: // %bb.0: -; ALL-NEXT: mov w8, v1.s[1] -; ALL-NEXT: mov w9, v0.s[1] -; ALL-NEXT: fmov w10, s0 +; ALL-NEXT: mov w9, v1.s[1] +; ALL-NEXT: mov w10, v0.s[1] +; ALL-NEXT: fmov w8, s1 ; ALL-NEXT: mov w11, v0.s[2] -; ALL-NEXT: mov w12, v0.s[3] -; ALL-NEXT: sdiv w8, w9, w8 -; ALL-NEXT: fmov w9, s1 ; ALL-NEXT: sdiv w9, w10, w9 +; ALL-NEXT: fmov w10, s0 +; ALL-NEXT: mov w12, v0.s[3] +; ALL-NEXT: sdiv w8, w10, w8 ; ALL-NEXT: mov w10, v1.s[2] ; ALL-NEXT: sdiv w10, w11, w10 ; ALL-NEXT: mov w11, v1.s[3] -; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: mov v2.s[1], w8 -; ALL-NEXT: sdiv w8, w12, w11 +; ALL-NEXT: fmov s2, w8 +; ALL-NEXT: mov v2.s[1], w9 +; ALL-NEXT: sdiv w11, w12, w11 ; ALL-NEXT: mov v2.s[2], w10 -; ALL-NEXT: mov v2.s[3], w8 +; ALL-NEXT: mov v2.s[3], w11 ; ALL-NEXT: mls v0.4s, v2.4s, v1.4s ; ALL-NEXT: str q2, [x0] ; ALL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll --- a/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll +++ b/llvm/test/CodeGen/AArch64/div-rem-pair-recomposition-unsigned.ll @@ -74,66 +74,66 @@ ; ALL-NEXT: umov w9, v0.b[1] ; ALL-NEXT: umov w10, v0.b[0] ; ALL-NEXT: umov w11, v0.b[2] +; ALL-NEXT: udiv w8, w9, w8 +; ALL-NEXT: umov w9, v1.b[0] ; ALL-NEXT: umov w12, v0.b[3] ; ALL-NEXT: umov w13, v0.b[4] ; ALL-NEXT: umov w14, v0.b[5] ; ALL-NEXT: umov w15, v0.b[6] -; ALL-NEXT: udiv w8, w9, w8 -; ALL-NEXT: umov w9, v1.b[0] ; ALL-NEXT: umov w16, v0.b[7] ; ALL-NEXT: umov w17, v0.b[8] +; ALL-NEXT: umov w18, v0.b[9] +; ALL-NEXT: umov w1, v0.b[10] +; ALL-NEXT: umov w2, v0.b[11] ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: umov w10, v1.b[2] ; ALL-NEXT: udiv w10, w11, w10 ; ALL-NEXT: umov w11, v1.b[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: umov w9, v1.b[9] +; ALL-NEXT: umov w9, v0.b[12] ; ALL-NEXT: mov v2.b[1], w8 +; ALL-NEXT: umov w8, v1.b[12] ; ALL-NEXT: udiv w11, w12, w11 ; ALL-NEXT: umov w12, v1.b[4] ; ALL-NEXT: mov v2.b[2], w10 -; ALL-NEXT: umov w10, v0.b[10] +; ALL-NEXT: umov w10, v0.b[13] ; ALL-NEXT: udiv w12, w13, w12 ; ALL-NEXT: umov w13, v1.b[5] ; ALL-NEXT: mov v2.b[3], w11 -; ALL-NEXT: umov w11, v0.b[11] +; ALL-NEXT: umov w11, v0.b[14] ; ALL-NEXT: udiv w13, w14, w13 ; ALL-NEXT: umov w14, v1.b[6] ; ALL-NEXT: mov v2.b[4], w12 -; ALL-NEXT: umov w12, v0.b[12] +; ALL-NEXT: umov w12, v0.b[15] ; ALL-NEXT: udiv w14, w15, w14 ; ALL-NEXT: umov w15, v1.b[7] ; ALL-NEXT: mov v2.b[5], w13 -; ALL-NEXT: umov w13, v0.b[13] ; ALL-NEXT: udiv w15, w16, w15 ; ALL-NEXT: umov w16, v1.b[8] ; ALL-NEXT: mov v2.b[6], w14 ; ALL-NEXT: udiv w16, w17, w16 -; ALL-NEXT: umov w17, v0.b[9] +; ALL-NEXT: umov w17, v1.b[9] ; ALL-NEXT: mov v2.b[7], w15 -; ALL-NEXT: udiv w8, w17, w9 -; ALL-NEXT: umov w9, v1.b[10] +; ALL-NEXT: udiv w17, w18, w17 +; ALL-NEXT: umov w18, v1.b[10] ; ALL-NEXT: mov v2.b[8], w16 +; ALL-NEXT: udiv w18, w1, w18 +; ALL-NEXT: umov w1, v1.b[11] +; ALL-NEXT: mov v2.b[9], w17 +; ALL-NEXT: udiv w1, w2, w1 +; ALL-NEXT: mov v2.b[10], w18 +; ALL-NEXT: udiv w8, w9, w8 +; ALL-NEXT: umov w9, v1.b[13] +; ALL-NEXT: mov v2.b[11], w1 ; ALL-NEXT: udiv w9, w10, w9 -; ALL-NEXT: umov w10, v1.b[11] -; ALL-NEXT: mov v2.b[9], w8 +; ALL-NEXT: umov w10, v1.b[14] +; ALL-NEXT: mov v2.b[12], w8 ; ALL-NEXT: udiv w10, w11, w10 -; ALL-NEXT: umov w11, v1.b[12] -; ALL-NEXT: mov v2.b[10], w9 -; ALL-NEXT: umov w9, v1.b[14] +; ALL-NEXT: umov w11, v1.b[15] +; ALL-NEXT: mov v2.b[13], w9 ; ALL-NEXT: udiv w11, w12, w11 -; ALL-NEXT: umov w12, v1.b[13] -; ALL-NEXT: mov v2.b[11], w10 -; ALL-NEXT: umov w10, v1.b[15] -; ALL-NEXT: udiv w8, w13, w12 -; ALL-NEXT: umov w12, v0.b[14] -; ALL-NEXT: mov v2.b[12], w11 -; ALL-NEXT: umov w11, v0.b[15] -; ALL-NEXT: udiv w9, w12, w9 -; ALL-NEXT: mov v2.b[13], w8 -; ALL-NEXT: udiv w8, w11, w10 -; ALL-NEXT: mov v2.b[14], w9 -; ALL-NEXT: mov v2.b[15], w8 +; ALL-NEXT: mov v2.b[14], w10 +; ALL-NEXT: mov v2.b[15], w11 ; ALL-NEXT: mls v0.16b, v2.16b, v1.16b ; ALL-NEXT: str q2, [x0] ; ALL-NEXT: ret @@ -151,33 +151,33 @@ ; ALL-NEXT: umov w9, v0.h[1] ; ALL-NEXT: umov w10, v0.h[0] ; ALL-NEXT: umov w11, v0.h[2] -; ALL-NEXT: umov w12, v0.h[3] -; ALL-NEXT: umov w13, v0.h[4] ; ALL-NEXT: udiv w8, w9, w8 ; ALL-NEXT: umov w9, v1.h[0] +; ALL-NEXT: umov w12, v0.h[3] +; ALL-NEXT: umov w13, v0.h[4] +; ALL-NEXT: umov w14, v0.h[5] +; ALL-NEXT: umov w15, v0.h[6] ; ALL-NEXT: udiv w9, w10, w9 ; ALL-NEXT: umov w10, v1.h[2] ; ALL-NEXT: udiv w10, w11, w10 ; ALL-NEXT: umov w11, v1.h[3] ; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: umov w9, v1.h[5] +; ALL-NEXT: umov w9, v0.h[7] ; ALL-NEXT: mov v2.h[1], w8 +; ALL-NEXT: umov w8, v1.h[7] ; ALL-NEXT: udiv w11, w12, w11 ; ALL-NEXT: umov w12, v1.h[4] ; ALL-NEXT: mov v2.h[2], w10 -; ALL-NEXT: umov w10, v0.h[6] ; ALL-NEXT: udiv w12, w13, w12 -; ALL-NEXT: umov w13, v0.h[5] +; ALL-NEXT: umov w13, v1.h[5] ; ALL-NEXT: mov v2.h[3], w11 -; ALL-NEXT: umov w11, v0.h[7] -; ALL-NEXT: udiv w8, w13, w9 -; ALL-NEXT: umov w9, v1.h[6] +; ALL-NEXT: udiv w13, w14, w13 +; ALL-NEXT: umov w14, v1.h[6] ; ALL-NEXT: mov v2.h[4], w12 -; ALL-NEXT: udiv w9, w10, w9 -; ALL-NEXT: umov w10, v1.h[7] -; ALL-NEXT: mov v2.h[5], w8 -; ALL-NEXT: udiv w8, w11, w10 -; ALL-NEXT: mov v2.h[6], w9 +; ALL-NEXT: udiv w14, w15, w14 +; ALL-NEXT: mov v2.h[5], w13 +; ALL-NEXT: udiv w8, w9, w8 +; ALL-NEXT: mov v2.h[6], w14 ; ALL-NEXT: mov v2.h[7], w8 ; ALL-NEXT: mls v0.8h, v2.8h, v1.8h ; ALL-NEXT: str q2, [x0] @@ -192,22 +192,22 @@ define <4 x i32> @vector_i128_i32(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %divdst) nounwind { ; ALL-LABEL: vector_i128_i32: ; ALL: // %bb.0: -; ALL-NEXT: mov w8, v1.s[1] -; ALL-NEXT: mov w9, v0.s[1] -; ALL-NEXT: fmov w10, s0 +; ALL-NEXT: mov w9, v1.s[1] +; ALL-NEXT: mov w10, v0.s[1] +; ALL-NEXT: fmov w8, s1 ; ALL-NEXT: mov w11, v0.s[2] -; ALL-NEXT: mov w12, v0.s[3] -; ALL-NEXT: udiv w8, w9, w8 -; ALL-NEXT: fmov w9, s1 ; ALL-NEXT: udiv w9, w10, w9 +; ALL-NEXT: fmov w10, s0 +; ALL-NEXT: mov w12, v0.s[3] +; ALL-NEXT: udiv w8, w10, w8 ; ALL-NEXT: mov w10, v1.s[2] ; ALL-NEXT: udiv w10, w11, w10 ; ALL-NEXT: mov w11, v1.s[3] -; ALL-NEXT: fmov s2, w9 -; ALL-NEXT: mov v2.s[1], w8 -; ALL-NEXT: udiv w8, w12, w11 +; ALL-NEXT: fmov s2, w8 +; ALL-NEXT: mov v2.s[1], w9 +; ALL-NEXT: udiv w11, w12, w11 ; ALL-NEXT: mov v2.s[2], w10 -; ALL-NEXT: mov v2.s[3], w8 +; ALL-NEXT: mov v2.s[3], w11 ; ALL-NEXT: mls v0.4s, v2.4s, v1.4s ; ALL-NEXT: str q2, [x0] ; ALL-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/expand-vector-rot.ll b/llvm/test/CodeGen/AArch64/expand-vector-rot.ll --- a/llvm/test/CodeGen/AArch64/expand-vector-rot.ll +++ b/llvm/test/CodeGen/AArch64/expand-vector-rot.ll @@ -6,15 +6,15 @@ define <2 x i16> @rotlv2_16(<2 x i16> %vec2_16, <2 x i16> %shift) { ; CHECK-LABEL: rotlv2_16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.2s, #15 -; CHECK-NEXT: neg v3.2s, v1.2s +; CHECK-NEXT: neg v2.2s, v1.2s +; CHECK-NEXT: movi v3.2s, #15 ; CHECK-NEXT: movi d4, #0x00ffff0000ffff -; CHECK-NEXT: and v3.8b, v3.8b, v2.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b +; CHECK-NEXT: and v2.8b, v2.8b, v3.8b ; CHECK-NEXT: and v4.8b, v0.8b, v4.8b -; CHECK-NEXT: neg v3.2s, v3.2s +; CHECK-NEXT: neg v2.2s, v2.2s ; CHECK-NEXT: ushl v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushl v2.2s, v4.2s, v3.2s +; CHECK-NEXT: ushl v2.2s, v4.2s, v2.2s ; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b ; CHECK-NEXT: ret %1 = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %vec2_16, <2 x i16> %vec2_16, <2 x i16> %shift) diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll --- a/llvm/test/CodeGen/AArch64/f16-instructions.ll +++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll @@ -1,5 +1,5 @@ -; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK-CVT --check-prefix=CHECK-COMMON -; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 -aarch64-neon-syntax=apple -asm-verbose=false -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-FP16 +; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple -asm-verbose=true -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK-CVT --check-prefix=CHECK-COMMON +; RUN: llc < %s -mtriple aarch64-unknown-unknown -mattr=+fullfp16 -aarch64-neon-syntax=apple -asm-verbose=true -disable-post-ra -frame-pointer=non-leaf | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-FP16 ; RUN: llc < %s -mtriple aarch64-unknown-unknown -aarch64-neon-syntax=apple \ ; RUN: -asm-verbose=false -disable-post-ra -frame-pointer=non-leaf -global-isel \ @@ -14,476 +14,475 @@ target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +define half @test_fadd(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fadd: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fadd s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fadd: -; CHECK-FP16-NEXT: fadd h0, h0, h1 -; CHECK-FP16-NEXT: ret - -define half @test_fadd(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fadd h0, h0, h1 +; CHECK-FP16-NEXT: ret %r = fadd half %a, %b ret half %r } +define half @test_fsub(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fsub: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fsub s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fsub s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fsub: -; CHECK-FP16-NEXT: fsub h0, h0, h1 -; CHECK-FP16-NEXT: ret - -define half @test_fsub(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fsub h0, h0, h1 +; CHECK-FP16-NEXT: ret %r = fsub half %a, %b ret half %r } +define half @test_fmul(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fmul: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmul s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fmul s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fmul: -; CHECK-FP16-NEXT: fmul h0, h0, h1 -; CHECK-FP16-NEXT: ret - -define half @test_fmul(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fmul h0, h0, h1 +; CHECK-FP16-NEXT: ret %r = fmul half %a, %b ret half %r } +define half @test_fmadd(half %a, half %b, half %c) #0 { ; CHECK-CVT-LABEL: test_fmadd: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmul s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvt s1, h2 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fmul s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvt s1, h2 +; CHECK-CVT-NEXT: fadd s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fmadd: -; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 -; CHECK-FP16-NEXT: ret - -define half @test_fmadd(half %a, half %b, half %c) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 +; CHECK-FP16-NEXT: ret %mul = fmul fast half %a, %b %r = fadd fast half %mul, %c ret half %r } +define half @test_fdiv(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fdiv: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fdiv s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fdiv s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fdiv: -; CHECK-FP16-NEXT: fdiv h0, h0, h1 -; CHECK-FP16-NEXT: ret - -define half @test_fdiv(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fdiv h0, h0, h1 +; CHECK-FP16-NEXT: ret %r = fdiv half %a, %b ret half %r } -; CHECK-COMMON-LABEL: test_frem: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: fcvt s1, h1 -; CHECK-COMMON-NEXT: bl {{_?}}fmodf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_frem(half %a, half %b) #0 { +; CHECK-COMMON-LABEL: test_frem: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: fcvt s0, h0 +; CHECK-COMMON-NEXT: fcvt s1, h1 +; CHECK-COMMON-NEXT: bl fmodf +; CHECK-COMMON-NEXT: fcvt h0, s0 +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret %r = frem half %a, %b ret half %r } - -; CHECK-COMMON-LABEL: test_store: -; CHECK-COMMON-NEXT: str h0, [x0] -; CHECK-COMMON-NEXT: ret define void @test_store(half %a, half* %b) #0 { +; CHECK-COMMON-LABEL: test_store: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: str h0, [x0] +; CHECK-COMMON-NEXT: ret store half %a, half* %b ret void } - -; CHECK-COMMON-LABEL: test_load: -; CHECK-COMMON-NEXT: ldr h0, [x0] -; CHECK-COMMON-NEXT: ret define half @test_load(half* %a) #0 { +; CHECK-COMMON-LABEL: test_load: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: ldr h0, [x0] +; CHECK-COMMON-NEXT: ret %r = load half, half* %a ret half %r } - declare half @test_callee(half %a, half %b) #0 -; CHECK-COMMON-LABEL: test_call: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: bl {{_?}}test_callee -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_call(half %a, half %b) #0 { +; CHECK-COMMON-LABEL: test_call: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: bl test_callee +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret %r = call half @test_callee(half %a, half %b) ret half %r } - -; CHECK-COMMON-LABEL: test_call_flipped: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fmov s2, s0 -; CHECK-COMMON-NEXT: fmov s0, s1 -; CHECK-COMMON-NEXT: fmov s1, s2 -; CHECK-COMMON-NEXT: bl {{_?}}test_callee -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_call_flipped(half %a, half %b) #0 { +; CHECK-COMMON-LABEL: test_call_flipped: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: fmov s2, s0 +; CHECK-COMMON-NEXT: fmov s0, s1 +; CHECK-COMMON-NEXT: fmov s1, s2 +; CHECK-COMMON-NEXT: bl test_callee +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret %r = call half @test_callee(half %b, half %a) ret half %r } - -; CHECK-COMMON-LABEL: test_tailcall_flipped: -; CHECK-COMMON-NEXT: fmov s2, s0 -; CHECK-COMMON-NEXT: fmov s0, s1 -; CHECK-COMMON-NEXT: fmov s1, s2 -; CHECK-COMMON-NEXT: b {{_?}}test_callee define half @test_tailcall_flipped(half %a, half %b) #0 { +; CHECK-COMMON-LABEL: test_tailcall_flipped: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: fmov s2, s0 +; CHECK-COMMON-NEXT: fmov s0, s1 +; CHECK-COMMON-NEXT: fmov s1, s2 +; CHECK-COMMON-NEXT: b test_callee %r = tail call half @test_callee(half %b, half %a) ret half %r } - +define half @test_select(half %a, half %b, i1 zeroext %c) #0 { ; CHECK-CVT-LABEL: test_select: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: cmp w0, #0 -; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: cmp w0, #0 +; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_select: -; CHECK-FP16-NEXT: cmp w0, #0 -; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne -; CHECK-FP16-NEXT: ret - -define half @test_select(half %a, half %b, i1 zeroext %c) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: cmp w0, #0 +; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne +; CHECK-FP16-NEXT: ret %r = select i1 %c, half %a, half %b ret half %r } +define half @test_select_cc(half %a, half %b, half %c, half %d) #0 { ; CHECK-CVT-LABEL: test_select_cc: -; CHECK-CVT-DAG: fcvt s3, h3 -; CHECK-CVT-DAG: fcvt s2, h2 -; CHECK-CVT-DAG: fcvt s1, h1 -; CHECK-CVT-DAG: fcvt s0, h0 -; CHECK-CVT-DAG: fcmp s2, s3 -; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s2, s3 +; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_select_cc: -; CHECK-FP16-NEXT: fcmp h2, h3 -; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne -; CHECK-FP16-NEXT: ret - -define half @test_select_cc(half %a, half %b, half %c, half %d) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h2, h3 +; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne +; CHECK-FP16-NEXT: ret %cc = fcmp une half %c, %d %r = select i1 %cc, half %a, half %b ret half %r } +define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 { ; CHECK-CVT-LABEL: test_select_cc_f32_f16: -; CHECK-CVT-DAG: fcvt s2, h2 -; CHECK-CVT-DAG: fcvt s3, h3 -; CHECK-CVT-NEXT: fcmp s2, s3 -; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcmp s2, s3 +; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_select_cc_f32_f16: -; CHECK-FP16-NEXT: fcmp h2, h3 -; CHECK-FP16-NEXT: fcsel s0, s0, s1, ne -; CHECK-FP16-NEXT: ret - -define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h2, h3 +; CHECK-FP16-NEXT: fcsel s0, s0, s1, ne +; CHECK-FP16-NEXT: ret %cc = fcmp une half %c, %d %r = select i1 %cc, float %a, float %b ret float %r } +define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 { ; CHECK-CVT-LABEL: test_select_cc_f16_f32: -; CHECK-CVT-DAG: fcvt s0, h0 -; CHECK-CVT-DAG: fcvt s1, h1 -; CHECK-CVT-DAG: fcmp s2, s3 -; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s2, s3 +; CHECK-CVT-NEXT: fcsel s0, s0, s1, ne +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_select_cc_f16_f32: -; CHECK-FP16-NEXT: fcmp s2, s3 -; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne -; CHECK-FP16-NEXT: ret - -define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp s2, s3 +; CHECK-FP16-NEXT: fcsel h0, h0, h1, ne +; CHECK-FP16-NEXT: ret %cc = fcmp une float %c, %d %r = select i1 %cc, half %a, half %b ret half %r } +define i1 @test_fcmp_une(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_une: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, ne -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w0, ne +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_une: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, ne -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_une(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, ne +; CHECK-FP16-NEXT: ret %r = fcmp une half %a, %b ret i1 %r } +define i1 @test_fcmp_ueq(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ueq: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset [[TRUE:w[0-9]+]], eq -; CHECK-CVT-NEXT: csinc w0, [[TRUE]], wzr, vc -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w8, eq +; CHECK-CVT-NEXT: csinc w0, w8, wzr, vc +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_ueq: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset [[TRUE:w[0-9]+]], eq -; CHECK-FP16-NEXT: csinc w0, [[TRUE]], wzr, vc -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_ueq(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w8, eq +; CHECK-FP16-NEXT: csinc w0, w8, wzr, vc +; CHECK-FP16-NEXT: ret %r = fcmp ueq half %a, %b ret i1 %r } +define i1 @test_fcmp_ugt(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ugt: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, hi -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w0, hi +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_ugt: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, hi -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_ugt(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, hi +; CHECK-FP16-NEXT: ret %r = fcmp ugt half %a, %b ret i1 %r } +define i1 @test_fcmp_uge(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_uge: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, pl -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w0, pl +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_uge: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, pl -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_uge(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, pl +; CHECK-FP16-NEXT: ret %r = fcmp uge half %a, %b ret i1 %r } +define i1 @test_fcmp_ult(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ult: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, lt -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w0, lt +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_ult: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, lt -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_ult(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, lt +; CHECK-FP16-NEXT: ret %r = fcmp ult half %a, %b ret i1 %r } +define i1 @test_fcmp_ule(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ule: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, le -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w0, le +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_ule: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, le -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_ule(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, le +; CHECK-FP16-NEXT: ret %r = fcmp ule half %a, %b ret i1 %r } +define i1 @test_fcmp_uno(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_uno: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, vs -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w0, vs +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_uno: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, vs -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_uno(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, vs +; CHECK-FP16-NEXT: ret %r = fcmp uno half %a, %b ret i1 %r } +define i1 @test_fcmp_one(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_one: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset [[TRUE:w[0-9]+]], mi -; CHECK-CVT-NEXT: csinc w0, [[TRUE]], wzr, le -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w8, mi +; CHECK-CVT-NEXT: csinc w0, w8, wzr, le +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_one: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset [[TRUE:w[0-9]+]], mi -; CHECK-FP16-NEXT: csinc w0, [[TRUE]], wzr, le -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_one(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w8, mi +; CHECK-FP16-NEXT: csinc w0, w8, wzr, le +; CHECK-FP16-NEXT: ret %r = fcmp one half %a, %b ret i1 %r } +define i1 @test_fcmp_oeq(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_oeq: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, eq -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w0, eq +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_oeq: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, eq -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_oeq(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, eq +; CHECK-FP16-NEXT: ret %r = fcmp oeq half %a, %b ret i1 %r } +define i1 @test_fcmp_ogt(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ogt: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, gt -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w0, gt +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_ogt: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, gt -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_ogt(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, gt +; CHECK-FP16-NEXT: ret %r = fcmp ogt half %a, %b ret i1 %r } +define i1 @test_fcmp_oge(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_oge: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, ge -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w0, ge +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_oge: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, ge -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_oge(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, ge +; CHECK-FP16-NEXT: ret %r = fcmp oge half %a, %b ret i1 %r } +define i1 @test_fcmp_olt(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_olt: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, mi -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w0, mi +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_olt: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, mi -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_olt(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, mi +; CHECK-FP16-NEXT: ret %r = fcmp olt half %a, %b ret i1 %r } +define i1 @test_fcmp_ole(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ole: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, ls -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w0, ls +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_ole: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, ls -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_ole(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, ls +; CHECK-FP16-NEXT: ret %r = fcmp ole half %a, %b ret i1 %r } +define i1 @test_fcmp_ord(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_fcmp_ord: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: cset w0, vc -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: cset w0, vc +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fcmp_ord: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: cset w0, vc -; CHECK-FP16-NEXT: ret - -define i1 @test_fcmp_ord(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: cset w0, vc +; CHECK-FP16-NEXT: ret %r = fcmp ord half %a, %b ret i1 %r } -; CHECK-COMMON-LABEL: test_fccmp: ; CHECK-CVT: fcvt s0, h0 ; CHECK-CVT-NEXT: fmov s1, #8.00000000 ; CHECK-CVT-NEXT: fcmp s0, s1 @@ -503,7 +502,6 @@ ; CHECK-FP16-NEXT: fcsel h0, h0, h1, gt ; CHECK-FP16-NEXT: str h0, [x0] ; CHECK-FP16-NEXT: ret - define void @test_fccmp(half %in, half* %out) { %cmp1 = fcmp ogt half %in, 0xH4800 %cmp2 = fcmp olt half %in, 0xH4500 @@ -513,21 +511,21 @@ ret void } +define void @test_br_cc(half %a, half %b, i32* %p1, i32* %p2) #0 { ; CHECK-CVT-LABEL: test_br_cc: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: csel x8, x0, x1, pl -; CHECK-CVT-NEXT: str wzr, [x8] -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: // %common.ret +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcmp s0, s1 +; CHECK-CVT-NEXT: csel x8, x0, x1, pl +; CHECK-CVT-NEXT: str wzr, [x8] +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_br_cc: -; CHECK-FP16-NEXT: fcmp h0, h1 -; CHECK-FP16-NEXT: csel x8, x0, x1, pl -; CHECK-FP16-NEXT: str wzr, [x8] -; CHECK-FP16-NEXT: ret - -define void @test_br_cc(half %a, half %b, i32* %p1, i32* %p2) #0 { +; CHECK-FP16: // %bb.0: // %common.ret +; CHECK-FP16-NEXT: fcmp h0, h1 +; CHECK-FP16-NEXT: csel x8, x0, x1, pl +; CHECK-FP16-NEXT: str wzr, [x8] +; CHECK-FP16-NEXT: ret %c = fcmp uge half %a, %b br i1 %c, label %then, label %else then: @@ -538,6 +536,7 @@ ret void } +define half @test_phi(half* %p1) #0 { ; CHECK-COMMON-LABEL: test_phi: ; CHECK-COMMON: mov x[[PTR:[0-9]+]], x0 ; CHECK-COMMON: ldr h[[AB:[0-9]+]], [x0] @@ -548,7 +547,6 @@ ; CHECK-COMMON: bl {{_?}}test_dummy ; CHECK-COMMON: fmov s0, s[[R]] ; CHECK-COMMON: ret -define half @test_phi(half* %p1) #0 { entry: %a = load half, half* %p1 br label %loop @@ -560,211 +558,212 @@ return: ret half %r } - declare i1 @test_dummy(half* %p1) #0 +define i32 @test_fptosi_i32(half %a) #0 { ; CHECK-CVT-LABEL: test_fptosi_i32: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzs w0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs w0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fptosi_i32: -; CHECK-FP16-NEXT: fcvtzs w0, h0 -; CHECK-FP16-NEXT: ret - -define i32 @test_fptosi_i32(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs w0, h0 +; CHECK-FP16-NEXT: ret %r = fptosi half %a to i32 ret i32 %r } +define i64 @test_fptosi_i64(half %a) #0 { ; CHECK-CVT-LABEL: test_fptosi_i64: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzs x0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs x0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fptosi_i64: -; CHECK-FP16-NEXT: fcvtzs x0, h0 -; CHECK-FP16-NEXT: ret - -define i64 @test_fptosi_i64(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzs x0, h0 +; CHECK-FP16-NEXT: ret %r = fptosi half %a to i64 ret i64 %r } +define i32 @test_fptoui_i32(half %a) #0 { ; CHECK-CVT-LABEL: test_fptoui_i32: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzu w0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu w0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fptoui_i32: -; CHECK-FP16-NEXT: fcvtzu w0, h0 -; CHECK-FP16-NEXT: ret - -define i32 @test_fptoui_i32(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu w0, h0 +; CHECK-FP16-NEXT: ret %r = fptoui half %a to i32 ret i32 %r } +define i64 @test_fptoui_i64(half %a) #0 { ; CHECK-CVT-LABEL: test_fptoui_i64: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvtzu x0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu x0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fptoui_i64: -; CHECK-FP16-NEXT: fcvtzu x0, h0 -; CHECK-FP16-NEXT: ret - -define i64 @test_fptoui_i64(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fcvtzu x0, h0 +; CHECK-FP16-NEXT: ret %r = fptoui half %a to i64 ret i64 %r } +define half @test_uitofp_i32(i32 %a) #0 { ; CHECK-CVT-LABEL: test_uitofp_i32: -; CHECK-CVT-NEXT: ucvtf s0, w0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: ucvtf s0, w0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_uitofp_i32: -; CHECK-FP16-NEXT: ucvtf h0, w0 -; CHECK-FP16-NEXT: ret - -define half @test_uitofp_i32(i32 %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ucvtf h0, w0 +; CHECK-FP16-NEXT: ret %r = uitofp i32 %a to half ret half %r } +define half @test_uitofp_i64(i64 %a) #0 { ; CHECK-CVT-LABEL: test_uitofp_i64: -; CHECK-CVT-NEXT: ucvtf s0, x0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: ucvtf s0, x0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_uitofp_i64: -; CHECK-FP16-NEXT: ucvtf h0, x0 -; CHECK-FP16-NEXT: ret - -define half @test_uitofp_i64(i64 %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ucvtf h0, x0 +; CHECK-FP16-NEXT: ret %r = uitofp i64 %a to half ret half %r } +define half @test_sitofp_i32(i32 %a) #0 { ; CHECK-CVT-LABEL: test_sitofp_i32: -; CHECK-CVT-NEXT: scvtf s0, w0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: scvtf s0, w0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_sitofp_i32: -; CHECK-FP16-NEXT: scvtf h0, w0 -; CHECK-FP16-NEXT: ret - -define half @test_sitofp_i32(i32 %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: scvtf h0, w0 +; CHECK-FP16-NEXT: ret %r = sitofp i32 %a to half ret half %r } +define half @test_sitofp_i64(i64 %a) #0 { ; CHECK-CVT-LABEL: test_sitofp_i64: -; CHECK-CVT-NEXT: scvtf s0, x0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: scvtf s0, x0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_sitofp_i64: -; CHECK-FP16-NEXT: scvtf h0, x0 -; CHECK-FP16-NEXT: ret -define half @test_sitofp_i64(i64 %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: scvtf h0, x0 +; CHECK-FP16-NEXT: ret %r = sitofp i64 %a to half ret half %r } - +define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { ; CHECK-CVT-LABEL: test_uitofp_i32_fadd: -; CHECK-CVT-NEXT: ucvtf s1, w0 -; CHECK-CVT-NEXT: fcvt h1, s1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: ucvtf s1, w0 +; CHECK-CVT-NEXT: fcvt h1, s1 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fadd s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_uitofp_i32_fadd: -; CHECK-FP16-NEXT: ucvtf h1, w0 -; CHECK-FP16-NEXT: fadd h0, h0, h1 -; CHECK-FP16-NEXT: ret - -define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: ucvtf h1, w0 +; CHECK-FP16-NEXT: fadd h0, h0, h1 +; CHECK-FP16-NEXT: ret %c = uitofp i32 %a to half %r = fadd half %b, %c ret half %r } +define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { ; CHECK-CVT-LABEL: test_sitofp_i32_fadd: -; CHECK-CVT-NEXT: scvtf s1, w0 -; CHECK-CVT-NEXT: fcvt h1, s1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: scvtf s1, w0 +; CHECK-CVT-NEXT: fcvt h1, s1 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fadd s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_sitofp_i32_fadd: -; CHECK-FP16-NEXT: scvtf h1, w0 -; CHECK-FP16-NEXT: fadd h0, h0, h1 -; CHECK-FP16-NEXT: ret - -define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: scvtf h1, w0 +; CHECK-FP16-NEXT: fadd h0, h0, h1 +; CHECK-FP16-NEXT: ret %c = sitofp i32 %a to half %r = fadd half %b, %c ret half %r } -; CHECK-COMMON-LABEL: test_fptrunc_float: -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ret - define half @test_fptrunc_float(float %a) #0 { +; CHECK-COMMON-LABEL: test_fptrunc_float: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: fcvt h0, s0 +; CHECK-COMMON-NEXT: ret %r = fptrunc float %a to half ret half %r } -; CHECK-COMMON-LABEL: test_fptrunc_double: -; CHECK-COMMON-NEXT: fcvt h0, d0 -; CHECK-COMMON-NEXT: ret define half @test_fptrunc_double(double %a) #0 { +; CHECK-COMMON-LABEL: test_fptrunc_double: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: fcvt h0, d0 +; CHECK-COMMON-NEXT: ret %r = fptrunc double %a to half ret half %r } - -; CHECK-COMMON-LABEL: test_fpext_float: -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: ret define float @test_fpext_float(half %a) #0 { +; CHECK-COMMON-LABEL: test_fpext_float: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: fcvt s0, h0 +; CHECK-COMMON-NEXT: ret %r = fpext half %a to float ret float %r } - -; CHECK-COMMON-LABEL: test_fpext_double: -; CHECK-COMMON-NEXT: fcvt d0, h0 -; CHECK-COMMON-NEXT: ret define double @test_fpext_double(half %a) #0 { +; CHECK-COMMON-LABEL: test_fpext_double: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: fcvt d0, h0 +; CHECK-COMMON-NEXT: ret %r = fpext half %a to double ret double %r } - -; CHECK-COMMON-LABEL: test_bitcast_halftoi16: -; CHECK-COMMON-NEXT: fmov w0, s0 -; CHECK-COMMON-NEXT: ret define i16 @test_bitcast_halftoi16(half %a) #0 { +; CHECK-COMMON-LABEL: test_bitcast_halftoi16: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: // kill: def $h0 killed $h0 def $s0 +; CHECK-COMMON-NEXT: fmov w0, s0 +; CHECK-COMMON-NEXT: ret %r = bitcast half %a to i16 ret i16 %r } - -; CHECK-COMMON-LABEL: test_bitcast_i16tohalf: -; CHECK-COMMON-NEXT: fmov s0, w0 -; CHECK-COMMON-NEXT: ret define half @test_bitcast_i16tohalf(i16 %a) #0 { +; CHECK-COMMON-LABEL: test_bitcast_i16tohalf: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: fmov s0, w0 +; CHECK-COMMON-NEXT: // kill: def $h0 killed $h0 killed $s0 +; CHECK-COMMON-NEXT: ret %r = bitcast i16 %a to half ret half %r } - declare half @llvm.sqrt.f16(half %a) #0 declare half @llvm.powi.f16.i32(half %a, i32 %b) #0 declare half @llvm.sin.f16(half %a) #0 @@ -795,16 +794,6 @@ ; FALLBACK-NOT: remark:{{.*}}test_sqrt ; FALLBACK-FP16-NOT: remark:{{.*}}test_sqrt -; CHECK-CVT-LABEL: test_sqrt: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fsqrt s0, s0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_sqrt: -; CHECK-FP16-NEXT: fsqrt h0, h0 -; CHECK-FP16-NEXT: ret - ; GISEL-CVT-LABEL: test_sqrt: ; GISEL-CVT-NEXT: fcvt s0, h0 ; GISEL-CVT-NEXT: fsqrt s0, s0 @@ -816,35 +805,36 @@ ; GISEL-FP16-NEXT: ret define half @test_sqrt(half %a) #0 { +; CHECK-CVT-LABEL: test_sqrt: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fsqrt s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; CHECK-FP16-LABEL: test_sqrt: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fsqrt h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.sqrt.f16(half %a) ret half %r } -; CHECK-COMMON-LABEL: test_powi: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}__powisf2 -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_powi(half %a, i32 %b) #0 { +; CHECK-COMMON-LABEL: test_powi: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: fcvt s0, h0 +; CHECK-COMMON-NEXT: bl __powisf2 +; CHECK-COMMON-NEXT: fcvt h0, s0 +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret %r = call half @llvm.powi.f16.i32(half %a, i32 %b) ret half %r } - ; FALLBACK-NOT: remark:{{.*}}test_sin ; FALLBACK-FP16-NOT: remark:{{.*}}test_sin -; CHECK-COMMON-LABEL: test_sin: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}sinf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_sin: ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -854,6 +844,15 @@ ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; GISEL-NEXT: ret define half @test_sin(half %a) #0 { +; CHECK-COMMON-LABEL: test_sin: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: fcvt s0, h0 +; CHECK-COMMON-NEXT: bl sinf +; CHECK-COMMON-NEXT: fcvt h0, s0 +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret %r = call half @llvm.sin.f16(half %a) ret half %r } @@ -861,15 +860,6 @@ ; FALLBACK-NOT: remark:{{.*}}test_cos ; FALLBACK-FP16-NOT: remark:{{.*}}test_cos -; CHECK-COMMON-LABEL: test_cos: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}cosf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_cos: ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -879,36 +869,36 @@ ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; GISEL-NEXT: ret define half @test_cos(half %a) #0 { +; CHECK-COMMON-LABEL: test_cos: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: fcvt s0, h0 +; CHECK-COMMON-NEXT: bl cosf +; CHECK-COMMON-NEXT: fcvt h0, s0 +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret %r = call half @llvm.cos.f16(half %a) ret half %r } -; CHECK-COMMON-LABEL: test_pow: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: fcvt s1, h1 -; CHECK-COMMON-NEXT: bl {{_?}}powf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret define half @test_pow(half %a, half %b) #0 { +; CHECK-COMMON-LABEL: test_pow: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: fcvt s0, h0 +; CHECK-COMMON-NEXT: fcvt s1, h1 +; CHECK-COMMON-NEXT: bl powf +; CHECK-COMMON-NEXT: fcvt h0, s0 +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret %r = call half @llvm.pow.f16(half %a, half %b) ret half %r } - ; FALLBACK-NOT: remark:{{.*}}test_exp ; FALLBACK-FP16-NOT: remark:{{.*}}test_exp -; CHECK-COMMON-LABEL: test_exp: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}expf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_exp: ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -918,19 +908,19 @@ ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; GISEL-NEXT: ret define half @test_exp(half %a) #0 { +; CHECK-COMMON-LABEL: test_exp: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: fcvt s0, h0 +; CHECK-COMMON-NEXT: bl expf +; CHECK-COMMON-NEXT: fcvt h0, s0 +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret %r = call half @llvm.exp.f16(half %a) ret half %r } -; CHECK-COMMON-LABEL: test_exp2: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}exp2f -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_exp2: ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -940,6 +930,15 @@ ; GISEL-NEXT: ldp x29, x30, [sp], #16 ; GISEL-NEXT: ret define half @test_exp2(half %a) #0 { +; CHECK-COMMON-LABEL: test_exp2: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: fcvt s0, h0 +; CHECK-COMMON-NEXT: bl exp2f +; CHECK-COMMON-NEXT: fcvt h0, s0 +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret %r = call half @llvm.exp2.f16(half %a) ret half %r } @@ -947,15 +946,6 @@ ; FALLBACK-NOT: remark:{{.*}}test_log ; FALLBACK-FP16-NOT: remark:{{.*}}test_log -; CHECK-COMMON-LABEL: test_log: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}logf -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_log: ; GISEL: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -966,6 +956,15 @@ ; GISEL-NEXT: ret define half @test_log(half %a) #0 { +; CHECK-COMMON-LABEL: test_log: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: fcvt s0, h0 +; CHECK-COMMON-NEXT: bl logf +; CHECK-COMMON-NEXT: fcvt h0, s0 +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret %r = call half @llvm.log.f16(half %a) ret half %r } @@ -973,15 +972,6 @@ ; FALLBACK-NOT: remark:{{.*}}test_log10 ; FALLBACK-FP16-NOT: remark:{{.*}}test_log10 -; CHECK-COMMON-LABEL: test_log10: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}log10f -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_log10: ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -992,6 +982,15 @@ ; GISEL-NEXT: ret define half @test_log10(half %a) #0 { +; CHECK-COMMON-LABEL: test_log10: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: fcvt s0, h0 +; CHECK-COMMON-NEXT: bl log10f +; CHECK-COMMON-NEXT: fcvt h0, s0 +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret %r = call half @llvm.log10.f16(half %a) ret half %r } @@ -999,15 +998,6 @@ ; FALLBACK-NOT: remark:{{.*}}test_log2 ; FALLBACK-FP16-NOT: remark:{{.*}}test_log2 -; CHECK-COMMON-LABEL: test_log2: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: fcvt s0, h0 -; CHECK-COMMON-NEXT: bl {{_?}}log2f -; CHECK-COMMON-NEXT: fcvt h0, s0 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 -; CHECK-COMMON-NEXT: ret - ; GISEL-LABEL: test_log2: ; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; GISEL-NEXT: mov x29, sp @@ -1018,37 +1008,36 @@ ; GISEL-NEXT: ret define half @test_log2(half %a) #0 { +; CHECK-COMMON-LABEL: test_log2: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: fcvt s0, h0 +; CHECK-COMMON-NEXT: bl log2f +; CHECK-COMMON-NEXT: fcvt h0, s0 +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret %r = call half @llvm.log2.f16(half %a) ret half %r } +define half @test_fma(half %a, half %b, half %c) #0 { ; CHECK-CVT-LABEL: test_fma: -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmadd s0, s0, s1, s2 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fmadd s0, s0, s1, s2 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fma: -; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 -; CHECK-FP16-NEXT: ret - -define half @test_fma(half %a, half %b, half %c) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 +; CHECK-FP16-NEXT: ret %r = call half @llvm.fma.f16(half %a, half %b, half %c) ret half %r } -; CHECK-CVT-LABEL: test_fabs: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fabs s0, s0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_fabs: -; CHECK-FP16-NEXT: fabs h0, h0 -; CHECK-FP16-NEXT: ret - ; FALLBACK-NOT: remark:{{.*}}test_fabs ; FALLBACK-FP16-NOT: remark:{{.*}}test_fabs @@ -1063,94 +1052,112 @@ ; GISEL-FP16-NEXT: ret define half @test_fabs(half %a) #0 { +; CHECK-CVT-LABEL: test_fabs: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fabs s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; CHECK-FP16-LABEL: test_fabs: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fabs h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.fabs.f16(half %a) ret half %r } +define half @test_minnum(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_minnum: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fminnm s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fminnm s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_minnum: -; CHECK-FP16-NEXT: fminnm h0, h0, h1 -; CHECK-FP16-NEXT: ret - -define half @test_minnum(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fminnm h0, h0, h1 +; CHECK-FP16-NEXT: ret %r = call half @llvm.minnum.f16(half %a, half %b) ret half %r } +define half @test_maxnum(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_maxnum: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmaxnm s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fmaxnm s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_maxnum: -; CHECK-FP16-NEXT: fmaxnm h0, h0, h1 -; CHECK-FP16-NEXT: ret - -define half @test_maxnum(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fmaxnm h0, h0, h1 +; CHECK-FP16-NEXT: ret %r = call half @llvm.maxnum.f16(half %a, half %b) ret half %r } +define half @test_copysign(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_copysign: -; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: bit.16b v0, v1, v2 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 +; CHECK-CVT-NEXT: bit.16b v0, v1, v2 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_copysign: -; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8 -; CHECK-FP16-NEXT: bit.16b v0, v1, v2 -; CHECK-FP16-NEXT: ret - -define half @test_copysign(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $h1 killed $h1 def $q1 +; CHECK-FP16-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8 +; CHECK-FP16-NEXT: bit.16b v0, v1, v2 +; CHECK-FP16-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.copysign.f16(half %a, half %b) ret half %r } +define half @test_copysign_f32(half %a, float %b) #0 { ; CHECK-CVT-LABEL: test_copysign_f32: -; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: bit.16b v0, v1, v2 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: // kill: def $s1 killed $s1 def $q1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 +; CHECK-CVT-NEXT: bit.16b v0, v1, v2 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_copysign_f32: -; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8 -; CHECK-FP16-NEXT: fcvt h1, s1 -; CHECK-FP16-NEXT: bit.16b v0, v1, v2 -; CHECK-FP16-NEXT: ret - -define half @test_copysign_f32(half %a, float %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP16-NEXT: fcvt h1, s1 +; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8 +; CHECK-FP16-NEXT: bit.16b v0, v1, v2 +; CHECK-FP16-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-FP16-NEXT: ret %tb = fptrunc float %b to half %r = call half @llvm.copysign.f16(half %a, half %tb) ret half %r } +define half @test_copysign_f64(half %a, double %b) #0 { ; CHECK-CVT-LABEL: test_copysign_f64: -; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 -; CHECK-CVT-NEXT: fcvt s1, d1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: bit.16b v0, v1, v2 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, d1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 +; CHECK-CVT-NEXT: bit.16b v0, v1, v2 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_copysign_f64: -; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8 -; CHECK-FP16-NEXT: fcvt h1, d1 -; CHECK-FP16-NEXT: bit.16b v0, v1, v2 -; CHECK-FP16-NEXT: ret - -define half @test_copysign_f64(half %a, double %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP16-NEXT: fcvt h1, d1 +; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8 +; CHECK-FP16-NEXT: bit.16b v0, v1, v2 +; CHECK-FP16-NEXT: // kill: def $h0 killed $h0 killed $q0 +; CHECK-FP16-NEXT: ret %tb = fptrunc double %b to half %r = call half @llvm.copysign.f16(half %a, half %tb) ret half %r @@ -1159,35 +1166,28 @@ ; Check that the FP promotion will use a truncating FP_ROUND, so we can fold ; away the (fpext (fp_round )) here. +define float @test_copysign_extended(half %a, half %b) #0 { ; CHECK-CVT-LABEL: test_copysign_extended: -; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: bit.16b v0, v1, v2 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: movi.4s v2, #128, lsl #24 +; CHECK-CVT-NEXT: bit.16b v0, v1, v2 +; CHECK-CVT-NEXT: // kill: def $s0 killed $s0 killed $q0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_copysign_extended: -; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8 -; CHECK-FP16-NEXT: bit.16b v0, v1, v2 -; CHECK-FP16-NEXT: fcvt s0, h0 -; CHECK-FP16-NEXT: ret - -define float @test_copysign_extended(half %a, half %b) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: // kill: def $h1 killed $h1 def $q1 +; CHECK-FP16-NEXT: // kill: def $h0 killed $h0 def $q0 +; CHECK-FP16-NEXT: movi.8h v2, #128, lsl #8 +; CHECK-FP16-NEXT: bit.16b v0, v1, v2 +; CHECK-FP16-NEXT: fcvt s0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.copysign.f16(half %a, half %b) %xr = fpext half %r to float ret float %xr } -; CHECK-CVT-LABEL: test_floor: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frintm [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_floor: -; CHECK-FP16-NEXT: frintm h0, h0 -; CHECK-FP16-NEXT: ret - ; FALLBACK-NOT: remark:{{.*}}test_floor ; FALLBACK-FP16-NOT: remark:{{.*}}test_floor @@ -1202,20 +1202,20 @@ ; GISEL-FP16-NEXT: ret define half @test_floor(half %a) #0 { +; CHECK-CVT-LABEL: test_floor: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintm s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; CHECK-FP16-LABEL: test_floor: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintm h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.floor.f16(half %a) ret half %r } -; CHECK-CVT-LABEL: test_ceil: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frintp [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - -; CHECK-FP16-LABEL: test_ceil: -; CHECK-FP16-NEXT: frintp h0, h0 -; CHECK-FP16-NEXT: ret - ; FALLBACK-NOT: remark:{{.*}}test_ceil ; FALLBACK-FP16-NOT: remark:{{.*}}test_ceil @@ -1229,61 +1229,65 @@ ; GISEL-FP16-NEXT: frintp h0, h0 ; GISEL-FP16-NEXT: ret define half @test_ceil(half %a) #0 { +; CHECK-CVT-LABEL: test_ceil: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintp s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; CHECK-FP16-LABEL: test_ceil: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintp h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.ceil.f16(half %a) ret half %r } +define half @test_trunc(half %a) #0 { ; CHECK-CVT-LABEL: test_trunc: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frintz [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintz s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_trunc: -; CHECK-FP16-NEXT: frintz h0, h0 -; CHECK-FP16-NEXT: ret - -define half @test_trunc(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintz h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.trunc.f16(half %a) ret half %r } +define half @test_rint(half %a) #0 { ; CHECK-CVT-LABEL: test_rint: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: frintx s0, s0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintx s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_rint: -; CHECK-FP16-NEXT: frintx h0, h0 -; CHECK-FP16-NEXT: ret - -define half @test_rint(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintx h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.rint.f16(half %a) ret half %r } +define half @test_nearbyint(half %a) #0 { ; CHECK-CVT-LABEL: test_nearbyint: -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: frinti s0, s0 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frinti s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_nearbyint: -; CHECK-FP16-NEXT: frinti h0, h0 -; CHECK-FP16-NEXT: ret - -define half @test_nearbyint(half %a) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinti h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.nearbyint.f16(half %a) ret half %r } -; CHECK-CVT-LABEL: test_round: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frinta [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - ; GISEL-CVT-LABEL: test_round: ; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 ; GISEL-CVT-NEXT: frinta [[INT32:s[0-9]+]], [[FLOAT32]] @@ -1291,25 +1295,25 @@ ; GISEL-CVT-NEXT: ret -; CHECK-FP16-LABEL: test_round: -; CHECK-FP16-NEXT: frinta h0, h0 -; CHECK-FP16-NEXT: ret - ; GISEL-FP16-LABEL: test_round: ; GISEL-FP16-NEXT: frinta h0, h0 ; GISEL-FP16-NEXT: ret define half @test_round(half %a) #0 { +; CHECK-CVT-LABEL: test_round: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frinta s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; CHECK-FP16-LABEL: test_round: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frinta h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.round.f16(half %a) ret half %r } -; CHECK-CVT-LABEL: test_roundeven: -; CHECK-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 -; CHECK-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]] -; CHECK-CVT-NEXT: fcvt h0, [[INT32]] -; CHECK-CVT-NEXT: ret - ; GISEL-CVT-LABEL: test_roundeven: ; GISEL-CVT-NEXT: fcvt [[FLOAT32:s[0-9]+]], h0 ; GISEL-CVT-NEXT: frintn [[INT32:s[0-9]+]], [[FLOAT32]] @@ -1317,62 +1321,68 @@ ; GISEL-CVT-NEXT: ret -; CHECK-FP16-LABEL: test_roundeven: -; CHECK-FP16-NEXT: frintn h0, h0 -; CHECK-FP16-NEXT: ret - ; GISEL-FP16-LABEL: test_roundeven: ; GISEL-FP16-NEXT: frintn h0, h0 ; GISEL-FP16-NEXT: ret define half @test_roundeven(half %a) #0 { +; CHECK-CVT-LABEL: test_roundeven: +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: frintn s0, s0 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret +; CHECK-FP16-LABEL: test_roundeven: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frintn h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.roundeven.f16(half %a) ret half %r } +define half @test_fmuladd(half %a, half %b, half %c) #0 { ; CHECK-CVT-LABEL: test_fmuladd: -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fmul s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: fcvt s1, h2 -; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: ret - +; CHECK-CVT: // %bb.0: +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fmul s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvt s1, h2 +; CHECK-CVT-NEXT: fadd s0, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s0 +; CHECK-CVT-NEXT: ret ; CHECK-FP16-LABEL: test_fmuladd: -; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 -; CHECK-FP16-NEXT: ret - -define half @test_fmuladd(half %a, half %b, half %c) #0 { +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: fmadd h0, h0, h1, h2 +; CHECK-FP16-NEXT: ret %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c) ret half %r } -; CHECK-FP16-LABEL: test_vrecpeh_f16: -; CHECK-FP16-NEXT: frecpe h0, h0 -; CHECK-FP16-NEXT: ret - define half @test_vrecpeh_f16(half %a) #0 { +; CHECK-FP16-LABEL: test_vrecpeh_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frecpe h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.aarch64.neon.frecpe.f16(half %a) ret half %r } -; CHECK-FP16-LABEL: test_vrecpxh_f16: -; CHECK-FP16-NEXT: frecpx h0, h0 -; CHECK-FP16-NEXT: ret - define half @test_vrecpxh_f16(half %a) #0 { +; CHECK-FP16-LABEL: test_vrecpxh_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frecpx h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.aarch64.neon.frecpx.f16(half %a) ret half %r } -; CHECK-FP16-LABEL: test_vrsqrteh_f16: -; CHECK-FP16-NEXT: frsqrte h0, h0 -; CHECK-FP16-NEXT: ret - define half @test_vrsqrteh_f16(half %a) #0 { +; CHECK-FP16-LABEL: test_vrsqrteh_f16: +; CHECK-FP16: // %bb.0: +; CHECK-FP16-NEXT: frsqrte h0, h0 +; CHECK-FP16-NEXT: ret %r = call half @llvm.aarch64.neon.frsqrte.f16(half %a) ret half %r } diff --git a/llvm/test/CodeGen/AArch64/fabs.ll b/llvm/test/CodeGen/AArch64/fabs.ll --- a/llvm/test/CodeGen/AArch64/fabs.ll +++ b/llvm/test/CodeGen/AArch64/fabs.ll @@ -23,10 +23,10 @@ ; CHECK-LABEL: still_not_fabs: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: fneg s2, s0 -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fcmp s0, s1 -; CHECK-NEXT: fcsel s0, s0, s2, ge +; CHECK-NEXT: fneg s1, s0 +; CHECK-NEXT: fmov s2, w8 +; CHECK-NEXT: fcmp s0, s2 +; CHECK-NEXT: fcsel s0, s0, s1, ge ; CHECK-NEXT: ret %cmp = fcmp nnan oge float %x, -0.0 %sub = fsub nnan float -0.0, %x diff --git a/llvm/test/CodeGen/AArch64/fcvt_combine.ll b/llvm/test/CodeGen/AArch64/fcvt_combine.ll --- a/llvm/test/CodeGen/AArch64/fcvt_combine.ll +++ b/llvm/test/CodeGen/AArch64/fcvt_combine.ll @@ -180,46 +180,46 @@ define <8 x i16> @test_v8f16(<8 x half> %in) { ; CHECK-NO16-LABEL: test_v8f16: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: mov h2, v0.h[1] -; CHECK-NO16-NEXT: mov h3, v0.h[4] -; CHECK-NO16-NEXT: mov h4, v0.h[5] -; CHECK-NO16-NEXT: mov h5, v0.h[2] -; CHECK-NO16-NEXT: fcvt s6, h0 +; CHECK-NO16-NEXT: mov h5, v0.h[1] +; CHECK-NO16-NEXT: mov h4, v0.h[4] +; CHECK-NO16-NEXT: mov h6, v0.h[5] +; CHECK-NO16-NEXT: mov h7, v0.h[2] +; CHECK-NO16-NEXT: fcvt s16, h0 +; CHECK-NO16-NEXT: fcvt s5, h5 ; CHECK-NO16-NEXT: fmov s1, #4.00000000 -; CHECK-NO16-NEXT: mov h7, v0.h[6] -; CHECK-NO16-NEXT: mov h16, v0.h[3] -; CHECK-NO16-NEXT: fcvt s2, h2 -; CHECK-NO16-NEXT: fcvt s3, h3 +; CHECK-NO16-NEXT: mov h2, v0.h[7] +; CHECK-NO16-NEXT: mov h3, v0.h[6] ; CHECK-NO16-NEXT: fcvt s4, h4 -; CHECK-NO16-NEXT: fcvt s5, h5 -; CHECK-NO16-NEXT: fmul s6, s6, s1 -; CHECK-NO16-NEXT: mov h0, v0.h[7] +; CHECK-NO16-NEXT: mov h0, v0.h[3] +; CHECK-NO16-NEXT: fcvt s6, h6 ; CHECK-NO16-NEXT: fcvt s7, h7 -; CHECK-NO16-NEXT: fcvt s16, h16 -; CHECK-NO16-NEXT: fmul s2, s2, s1 -; CHECK-NO16-NEXT: fmul s3, s3, s1 -; CHECK-NO16-NEXT: fmul s4, s4, s1 -; CHECK-NO16-NEXT: fmul s5, s5, s1 -; CHECK-NO16-NEXT: fcvt h6, s6 +; CHECK-NO16-NEXT: fcvt s3, h3 ; CHECK-NO16-NEXT: fcvt s0, h0 -; CHECK-NO16-NEXT: fmul s7, s7, s1 ; CHECK-NO16-NEXT: fmul s16, s16, s1 -; CHECK-NO16-NEXT: fcvt h2, s2 -; CHECK-NO16-NEXT: fcvt h3, s3 -; CHECK-NO16-NEXT: fcvt h4, s4 -; CHECK-NO16-NEXT: fcvt h5, s5 +; CHECK-NO16-NEXT: fmul s5, s5, s1 +; CHECK-NO16-NEXT: fcvt s2, h2 +; CHECK-NO16-NEXT: fmul s4, s4, s1 +; CHECK-NO16-NEXT: fmul s6, s6, s1 +; CHECK-NO16-NEXT: fmul s7, s7, s1 +; CHECK-NO16-NEXT: fmul s3, s3, s1 ; CHECK-NO16-NEXT: fmul s0, s0, s1 -; CHECK-NO16-NEXT: fcvt h1, s7 -; CHECK-NO16-NEXT: mov v6.h[1], v2.h[0] -; CHECK-NO16-NEXT: fcvt h2, s16 -; CHECK-NO16-NEXT: mov v3.h[1], v4.h[0] +; CHECK-NO16-NEXT: fcvt h16, s16 +; CHECK-NO16-NEXT: fcvt h5, s5 +; CHECK-NO16-NEXT: fmul s1, s2, s1 +; CHECK-NO16-NEXT: fcvt h2, s4 +; CHECK-NO16-NEXT: fcvt h4, s6 +; CHECK-NO16-NEXT: fcvt h6, s7 +; CHECK-NO16-NEXT: fcvt h3, s3 ; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: mov v6.h[2], v5.h[0] -; CHECK-NO16-NEXT: mov v3.h[2], v1.h[0] -; CHECK-NO16-NEXT: mov v6.h[3], v2.h[0] -; CHECK-NO16-NEXT: mov v3.h[3], v0.h[0] -; CHECK-NO16-NEXT: fcvtl v0.4s, v6.4h -; CHECK-NO16-NEXT: fcvtl v1.4s, v3.4h +; CHECK-NO16-NEXT: mov v16.h[1], v5.h[0] +; CHECK-NO16-NEXT: fcvt h1, s1 +; CHECK-NO16-NEXT: mov v2.h[1], v4.h[0] +; CHECK-NO16-NEXT: mov v16.h[2], v6.h[0] +; CHECK-NO16-NEXT: mov v2.h[2], v3.h[0] +; CHECK-NO16-NEXT: mov v16.h[3], v0.h[0] +; CHECK-NO16-NEXT: mov v2.h[3], v1.h[0] +; CHECK-NO16-NEXT: fcvtl v0.4s, v16.4h +; CHECK-NO16-NEXT: fcvtl v1.4s, v2.4h ; CHECK-NO16-NEXT: fcvtzs v0.4s, v0.4s ; CHECK-NO16-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NO16-NEXT: xtn v0.4h, v0.4s @@ -238,8 +238,8 @@ define <4 x i16> @test_v4f16(<4 x half> %in) { ; CHECK-NO16-LABEL: test_v4f16: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h @@ -259,8 +259,8 @@ define <4 x i32> @test_v4f16_i32(<4 x half> %in) { ; CHECK-NO16-LABEL: test_v4f16_i32: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h @@ -352,11 +352,11 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmov v1.2s, #16.00000000 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzs x9, s1 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %mul.i = fmul <2 x float> %f, %vcvt.i = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> %mul.i) @@ -478,100 +478,100 @@ ; CHECK-NO16-NEXT: mov h3, v0.h[5] ; CHECK-NO16-NEXT: mov h4, v0.h[6] ; CHECK-NO16-NEXT: fmov s1, #4.00000000 -; CHECK-NO16-NEXT: mov h5, v0.h[7] -; CHECK-NO16-NEXT: mov h6, v0.h[1] -; CHECK-NO16-NEXT: mov h7, v0.h[2] -; CHECK-NO16-NEXT: fcvt s16, h0 ; CHECK-NO16-NEXT: fcvt s2, h2 ; CHECK-NO16-NEXT: fcvt s3, h3 +; CHECK-NO16-NEXT: mov h5, v0.h[7] ; CHECK-NO16-NEXT: fcvt s4, h4 -; CHECK-NO16-NEXT: mov h0, v0.h[3] +; CHECK-NO16-NEXT: mov h16, v0.h[1] +; CHECK-NO16-NEXT: mov h6, v0.h[3] ; CHECK-NO16-NEXT: fcvt s5, h5 -; CHECK-NO16-NEXT: fcvt s6, h6 -; CHECK-NO16-NEXT: mov w9, #32767 -; CHECK-NO16-NEXT: mov w10, #-32768 +; CHECK-NO16-NEXT: mov h7, v0.h[2] ; CHECK-NO16-NEXT: fmul s2, s2, s1 ; CHECK-NO16-NEXT: fmul s3, s3, s1 ; CHECK-NO16-NEXT: fmul s4, s4, s1 ; CHECK-NO16-NEXT: fcvt s0, h0 +; CHECK-NO16-NEXT: fcvt s16, h16 +; CHECK-NO16-NEXT: fcvt s7, h7 ; CHECK-NO16-NEXT: fmul s5, s5, s1 -; CHECK-NO16-NEXT: fmul s6, s6, s1 +; CHECK-NO16-NEXT: fcvt s6, h6 ; CHECK-NO16-NEXT: fcvt h2, s2 ; CHECK-NO16-NEXT: fcvt h3, s3 +; CHECK-NO16-NEXT: fcvt h4, s4 ; CHECK-NO16-NEXT: fmul s0, s0, s1 +; CHECK-NO16-NEXT: fmul s16, s16, s1 +; CHECK-NO16-NEXT: mov w9, #32767 ; CHECK-NO16-NEXT: fcvt h5, s5 -; CHECK-NO16-NEXT: fcvt h6, s6 +; CHECK-NO16-NEXT: mov w10, #-32768 ; CHECK-NO16-NEXT: mov v2.h[1], v3.h[0] -; CHECK-NO16-NEXT: fcvt h3, s4 -; CHECK-NO16-NEXT: fcvt s4, h7 -; CHECK-NO16-NEXT: fmul s7, s16, s1 +; CHECK-NO16-NEXT: fmul s3, s7, s1 ; CHECK-NO16-NEXT: fcvt h0, s0 -; CHECK-NO16-NEXT: mov v2.h[2], v3.h[0] -; CHECK-NO16-NEXT: fmul s3, s4, s1 -; CHECK-NO16-NEXT: fcvt h4, s7 +; CHECK-NO16-NEXT: fmul s1, s6, s1 +; CHECK-NO16-NEXT: mov v2.h[2], v4.h[0] +; CHECK-NO16-NEXT: fcvt h4, s16 ; CHECK-NO16-NEXT: mov v2.h[3], v5.h[0] -; CHECK-NO16-NEXT: fcvt h1, s3 -; CHECK-NO16-NEXT: mov v4.h[1], v6.h[0] +; CHECK-NO16-NEXT: fcvt h3, s3 +; CHECK-NO16-NEXT: fcvt h1, s1 ; CHECK-NO16-NEXT: fcvtl v2.4s, v2.4h -; CHECK-NO16-NEXT: mov v4.h[2], v1.h[0] -; CHECK-NO16-NEXT: mov s1, v2.s[1] +; CHECK-NO16-NEXT: mov v0.h[1], v4.h[0] +; CHECK-NO16-NEXT: mov v0.h[2], v3.h[0] +; CHECK-NO16-NEXT: mov s3, v2.s[1] ; CHECK-NO16-NEXT: fcvtzs w11, s2 -; CHECK-NO16-NEXT: mov v4.h[3], v0.h[0] -; CHECK-NO16-NEXT: mov s0, v2.s[2] +; CHECK-NO16-NEXT: mov v0.h[3], v1.h[0] +; CHECK-NO16-NEXT: mov s1, v2.s[2] +; CHECK-NO16-NEXT: fcvtzs w8, s3 ; CHECK-NO16-NEXT: mov s2, v2.s[3] -; CHECK-NO16-NEXT: fcvtzs w8, s1 -; CHECK-NO16-NEXT: fcvtl v1.4s, v4.4h -; CHECK-NO16-NEXT: fcvtzs w12, s0 -; CHECK-NO16-NEXT: cmp w8, w9 +; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fcvtzs w12, s1 ; CHECK-NO16-NEXT: fcvtzs w13, s2 +; CHECK-NO16-NEXT: cmp w8, w9 ; CHECK-NO16-NEXT: csel w8, w8, w9, lt ; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: mov s1, v0.s[1] ; CHECK-NO16-NEXT: csel w8, w8, w10, gt ; CHECK-NO16-NEXT: cmp w11, w9 ; CHECK-NO16-NEXT: csel w11, w11, w9, lt -; CHECK-NO16-NEXT: mov s0, v1.s[1] +; CHECK-NO16-NEXT: fcvtzs w14, s1 ; CHECK-NO16-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: fcvtzs w15, s1 +; CHECK-NO16-NEXT: fcvtzs w15, s0 ; CHECK-NO16-NEXT: csel w11, w11, w10, gt ; CHECK-NO16-NEXT: cmp w12, w9 ; CHECK-NO16-NEXT: csel w12, w12, w9, lt +; CHECK-NO16-NEXT: mov s1, v0.s[2] ; CHECK-NO16-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: fcvtzs w14, s0 +; CHECK-NO16-NEXT: mov s0, v0.s[3] ; CHECK-NO16-NEXT: csel w12, w12, w10, gt ; CHECK-NO16-NEXT: cmp w13, w9 ; CHECK-NO16-NEXT: csel w13, w13, w9, lt -; CHECK-NO16-NEXT: mov s0, v1.s[2] +; CHECK-NO16-NEXT: fcvtzs w16, s1 ; CHECK-NO16-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: fmov s2, w11 +; CHECK-NO16-NEXT: fcvtzs w17, s0 ; CHECK-NO16-NEXT: csel w13, w13, w10, gt ; CHECK-NO16-NEXT: cmp w14, w9 ; CHECK-NO16-NEXT: csel w14, w14, w9, lt +; CHECK-NO16-NEXT: fmov s1, w11 ; CHECK-NO16-NEXT: cmn w14, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: fcvtzs w16, s0 ; CHECK-NO16-NEXT: csel w14, w14, w10, gt ; CHECK-NO16-NEXT: cmp w15, w9 ; CHECK-NO16-NEXT: csel w15, w15, w9, lt -; CHECK-NO16-NEXT: mov s0, v1.s[3] ; CHECK-NO16-NEXT: cmn w15, #8, lsl #12 // =32768 ; CHECK-NO16-NEXT: csel w15, w15, w10, gt ; CHECK-NO16-NEXT: cmp w16, w9 -; CHECK-NO16-NEXT: csel w11, w16, w9, lt -; CHECK-NO16-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: fmov s1, w15 -; CHECK-NO16-NEXT: fcvtzs w15, s0 -; CHECK-NO16-NEXT: csel w11, w11, w10, gt -; CHECK-NO16-NEXT: mov v2.s[1], w8 -; CHECK-NO16-NEXT: mov v1.s[1], w14 -; CHECK-NO16-NEXT: cmp w15, w9 -; CHECK-NO16-NEXT: csel w8, w15, w9, lt -; CHECK-NO16-NEXT: cmn w8, #8, lsl #12 // =32768 -; CHECK-NO16-NEXT: csel w8, w8, w10, gt -; CHECK-NO16-NEXT: mov v1.s[2], w11 -; CHECK-NO16-NEXT: mov v2.s[2], w12 -; CHECK-NO16-NEXT: mov v1.s[3], w8 -; CHECK-NO16-NEXT: mov v2.s[3], w13 -; CHECK-NO16-NEXT: xtn v0.4h, v1.4s -; CHECK-NO16-NEXT: xtn2 v0.8h, v2.4s +; CHECK-NO16-NEXT: csel w16, w16, w9, lt +; CHECK-NO16-NEXT: cmn w16, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: fmov s0, w15 +; CHECK-NO16-NEXT: csel w11, w16, w10, gt +; CHECK-NO16-NEXT: cmp w17, w9 +; CHECK-NO16-NEXT: csel w9, w17, w9, lt +; CHECK-NO16-NEXT: mov v0.s[1], w14 +; CHECK-NO16-NEXT: cmn w9, #8, lsl #12 // =32768 +; CHECK-NO16-NEXT: mov v1.s[1], w8 +; CHECK-NO16-NEXT: csel w8, w9, w10, gt +; CHECK-NO16-NEXT: mov v0.s[2], w11 +; CHECK-NO16-NEXT: mov v1.s[2], w12 +; CHECK-NO16-NEXT: mov v0.s[3], w8 +; CHECK-NO16-NEXT: mov v1.s[3], w13 +; CHECK-NO16-NEXT: xtn v0.4h, v0.4s +; CHECK-NO16-NEXT: xtn2 v0.8h, v1.4s ; CHECK-NO16-NEXT: ret ; ; CHECK-FP16-LABEL: test_v8f16_sat: @@ -586,8 +586,8 @@ define <4 x i16> @test_v4f16_sat(<4 x half> %in) { ; CHECK-NO16-LABEL: test_v4f16_sat: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h @@ -607,8 +607,8 @@ define <4 x i32> @test_v4f16_i32_sat(<4 x half> %in) { ; CHECK-NO16-LABEL: test_v4f16_i32_sat: ; CHECK-NO16: // %bb.0: -; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NO16-NEXT: fmov v1.4s, #4.00000000 ; CHECK-NO16-NEXT: fmul v0.4s, v0.4s, v1.4s ; CHECK-NO16-NEXT: fcvtn v0.4h, v0.4s ; CHECK-NO16-NEXT: fcvtl v0.4s, v0.4h @@ -630,8 +630,8 @@ define <4 x i32> @test_extrasat(<4 x float> %f) { ; CHECK-LABEL: test_extrasat: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff ; CHECK-NEXT: fcvtzu v0.4s, v0.4s, #3 +; CHECK-NEXT: movi v1.2d, #0xffffff00ffffff ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: bic v0.4s, #255, lsl #24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/fdiv_combine.ll b/llvm/test/CodeGen/AArch64/fdiv_combine.ll --- a/llvm/test/CodeGen/AArch64/fdiv_combine.ll +++ b/llvm/test/CodeGen/AArch64/fdiv_combine.ll @@ -29,8 +29,8 @@ define <2 x float> @test3(<2 x i32> %in) { ; CHECK-LABEL: test3: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov.2s v1, #9.00000000 ; CHECK-NEXT: scvtf.2s v0, v0 +; CHECK-NEXT: fmov.2s v1, #9.00000000 ; CHECK-NEXT: fdiv.2s v0, v0, v1 ; CHECK-NEXT: ret entry: @@ -43,8 +43,8 @@ define <2 x float> @test4(<2 x i32> %in) { ; CHECK-LABEL: test4: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi.2s v1, #80, lsl #24 ; CHECK-NEXT: scvtf.2s v0, v0 +; CHECK-NEXT: movi.2s v1, #80, lsl #24 ; CHECK-NEXT: fdiv.2s v0, v0, v1 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll --- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -9,52 +9,52 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: mov h7, v0.h[2] -; CHECK-CVT-NEXT: mov h16, v1.h[3] -; CHECK-CVT-NEXT: mov h17, v0.h[3] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] +; CHECK-CVT-NEXT: mov h7, v0.h[2] +; CHECK-CVT-NEXT: mov h17, v1.h[4] +; CHECK-CVT-NEXT: mov h18, v1.h[5] ; CHECK-CVT-NEXT: fadd s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h7 -; CHECK-CVT-NEXT: fcvt s7, h16 -; CHECK-CVT-NEXT: fcvt s16, h17 +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: fadd s3, s3, s2 +; CHECK-CVT-NEXT: mov h5, v1.h[3] +; CHECK-CVT-NEXT: mov h2, v0.h[3] +; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s17, h17 +; CHECK-CVT-NEXT: fcvt s16, h2 ; CHECK-CVT-NEXT: fcvt h2, s4 -; CHECK-CVT-NEXT: fadd s4, s6, s5 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: mov h6, v0.h[4] -; CHECK-CVT-NEXT: fadd s7, s16, s7 +; CHECK-CVT-NEXT: mov h4, v0.h[4] ; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: mov h16, v0.h[5] -; CHECK-CVT-NEXT: fcvt h7, s7 +; CHECK-CVT-NEXT: fadd s6, s7, s6 +; CHECK-CVT-NEXT: mov h7, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fadd s5, s16, s5 +; CHECK-CVT-NEXT: fcvt s16, h18 ; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: mov h6, v1.h[5] -; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] -; CHECK-CVT-NEXT: fadd s3, s5, s4 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: fcvt s5, h16 -; CHECK-CVT-NEXT: mov h6, v1.h[6] -; CHECK-CVT-NEXT: mov h16, v0.h[6] +; CHECK-CVT-NEXT: fcvt s3, h7 +; CHECK-CVT-NEXT: mov h7, v1.h[6] +; CHECK-CVT-NEXT: mov h18, v0.h[6] +; CHECK-CVT-NEXT: fadd s4, s4, s17 +; CHECK-CVT-NEXT: fcvt h6, s6 +; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s17, h18 ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: fadd s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h16 +; CHECK-CVT-NEXT: fadd s3, s3, s16 +; CHECK-CVT-NEXT: fcvt h5, s5 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fadd s4, s6, s5 +; CHECK-CVT-NEXT: mov v2.h[2], v6.h[0] +; CHECK-CVT-NEXT: fadd s6, s17, s7 +; CHECK-CVT-NEXT: fcvt h4, s4 +; CHECK-CVT-NEXT: mov v2.h[3], v5.h[0] ; CHECK-CVT-NEXT: fadd s0, s0, s1 -; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: fcvt h1, s3 +; CHECK-CVT-NEXT: fcvt h3, s6 +; CHECK-CVT-NEXT: mov v2.h[4], v4.h[0] +; CHECK-CVT-NEXT: mov v2.h[5], v1.h[0] ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: mov v2.h[6], v3.h[0] ; CHECK-CVT-NEXT: mov v2.h[7], v0.h[0] @@ -78,52 +78,52 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: mov h7, v0.h[2] -; CHECK-CVT-NEXT: mov h16, v1.h[3] -; CHECK-CVT-NEXT: mov h17, v0.h[3] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] +; CHECK-CVT-NEXT: mov h7, v0.h[2] +; CHECK-CVT-NEXT: mov h17, v1.h[4] +; CHECK-CVT-NEXT: mov h18, v1.h[5] ; CHECK-CVT-NEXT: fsub s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h7 -; CHECK-CVT-NEXT: fcvt s7, h16 -; CHECK-CVT-NEXT: fcvt s16, h17 +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: fsub s3, s3, s2 +; CHECK-CVT-NEXT: mov h5, v1.h[3] +; CHECK-CVT-NEXT: mov h2, v0.h[3] +; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s17, h17 +; CHECK-CVT-NEXT: fcvt s16, h2 ; CHECK-CVT-NEXT: fcvt h2, s4 -; CHECK-CVT-NEXT: fsub s4, s6, s5 -; CHECK-CVT-NEXT: mov h5, v1.h[4] -; CHECK-CVT-NEXT: mov h6, v0.h[4] -; CHECK-CVT-NEXT: fsub s7, s16, s7 +; CHECK-CVT-NEXT: mov h4, v0.h[4] ; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: mov h16, v0.h[5] -; CHECK-CVT-NEXT: fcvt h7, s7 +; CHECK-CVT-NEXT: fsub s6, s7, s6 +; CHECK-CVT-NEXT: mov h7, v0.h[5] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fsub s5, s16, s5 +; CHECK-CVT-NEXT: fcvt s16, h18 ; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: mov h6, v1.h[5] -; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] -; CHECK-CVT-NEXT: fsub s3, s5, s4 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: fcvt s5, h16 -; CHECK-CVT-NEXT: mov h6, v1.h[6] -; CHECK-CVT-NEXT: mov h16, v0.h[6] +; CHECK-CVT-NEXT: fcvt s3, h7 +; CHECK-CVT-NEXT: mov h7, v1.h[6] +; CHECK-CVT-NEXT: mov h18, v0.h[6] +; CHECK-CVT-NEXT: fsub s4, s4, s17 +; CHECK-CVT-NEXT: fcvt h6, s6 +; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s17, h18 ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: fsub s4, s5, s4 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h16 +; CHECK-CVT-NEXT: fsub s3, s3, s16 +; CHECK-CVT-NEXT: fcvt h5, s5 ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fsub s4, s6, s5 +; CHECK-CVT-NEXT: mov v2.h[2], v6.h[0] +; CHECK-CVT-NEXT: fsub s6, s17, s7 +; CHECK-CVT-NEXT: fcvt h4, s4 +; CHECK-CVT-NEXT: mov v2.h[3], v5.h[0] ; CHECK-CVT-NEXT: fsub s0, s0, s1 -; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: fcvt h1, s3 +; CHECK-CVT-NEXT: fcvt h3, s6 +; CHECK-CVT-NEXT: mov v2.h[4], v4.h[0] +; CHECK-CVT-NEXT: mov v2.h[5], v1.h[0] ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: mov v2.h[6], v3.h[0] ; CHECK-CVT-NEXT: mov v2.h[7], v0.h[0] @@ -147,52 +147,52 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: mov h7, v0.h[2] -; CHECK-CVT-NEXT: mov h16, v0.h[3] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fmul s4, s5, s4 -; CHECK-CVT-NEXT: mov h5, v1.h[3] +; CHECK-CVT-NEXT: mov h6, v1.h[2] +; CHECK-CVT-NEXT: mov h7, v0.h[2] +; CHECK-CVT-NEXT: mov h16, v1.h[3] +; CHECK-CVT-NEXT: mov h17, v1.h[4] ; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fmul s4, s5, s4 ; CHECK-CVT-NEXT: fmul s3, s3, s2 -; CHECK-CVT-NEXT: fcvt h2, s4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: fcvt s5, h16 +; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: fcvt s16, h16 +; CHECK-CVT-NEXT: mov h18, v0.h[5] +; CHECK-CVT-NEXT: fcvt s17, h17 +; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: fmul s6, s7, s6 -; CHECK-CVT-NEXT: mov h7, v1.h[4] -; CHECK-CVT-NEXT: mov h16, v0.h[4] +; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: fcvt h2, s4 ; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: fmul s4, s5, s4 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: fcvt h6, s6 +; CHECK-CVT-NEXT: mov h4, v1.h[5] ; CHECK-CVT-NEXT: fcvt s7, h7 +; CHECK-CVT-NEXT: fcvt s18, h18 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fmul s5, s5, s16 +; CHECK-CVT-NEXT: mov h16, v1.h[6] +; CHECK-CVT-NEXT: fcvt h6, s6 ; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] -; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h3, v0.h[6] +; CHECK-CVT-NEXT: fmul s7, s7, s17 ; CHECK-CVT-NEXT: fcvt s16, h16 -; CHECK-CVT-NEXT: fcvt h4, s4 -; CHECK-CVT-NEXT: fcvt s5, h5 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov v2.h[2], v6.h[0] -; CHECK-CVT-NEXT: fmul s6, s16, s7 -; CHECK-CVT-NEXT: mov h7, v1.h[6] -; CHECK-CVT-NEXT: mov h16, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: fmul s3, s5, s3 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v4.h[0] -; CHECK-CVT-NEXT: fcvt h4, s6 -; CHECK-CVT-NEXT: fcvt s5, h7 -; CHECK-CVT-NEXT: fcvt s6, h16 +; CHECK-CVT-NEXT: fmul s4, s18, s4 +; CHECK-CVT-NEXT: fcvt h5, s5 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt h3, s3 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v4.h[0] -; CHECK-CVT-NEXT: fmul s4, s6, s5 +; CHECK-CVT-NEXT: mov v2.h[2], v6.h[0] +; CHECK-CVT-NEXT: fmul s3, s3, s16 +; CHECK-CVT-NEXT: fcvt h6, s7 +; CHECK-CVT-NEXT: mov v2.h[3], v5.h[0] ; CHECK-CVT-NEXT: fmul s0, s0, s1 -; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: fcvt h1, s4 +; CHECK-CVT-NEXT: mov v2.h[4], v6.h[0] +; CHECK-CVT-NEXT: fcvt h3, s3 +; CHECK-CVT-NEXT: mov v2.h[5], v1.h[0] ; CHECK-CVT-NEXT: fcvt h0, s0 ; CHECK-CVT-NEXT: mov v2.h[6], v3.h[0] ; CHECK-CVT-NEXT: mov v2.h[7], v0.h[0] @@ -216,56 +216,55 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h0 ; CHECK-CVT-NEXT: mov h5, v0.h[2] -; CHECK-CVT-NEXT: mov h6, v0.h[3] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v0.h[5] -; CHECK-CVT-NEXT: mov h17, v0.h[6] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcvt s5, h5 -; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: mov h6, v0.h[3] +; CHECK-CVT-NEXT: mov h7, v0.h[4] +; CHECK-CVT-NEXT: mov h16, v0.h[5] ; CHECK-CVT-NEXT: fcvt s6, h6 +; CHECK-CVT-NEXT: mov h17, v0.h[6] +; CHECK-CVT-NEXT: fdiv s2, s3, s2 +; CHECK-CVT-NEXT: fcvt s3, h1 ; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcvt s17, h17 -; CHECK-CVT-NEXT: fdiv s2, s3, s2 -; CHECK-CVT-NEXT: fcvt s3, h1 +; CHECK-CVT-NEXT: mov h0, v0.h[7] ; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: fdiv s3, s4, s3 ; CHECK-CVT-NEXT: mov h4, v1.h[2] -; CHECK-CVT-NEXT: fcvt h18, s2 ; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt h2, s2 ; CHECK-CVT-NEXT: fdiv s4, s5, s4 ; CHECK-CVT-NEXT: mov h5, v1.h[3] -; CHECK-CVT-NEXT: fcvt h2, s3 ; CHECK-CVT-NEXT: fcvt s5, h5 -; CHECK-CVT-NEXT: mov v2.h[1], v18.h[0] ; CHECK-CVT-NEXT: fdiv s5, s6, s5 ; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: fcvt h4, s4 ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: mov v2.h[2], v4.h[0] ; CHECK-CVT-NEXT: fdiv s6, s7, s6 ; CHECK-CVT-NEXT: mov h7, v1.h[5] -; CHECK-CVT-NEXT: fcvt h4, s5 ; CHECK-CVT-NEXT: fcvt s7, h7 -; CHECK-CVT-NEXT: mov v2.h[3], v4.h[0] ; CHECK-CVT-NEXT: fdiv s7, s16, s7 ; CHECK-CVT-NEXT: mov h16, v1.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] ; CHECK-CVT-NEXT: fcvt s16, h16 ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fdiv s3, s17, s16 -; CHECK-CVT-NEXT: fdiv s0, s0, s1 -; CHECK-CVT-NEXT: fcvt h1, s6 -; CHECK-CVT-NEXT: mov v2.h[4], v1.h[0] -; CHECK-CVT-NEXT: fcvt h1, s7 -; CHECK-CVT-NEXT: mov v2.h[5], v1.h[0] -; CHECK-CVT-NEXT: fcvt h1, s3 -; CHECK-CVT-NEXT: mov v2.h[6], v1.h[0] -; CHECK-CVT-NEXT: fcvt h0, s0 -; CHECK-CVT-NEXT: mov v2.h[7], v0.h[0] -; CHECK-CVT-NEXT: mov v0.16b, v2.16b +; CHECK-CVT-NEXT: fdiv s16, s17, s16 +; CHECK-CVT-NEXT: fdiv s1, s0, s1 +; CHECK-CVT-NEXT: fcvt h0, s3 +; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: mov v0.h[1], v2.h[0] +; CHECK-CVT-NEXT: fcvt h2, s5 +; CHECK-CVT-NEXT: mov v0.h[2], v3.h[0] +; CHECK-CVT-NEXT: fcvt h3, s6 +; CHECK-CVT-NEXT: mov v0.h[3], v2.h[0] +; CHECK-CVT-NEXT: fcvt h2, s7 +; CHECK-CVT-NEXT: mov v0.h[4], v3.h[0] +; CHECK-CVT-NEXT: fcvt h3, s16 +; CHECK-CVT-NEXT: mov v0.h[5], v2.h[0] +; CHECK-CVT-NEXT: fcvt h1, s1 +; CHECK-CVT-NEXT: mov v0.h[6], v3.h[0] +; CHECK-CVT-NEXT: mov v0.h[7], v1.h[0] ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: div_h: @@ -315,22 +314,22 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d4, v0.d[1] ; CHECK-NEXT: fcvt h0, d0 -; CHECK-NEXT: mov d5, v1.d[1] -; CHECK-NEXT: fcvt h1, d1 +; CHECK-NEXT: fcvt h5, d1 +; CHECK-NEXT: mov d1, v1.d[1] ; CHECK-NEXT: fcvt h4, d4 +; CHECK-NEXT: fcvt h1, d1 ; CHECK-NEXT: mov v0.h[1], v4.h[0] -; CHECK-NEXT: fcvt h4, d5 -; CHECK-NEXT: mov v0.h[2], v1.h[0] -; CHECK-NEXT: fcvt h1, d2 +; CHECK-NEXT: fcvt h4, d2 ; CHECK-NEXT: mov d2, v2.d[1] -; CHECK-NEXT: mov v0.h[3], v4.h[0] -; CHECK-NEXT: fcvt h2, d2 -; CHECK-NEXT: mov v0.h[4], v1.h[0] -; CHECK-NEXT: fcvt h1, d3 -; CHECK-NEXT: mov v0.h[5], v2.h[0] -; CHECK-NEXT: mov d2, v3.d[1] -; CHECK-NEXT: mov v0.h[6], v1.h[0] +; CHECK-NEXT: mov v0.h[2], v5.h[0] +; CHECK-NEXT: mov v0.h[3], v1.h[0] ; CHECK-NEXT: fcvt h1, d2 +; CHECK-NEXT: fcvt h2, d3 +; CHECK-NEXT: mov d3, v3.d[1] +; CHECK-NEXT: mov v0.h[4], v4.h[0] +; CHECK-NEXT: mov v0.h[5], v1.h[0] +; CHECK-NEXT: fcvt h1, d3 +; CHECK-NEXT: mov v0.h[6], v2.h[0] ; CHECK-NEXT: mov v0.h[7], v1.h[0] ; CHECK-NEXT: ret %1 = fptrunc <8 x double> %a to <8 x half> @@ -340,8 +339,9 @@ define <8 x float> @h_to_s(<8 x half> %a) { ; CHECK-LABEL: h_to_s: ; CHECK: // %bb.0: +; CHECK-NEXT: fcvtl v2.4s, v0.4h ; CHECK-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-NEXT: fcvtl v0.4s, v0.4h +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %1 = fpext <8 x half> %a to <8 x float> ret <8 x float> %1 @@ -350,25 +350,25 @@ define <8 x double> @h_to_d(<8 x half> %a) { ; CHECK-LABEL: h_to_d: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: mov h1, v0.h[1] ; CHECK-NEXT: mov h3, v0.h[3] +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: mov h4, v0.h[2] -; CHECK-NEXT: fcvt d0, h0 -; CHECK-NEXT: mov h5, v2.h[1] -; CHECK-NEXT: mov h6, v2.h[3] -; CHECK-NEXT: mov h7, v2.h[2] -; CHECK-NEXT: fcvt d16, h1 -; CHECK-NEXT: fcvt d17, h3 +; CHECK-NEXT: fcvt d5, h1 +; CHECK-NEXT: fcvt d6, h3 ; CHECK-NEXT: fcvt d1, h4 +; CHECK-NEXT: mov h3, v2.h[1] +; CHECK-NEXT: mov h4, v2.h[3] +; CHECK-NEXT: mov h7, v2.h[2] +; CHECK-NEXT: fcvt d0, h0 +; CHECK-NEXT: fcvt d16, h3 ; CHECK-NEXT: fcvt d2, h2 -; CHECK-NEXT: fcvt d4, h5 -; CHECK-NEXT: fcvt d5, h6 +; CHECK-NEXT: fcvt d4, h4 ; CHECK-NEXT: fcvt d3, h7 -; CHECK-NEXT: mov v0.d[1], v16.d[0] -; CHECK-NEXT: mov v1.d[1], v17.d[0] -; CHECK-NEXT: mov v2.d[1], v4.d[0] -; CHECK-NEXT: mov v3.d[1], v5.d[0] +; CHECK-NEXT: mov v1.d[1], v6.d[0] +; CHECK-NEXT: mov v0.d[1], v5.d[0] +; CHECK-NEXT: mov v2.d[1], v16.d[0] +; CHECK-NEXT: mov v3.d[1], v4.d[0] ; CHECK-NEXT: ret %1 = fpext <8 x half> %a to <8 x double> ret <8 x double> %1 @@ -444,24 +444,24 @@ ; CHECK-CVT-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-CVT-NEXT: sshll2 v3.4s, v0.8h, #0 ; CHECK-CVT-NEXT: sshll v0.4s, v0.4h, #0 -; CHECK-CVT-NEXT: scvtf v2.4s, v2.4s -; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s ; CHECK-CVT-NEXT: scvtf v3.4s, v3.4s ; CHECK-CVT-NEXT: scvtf v0.4s, v0.4s -; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s -; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s +; CHECK-CVT-NEXT: scvtf v2.4s, v2.4s +; CHECK-CVT-NEXT: scvtf v1.4s, v1.4s ; CHECK-CVT-NEXT: fcvtn v3.4h, v3.4s ; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s -; CHECK-CVT-NEXT: mov v1.d[1], v2.d[0] +; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s +; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s ; CHECK-CVT-NEXT: mov v0.d[1], v3.d[0] +; CHECK-CVT-NEXT: mov v1.d[1], v2.d[0] ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: sitofp_v16i8: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: sshll2 v1.8h, v0.16b, #0 -; CHECK-FP16-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-FP16-NEXT: scvtf v1.8h, v1.8h -; CHECK-FP16-NEXT: scvtf v0.8h, v0.8h +; CHECK-FP16-NEXT: sshll v1.8h, v0.8b, #0 +; CHECK-FP16-NEXT: sshll2 v2.8h, v0.16b, #0 +; CHECK-FP16-NEXT: scvtf v0.8h, v1.8h +; CHECK-FP16-NEXT: scvtf v1.8h, v2.8h ; CHECK-FP16-NEXT: ret %1 = sitofp <16 x i8> %a to <16 x half> ret <16 x half> %1 @@ -569,24 +569,24 @@ ; CHECK-CVT-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-CVT-NEXT: ushll2 v3.4s, v0.8h, #0 ; CHECK-CVT-NEXT: ushll v0.4s, v0.4h, #0 -; CHECK-CVT-NEXT: ucvtf v2.4s, v2.4s -; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s ; CHECK-CVT-NEXT: ucvtf v3.4s, v3.4s ; CHECK-CVT-NEXT: ucvtf v0.4s, v0.4s -; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s -; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s +; CHECK-CVT-NEXT: ucvtf v2.4s, v2.4s +; CHECK-CVT-NEXT: ucvtf v1.4s, v1.4s ; CHECK-CVT-NEXT: fcvtn v3.4h, v3.4s ; CHECK-CVT-NEXT: fcvtn v0.4h, v0.4s -; CHECK-CVT-NEXT: mov v1.d[1], v2.d[0] +; CHECK-CVT-NEXT: fcvtn v2.4h, v2.4s +; CHECK-CVT-NEXT: fcvtn v1.4h, v1.4s ; CHECK-CVT-NEXT: mov v0.d[1], v3.d[0] +; CHECK-CVT-NEXT: mov v1.d[1], v2.d[0] ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: uitofp_v16i8: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: ushll2 v1.8h, v0.16b, #0 -; CHECK-FP16-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-FP16-NEXT: ucvtf v1.8h, v1.8h -; CHECK-FP16-NEXT: ucvtf v0.8h, v0.8h +; CHECK-FP16-NEXT: ushll v1.8h, v0.8b, #0 +; CHECK-FP16-NEXT: ushll2 v2.8h, v0.16b, #0 +; CHECK-FP16-NEXT: ucvtf v0.8h, v1.8h +; CHECK-FP16-NEXT: ucvtf v1.8h, v2.8h ; CHECK-FP16-NEXT: ret %1 = uitofp <16 x i8> %a to <16 x half> ret <16 x half> %1 @@ -752,57 +752,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, ne ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csetm w9, ne +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w9, ne -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, ne ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w9 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: mov v6.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, ne ; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w9, ne ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, ne ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, ne -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, ne +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_une: @@ -822,65 +822,65 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, eq +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: mov h4, v1.h[3] -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: csinv w9, w9, wzr, vc -; CHECK-CVT-NEXT: fcvt s4, h4 -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fmov s3, w9 +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: mov h5, v1.h[4] ; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[1], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s2, s4 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[5] -; CHECK-CVT-NEXT: mov h6, v0.h[5] -; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csinv w9, w9, wzr, vc +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: csetm w10, eq +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csinv w10, w10, wzr, vc +; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov h3, v0.h[5] ; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: csetm w11, eq +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csinv w11, w11, wzr, vc +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v3.h[3], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: mov v3.h[4], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, vc -; CHECK-CVT-NEXT: fcmp s4, s2 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csinv w9, w9, wzr, vc +; CHECK-CVT-NEXT: fcmp s3, s4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v6.h[1], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v3.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w10 +; CHECK-CVT-NEXT: mov v6.h[3], w11 +; CHECK-CVT-NEXT: csetm w10, eq +; CHECK-CVT-NEXT: csinv w10, w10, wzr, vc ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v3.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[4], w9 +; CHECK-CVT-NEXT: mov v6.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: csinv w8, w8, wzr, vc -; CHECK-CVT-NEXT: mov v3.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v3.8h +; CHECK-CVT-NEXT: mov v6.h[6], w10 +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ueq: @@ -902,57 +902,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, hi ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csetm w9, hi +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w9, hi -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, hi ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w9 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: mov v6.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, hi ; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w9, hi ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, hi ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, hi -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, hi +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ugt: @@ -972,57 +972,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, pl ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csetm w9, pl +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w9, pl -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, pl ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w9 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: mov v6.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, pl ; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w9, pl ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, pl ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, pl -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, pl +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_uge: @@ -1042,57 +1042,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, lt ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csetm w9, lt +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w9, lt -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, lt ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w9 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: mov v6.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, lt ; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w9, lt ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, lt ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, lt -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, lt +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ult: @@ -1112,57 +1112,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, le ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csetm w9, le +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w9, le -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, le ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w9 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: mov v6.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, le ; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w9, le ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, le ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, le -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, le +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ule: @@ -1182,57 +1182,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, vs ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csetm w9, vs +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w9, vs -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, vs ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w9 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: mov v6.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, vs ; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w9, vs ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, vs ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, vs -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, vs +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_uno: @@ -1254,65 +1254,65 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v0.h[4] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, mi +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: mov h4, v1.h[3] -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: csinv w9, w9, wzr, le -; CHECK-CVT-NEXT: fcvt s4, h4 -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fmov s3, w9 +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: mov h5, v1.h[4] ; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[1], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s2, s4 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[5] -; CHECK-CVT-NEXT: mov h6, v0.h[5] -; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[2], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csinv w9, w9, wzr, le +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v0.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: mov h6, v1.h[5] +; CHECK-CVT-NEXT: csetm w10, mi +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csinv w10, w10, wzr, le +; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: mov h3, v0.h[5] ; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: csetm w11, mi +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: csinv w11, w11, wzr, le +; CHECK-CVT-NEXT: fcmp s2, s5 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v3.h[3], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 -; CHECK-CVT-NEXT: fcvt s2, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: mov v3.h[4], w8 -; CHECK-CVT-NEXT: csinv w8, w9, wzr, le -; CHECK-CVT-NEXT: fcmp s4, s2 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: csinv w9, w9, wzr, le +; CHECK-CVT-NEXT: fcmp s3, s4 +; CHECK-CVT-NEXT: fcvt s3, h5 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov v6.h[1], w8 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v3.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w10 +; CHECK-CVT-NEXT: mov v6.h[3], w11 +; CHECK-CVT-NEXT: csetm w10, mi +; CHECK-CVT-NEXT: csinv w10, w10, wzr, le ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v3.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[4], w9 +; CHECK-CVT-NEXT: mov v6.h[5], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: csinv w8, w8, wzr, le -; CHECK-CVT-NEXT: mov v3.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v3.8h +; CHECK-CVT-NEXT: mov v6.h[6], w10 +; CHECK-CVT-NEXT: mov v6.h[7], w8 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_one: @@ -1333,57 +1333,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w9, eq -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w9 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: mov v6.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w9, eq ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, eq ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, eq -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, eq +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_oeq: @@ -1402,57 +1402,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, gt ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csetm w9, gt +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w9, gt -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, gt ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w9 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: mov v6.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, gt ; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w9, gt ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, gt ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, gt -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, gt +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ogt: @@ -1471,57 +1471,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, ge ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csetm w9, ge +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w9, ge -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, ge ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w9 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: mov v6.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, ge ; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w9, ge ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, ge ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, ge -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, ge +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_oge: @@ -1540,57 +1540,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w9, mi -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w9 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: mov v6.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w9, mi ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, mi ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, mi -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, mi +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_olt: @@ -1609,57 +1609,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, ls ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csetm w9, ls +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w9, ls -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, ls ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w9 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: mov v6.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, ls ; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w9, ls ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, ls ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, ls -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, ls +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ole: @@ -1678,57 +1678,57 @@ ; CHECK-CVT-NEXT: mov h3, v0.h[1] ; CHECK-CVT-NEXT: fcvt s4, h1 ; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h6, v1.h[4] -; CHECK-CVT-NEXT: mov h7, v0.h[4] -; CHECK-CVT-NEXT: mov h16, v1.h[5] ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov h6, v1.h[2] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s7, h7 ; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: mov h2, v1.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w8, vc ; CHECK-CVT-NEXT: fcmp s5, s4 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: mov h4, v1.h[3] +; CHECK-CVT-NEXT: mov h4, v0.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: mov h5, v0.h[3] +; CHECK-CVT-NEXT: mov h5, v0.h[4] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: csetm w9, vc +; CHECK-CVT-NEXT: fcmp s2, s6 +; CHECK-CVT-NEXT: mov h2, v1.h[4] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fmov s6, w9 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csetm w9, vc -; CHECK-CVT-NEXT: fcmp s3, s2 -; CHECK-CVT-NEXT: fmov s2, w9 -; CHECK-CVT-NEXT: fcvt s3, h4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v0.h[5] -; CHECK-CVT-NEXT: mov v2.h[1], w8 -; CHECK-CVT-NEXT: csetm w8, vc ; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: fcvt s3, h16 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: mov h5, v1.h[6] -; CHECK-CVT-NEXT: mov v2.h[2], w8 -; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h3, v1.h[5] +; CHECK-CVT-NEXT: mov h4, v0.h[5] +; CHECK-CVT-NEXT: mov v6.h[1], w8 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s7, s6 -; CHECK-CVT-NEXT: mov h6, v0.h[6] -; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], w8 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcmp s5, s2 +; CHECK-CVT-NEXT: mov v6.h[2], w9 +; CHECK-CVT-NEXT: mov h2, v1.h[6] +; CHECK-CVT-NEXT: mov h5, v0.h[6] +; CHECK-CVT-NEXT: mov v6.h[3], w8 ; CHECK-CVT-NEXT: csetm w8, vc ; CHECK-CVT-NEXT: fcmp s4, s3 +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h5 -; CHECK-CVT-NEXT: fcvt s4, h6 +; CHECK-CVT-NEXT: mov h1, v1.h[7] +; CHECK-CVT-NEXT: mov h0, v0.h[7] +; CHECK-CVT-NEXT: csetm w9, vc ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], w8 -; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: fcmp s4, s3 -; CHECK-CVT-NEXT: mov v2.h[5], w8 +; CHECK-CVT-NEXT: fcmp s3, s2 +; CHECK-CVT-NEXT: mov v6.h[4], w8 ; CHECK-CVT-NEXT: csetm w8, vc ; CHECK-CVT-NEXT: fcmp s0, s1 -; CHECK-CVT-NEXT: mov v2.h[6], w8 -; CHECK-CVT-NEXT: csetm w8, vc -; CHECK-CVT-NEXT: mov v2.h[7], w8 -; CHECK-CVT-NEXT: xtn v0.8b, v2.8h +; CHECK-CVT-NEXT: mov v6.h[5], w9 +; CHECK-CVT-NEXT: csetm w9, vc +; CHECK-CVT-NEXT: mov v6.h[6], w8 +; CHECK-CVT-NEXT: mov v6.h[7], w9 +; CHECK-CVT-NEXT: xtn v0.8b, v6.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_fcmp_ord: diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -60,15 +60,15 @@ ; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3 ; CHECK-NEXT: // kill: def $s4 killed $s4 def $q4 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: fcvtzs v4.4s, v4.4s +; CHECK-NEXT: fcvtzs v1.4s, v4.4s ; CHECK-NEXT: mov v0.s[2], v2.s[0] -; CHECK-NEXT: fmov w4, s4 ; CHECK-NEXT: mov v0.s[3], v3.s[0] +; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptosi.sat.v5f32.v5i32(<5 x float> %f) ret <5 x i32> %x @@ -88,13 +88,13 @@ ; CHECK-NEXT: mov v0.s[2], v2.s[0] ; CHECK-NEXT: fcvtzs v1.4s, v4.4s ; CHECK-NEXT: mov v0.s[3], v3.s[0] -; CHECK-NEXT: mov w5, v1.s[1] -; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptosi.sat.v6f32.v6i32(<6 x float> %f) ret <6 x i32> %x @@ -117,13 +117,13 @@ ; CHECK-NEXT: mov v0.s[3], v3.s[0] ; CHECK-NEXT: fcvtzs v1.4s, v4.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: mov w5, v1.s[1] ; CHECK-NEXT: mov w6, v1.s[2] -; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptosi.sat.v7f32.v7i32(<7 x float> %f) ret <7 x i32> %x @@ -163,11 +163,11 @@ define <2 x i32> @test_signed_v2f64_v2i32(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptosi.sat.v2f64.v2i32(<2 x double> %f) @@ -178,13 +178,13 @@ ; CHECK-LABEL: test_signed_v3f64_v3i32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzs w8, d2 +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: fcvtzs w9, d0 ; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: fcvtzs w8, d0 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: mov v0.s[3], w9 ; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptosi.sat.v3f64.v3i32(<3 x double> %f) ret <3 x i32> %x @@ -193,16 +193,16 @@ define <4 x i32> @test_signed_v4f64_v4i32(<4 x double> %f) { ; CHECK-LABEL: test_signed_v4f64_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: fcvtzs w9, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d2 -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzs w8, d1 ; CHECK-NEXT: mov d1, v1.d[1] +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: mov v0.s[3], w9 ; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptosi.sat.v4f64.v4i32(<4 x double> %f) ret <4 x i32> %x @@ -295,11 +295,11 @@ ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -364,12 +364,12 @@ ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -454,11 +454,11 @@ ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 ; CHECK-NEXT: .cfi_offset w30, -48 -; CHECK-NEXT: stp q2, q3, [sp, #64] // 32-byte Folded Spill ; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: stp q2, q3, [sp, #64] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b -; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str q0, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] ; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill @@ -619,11 +619,11 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptosi.sat.v5f16.v5i32(<5 x half> %f) ret <5 x i32> %x @@ -636,12 +636,12 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptosi.sat.v6f16.v6i32(<6 x half> %f) ret <6 x i32> %x @@ -650,17 +650,17 @@ define <7 x i32> @test_signed_v7f16_v7i32(<7 x half> %f) { ; CHECK-LABEL: test_signed_v7f16_v7i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v1.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h ; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: mov w1, v1.s[1] -; CHECK-NEXT: mov w2, v1.s[2] -; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w6, v0.s[2] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: mov w2, v0.s[2] +; CHECK-NEXT: mov w3, v0.s[3] +; CHECK-NEXT: mov w6, v1.s[2] ; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptosi.sat.v7f16.v7i32(<7 x half> %f) ret <7 x i32> %x @@ -669,10 +669,10 @@ define <8 x i32> @test_signed_v8f16_v8i32(<8 x half> %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-NEXT: fcvtzs v0.4s, v1.4s +; CHECK-NEXT: fcvtzs v1.4s, v2.4s ; CHECK-NEXT: ret %x = call <8 x i32> @llvm.fptosi.sat.v8f16.v8i32(<8 x half> %f) ret <8 x i32> %x @@ -695,8 +695,8 @@ define <2 x i1> @test_signed_v2f32_v2i1(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -708,8 +708,8 @@ define <2 x i8> @test_signed_v2f32_v2i8(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #127 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #127 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mvni v1.2s, #127 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -721,8 +721,8 @@ define <2 x i13> @test_signed_v2f32_v2i13(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #15, msl #8 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #15, msl #8 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mvni v1.2s, #15, msl #8 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -734,8 +734,8 @@ define <2 x i16> @test_signed_v2f32_v2i16(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #127, msl #8 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #127, msl #8 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mvni v1.2s, #127, msl #8 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -747,8 +747,8 @@ define <2 x i19> @test_signed_v2f32_v2i19(<2 x float> %f) { ; CHECK-LABEL: test_signed_v2f32_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #3, msl #16 ; CHECK-NEXT: fcvtzs v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #3, msl #16 ; CHECK-NEXT: smin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: mvni v1.2s, #3, msl #16 ; CHECK-NEXT: smax v0.2s, v0.2s, v1.2s @@ -794,11 +794,11 @@ ; CHECK-LABEL: test_signed_v2f32_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzs x8, s0 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzs x9, s1 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %x = call <2 x i64> @llvm.fptosi.sat.v2f32.v2i64(<2 x float> %f) ret <2 x i64> %x @@ -955,8 +955,8 @@ define <4 x i1> @test_signed_v4f32_v4i1(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0000000000000000 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -969,8 +969,8 @@ define <4 x i8> @test_signed_v4f32_v4i8(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #127 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #127 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mvni v1.4s, #127 ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -983,8 +983,8 @@ define <4 x i13> @test_signed_v4f32_v4i13(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #15, msl #8 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #15, msl #8 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mvni v1.4s, #15, msl #8 ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1007,8 +1007,8 @@ define <4 x i19> @test_signed_v4f32_v4i19(<4 x float> %f) { ; CHECK-LABEL: test_signed_v4f32_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #3, msl #16 ; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #3, msl #16 ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: mvni v1.4s, #3, msl #16 ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s @@ -1033,17 +1033,17 @@ ; CHECK-NEXT: mov x9, #562949953421311 ; CHECK-NEXT: mov x10, #-562949953421312 ; CHECK-NEXT: fcvtzs x12, s0 -; CHECK-NEXT: mov s2, v1.s[1] ; CHECK-NEXT: fcvtzs x8, s1 -; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: mov s1, v1.s[1] +; CHECK-NEXT: mov s0, v0.s[1] +; CHECK-NEXT: fcvtzs x11, s1 ; CHECK-NEXT: cmp x8, x9 -; CHECK-NEXT: fcvtzs x11, s2 ; CHECK-NEXT: csel x8, x8, x9, lt ; CHECK-NEXT: cmp x8, x10 ; CHECK-NEXT: csel x2, x8, x10, gt ; CHECK-NEXT: cmp x11, x9 ; CHECK-NEXT: csel x8, x11, x9, lt -; CHECK-NEXT: fcvtzs x11, s1 +; CHECK-NEXT: fcvtzs x11, s0 ; CHECK-NEXT: cmp x8, x10 ; CHECK-NEXT: csel x3, x8, x10, gt ; CHECK-NEXT: cmp x12, x9 @@ -1063,16 +1063,16 @@ ; CHECK-LABEL: test_signed_v4f32_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov s3, v0.s[1] ; CHECK-NEXT: fcvtzs x9, s0 -; CHECK-NEXT: mov s2, v1.s[1] +; CHECK-NEXT: mov s2, v0.s[1] ; CHECK-NEXT: fcvtzs x8, s1 +; CHECK-NEXT: mov s1, v1.s[1] +; CHECK-NEXT: fcvtzs x10, s2 ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzs x9, s3 +; CHECK-NEXT: fcvtzs x11, s1 ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fcvtzs x8, s2 -; CHECK-NEXT: mov v0.d[1], x9 -; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 ; CHECK-NEXT: ret %x = call <4 x i64> @llvm.fptosi.sat.v4f32.v4i64(<4 x float> %f) ret <4 x i64> %x @@ -1423,11 +1423,11 @@ define <2 x i32> @test_signed_v2f64_v2i32_duplicate(<2 x double> %f) { ; CHECK-LABEL: test_signed_v2f64_v2i32_duplicate: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzs w8, d0 +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzs w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzs w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptosi.sat.v2f64.v2i32(<2 x double> %f) @@ -1626,8 +1626,8 @@ ; ; CHECK-FP16-LABEL: test_signed_v4f16_v4i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h @@ -1650,8 +1650,8 @@ ; ; CHECK-FP16-LABEL: test_signed_v4f16_v4i8: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.4h, #127 ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: movi v1.4h, #127 ; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: mvni v1.4h, #127 ; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h @@ -1674,8 +1674,8 @@ ; ; CHECK-FP16-LABEL: test_signed_v4f16_v4i13: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mvni v1.4h, #240, lsl #8 ; CHECK-FP16-NEXT: fcvtzs v0.4h, v0.4h +; CHECK-FP16-NEXT: mvni v1.4h, #240, lsl #8 ; CHECK-FP16-NEXT: smin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: movi v1.4h, #240, lsl #8 ; CHECK-FP16-NEXT: smax v0.4h, v0.4h, v1.4h @@ -1728,18 +1728,18 @@ ; CHECK-CVT-LABEL: test_signed_v4f16_v4i50: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-NEXT: mov h1, v0.h[1] -; CHECK-CVT-NEXT: fcvt s2, h0 -; CHECK-CVT-NEXT: mov h3, v0.h[2] -; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: fcvt s1, h0 +; CHECK-CVT-NEXT: mov h2, v0.h[1] ; CHECK-CVT-NEXT: mov x8, #562949953421311 ; CHECK-CVT-NEXT: mov x11, #-562949953421312 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvtzs x9, s1 +; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: mov h0, v0.h[3] ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvtzs x9, s2 -; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzs x10, s2 ; CHECK-CVT-NEXT: cmp x9, x8 -; CHECK-CVT-NEXT: fcvtzs x10, s1 -; CHECK-CVT-NEXT: fcvt s1, h3 +; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: csel x9, x9, x8, lt ; CHECK-CVT-NEXT: cmp x9, x11 ; CHECK-CVT-NEXT: csel x0, x9, x11, gt @@ -1762,15 +1762,15 @@ ; CHECK-FP16-LABEL: test_signed_v4f16_v4i50: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h1, v0.h[1] ; CHECK-FP16-NEXT: fcvtzs x9, h0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] ; CHECK-FP16-NEXT: mov x8, #562949953421311 ; CHECK-FP16-NEXT: mov x11, #-562949953421312 -; CHECK-FP16-NEXT: cmp x9, x8 ; CHECK-FP16-NEXT: fcvtzs x10, h1 ; CHECK-FP16-NEXT: mov h1, v0.h[2] -; CHECK-FP16-NEXT: csel x9, x9, x8, lt +; CHECK-FP16-NEXT: cmp x9, x8 ; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: csel x9, x9, x8, lt ; CHECK-FP16-NEXT: cmp x9, x11 ; CHECK-FP16-NEXT: csel x0, x9, x11, gt ; CHECK-FP16-NEXT: cmp x10, x8 @@ -1796,37 +1796,37 @@ ; CHECK-CVT-LABEL: test_signed_v4f16_v4i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: fcvt s1, h0 ; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: fcvt s3, h0 ; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzs x8, s3 -; CHECK-CVT-NEXT: fcvt s3, h0 -; CHECK-CVT-NEXT: fcvtzs x9, s1 +; CHECK-CVT-NEXT: fcvtzs x8, s1 +; CHECK-CVT-NEXT: fcvt s1, h0 +; CHECK-CVT-NEXT: fcvtzs x10, s3 +; CHECK-CVT-NEXT: fcvtzs x9, s2 ; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s2 -; CHECK-CVT-NEXT: fmov d1, x9 -; CHECK-CVT-NEXT: fcvtzs x9, s3 -; CHECK-CVT-NEXT: mov v0.d[1], x8 -; CHECK-CVT-NEXT: mov v1.d[1], x9 +; CHECK-CVT-NEXT: fcvtzs x8, s1 +; CHECK-CVT-NEXT: fmov d1, x10 +; CHECK-CVT-NEXT: mov v0.d[1], x9 +; CHECK-CVT-NEXT: mov v1.d[1], x8 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v4f16_v4i64: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h1, v0.h[2] -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: mov h3, v0.h[3] +; CHECK-FP16-NEXT: mov h2, v0.h[2] ; CHECK-FP16-NEXT: fcvtzs x8, h0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: fcvtzs x10, h2 ; CHECK-FP16-NEXT: fcvtzs x9, h1 +; CHECK-FP16-NEXT: fcvtzs x11, h0 ; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fcvtzs x8, h2 -; CHECK-FP16-NEXT: fmov d1, x9 -; CHECK-FP16-NEXT: fcvtzs x9, h3 -; CHECK-FP16-NEXT: mov v0.d[1], x8 -; CHECK-FP16-NEXT: mov v1.d[1], x9 +; CHECK-FP16-NEXT: fmov d1, x10 +; CHECK-FP16-NEXT: mov v0.d[1], x9 +; CHECK-FP16-NEXT: mov v1.d[1], x11 ; CHECK-FP16-NEXT: ret %x = call <4 x i64> @llvm.fptosi.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x @@ -1882,12 +1882,12 @@ ; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x9, x26, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 @@ -1931,8 +1931,8 @@ ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret @@ -1990,12 +1990,12 @@ ; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: csel x9, x26, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 @@ -2039,8 +2039,8 @@ ; CHECK-NEXT: fmov d0, x9 ; CHECK-NEXT: ldr d10, [sp, #16] // 8-byte Folded Reload ; CHECK-NEXT: ldp x26, x25, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #24] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret @@ -2073,29 +2073,30 @@ ; CHECK-CVT-NEXT: fcvtzs w8, s2 ; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: fcvtzs w10, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] ; CHECK-CVT-NEXT: cmp w8, #0 +; CHECK-CVT-NEXT: fcvtzs w11, s1 ; CHECK-CVT-NEXT: csel w8, w8, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w10, s2 +; CHECK-CVT-NEXT: fcvtzs w12, s2 ; CHECK-CVT-NEXT: cmp w8, #0 -; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge ; CHECK-CVT-NEXT: cmp w9, #0 ; CHECK-CVT-NEXT: csel w9, w9, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w11, s1 +; CHECK-CVT-NEXT: fcvtzs w14, s1 ; CHECK-CVT-NEXT: cmp w9, #0 -; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge ; CHECK-CVT-NEXT: cmp w10, #0 ; CHECK-CVT-NEXT: csel w10, w10, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: fcvtzs w15, s0 ; CHECK-CVT-NEXT: cmp w10, #0 -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csinv w10, w10, wzr, ge ; CHECK-CVT-NEXT: cmp w11, #0 ; CHECK-CVT-NEXT: csel w11, w11, wzr, lt -; CHECK-CVT-NEXT: fcvtzs w14, s1 +; CHECK-CVT-NEXT: fmov s0, w9 ; CHECK-CVT-NEXT: cmp w11, #0 -; CHECK-CVT-NEXT: fmov s2, w9 ; CHECK-CVT-NEXT: csinv w11, w11, wzr, ge ; CHECK-CVT-NEXT: cmp w12, #0 ; CHECK-CVT-NEXT: csel w12, w12, wzr, lt @@ -2106,30 +2107,29 @@ ; CHECK-CVT-NEXT: cmp w13, #0 ; CHECK-CVT-NEXT: csinv w13, w13, wzr, ge ; CHECK-CVT-NEXT: cmp w14, #0 -; CHECK-CVT-NEXT: csel w9, w14, wzr, lt -; CHECK-CVT-NEXT: cmp w9, #0 +; CHECK-CVT-NEXT: csel w14, w14, wzr, lt +; CHECK-CVT-NEXT: cmp w14, #0 ; CHECK-CVT-NEXT: fmov s1, w13 -; CHECK-CVT-NEXT: fcvtzs w13, s0 -; CHECK-CVT-NEXT: csinv w9, w9, wzr, ge -; CHECK-CVT-NEXT: mov v2.s[1], w8 +; CHECK-CVT-NEXT: csinv w9, w14, wzr, ge +; CHECK-CVT-NEXT: cmp w15, #0 +; CHECK-CVT-NEXT: csel w13, w15, wzr, lt ; CHECK-CVT-NEXT: mov v1.s[1], w12 ; CHECK-CVT-NEXT: cmp w13, #0 -; CHECK-CVT-NEXT: csel w8, w13, wzr, lt -; CHECK-CVT-NEXT: cmp w8, #0 -; CHECK-CVT-NEXT: csinv w8, w8, wzr, ge +; CHECK-CVT-NEXT: mov v0.s[1], w8 +; CHECK-CVT-NEXT: csinv w8, w13, wzr, ge ; CHECK-CVT-NEXT: mov v1.s[2], w9 -; CHECK-CVT-NEXT: mov v2.s[2], w10 +; CHECK-CVT-NEXT: mov v0.s[2], w10 ; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: mov v2.s[3], w11 -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-NEXT: mov v0.s[3], w11 +; CHECK-CVT-NEXT: xtn v1.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v1.8h, v0.4s +; CHECK-CVT-NEXT: xtn v0.8b, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: movi v1.2d, #0000000000000000 ; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: movi v1.2d, #0xffffffffffffffff ; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h @@ -2143,66 +2143,66 @@ ; CHECK-CVT-LABEL: test_signed_v8f16_v8i8: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-CVT-NEXT: mov w8, #127 -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w9, #127 ; CHECK-CVT-NEXT: mov w10, #-128 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: fcvtzs w11, s1 ; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: fcvtzs w8, s2 ; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: mov s1, v1.s[3] -; CHECK-CVT-NEXT: cmp w9, #127 -; CHECK-CVT-NEXT: csel w9, w9, w8, lt ; CHECK-CVT-NEXT: fcvtzs w12, s2 -; CHECK-CVT-NEXT: cmn w9, #128 ; CHECK-CVT-NEXT: mov s2, v0.s[1] -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, #127 -; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: cmp w8, #127 ; CHECK-CVT-NEXT: fcvtzs w13, s1 -; CHECK-CVT-NEXT: cmn w11, #128 +; CHECK-CVT-NEXT: csel w8, w8, w9, lt +; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: cmn w8, #128 ; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: csel w8, w8, w10, gt +; CHECK-CVT-NEXT: cmp w11, #127 +; CHECK-CVT-NEXT: csel w11, w11, w9, lt +; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: cmn w11, #128 +; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csel w11, w11, w10, gt ; CHECK-CVT-NEXT: cmp w12, #127 -; CHECK-CVT-NEXT: csel w12, w12, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: csel w12, w12, w9, lt +; CHECK-CVT-NEXT: fcvtzs w17, s0 ; CHECK-CVT-NEXT: cmn w12, #128 -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, #127 -; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: csel w13, w13, w9, lt +; CHECK-CVT-NEXT: fmov s0, w11 ; CHECK-CVT-NEXT: cmn w13, #128 -; CHECK-CVT-NEXT: fmov s2, w11 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, #127 -; CHECK-CVT-NEXT: csel w14, w14, w8, lt +; CHECK-CVT-NEXT: csel w14, w14, w9, lt ; CHECK-CVT-NEXT: cmn w14, #128 ; CHECK-CVT-NEXT: csel w14, w14, w10, gt ; CHECK-CVT-NEXT: cmp w15, #127 -; CHECK-CVT-NEXT: csel w15, w15, w8, lt +; CHECK-CVT-NEXT: csel w15, w15, w9, lt ; CHECK-CVT-NEXT: cmn w15, #128 ; CHECK-CVT-NEXT: csel w15, w15, w10, gt ; CHECK-CVT-NEXT: cmp w16, #127 -; CHECK-CVT-NEXT: csel w11, w16, w8, lt -; CHECK-CVT-NEXT: cmn w11, #128 +; CHECK-CVT-NEXT: csel w16, w16, w9, lt +; CHECK-CVT-NEXT: cmn w16, #128 ; CHECK-CVT-NEXT: fmov s1, w15 -; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: csel w11, w11, w10, gt -; CHECK-CVT-NEXT: mov v2.s[1], w9 +; CHECK-CVT-NEXT: csel w11, w16, w10, gt +; CHECK-CVT-NEXT: cmp w17, #127 +; CHECK-CVT-NEXT: csel w9, w17, w9, lt ; CHECK-CVT-NEXT: mov v1.s[1], w14 -; CHECK-CVT-NEXT: cmp w15, #127 -; CHECK-CVT-NEXT: csel w8, w15, w8, lt -; CHECK-CVT-NEXT: cmn w8, #128 -; CHECK-CVT-NEXT: csel w8, w8, w10, gt +; CHECK-CVT-NEXT: cmn w9, #128 +; CHECK-CVT-NEXT: mov v0.s[1], w8 +; CHECK-CVT-NEXT: csel w8, w9, w10, gt ; CHECK-CVT-NEXT: mov v1.s[2], w11 -; CHECK-CVT-NEXT: mov v2.s[2], w12 +; CHECK-CVT-NEXT: mov v0.s[2], w12 ; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-NEXT: mov v0.s[3], w13 +; CHECK-CVT-NEXT: xtn v1.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v1.8h, v0.4s +; CHECK-CVT-NEXT: xtn v0.8b, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i8: @@ -2218,71 +2218,71 @@ ; CHECK-CVT-LABEL: test_signed_v8f16_v8i13: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-CVT-NEXT: mov w8, #4095 -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h +; CHECK-CVT-NEXT: mov w9, #4095 ; CHECK-CVT-NEXT: mov w10, #-4096 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: fcvtzs w11, s1 ; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: fcvtzs w9, s2 +; CHECK-CVT-NEXT: fcvtzs w8, s2 ; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: mov s1, v1.s[3] -; CHECK-CVT-NEXT: cmp w9, #4095 -; CHECK-CVT-NEXT: csel w9, w9, w8, lt ; CHECK-CVT-NEXT: fcvtzs w12, s2 -; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: mov s2, v0.s[1] -; CHECK-CVT-NEXT: csel w9, w9, w10, gt -; CHECK-CVT-NEXT: cmp w11, #4095 -; CHECK-CVT-NEXT: csel w11, w11, w8, lt +; CHECK-CVT-NEXT: cmp w8, #4095 ; CHECK-CVT-NEXT: fcvtzs w13, s1 -; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: csel w8, w8, w9, lt +; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: csel w8, w8, w10, gt +; CHECK-CVT-NEXT: cmp w11, #4095 +; CHECK-CVT-NEXT: csel w11, w11, w9, lt +; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csel w11, w11, w10, gt ; CHECK-CVT-NEXT: cmp w12, #4095 -; CHECK-CVT-NEXT: csel w12, w12, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: csel w12, w12, w9, lt +; CHECK-CVT-NEXT: fcvtzs w17, s0 ; CHECK-CVT-NEXT: cmn w12, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, #4095 -; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: csel w13, w13, w9, lt +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: cmn w13, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: fmov s2, w11 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, #4095 -; CHECK-CVT-NEXT: csel w14, w14, w8, lt +; CHECK-CVT-NEXT: csel w14, w14, w9, lt ; CHECK-CVT-NEXT: cmn w14, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: csel w14, w14, w10, gt ; CHECK-CVT-NEXT: cmp w15, #4095 -; CHECK-CVT-NEXT: csel w15, w15, w8, lt +; CHECK-CVT-NEXT: csel w15, w15, w9, lt ; CHECK-CVT-NEXT: cmn w15, #1, lsl #12 // =4096 ; CHECK-CVT-NEXT: csel w15, w15, w10, gt ; CHECK-CVT-NEXT: cmp w16, #4095 -; CHECK-CVT-NEXT: csel w11, w16, w8, lt -; CHECK-CVT-NEXT: cmn w11, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: fmov s1, w15 -; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: csel w11, w11, w10, gt -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: mov v1.s[1], w14 -; CHECK-CVT-NEXT: cmp w15, #4095 -; CHECK-CVT-NEXT: csel w8, w15, w8, lt -; CHECK-CVT-NEXT: cmn w8, #1, lsl #12 // =4096 -; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v1.s[2], w11 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: csel w16, w16, w9, lt +; CHECK-CVT-NEXT: cmn w16, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: fmov s0, w15 +; CHECK-CVT-NEXT: csel w11, w16, w10, gt +; CHECK-CVT-NEXT: cmp w17, #4095 +; CHECK-CVT-NEXT: csel w9, w17, w9, lt +; CHECK-CVT-NEXT: mov v0.s[1], w14 +; CHECK-CVT-NEXT: cmn w9, #1, lsl #12 // =4096 +; CHECK-CVT-NEXT: mov v1.s[1], w8 +; CHECK-CVT-NEXT: csel w8, w9, w10, gt +; CHECK-CVT-NEXT: mov v0.s[2], w11 +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: mov v0.s[3], w8 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i13: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mvni v1.8h, #240, lsl #8 ; CHECK-FP16-NEXT: fcvtzs v0.8h, v0.8h +; CHECK-FP16-NEXT: mvni v1.8h, #240, lsl #8 ; CHECK-FP16-NEXT: smin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: movi v1.8h, #240, lsl #8 ; CHECK-FP16-NEXT: smax v0.8h, v0.8h, v1.8h @@ -2296,37 +2296,38 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: mov w8, #32767 -; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov w10, #-32768 +; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: fcvtzs w11, s1 ; CHECK-CVT-NEXT: fcvtzs w15, s0 ; CHECK-CVT-NEXT: fcvtzs w9, s2 ; CHECK-CVT-NEXT: mov s2, v1.s[2] ; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] ; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: fcvtzs w13, s1 ; CHECK-CVT-NEXT: csel w9, w9, w8, lt -; CHECK-CVT-NEXT: fcvtzs w12, s2 +; CHECK-CVT-NEXT: fcvtzs w14, s2 ; CHECK-CVT-NEXT: cmn w9, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s2, v0.s[1] +; CHECK-CVT-NEXT: mov s1, v0.s[2] ; CHECK-CVT-NEXT: csel w9, w9, w10, gt ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w11, w11, w8, lt -; CHECK-CVT-NEXT: fcvtzs w13, s1 +; CHECK-CVT-NEXT: fcvtzs w16, s1 ; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s1, v0.s[2] +; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csel w11, w11, w10, gt ; CHECK-CVT-NEXT: cmp w12, w8 ; CHECK-CVT-NEXT: csel w12, w12, w8, lt -; CHECK-CVT-NEXT: fcvtzs w14, s2 +; CHECK-CVT-NEXT: fcvtzs w17, s0 ; CHECK-CVT-NEXT: cmn w12, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: mov s0, v0.s[3] ; CHECK-CVT-NEXT: csel w12, w12, w10, gt ; CHECK-CVT-NEXT: cmp w13, w8 ; CHECK-CVT-NEXT: csel w13, w13, w8, lt -; CHECK-CVT-NEXT: fcvtzs w16, s1 +; CHECK-CVT-NEXT: fmov s1, w11 ; CHECK-CVT-NEXT: cmn w13, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fmov s2, w11 ; CHECK-CVT-NEXT: csel w13, w13, w10, gt ; CHECK-CVT-NEXT: cmp w14, w8 ; CHECK-CVT-NEXT: csel w14, w14, w8, lt @@ -2337,23 +2338,22 @@ ; CHECK-CVT-NEXT: cmn w15, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: csel w15, w15, w10, gt ; CHECK-CVT-NEXT: cmp w16, w8 -; CHECK-CVT-NEXT: csel w11, w16, w8, lt -; CHECK-CVT-NEXT: cmn w11, #8, lsl #12 // =32768 -; CHECK-CVT-NEXT: fmov s1, w15 -; CHECK-CVT-NEXT: fcvtzs w15, s0 -; CHECK-CVT-NEXT: csel w11, w11, w10, gt -; CHECK-CVT-NEXT: mov v2.s[1], w9 -; CHECK-CVT-NEXT: mov v1.s[1], w14 -; CHECK-CVT-NEXT: cmp w15, w8 -; CHECK-CVT-NEXT: csel w8, w15, w8, lt +; CHECK-CVT-NEXT: csel w16, w16, w8, lt +; CHECK-CVT-NEXT: cmn w16, #8, lsl #12 // =32768 +; CHECK-CVT-NEXT: fmov s0, w15 +; CHECK-CVT-NEXT: csel w11, w16, w10, gt +; CHECK-CVT-NEXT: cmp w17, w8 +; CHECK-CVT-NEXT: csel w8, w17, w8, lt +; CHECK-CVT-NEXT: mov v0.s[1], w14 ; CHECK-CVT-NEXT: cmn w8, #8, lsl #12 // =32768 ; CHECK-CVT-NEXT: csel w8, w8, w10, gt -; CHECK-CVT-NEXT: mov v1.s[2], w11 -; CHECK-CVT-NEXT: mov v2.s[2], w12 -; CHECK-CVT-NEXT: mov v1.s[3], w8 -; CHECK-CVT-NEXT: mov v2.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v1.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v2.4s +; CHECK-CVT-NEXT: mov v1.s[1], w9 +; CHECK-CVT-NEXT: mov v0.s[2], w11 +; CHECK-CVT-NEXT: mov v1.s[2], w12 +; CHECK-CVT-NEXT: mov v0.s[3], w8 +; CHECK-CVT-NEXT: mov v1.s[3], w13 +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s +; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i16: @@ -2367,24 +2367,24 @@ define <8 x i19> @test_signed_v8f16_v8i19(<8 x half> %f) { ; CHECK-LABEL: test_signed_v8f16_v8i19: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v2.4s, v0.4h +; CHECK-NEXT: fcvtl v1.4s, v0.4h ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-NEXT: movi v1.4s, #3, msl #16 +; CHECK-NEXT: movi v2.4s, #3, msl #16 ; CHECK-NEXT: mvni v3.4s, #3, msl #16 -; CHECK-NEXT: fcvtzs v2.4s, v2.4s +; CHECK-NEXT: fcvtzs v1.4s, v1.4s ; CHECK-NEXT: fcvtzs v0.4s, v0.4s -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v1.4s, v2.4s, v3.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v3.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v3.4s -; CHECK-NEXT: mov w1, v1.s[1] -; CHECK-NEXT: mov w2, v1.s[2] +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: mov w6, v0.s[2] ; CHECK-NEXT: mov w7, v0.s[3] -; CHECK-NEXT: fmov w4, s0 -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: mov w1, v1.s[1] +; CHECK-NEXT: mov w2, v1.s[2] +; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: ret %x = call <8 x i19> @llvm.fptosi.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x @@ -2393,10 +2393,10 @@ define <8 x i32> @test_signed_v8f16_v8i32_duplicate(<8 x half> %f) { ; CHECK-LABEL: test_signed_v8f16_v8i32_duplicate: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: fcvtzs v1.4s, v1.4s -; CHECK-NEXT: fcvtzs v0.4s, v0.4s +; CHECK-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-NEXT: fcvtzs v0.4s, v1.4s +; CHECK-NEXT: fcvtzs v1.4s, v2.4s ; CHECK-NEXT: ret %x = call <8 x i32> @llvm.fptosi.sat.v8f16.v8i32(<8 x half> %f) ret <8 x i32> %x @@ -2408,51 +2408,51 @@ ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-CVT-NEXT: mov x8, #562949953421311 ; CHECK-CVT-NEXT: mov x12, #-562949953421312 -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: mov h2, v1.h[1] -; CHECK-CVT-NEXT: fcvt s3, h1 -; CHECK-CVT-NEXT: mov h4, v1.h[2] +; CHECK-CVT-NEXT: fcvt s4, h0 +; CHECK-CVT-NEXT: fcvt s2, h1 +; CHECK-CVT-NEXT: mov h3, v1.h[1] +; CHECK-CVT-NEXT: mov h5, v1.h[2] ; CHECK-CVT-NEXT: mov h1, v1.h[3] -; CHECK-CVT-NEXT: fcvtzs x10, s5 -; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzs x9, s3 -; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzs x10, s4 +; CHECK-CVT-NEXT: fcvtzs x9, s2 +; CHECK-CVT-NEXT: fcvt s2, h5 +; CHECK-CVT-NEXT: fcvtzs x11, s3 ; CHECK-CVT-NEXT: cmp x9, x8 -; CHECK-CVT-NEXT: fcvtzs x11, s2 ; CHECK-CVT-NEXT: csel x9, x9, x8, lt +; CHECK-CVT-NEXT: fcvtzs x13, s2 ; CHECK-CVT-NEXT: cmp x9, x12 -; CHECK-CVT-NEXT: fcvtzs x13, s3 -; CHECK-CVT-NEXT: csel x4, x9, x12, gt ; CHECK-CVT-NEXT: mov h2, v0.h[1] +; CHECK-CVT-NEXT: csel x4, x9, x12, gt ; CHECK-CVT-NEXT: cmp x11, x8 -; CHECK-CVT-NEXT: fcvtzs x9, s1 ; CHECK-CVT-NEXT: csel x11, x11, x8, lt -; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: fcvtzs x9, s1 ; CHECK-CVT-NEXT: cmp x11, x12 -; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: csel x5, x11, x12, gt ; CHECK-CVT-NEXT: cmp x13, x8 ; CHECK-CVT-NEXT: csel x11, x13, x8, lt -; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: mov h1, v0.h[2] ; CHECK-CVT-NEXT: cmp x11, x12 -; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: mov h0, v0.h[3] ; CHECK-CVT-NEXT: csel x6, x11, x12, gt ; CHECK-CVT-NEXT: cmp x9, x8 ; CHECK-CVT-NEXT: csel x9, x9, x8, lt -; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: cmp x9, x12 ; CHECK-CVT-NEXT: fcvtzs x11, s2 ; CHECK-CVT-NEXT: csel x7, x9, x12, gt ; CHECK-CVT-NEXT: cmp x10, x8 ; CHECK-CVT-NEXT: csel x9, x10, x8, lt -; CHECK-CVT-NEXT: fcvtzs x10, s1 +; CHECK-CVT-NEXT: fcvt s0, h0 ; CHECK-CVT-NEXT: cmp x9, x12 +; CHECK-CVT-NEXT: fcvtzs x10, s1 ; CHECK-CVT-NEXT: csel x0, x9, x12, gt ; CHECK-CVT-NEXT: cmp x11, x8 ; CHECK-CVT-NEXT: csel x9, x11, x8, lt -; CHECK-CVT-NEXT: fcvtzs x11, s0 ; CHECK-CVT-NEXT: cmp x9, x12 +; CHECK-CVT-NEXT: fcvtzs x11, s0 ; CHECK-CVT-NEXT: csel x1, x9, x12, gt ; CHECK-CVT-NEXT: cmp x10, x8 ; CHECK-CVT-NEXT: csel x9, x10, x8, lt @@ -2467,54 +2467,54 @@ ; CHECK-FP16-LABEL: test_signed_v8f16_v8i50: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov x8, #562949953421311 -; CHECK-FP16-NEXT: mov x11, #-562949953421312 +; CHECK-FP16-NEXT: mov x9, #562949953421311 +; CHECK-FP16-NEXT: mov x10, #-562949953421312 +; CHECK-FP16-NEXT: fcvtzs x8, h1 ; CHECK-FP16-NEXT: mov h2, v1.h[1] -; CHECK-FP16-NEXT: fcvtzs x9, h1 -; CHECK-FP16-NEXT: mov h3, v1.h[2] +; CHECK-FP16-NEXT: fcvtzs x11, h2 +; CHECK-FP16-NEXT: mov h2, v1.h[2] +; CHECK-FP16-NEXT: cmp x8, x9 ; CHECK-FP16-NEXT: mov h1, v1.h[3] -; CHECK-FP16-NEXT: cmp x9, x8 -; CHECK-FP16-NEXT: fcvtzs x10, h2 -; CHECK-FP16-NEXT: csel x9, x9, x8, lt -; CHECK-FP16-NEXT: cmp x9, x11 -; CHECK-FP16-NEXT: fcvtzs x12, h3 -; CHECK-FP16-NEXT: csel x4, x9, x11, gt +; CHECK-FP16-NEXT: csel x8, x8, x9, lt +; CHECK-FP16-NEXT: fcvtzs x12, h2 +; CHECK-FP16-NEXT: cmp x8, x10 ; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: cmp x10, x8 -; CHECK-FP16-NEXT: csel x9, x10, x8, lt -; CHECK-FP16-NEXT: fcvtzs x10, h1 -; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: csel x4, x8, x10, gt +; CHECK-FP16-NEXT: cmp x11, x9 +; CHECK-FP16-NEXT: csel x8, x11, x9, lt +; CHECK-FP16-NEXT: fcvtzs x11, h1 +; CHECK-FP16-NEXT: cmp x8, x10 ; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: csel x5, x9, x11, gt -; CHECK-FP16-NEXT: cmp x12, x8 -; CHECK-FP16-NEXT: csel x9, x12, x8, lt -; CHECK-FP16-NEXT: fcvtzs x12, h0 -; CHECK-FP16-NEXT: cmp x9, x11 +; CHECK-FP16-NEXT: csel x5, x8, x10, gt +; CHECK-FP16-NEXT: cmp x12, x9 +; CHECK-FP16-NEXT: csel x12, x12, x9, lt +; CHECK-FP16-NEXT: fcvtzs x8, h0 +; CHECK-FP16-NEXT: cmp x12, x10 ; CHECK-FP16-NEXT: mov h0, v0.h[3] -; CHECK-FP16-NEXT: csel x6, x9, x11, gt -; CHECK-FP16-NEXT: cmp x10, x8 -; CHECK-FP16-NEXT: csel x9, x10, x8, lt -; CHECK-FP16-NEXT: fcvtzs x10, h1 -; CHECK-FP16-NEXT: cmp x9, x11 -; CHECK-FP16-NEXT: csel x7, x9, x11, gt -; CHECK-FP16-NEXT: cmp x12, x8 -; CHECK-FP16-NEXT: csel x9, x12, x8, lt -; CHECK-FP16-NEXT: fcvtzs x12, h2 -; CHECK-FP16-NEXT: cmp x9, x11 -; CHECK-FP16-NEXT: csel x0, x9, x11, gt -; CHECK-FP16-NEXT: cmp x10, x8 -; CHECK-FP16-NEXT: csel x9, x10, x8, lt -; CHECK-FP16-NEXT: fcvtzs x10, h0 -; CHECK-FP16-NEXT: cmp x9, x11 -; CHECK-FP16-NEXT: csel x1, x9, x11, gt -; CHECK-FP16-NEXT: cmp x12, x8 -; CHECK-FP16-NEXT: csel x9, x12, x8, lt -; CHECK-FP16-NEXT: cmp x9, x11 -; CHECK-FP16-NEXT: csel x2, x9, x11, gt -; CHECK-FP16-NEXT: cmp x10, x8 -; CHECK-FP16-NEXT: csel x8, x10, x8, lt -; CHECK-FP16-NEXT: cmp x8, x11 -; CHECK-FP16-NEXT: csel x3, x8, x11, gt +; CHECK-FP16-NEXT: csel x6, x12, x10, gt +; CHECK-FP16-NEXT: cmp x11, x9 +; CHECK-FP16-NEXT: csel x11, x11, x9, lt +; CHECK-FP16-NEXT: fcvtzs x12, h1 +; CHECK-FP16-NEXT: cmp x11, x10 +; CHECK-FP16-NEXT: csel x7, x11, x10, gt +; CHECK-FP16-NEXT: cmp x8, x9 +; CHECK-FP16-NEXT: csel x8, x8, x9, lt +; CHECK-FP16-NEXT: fcvtzs x11, h2 +; CHECK-FP16-NEXT: cmp x8, x10 +; CHECK-FP16-NEXT: csel x0, x8, x10, gt +; CHECK-FP16-NEXT: cmp x12, x9 +; CHECK-FP16-NEXT: csel x8, x12, x9, lt +; CHECK-FP16-NEXT: fcvtzs x12, h0 +; CHECK-FP16-NEXT: cmp x8, x10 +; CHECK-FP16-NEXT: csel x1, x8, x10, gt +; CHECK-FP16-NEXT: cmp x11, x9 +; CHECK-FP16-NEXT: csel x8, x11, x9, lt +; CHECK-FP16-NEXT: cmp x8, x10 +; CHECK-FP16-NEXT: csel x2, x8, x10, gt +; CHECK-FP16-NEXT: cmp x12, x9 +; CHECK-FP16-NEXT: csel x8, x12, x9, lt +; CHECK-FP16-NEXT: cmp x8, x10 +; CHECK-FP16-NEXT: csel x3, x8, x10, gt ; CHECK-FP16-NEXT: ret %x = call <8 x i50> @llvm.fptosi.sat.v8f16.v8i50(<8 x half> %f) ret <8 x i50> %x @@ -2524,63 +2524,64 @@ ; CHECK-CVT-LABEL: test_signed_v8f16_v8i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-CVT-NEXT: mov h4, v0.h[2] ; CHECK-CVT-NEXT: fcvt s5, h0 ; CHECK-CVT-NEXT: fcvt s2, h1 ; CHECK-CVT-NEXT: mov h3, v1.h[1] -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h4, v1.h[2] ; CHECK-CVT-NEXT: mov h1, v1.h[3] -; CHECK-CVT-NEXT: fcvtzs x9, s5 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvtzs x8, s2 -; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: fmov d2, x8 +; CHECK-CVT-NEXT: fcvtzs x8, s3 +; CHECK-CVT-NEXT: fcvtzs x9, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[2] +; CHECK-CVT-NEXT: fcvtzs x10, s1 ; CHECK-CVT-NEXT: mov h3, v0.h[1] +; CHECK-CVT-NEXT: fcvt s1, h4 ; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h1 -; CHECK-CVT-NEXT: fcvtzs x10, s2 -; CHECK-CVT-NEXT: fmov d2, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s4 -; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v2.d[1], x8 +; CHECK-CVT-NEXT: fcvtzs x8, s5 +; CHECK-CVT-NEXT: fcvt s6, h3 ; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: fmov d0, x9 -; CHECK-CVT-NEXT: mov v2.d[1], x10 -; CHECK-CVT-NEXT: fcvtzs x10, s5 +; CHECK-CVT-NEXT: fmov d3, x9 +; CHECK-CVT-NEXT: fmov d0, x8 +; CHECK-CVT-NEXT: fcvtzs x8, s1 +; CHECK-CVT-NEXT: mov v3.d[1], x10 +; CHECK-CVT-NEXT: fcvtzs x9, s6 +; CHECK-CVT-NEXT: fcvtzs x10, s4 ; CHECK-CVT-NEXT: fmov d1, x8 -; CHECK-CVT-NEXT: fcvtzs x9, s3 -; CHECK-CVT-NEXT: fcvtzs x8, s4 -; CHECK-CVT-NEXT: fmov d3, x10 -; CHECK-CVT-NEXT: fcvtzs x10, s6 ; CHECK-CVT-NEXT: mov v0.d[1], x9 -; CHECK-CVT-NEXT: mov v1.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x10 +; CHECK-CVT-NEXT: mov v1.d[1], x10 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i64: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h3, v0.h[2] -; CHECK-FP16-NEXT: mov h5, v0.h[3] -; CHECK-FP16-NEXT: fcvtzs x9, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] +; CHECK-FP16-NEXT: mov h5, v0.h[2] ; CHECK-FP16-NEXT: fcvtzs x8, h1 -; CHECK-FP16-NEXT: mov h4, v1.h[2] -; CHECK-FP16-NEXT: mov h6, v1.h[3] -; CHECK-FP16-NEXT: fcvtzs x10, h2 +; CHECK-FP16-NEXT: mov h2, v1.h[1] +; CHECK-FP16-NEXT: mov h3, v1.h[2] +; CHECK-FP16-NEXT: mov h1, v1.h[3] +; CHECK-FP16-NEXT: fcvtzs x9, h2 ; CHECK-FP16-NEXT: fmov d2, x8 ; CHECK-FP16-NEXT: fcvtzs x8, h3 -; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fmov d0, x9 -; CHECK-FP16-NEXT: mov v2.d[1], x10 -; CHECK-FP16-NEXT: fcvtzs x10, h4 -; CHECK-FP16-NEXT: fmov d1, x8 -; CHECK-FP16-NEXT: fcvtzs x9, h3 -; CHECK-FP16-NEXT: fcvtzs x8, h5 -; CHECK-FP16-NEXT: fmov d3, x10 -; CHECK-FP16-NEXT: fcvtzs x10, h6 -; CHECK-FP16-NEXT: mov v0.d[1], x9 -; CHECK-FP16-NEXT: mov v1.d[1], x8 +; CHECK-FP16-NEXT: fcvtzs x10, h1 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: mov v2.d[1], x9 +; CHECK-FP16-NEXT: fcvtzs x9, h0 +; CHECK-FP16-NEXT: fmov d3, x8 +; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: fcvtzs x8, h1 +; CHECK-FP16-NEXT: fmov d4, x9 +; CHECK-FP16-NEXT: fcvtzs x9, h5 ; CHECK-FP16-NEXT: mov v3.d[1], x10 +; CHECK-FP16-NEXT: fcvtzs x10, h0 +; CHECK-FP16-NEXT: fmov d1, x9 +; CHECK-FP16-NEXT: mov v4.d[1], x8 +; CHECK-FP16-NEXT: mov v1.d[1], x10 +; CHECK-FP16-NEXT: mov v0.16b, v4.16b ; CHECK-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x @@ -2623,18 +2624,18 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov w8, #-251658240 +; CHECK-NEXT: mov x27, #-34359738368 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov x25, #-34359738368 -; CHECK-NEXT: mov x23, #34359738367 -; CHECK-NEXT: fmov s10, w8 +; CHECK-NEXT: mov x24, #34359738367 +; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: mov w8, #1895825407 ; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x25, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x23, x9, gt +; CHECK-NEXT: fmov s10, w8 +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, x27, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x24, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2644,12 +2645,12 @@ ; CHECK-NEXT: csel x8, xzr, x9, vs ; CHECK-NEXT: str x8, [sp, #72] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x25, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x23, x9, gt +; CHECK-NEXT: csel x9, x27, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x24, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2658,43 +2659,43 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: csel x8, x27, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: csel x8, x25, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x23, x8, gt +; CHECK-NEXT: csel x8, x24, x8, gt ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x24, xzr, x8, vs +; CHECK-NEXT: csel x23, xzr, x8, vs ; CHECK-NEXT: csel x8, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: csel x8, x27, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: mov h0, v0.h[1] -; CHECK-NEXT: csel x8, x25, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x23, x8, gt +; CHECK-NEXT: csel x8, x24, x8, gt ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x27, xzr, x8, vs +; CHECK-NEXT: csel x26, xzr, x8, vs ; CHECK-NEXT: csel x8, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: csel x8, xzr, x0, lt +; CHECK-NEXT: csel x9, x27, x1, lt ; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x25, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x23, x9, gt +; CHECK-NEXT: csel x9, x24, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2703,85 +2704,85 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, x25, x1, lt -; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x23, x9, gt +; CHECK-NEXT: csel x9, x27, x1, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: csel x9, x24, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x20, xzr, x8, vs -; CHECK-NEXT: csel x28, xzr, x9, vs +; CHECK-NEXT: csel x25, xzr, x8, vs +; CHECK-NEXT: csel x20, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti +; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: csel x8, x27, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: mov h0, v0.h[2] -; CHECK-NEXT: csel x8, x25, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt -; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x23, x8, gt +; CHECK-NEXT: csel x8, x24, x8, gt ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x21, xzr, x8, vs -; CHECK-NEXT: csel x26, xzr, x9, vs +; CHECK-NEXT: csel x28, xzr, x8, vs +; CHECK-NEXT: csel x21, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: fmov d0, x20 -; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: ldr x11, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: lsr x10, x28, #28 -; CHECK-NEXT: ldr d1, [sp] // 8-byte Folded Reload -; CHECK-NEXT: lsr x12, x29, #28 -; CHECK-NEXT: mov v0.d[1], x28 -; CHECK-NEXT: csel x8, x25, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 +; CHECK-NEXT: fmov d0, x25 +; CHECK-NEXT: lsr x10, x20, #28 ; CHECK-NEXT: stur x11, [x19, #75] -; CHECK-NEXT: ldr x13, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: csel x8, x27, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: fcmp s8, s10 +; CHECK-NEXT: ldr x12, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: lsr x11, x29, #28 +; CHECK-NEXT: mov v0.d[1], x20 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x8, x23, x8, gt +; CHECK-NEXT: csel x8, x24, x8, gt +; CHECK-NEXT: stur x12, [x19, #50] ; CHECK-NEXT: fcmp s8, s8 -; CHECK-NEXT: fmov x11, d0 -; CHECK-NEXT: stur x13, [x19, #50] -; CHECK-NEXT: mov v1.d[1], x29 -; CHECK-NEXT: ldr d0, [sp, #16] // 8-byte Folded Reload -; CHECK-NEXT: csel x9, xzr, x9, vs +; CHECK-NEXT: ldr d1, [sp] // 8-byte Folded Reload ; CHECK-NEXT: strb w10, [x19, #49] -; CHECK-NEXT: extr x10, x28, x11, #28 +; CHECK-NEXT: fmov x10, d0 +; CHECK-NEXT: strb w11, [x19, #24] ; CHECK-NEXT: csel x8, xzr, x8, vs -; CHECK-NEXT: bfi x8, x11, #36, #28 -; CHECK-NEXT: strb w12, [x19, #24] +; CHECK-NEXT: csel x9, xzr, x9, vs +; CHECK-NEXT: mov v1.d[1], x29 +; CHECK-NEXT: extr x11, x20, x10, #28 +; CHECK-NEXT: bfi x8, x10, #36, #28 +; CHECK-NEXT: fmov x10, d1 ; CHECK-NEXT: stur x9, [x19, #25] -; CHECK-NEXT: fmov x12, d1 -; CHECK-NEXT: stur x10, [x19, #41] ; CHECK-NEXT: lsr x9, x22, #28 -; CHECK-NEXT: ldr d1, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: stur x11, [x19, #41] ; CHECK-NEXT: stur x8, [x19, #33] -; CHECK-NEXT: ldr x11, [sp, #72] // 8-byte Folded Reload -; CHECK-NEXT: extr x18, x29, x12, #28 +; CHECK-NEXT: extr x8, x29, x10, #28 +; CHECK-NEXT: ldr d0, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: bfi x28, x10, #36, #28 +; CHECK-NEXT: str x8, [x19, #16] ; CHECK-NEXT: mov v0.d[1], x22 -; CHECK-NEXT: bfi x21, x12, #36, #28 -; CHECK-NEXT: str x26, [x19] -; CHECK-NEXT: mov v1.d[1], x11 +; CHECK-NEXT: ldr x11, [sp, #72] // 8-byte Folded Reload +; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload ; CHECK-NEXT: lsr x10, x11, #28 ; CHECK-NEXT: mov x13, x11 -; CHECK-NEXT: stp x21, x18, [x19, #8] -; CHECK-NEXT: fmov x8, d0 +; CHECK-NEXT: stp x21, x28, [x19] +; CHECK-NEXT: mov v0.d[1], x11 +; CHECK-NEXT: extr x12, x22, x8, #28 +; CHECK-NEXT: bfi x26, x8, #36, #28 ; CHECK-NEXT: strb w9, [x19, #99] +; CHECK-NEXT: fmov x11, d0 ; CHECK-NEXT: strb w10, [x19, #74] -; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: extr x12, x22, x8, #28 -; CHECK-NEXT: bfi x27, x8, #36, #28 -; CHECK-NEXT: extr x8, x13, x11, #28 -; CHECK-NEXT: bfi x24, x11, #36, #28 ; CHECK-NEXT: stur x12, [x19, #91] -; CHECK-NEXT: stur x27, [x19, #83] +; CHECK-NEXT: stur x26, [x19, #83] +; CHECK-NEXT: extr x8, x13, x11, #28 +; CHECK-NEXT: bfi x23, x11, #36, #28 ; CHECK-NEXT: stur x8, [x19, #66] -; CHECK-NEXT: stur x24, [x19, #58] +; CHECK-NEXT: stur x23, [x19, #58] ; CHECK-NEXT: ldp x20, x19, [sp, #176] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #160] // 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #144] // 16-byte Folded Reload @@ -2832,9 +2833,9 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: mov w8, #-16777216 -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov x21, #-9223372036854775808 -; CHECK-NEXT: mov x22, #9223372036854775807 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov x23, #9223372036854775807 ; CHECK-NEXT: fmov s10, w8 ; CHECK-NEXT: mov w8, #2130706431 ; CHECK-NEXT: mov h0, v0.h[1] @@ -2843,7 +2844,7 @@ ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2853,13 +2854,13 @@ ; CHECK-NEXT: csel x8, xzr, x9, vs ; CHECK-NEXT: str x8, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2874,7 +2875,7 @@ ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: mov h0, v0.h[3] -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2889,7 +2890,7 @@ ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2897,27 +2898,27 @@ ; CHECK-NEXT: csel x29, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csel x20, xzr, x8, vs -; CHECK-NEXT: csel x23, xzr, x9, vs +; CHECK-NEXT: csel x22, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2925,13 +2926,13 @@ ; CHECK-NEXT: csel x25, xzr, x9, vs ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixsfti -; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, s10 -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: fcvt s8, h0 @@ -2942,13 +2943,13 @@ ; CHECK-NEXT: fcmp s8, s10 ; CHECK-NEXT: stp x26, x27, [x19, #32] ; CHECK-NEXT: stp x24, x25, [x19, #16] -; CHECK-NEXT: stp x20, x23, [x19] +; CHECK-NEXT: stp x20, x22, [x19] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, x21, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: stp x28, x29, [x19, #112] ; CHECK-NEXT: ldr x10, [sp] // 8-byte Folded Reload -; CHECK-NEXT: csel x9, x22, x9, gt +; CHECK-NEXT: csel x9, x23, x9, gt ; CHECK-NEXT: csinv x8, x8, xzr, le ; CHECK-NEXT: fcmp s8, s8 ; CHECK-NEXT: str x10, [x19, #104] diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-scalar.ll @@ -119,11 +119,11 @@ ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: mov x10, #68719476735 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s0 -; CHECK-NEXT: csel x1, x10, x9, gt -; CHECK-NEXT: csinv x0, x8, xzr, le +; CHECK-NEXT: csel x1, x10, x8, gt +; CHECK-NEXT: csinv x0, x9, xzr, le ; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call i100 @llvm.fptoui.sat.i100.f32(float %f) @@ -141,11 +141,11 @@ ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s0 -; CHECK-NEXT: csinv x0, x9, xzr, le -; CHECK-NEXT: csinv x1, x8, xzr, le +; CHECK-NEXT: csinv x0, x8, xzr, le +; CHECK-NEXT: csinv x1, x9, xzr, le ; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call i128 @llvm.fptoui.sat.i128.f32(float %f) @@ -269,11 +269,11 @@ ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: mov x10, #68719476735 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: fcmp d8, d0 -; CHECK-NEXT: csel x1, x10, x9, gt -; CHECK-NEXT: csinv x0, x8, xzr, le +; CHECK-NEXT: csel x1, x10, x8, gt +; CHECK-NEXT: csinv x0, x9, xzr, le ; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call i100 @llvm.fptoui.sat.i100.f64(double %f) @@ -291,11 +291,11 @@ ; CHECK-NEXT: fcmp d8, #0.0 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: fcmp d8, d0 -; CHECK-NEXT: csinv x0, x9, xzr, le -; CHECK-NEXT: csinv x1, x8, xzr, le +; CHECK-NEXT: csinv x0, x8, xzr, le +; CHECK-NEXT: csinv x1, x9, xzr, le ; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call i128 @llvm.fptoui.sat.i128.f64(double %f) @@ -486,11 +486,11 @@ ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: mov x10, #68719476735 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s0 -; CHECK-NEXT: csel x1, x10, x9, gt -; CHECK-NEXT: csinv x0, x8, xzr, le +; CHECK-NEXT: csel x1, x10, x8, gt +; CHECK-NEXT: csinv x0, x9, xzr, le ; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call i100 @llvm.fptoui.sat.i100.f16(half %f) @@ -509,11 +509,11 @@ ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s0 -; CHECK-NEXT: csinv x0, x9, xzr, le -; CHECK-NEXT: csinv x1, x8, xzr, le +; CHECK-NEXT: csinv x0, x8, xzr, le +; CHECK-NEXT: csinv x1, x9, xzr, le ; CHECK-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %x = call i128 @llvm.fptoui.sat.i128.f16(half %f) diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -60,15 +60,15 @@ ; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3 ; CHECK-NEXT: // kill: def $s4 killed $s4 def $q4 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: fcvtzu v4.4s, v4.4s +; CHECK-NEXT: fcvtzu v1.4s, v4.4s ; CHECK-NEXT: mov v0.s[2], v2.s[0] -; CHECK-NEXT: fmov w4, s4 ; CHECK-NEXT: mov v0.s[3], v3.s[0] +; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptoui.sat.v5f32.v5i32(<5 x float> %f) ret <5 x i32> %x @@ -88,13 +88,13 @@ ; CHECK-NEXT: mov v0.s[2], v2.s[0] ; CHECK-NEXT: fcvtzu v1.4s, v4.4s ; CHECK-NEXT: mov v0.s[3], v3.s[0] -; CHECK-NEXT: mov w5, v1.s[1] -; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptoui.sat.v6f32.v6i32(<6 x float> %f) ret <6 x i32> %x @@ -117,13 +117,13 @@ ; CHECK-NEXT: mov v0.s[3], v3.s[0] ; CHECK-NEXT: fcvtzu v1.4s, v4.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w4, s1 ; CHECK-NEXT: mov w5, v1.s[1] ; CHECK-NEXT: mov w6, v1.s[2] -; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: mov w2, v0.s[2] ; CHECK-NEXT: mov w3, v0.s[3] -; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptoui.sat.v7f32.v7i32(<7 x float> %f) ret <7 x i32> %x @@ -163,11 +163,11 @@ define <2 x i32> @test_unsigned_v2f64_v2i32(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f) @@ -178,13 +178,13 @@ ; CHECK-LABEL: test_unsigned_v3f64_v3i32: ; CHECK: // %bb.0: ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzu w8, d2 +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: fcvtzu w9, d0 ; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: fcvtzu w8, d0 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: mov v0.s[3], w9 ; CHECK-NEXT: ret %x = call <3 x i32> @llvm.fptoui.sat.v3f64.v3i32(<3 x double> %f) ret <3 x i32> %x @@ -193,16 +193,16 @@ define <4 x i32> @test_unsigned_v4f64_v4i32(<4 x double> %f) { ; CHECK-LABEL: test_unsigned_v4f64_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d2, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: mov d0, v0.d[1] +; CHECK-NEXT: fcvtzu w9, d0 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d2 -; CHECK-NEXT: mov v0.s[1], w8 ; CHECK-NEXT: fcvtzu w8, d1 ; CHECK-NEXT: mov d1, v1.d[1] +; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: mov v0.s[2], w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[3], w8 +; CHECK-NEXT: mov v0.s[3], w9 ; CHECK-NEXT: ret %x = call <4 x i32> @llvm.fptoui.sat.v4f64.v4i32(<4 x double> %f) ret <4 x i32> %x @@ -285,11 +285,11 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: stp q1, q0, [sp, #32] // 32-byte Folded Spill -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] ; CHECK-NEXT: str q1, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -338,12 +338,12 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: adrp x8, .LCPI16_0 +; CHECK-NEXT: stp q0, q2, [sp, #48] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: mov v0.16b, v2.16b +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] ; CHECK-NEXT: str q1, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -406,13 +406,13 @@ ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w30, -32 -; CHECK-NEXT: stp q0, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: adrp x8, .LCPI17_0 +; CHECK-NEXT: stp q0, q2, [sp, #16] // 32-byte Folded Spill ; CHECK-NEXT: mov v2.16b, v1.16b ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q3, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: str q1, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: bl __getf2 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -548,11 +548,11 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %x = call <5 x i32> @llvm.fptoui.sat.v5f16.v5i32(<5 x half> %f) ret <5 x i32> %x @@ -565,12 +565,12 @@ ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w1, v1.s[1] ; CHECK-NEXT: mov w2, v1.s[2] ; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: ret %x = call <6 x i32> @llvm.fptoui.sat.v6f16.v6i32(<6 x half> %f) ret <6 x i32> %x @@ -579,17 +579,17 @@ define <7 x i32> @test_unsigned_v7f16_v7i32(<7 x half> %f) { ; CHECK-LABEL: test_unsigned_v7f16_v7i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v1.4s, v0.4h -; CHECK-NEXT: fcvtl2 v0.4s, v0.8h +; CHECK-NEXT: fcvtl2 v1.4s, v0.8h +; CHECK-NEXT: fcvtl v0.4s, v0.4h ; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-NEXT: mov w1, v1.s[1] -; CHECK-NEXT: mov w2, v1.s[2] -; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w6, v0.s[2] -; CHECK-NEXT: fmov w0, s1 -; CHECK-NEXT: fmov w4, s0 +; CHECK-NEXT: fmov w4, s1 +; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w5, v1.s[1] +; CHECK-NEXT: mov w1, v0.s[1] +; CHECK-NEXT: mov w2, v0.s[2] +; CHECK-NEXT: mov w3, v0.s[3] +; CHECK-NEXT: mov w6, v1.s[2] ; CHECK-NEXT: ret %x = call <7 x i32> @llvm.fptoui.sat.v7f16.v7i32(<7 x half> %f) ret <7 x i32> %x @@ -598,10 +598,10 @@ define <8 x i32> @test_unsigned_v8f16_v8i32(<8 x half> %f) { ; CHECK-LABEL: test_unsigned_v8f16_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: fcvtzu v1.4s, v1.4s -; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-NEXT: fcvtzu v0.4s, v1.4s +; CHECK-NEXT: fcvtzu v1.4s, v2.4s ; CHECK-NEXT: ret %x = call <8 x i32> @llvm.fptoui.sat.v8f16.v8i32(<8 x half> %f) ret <8 x i32> %x @@ -624,8 +624,8 @@ define <2 x i1> @test_unsigned_v2f32_v2i1(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #1 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #1 ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i1> @llvm.fptoui.sat.v2f32.v2i1(<2 x float> %f) @@ -635,8 +635,8 @@ define <2 x i8> @test_unsigned_v2f32_v2i8(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0x0000ff000000ff ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi d1, #0x0000ff000000ff ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i8> @llvm.fptoui.sat.v2f32.v2i8(<2 x float> %f) @@ -646,8 +646,8 @@ define <2 x i13> @test_unsigned_v2f32_v2i13(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #31, msl #8 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #31, msl #8 ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i13> @llvm.fptoui.sat.v2f32.v2i13(<2 x float> %f) @@ -657,8 +657,8 @@ define <2 x i16> @test_unsigned_v2f32_v2i16(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi d1, #0x00ffff0000ffff ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i16> @llvm.fptoui.sat.v2f32.v2i16(<2 x float> %f) @@ -668,8 +668,8 @@ define <2 x i19> @test_unsigned_v2f32_v2i19(<2 x float> %f) { ; CHECK-LABEL: test_unsigned_v2f32_v2i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2s, #7, msl #16 ; CHECK-NEXT: fcvtzu v0.2s, v0.2s +; CHECK-NEXT: movi v1.2s, #7, msl #16 ; CHECK-NEXT: umin v0.2s, v0.2s, v1.2s ; CHECK-NEXT: ret %x = call <2 x i19> @llvm.fptoui.sat.v2f32.v2i19(<2 x float> %f) @@ -708,11 +708,11 @@ ; CHECK-LABEL: test_unsigned_v2f32_v2i64: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzu x8, s0 +; CHECK-NEXT: mov s1, v0.s[1] +; CHECK-NEXT: fcvtzu x9, s1 ; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcvtzu x8, s1 -; CHECK-NEXT: mov v0.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x9 ; CHECK-NEXT: ret %x = call <2 x i64> @llvm.fptoui.sat.v2f32.v2i64(<2 x float> %f) ret <2 x i64> %x @@ -743,11 +743,11 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csel x19, x21, x9, gt -; CHECK-NEXT: csinv x20, x8, xzr, le +; CHECK-NEXT: csel x19, x21, x8, gt +; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x2, x20 @@ -793,11 +793,11 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: csinv x19, x9, xzr, le -; CHECK-NEXT: csinv x20, x8, xzr, le +; CHECK-NEXT: csinv x19, x8, xzr, le +; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x2, x19 @@ -837,8 +837,8 @@ define <4 x i1> @test_unsigned_v4f32_v4i1(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -849,8 +849,8 @@ define <4 x i8> @test_unsigned_v4f32_v4i8(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.2d, #0x0000ff000000ff ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -861,8 +861,8 @@ define <4 x i13> @test_unsigned_v4f32_v4i13(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i13: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #31, msl #8 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #31, msl #8 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -883,8 +883,8 @@ define <4 x i19> @test_unsigned_v4f32_v4i19(<4 x float> %f) { ; CHECK-LABEL: test_unsigned_v4f32_v4i19: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #7, msl #16 ; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #7, msl #16 ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %x = call <4 x i19> @llvm.fptoui.sat.v4f32.v4i19(<4 x float> %f) @@ -905,13 +905,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: mov x8, #1125899906842623 -; CHECK-NEXT: mov s3, v0.s[1] ; CHECK-NEXT: fcvtzu x11, s0 ; CHECK-NEXT: mov s2, v1.s[1] ; CHECK-NEXT: fcvtzu x9, s1 -; CHECK-NEXT: fcvtzu x12, s3 -; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzu x10, s2 +; CHECK-NEXT: fcvtzu x12, s1 +; CHECK-NEXT: cmp x9, x8 ; CHECK-NEXT: csel x2, x9, x8, lo ; CHECK-NEXT: cmp x10, x8 ; CHECK-NEXT: csel x3, x10, x8, lo @@ -928,16 +928,16 @@ ; CHECK-LABEL: test_unsigned_v4f32_v4i64: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov s3, v0.s[1] ; CHECK-NEXT: fcvtzu x9, s0 -; CHECK-NEXT: mov s2, v1.s[1] +; CHECK-NEXT: mov s2, v0.s[1] ; CHECK-NEXT: fcvtzu x8, s1 +; CHECK-NEXT: mov s1, v1.s[1] +; CHECK-NEXT: fcvtzu x10, s2 ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: fcvtzu x9, s3 +; CHECK-NEXT: fcvtzu x11, s1 ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: fcvtzu x8, s2 -; CHECK-NEXT: mov v0.d[1], x9 -; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v1.d[1], x11 ; CHECK-NEXT: ret %x = call <4 x i64> @llvm.fptoui.sat.v4f32.v4i64(<4 x float> %f) ret <4 x i64> %x @@ -968,22 +968,22 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #1904214015 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: mov x25, #68719476735 ; CHECK-NEXT: fmov s9, w8 +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: csel x19, x25, x9, gt -; CHECK-NEXT: csinv x20, x8, xzr, le +; CHECK-NEXT: csel x19, x25, x8, gt +; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s0, s9 @@ -1018,8 +1018,8 @@ ; CHECK-NEXT: ldp x24, x23, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldp x30, x25, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #112 ; CHECK-NEXT: ret @@ -1051,21 +1051,21 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: mov w8, #2139095039 -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 +; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov s9, w8 +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 -; CHECK-NEXT: csinv x19, x9, xzr, le -; CHECK-NEXT: csinv x20, x8, xzr, le +; CHECK-NEXT: csinv x19, x8, xzr, le +; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: fcmp s0, #0.0 +; CHECK-NEXT: mov s8, v0.s[1] ; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s0, s9 @@ -1220,11 +1220,11 @@ define <2 x i32> @test_unsigned_v2f64_v2i32_duplicate(<2 x double> %f) { ; CHECK-LABEL: test_unsigned_v2f64_v2i32_duplicate: ; CHECK: // %bb.0: -; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w8, d0 +; CHECK-NEXT: mov d1, v0.d[1] +; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fcvtzu w8, d1 -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %x = call <2 x i32> @llvm.fptoui.sat.v2f64.v2i32(<2 x double> %f) @@ -1282,11 +1282,11 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: fmov d9, x8 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x8, xzr, x1, lt ; CHECK-NEXT: fcmp d8, d9 -; CHECK-NEXT: csel x19, x21, x9, gt -; CHECK-NEXT: csinv x20, x8, xzr, le +; CHECK-NEXT: csel x19, x21, x8, gt +; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x2, x20 @@ -1331,11 +1331,11 @@ ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: fmov d9, x8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x8, xzr, x0, lt ; CHECK-NEXT: fcmp d8, d9 -; CHECK-NEXT: csinv x19, x9, xzr, le -; CHECK-NEXT: csinv x20, x8, xzr, le +; CHECK-NEXT: csinv x19, x8, xzr, le +; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov x2, x19 @@ -1384,8 +1384,8 @@ ; ; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.4h, #1 ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: movi v1.4h, #1 ; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i1> @llvm.fptoui.sat.v4f16.v4i1(<4 x half> %f) @@ -1404,8 +1404,8 @@ ; ; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i8: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi d1, #0xff00ff00ff00ff ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: movi d1, #0xff00ff00ff00ff ; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i8> @llvm.fptoui.sat.v4f16.v4i8(<4 x half> %f) @@ -1424,8 +1424,8 @@ ; ; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i13: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mvni v1.4h, #224, lsl #8 ; CHECK-FP16-NEXT: fcvtzu v0.4h, v0.4h +; CHECK-FP16-NEXT: mvni v1.4h, #224, lsl #8 ; CHECK-FP16-NEXT: umin v0.4h, v0.4h, v1.4h ; CHECK-FP16-NEXT: ret %x = call <4 x i13> @llvm.fptoui.sat.v4f16.v4i13(<4 x half> %f) @@ -1475,18 +1475,18 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-CVT-NEXT: mov h1, v0.h[1] -; CHECK-CVT-NEXT: mov h2, v0.h[2] -; CHECK-CVT-NEXT: mov h3, v0.h[3] -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov x8, #1125899906842623 +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: mov h0, v0.h[3] ; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: mov x8, #1125899906842623 ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvtzu x9, s0 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu x9, s2 ; CHECK-CVT-NEXT: fcvtzu x10, s1 -; CHECK-CVT-NEXT: fcvtzu x11, s2 +; CHECK-CVT-NEXT: fcvtzu x11, s3 ; CHECK-CVT-NEXT: cmp x9, x8 -; CHECK-CVT-NEXT: fcvtzu x12, s3 +; CHECK-CVT-NEXT: fcvtzu x12, s0 ; CHECK-CVT-NEXT: csel x0, x9, x8, lo ; CHECK-CVT-NEXT: cmp x10, x8 ; CHECK-CVT-NEXT: csel x1, x10, x8, lo @@ -1500,14 +1500,14 @@ ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-FP16-NEXT: mov h1, v0.h[1] -; CHECK-FP16-NEXT: mov h2, v0.h[2] -; CHECK-FP16-NEXT: mov h3, v0.h[3] ; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov x8, #1125899906842623 +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h0, v0.h[3] ; CHECK-FP16-NEXT: fcvtzu x10, h1 +; CHECK-FP16-NEXT: mov x8, #1125899906842623 ; CHECK-FP16-NEXT: fcvtzu x11, h2 ; CHECK-FP16-NEXT: cmp x9, x8 -; CHECK-FP16-NEXT: fcvtzu x12, h3 +; CHECK-FP16-NEXT: fcvtzu x12, h0 ; CHECK-FP16-NEXT: csel x0, x9, x8, lo ; CHECK-FP16-NEXT: cmp x10, x8 ; CHECK-FP16-NEXT: csel x1, x10, x8, lo @@ -1524,37 +1524,37 @@ ; CHECK-CVT-LABEL: test_unsigned_v4f16_v4i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-CVT-NEXT: mov h1, v0.h[2] +; CHECK-CVT-NEXT: mov h3, v0.h[2] +; CHECK-CVT-NEXT: fcvt s1, h0 ; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: fcvt s3, h0 ; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvt s3, h3 ; CHECK-CVT-NEXT: fcvt s2, h2 -; CHECK-CVT-NEXT: fcvtzu x8, s3 -; CHECK-CVT-NEXT: fcvt s3, h0 -; CHECK-CVT-NEXT: fcvtzu x9, s1 +; CHECK-CVT-NEXT: fcvtzu x8, s1 +; CHECK-CVT-NEXT: fcvt s1, h0 +; CHECK-CVT-NEXT: fcvtzu x10, s3 +; CHECK-CVT-NEXT: fcvtzu x9, s2 ; CHECK-CVT-NEXT: fmov d0, x8 -; CHECK-CVT-NEXT: fcvtzu x8, s2 -; CHECK-CVT-NEXT: fmov d1, x9 -; CHECK-CVT-NEXT: fcvtzu x9, s3 -; CHECK-CVT-NEXT: mov v0.d[1], x8 -; CHECK-CVT-NEXT: mov v1.d[1], x9 +; CHECK-CVT-NEXT: fcvtzu x8, s1 +; CHECK-CVT-NEXT: fmov d1, x10 +; CHECK-CVT-NEXT: mov v0.d[1], x9 +; CHECK-CVT-NEXT: mov v1.d[1], x8 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v4f16_v4i64: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-FP16-NEXT: mov h1, v0.h[2] -; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: mov h3, v0.h[3] +; CHECK-FP16-NEXT: mov h2, v0.h[2] ; CHECK-FP16-NEXT: fcvtzu x8, h0 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu x10, h2 ; CHECK-FP16-NEXT: fcvtzu x9, h1 +; CHECK-FP16-NEXT: fcvtzu x11, h0 ; CHECK-FP16-NEXT: fmov d0, x8 -; CHECK-FP16-NEXT: fcvtzu x8, h2 -; CHECK-FP16-NEXT: fmov d1, x9 -; CHECK-FP16-NEXT: fcvtzu x9, h3 -; CHECK-FP16-NEXT: mov v0.d[1], x8 -; CHECK-FP16-NEXT: mov v1.d[1], x9 +; CHECK-FP16-NEXT: fmov d1, x10 +; CHECK-FP16-NEXT: mov v0.d[1], x9 +; CHECK-FP16-NEXT: mov v1.d[1], x11 ; CHECK-FP16-NEXT: ret %x = call <4 x i64> @llvm.fptoui.sat.v4f16.v4i64(<4 x half> %f) ret <4 x i64> %x @@ -1586,18 +1586,18 @@ ; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #1904214015 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: mov x25, #68719476735 -; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x10, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csel x19, x25, x9, gt -; CHECK-NEXT: csinv x20, x8, xzr, le +; CHECK-NEXT: csel x19, x25, x10, gt +; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -1637,8 +1637,8 @@ ; CHECK-NEXT: ldp x22, x21, [sp, #64] // 16-byte Folded Reload ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp x30, x25, [sp, #32] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #96 @@ -1672,17 +1672,17 @@ ; CHECK-NEXT: fcvt s8, h1 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #2139095039 +; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: mov h0, v0.h[2] +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x10, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x19, x9, xzr, le -; CHECK-NEXT: csinv x20, x8, xzr, le +; CHECK-NEXT: csinv x19, x10, xzr, le +; CHECK-NEXT: csinv x20, x9, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload @@ -1723,8 +1723,8 @@ ; CHECK-NEXT: fmov d0, x8 ; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #48] // 16-byte Folded Reload -; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: ldp d9, d8, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mov v0.d[1], x1 ; CHECK-NEXT: fmov x0, d0 ; CHECK-NEXT: add sp, sp, #96 ; CHECK-NEXT: ret @@ -1753,20 +1753,19 @@ ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w9, s1 -; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fcvtzu w8, s2 +; CHECK-CVT-NEXT: fcvtzu w8, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w11, s3 -; CHECK-CVT-NEXT: fcvtzu w12, s4 -; CHECK-CVT-NEXT: fcvtzu w13, s5 -; CHECK-CVT-NEXT: cmp w8, #1 -; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo +; CHECK-CVT-NEXT: fcvtzu w12, s1 +; CHECK-CVT-NEXT: fcvtzu w13, s2 +; CHECK-CVT-NEXT: fcvtzu w10, s0 ; CHECK-CVT-NEXT: cmp w9, #1 +; CHECK-CVT-NEXT: mov s3, v0.s[2] ; CHECK-CVT-NEXT: csinc w9, w9, wzr, lo +; CHECK-CVT-NEXT: cmp w8, #1 +; CHECK-CVT-NEXT: csinc w8, w8, wzr, lo ; CHECK-CVT-NEXT: cmp w11, #1 ; CHECK-CVT-NEXT: csinc w11, w11, wzr, lo ; CHECK-CVT-NEXT: cmp w12, #1 @@ -1775,29 +1774,30 @@ ; CHECK-CVT-NEXT: csinc w13, w13, wzr, lo ; CHECK-CVT-NEXT: cmp w10, #1 ; CHECK-CVT-NEXT: csinc w10, w10, wzr, lo -; CHECK-CVT-NEXT: fmov s2, w10 -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fmov s1, w9 -; CHECK-CVT-NEXT: mov v2.s[1], w13 -; CHECK-CVT-NEXT: cmp w10, #1 -; CHECK-CVT-NEXT: csinc w9, w10, wzr, lo -; CHECK-CVT-NEXT: fcvtzu w10, s0 -; CHECK-CVT-NEXT: mov v1.s[1], w8 -; CHECK-CVT-NEXT: mov v2.s[2], w9 -; CHECK-CVT-NEXT: cmp w10, #1 -; CHECK-CVT-NEXT: csinc w8, w10, wzr, lo -; CHECK-CVT-NEXT: mov v1.s[2], w11 -; CHECK-CVT-NEXT: mov v2.s[3], w8 -; CHECK-CVT-NEXT: mov v1.s[3], w12 -; CHECK-CVT-NEXT: xtn v0.4h, v2.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-NEXT: fcvtzu w14, s3 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: fcvtzu w15, s0 +; CHECK-CVT-NEXT: fmov s1, w10 +; CHECK-CVT-NEXT: fmov s0, w8 +; CHECK-CVT-NEXT: cmp w14, #1 +; CHECK-CVT-NEXT: csinc w8, w14, wzr, lo +; CHECK-CVT-NEXT: mov v1.s[1], w13 +; CHECK-CVT-NEXT: cmp w15, #1 +; CHECK-CVT-NEXT: mov v0.s[1], w9 +; CHECK-CVT-NEXT: csinc w9, w15, wzr, lo +; CHECK-CVT-NEXT: mov v1.s[2], w8 +; CHECK-CVT-NEXT: mov v0.s[2], w11 +; CHECK-CVT-NEXT: mov v1.s[3], w9 +; CHECK-CVT-NEXT: mov v0.s[3], w12 +; CHECK-CVT-NEXT: xtn v1.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v1.8h, v0.4s +; CHECK-CVT-NEXT: xtn v0.8b, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i1: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: movi v1.8h, #1 ; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: movi v1.8h, #1 ; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: xtn v0.8b, v0.8h ; CHECK-FP16-NEXT: ret @@ -1813,20 +1813,19 @@ ; CHECK-CVT-NEXT: mov w8, #255 ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w12, s3 -; CHECK-CVT-NEXT: fcvtzu w13, s4 -; CHECK-CVT-NEXT: fcvtzu w14, s5 -; CHECK-CVT-NEXT: cmp w9, #255 -; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: fcvtzu w13, s1 +; CHECK-CVT-NEXT: fcvtzu w14, s2 +; CHECK-CVT-NEXT: fcvtzu w11, s0 ; CHECK-CVT-NEXT: cmp w10, #255 +; CHECK-CVT-NEXT: mov s3, v0.s[2] ; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w9, #255 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w12, #255 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, #255 @@ -1835,23 +1834,24 @@ ; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w11, #255 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w11 -; CHECK-CVT-NEXT: fcvtzu w11, s1 -; CHECK-CVT-NEXT: fmov s1, w10 -; CHECK-CVT-NEXT: mov v2.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, #255 -; CHECK-CVT-NEXT: csel w10, w11, w8, lo -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v1.s[1], w9 -; CHECK-CVT-NEXT: mov v2.s[2], w10 -; CHECK-CVT-NEXT: cmp w11, #255 -; CHECK-CVT-NEXT: csel w8, w11, w8, lo -; CHECK-CVT-NEXT: mov v1.s[2], w12 -; CHECK-CVT-NEXT: mov v2.s[3], w8 -; CHECK-CVT-NEXT: mov v1.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v2.4s -; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s -; CHECK-CVT-NEXT: xtn v0.8b, v0.8h +; CHECK-CVT-NEXT: fcvtzu w15, s3 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: fcvtzu w16, s0 +; CHECK-CVT-NEXT: fmov s1, w11 +; CHECK-CVT-NEXT: cmp w15, #255 +; CHECK-CVT-NEXT: fmov s0, w9 +; CHECK-CVT-NEXT: csel w9, w15, w8, lo +; CHECK-CVT-NEXT: mov v1.s[1], w14 +; CHECK-CVT-NEXT: cmp w16, #255 +; CHECK-CVT-NEXT: csel w8, w16, w8, lo +; CHECK-CVT-NEXT: mov v0.s[1], w10 +; CHECK-CVT-NEXT: mov v1.s[2], w9 +; CHECK-CVT-NEXT: mov v0.s[2], w12 +; CHECK-CVT-NEXT: mov v1.s[3], w8 +; CHECK-CVT-NEXT: mov v0.s[3], w13 +; CHECK-CVT-NEXT: xtn v1.4h, v1.4s +; CHECK-CVT-NEXT: xtn2 v1.8h, v0.4s +; CHECK-CVT-NEXT: xtn v0.8b, v1.8h ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i8: @@ -1871,20 +1871,19 @@ ; CHECK-CVT-NEXT: mov w8, #8191 ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w12, s3 -; CHECK-CVT-NEXT: fcvtzu w13, s4 -; CHECK-CVT-NEXT: fcvtzu w14, s5 -; CHECK-CVT-NEXT: cmp w9, w8 -; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: fcvtzu w13, s1 +; CHECK-CVT-NEXT: fcvtzu w14, s2 +; CHECK-CVT-NEXT: fcvtzu w11, s0 ; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: mov s3, v0.s[2] ; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w12, w8 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, w8 @@ -1893,28 +1892,29 @@ ; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w11 -; CHECK-CVT-NEXT: fcvtzu w11, s1 -; CHECK-CVT-NEXT: fmov s1, w10 -; CHECK-CVT-NEXT: mov v2.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w10, w11, w8, lo -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v1.s[1], w9 -; CHECK-CVT-NEXT: mov v2.s[2], w10 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w8, w11, w8, lo +; CHECK-CVT-NEXT: fcvtzu w15, s3 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: fmov s1, w9 +; CHECK-CVT-NEXT: fcvtzu w16, s0 +; CHECK-CVT-NEXT: fmov s0, w11 +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: csel w9, w15, w8, lo +; CHECK-CVT-NEXT: mov v0.s[1], w14 +; CHECK-CVT-NEXT: cmp w16, w8 +; CHECK-CVT-NEXT: csel w8, w16, w8, lo +; CHECK-CVT-NEXT: mov v1.s[1], w10 +; CHECK-CVT-NEXT: mov v0.s[2], w9 ; CHECK-CVT-NEXT: mov v1.s[2], w12 -; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: mov v0.s[3], w8 ; CHECK-CVT-NEXT: mov v1.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v2.4s +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s ; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i13: ; CHECK-FP16: // %bb.0: -; CHECK-FP16-NEXT: mvni v1.8h, #224, lsl #8 ; CHECK-FP16-NEXT: fcvtzu v0.8h, v0.8h +; CHECK-FP16-NEXT: mvni v1.8h, #224, lsl #8 ; CHECK-FP16-NEXT: umin v0.8h, v0.8h, v1.8h ; CHECK-FP16-NEXT: ret %x = call <8 x i13> @llvm.fptoui.sat.v8f16.v8i13(<8 x half> %f) @@ -1929,20 +1929,19 @@ ; CHECK-CVT-NEXT: mov w8, #65535 ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] -; CHECK-CVT-NEXT: mov s4, v1.s[3] -; CHECK-CVT-NEXT: mov s5, v0.s[1] -; CHECK-CVT-NEXT: fcvtzu w10, s1 -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov s1, v0.s[2] -; CHECK-CVT-NEXT: mov s0, v0.s[3] -; CHECK-CVT-NEXT: fcvtzu w9, s2 +; CHECK-CVT-NEXT: fcvtzu w9, s1 +; CHECK-CVT-NEXT: mov s1, v1.s[3] +; CHECK-CVT-NEXT: fcvtzu w10, s2 +; CHECK-CVT-NEXT: mov s2, v0.s[1] ; CHECK-CVT-NEXT: fcvtzu w12, s3 -; CHECK-CVT-NEXT: fcvtzu w13, s4 -; CHECK-CVT-NEXT: fcvtzu w14, s5 -; CHECK-CVT-NEXT: cmp w9, w8 -; CHECK-CVT-NEXT: csel w9, w9, w8, lo +; CHECK-CVT-NEXT: fcvtzu w13, s1 +; CHECK-CVT-NEXT: fcvtzu w14, s2 +; CHECK-CVT-NEXT: fcvtzu w11, s0 ; CHECK-CVT-NEXT: cmp w10, w8 +; CHECK-CVT-NEXT: mov s3, v0.s[2] ; CHECK-CVT-NEXT: csel w10, w10, w8, lo +; CHECK-CVT-NEXT: cmp w9, w8 +; CHECK-CVT-NEXT: csel w9, w9, w8, lo ; CHECK-CVT-NEXT: cmp w12, w8 ; CHECK-CVT-NEXT: csel w12, w12, w8, lo ; CHECK-CVT-NEXT: cmp w13, w8 @@ -1951,21 +1950,22 @@ ; CHECK-CVT-NEXT: csel w14, w14, w8, lo ; CHECK-CVT-NEXT: cmp w11, w8 ; CHECK-CVT-NEXT: csel w11, w11, w8, lo -; CHECK-CVT-NEXT: fmov s2, w11 -; CHECK-CVT-NEXT: fcvtzu w11, s1 -; CHECK-CVT-NEXT: fmov s1, w10 -; CHECK-CVT-NEXT: mov v2.s[1], w14 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w10, w11, w8, lo -; CHECK-CVT-NEXT: fcvtzu w11, s0 -; CHECK-CVT-NEXT: mov v1.s[1], w9 -; CHECK-CVT-NEXT: mov v2.s[2], w10 -; CHECK-CVT-NEXT: cmp w11, w8 -; CHECK-CVT-NEXT: csel w8, w11, w8, lo +; CHECK-CVT-NEXT: fcvtzu w15, s3 +; CHECK-CVT-NEXT: mov s0, v0.s[3] +; CHECK-CVT-NEXT: fmov s1, w9 +; CHECK-CVT-NEXT: fcvtzu w16, s0 +; CHECK-CVT-NEXT: fmov s0, w11 +; CHECK-CVT-NEXT: cmp w15, w8 +; CHECK-CVT-NEXT: csel w9, w15, w8, lo +; CHECK-CVT-NEXT: mov v0.s[1], w14 +; CHECK-CVT-NEXT: cmp w16, w8 +; CHECK-CVT-NEXT: csel w8, w16, w8, lo +; CHECK-CVT-NEXT: mov v1.s[1], w10 +; CHECK-CVT-NEXT: mov v0.s[2], w9 ; CHECK-CVT-NEXT: mov v1.s[2], w12 -; CHECK-CVT-NEXT: mov v2.s[3], w8 +; CHECK-CVT-NEXT: mov v0.s[3], w8 ; CHECK-CVT-NEXT: mov v1.s[3], w13 -; CHECK-CVT-NEXT: xtn v0.4h, v2.4s +; CHECK-CVT-NEXT: xtn v0.4h, v0.4s ; CHECK-CVT-NEXT: xtn2 v0.8h, v1.4s ; CHECK-CVT-NEXT: ret ; @@ -1980,21 +1980,21 @@ define <8 x i19> @test_unsigned_v8f16_v8i19(<8 x half> %f) { ; CHECK-LABEL: test_unsigned_v8f16_v8i19: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl v2.4s, v0.4h +; CHECK-NEXT: fcvtl v1.4s, v0.4h ; CHECK-NEXT: fcvtl2 v0.4s, v0.8h -; CHECK-NEXT: movi v1.4s, #7, msl #16 -; CHECK-NEXT: fcvtzu v2.4s, v2.4s +; CHECK-NEXT: movi v2.4s, #7, msl #16 +; CHECK-NEXT: fcvtzu v1.4s, v1.4s ; CHECK-NEXT: fcvtzu v0.4s, v0.4s -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mov w1, v2.s[1] -; CHECK-NEXT: mov w2, v2.s[2] +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w4, s0 ; CHECK-NEXT: mov w5, v0.s[1] -; CHECK-NEXT: mov w3, v2.s[3] ; CHECK-NEXT: mov w6, v0.s[2] ; CHECK-NEXT: mov w7, v0.s[3] -; CHECK-NEXT: fmov w4, s0 -; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: mov w1, v1.s[1] +; CHECK-NEXT: mov w2, v1.s[2] +; CHECK-NEXT: mov w3, v1.s[3] ; CHECK-NEXT: ret %x = call <8 x i19> @llvm.fptoui.sat.v8f16.v8i19(<8 x half> %f) ret <8 x i19> %x @@ -2003,10 +2003,10 @@ define <8 x i32> @test_unsigned_v8f16_v8i32_duplicate(<8 x half> %f) { ; CHECK-LABEL: test_unsigned_v8f16_v8i32_duplicate: ; CHECK: // %bb.0: -; CHECK-NEXT: fcvtl2 v1.4s, v0.8h -; CHECK-NEXT: fcvtl v0.4s, v0.4h -; CHECK-NEXT: fcvtzu v1.4s, v1.4s -; CHECK-NEXT: fcvtzu v0.4s, v0.4s +; CHECK-NEXT: fcvtl v1.4s, v0.4h +; CHECK-NEXT: fcvtl2 v2.4s, v0.8h +; CHECK-NEXT: fcvtzu v0.4s, v1.4s +; CHECK-NEXT: fcvtzu v1.4s, v2.4s ; CHECK-NEXT: ret %x = call <8 x i32> @llvm.fptoui.sat.v8f16.v8i32(<8 x half> %f) ret <8 x i32> %x @@ -2017,80 +2017,80 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-CVT-NEXT: mov x8, #1125899906842623 -; CHECK-CVT-NEXT: mov h2, v0.h[1] -; CHECK-CVT-NEXT: mov h3, v0.h[2] -; CHECK-CVT-NEXT: mov h5, v0.h[3] -; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov h4, v1.h[1] +; CHECK-CVT-NEXT: fcvt s2, h0 +; CHECK-CVT-NEXT: mov h3, v1.h[1] +; CHECK-CVT-NEXT: fcvt s5, h1 ; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: mov h7, v1.h[3] -; CHECK-CVT-NEXT: fcvt s1, h1 -; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: mov h1, v1.h[3] ; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvtzu x9, s0 -; CHECK-CVT-NEXT: fcvt s5, h5 -; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h4, v0.h[1] ; CHECK-CVT-NEXT: fcvt s6, h6 -; CHECK-CVT-NEXT: fcvt s0, h7 -; CHECK-CVT-NEXT: fcvtzu x10, s1 -; CHECK-CVT-NEXT: fcvtzu x11, s2 -; CHECK-CVT-NEXT: fcvtzu x12, s3 -; CHECK-CVT-NEXT: fcvtzu x14, s5 -; CHECK-CVT-NEXT: fcvtzu x13, s4 -; CHECK-CVT-NEXT: fcvtzu x15, s6 +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fcvtzu x9, s2 +; CHECK-CVT-NEXT: fcvtzu x10, s5 +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvtzu x11, s3 +; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: fcvtzu x12, s6 +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvt s0, h0 +; CHECK-CVT-NEXT: fcvtzu x14, s1 ; CHECK-CVT-NEXT: cmp x10, x8 -; CHECK-CVT-NEXT: fcvtzu x16, s0 +; CHECK-CVT-NEXT: fcvtzu x13, s4 ; CHECK-CVT-NEXT: csel x4, x10, x8, lo -; CHECK-CVT-NEXT: cmp x13, x8 -; CHECK-CVT-NEXT: csel x5, x13, x8, lo -; CHECK-CVT-NEXT: cmp x15, x8 -; CHECK-CVT-NEXT: csel x6, x15, x8, lo -; CHECK-CVT-NEXT: cmp x16, x8 -; CHECK-CVT-NEXT: csel x7, x16, x8, lo -; CHECK-CVT-NEXT: cmp x9, x8 -; CHECK-CVT-NEXT: csel x0, x9, x8, lo ; CHECK-CVT-NEXT: cmp x11, x8 -; CHECK-CVT-NEXT: csel x1, x11, x8, lo +; CHECK-CVT-NEXT: csel x5, x11, x8, lo ; CHECK-CVT-NEXT: cmp x12, x8 -; CHECK-CVT-NEXT: csel x2, x12, x8, lo +; CHECK-CVT-NEXT: csel x6, x12, x8, lo ; CHECK-CVT-NEXT: cmp x14, x8 -; CHECK-CVT-NEXT: csel x3, x14, x8, lo +; CHECK-CVT-NEXT: fcvtzu x10, s2 +; CHECK-CVT-NEXT: csel x7, x14, x8, lo +; CHECK-CVT-NEXT: cmp x9, x8 +; CHECK-CVT-NEXT: fcvtzu x11, s0 +; CHECK-CVT-NEXT: csel x0, x9, x8, lo +; CHECK-CVT-NEXT: cmp x13, x8 +; CHECK-CVT-NEXT: csel x1, x13, x8, lo +; CHECK-CVT-NEXT: cmp x10, x8 +; CHECK-CVT-NEXT: csel x2, x10, x8, lo +; CHECK-CVT-NEXT: cmp x11, x8 +; CHECK-CVT-NEXT: csel x3, x11, x8, lo ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i50: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-FP16-NEXT: mov x8, #1125899906842623 +; CHECK-FP16-NEXT: fcvtzu x11, h0 +; CHECK-FP16-NEXT: mov h2, v1.h[1] +; CHECK-FP16-NEXT: mov h3, v1.h[2] +; CHECK-FP16-NEXT: fcvtzu x12, h1 +; CHECK-FP16-NEXT: mov h1, v1.h[3] +; CHECK-FP16-NEXT: fcvtzu x9, h2 +; CHECK-FP16-NEXT: fcvtzu x10, h3 ; CHECK-FP16-NEXT: mov h2, v0.h[1] -; CHECK-FP16-NEXT: mov h3, v0.h[2] -; CHECK-FP16-NEXT: mov h5, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov h4, v1.h[1] -; CHECK-FP16-NEXT: mov h6, v1.h[2] -; CHECK-FP16-NEXT: mov h0, v1.h[3] -; CHECK-FP16-NEXT: fcvtzu x10, h1 -; CHECK-FP16-NEXT: fcvtzu x11, h2 -; CHECK-FP16-NEXT: fcvtzu x12, h3 -; CHECK-FP16-NEXT: fcvtzu x14, h5 -; CHECK-FP16-NEXT: fcvtzu x13, h4 -; CHECK-FP16-NEXT: fcvtzu x15, h6 +; CHECK-FP16-NEXT: fcvtzu x14, h1 +; CHECK-FP16-NEXT: cmp x12, x8 +; CHECK-FP16-NEXT: fcvtzu x13, h2 +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: csel x4, x12, x8, lo +; CHECK-FP16-NEXT: cmp x9, x8 +; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: csel x5, x9, x8, lo ; CHECK-FP16-NEXT: cmp x10, x8 -; CHECK-FP16-NEXT: fcvtzu x16, h0 -; CHECK-FP16-NEXT: csel x4, x10, x8, lo +; CHECK-FP16-NEXT: fcvtzu x9, h2 +; CHECK-FP16-NEXT: csel x6, x10, x8, lo +; CHECK-FP16-NEXT: cmp x14, x8 +; CHECK-FP16-NEXT: csel x7, x14, x8, lo +; CHECK-FP16-NEXT: cmp x11, x8 +; CHECK-FP16-NEXT: fcvtzu x10, h0 +; CHECK-FP16-NEXT: csel x0, x11, x8, lo ; CHECK-FP16-NEXT: cmp x13, x8 -; CHECK-FP16-NEXT: csel x5, x13, x8, lo -; CHECK-FP16-NEXT: cmp x15, x8 -; CHECK-FP16-NEXT: csel x6, x15, x8, lo -; CHECK-FP16-NEXT: cmp x16, x8 -; CHECK-FP16-NEXT: csel x7, x16, x8, lo +; CHECK-FP16-NEXT: csel x1, x13, x8, lo ; CHECK-FP16-NEXT: cmp x9, x8 -; CHECK-FP16-NEXT: csel x0, x9, x8, lo -; CHECK-FP16-NEXT: cmp x11, x8 -; CHECK-FP16-NEXT: csel x1, x11, x8, lo -; CHECK-FP16-NEXT: cmp x12, x8 -; CHECK-FP16-NEXT: csel x2, x12, x8, lo -; CHECK-FP16-NEXT: cmp x14, x8 -; CHECK-FP16-NEXT: csel x3, x14, x8, lo +; CHECK-FP16-NEXT: csel x2, x9, x8, lo +; CHECK-FP16-NEXT: cmp x10, x8 +; CHECK-FP16-NEXT: csel x3, x10, x8, lo ; CHECK-FP16-NEXT: ret %x = call <8 x i50> @llvm.fptoui.sat.v8f16.v8i50(<8 x half> %f) ret <8 x i50> %x @@ -2100,63 +2100,64 @@ ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-CVT-NEXT: mov h4, v0.h[2] ; CHECK-CVT-NEXT: fcvt s5, h0 ; CHECK-CVT-NEXT: fcvt s2, h1 ; CHECK-CVT-NEXT: mov h3, v1.h[1] -; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: mov h4, v1.h[2] ; CHECK-CVT-NEXT: mov h1, v1.h[3] -; CHECK-CVT-NEXT: fcvtzu x9, s5 +; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvtzu x8, s2 -; CHECK-CVT-NEXT: fcvt s2, h3 +; CHECK-CVT-NEXT: fmov d2, x8 +; CHECK-CVT-NEXT: fcvtzu x8, s3 +; CHECK-CVT-NEXT: fcvtzu x9, s4 +; CHECK-CVT-NEXT: mov h4, v0.h[2] +; CHECK-CVT-NEXT: fcvtzu x10, s1 ; CHECK-CVT-NEXT: mov h3, v0.h[1] +; CHECK-CVT-NEXT: fcvt s1, h4 ; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h1 -; CHECK-CVT-NEXT: fcvtzu x10, s2 -; CHECK-CVT-NEXT: fmov d2, x8 -; CHECK-CVT-NEXT: fcvtzu x8, s4 -; CHECK-CVT-NEXT: fcvt s3, h3 +; CHECK-CVT-NEXT: mov v2.d[1], x8 +; CHECK-CVT-NEXT: fcvtzu x8, s5 +; CHECK-CVT-NEXT: fcvt s6, h3 ; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: fmov d0, x9 -; CHECK-CVT-NEXT: mov v2.d[1], x10 -; CHECK-CVT-NEXT: fcvtzu x10, s5 +; CHECK-CVT-NEXT: fmov d3, x9 +; CHECK-CVT-NEXT: fmov d0, x8 +; CHECK-CVT-NEXT: fcvtzu x8, s1 +; CHECK-CVT-NEXT: mov v3.d[1], x10 +; CHECK-CVT-NEXT: fcvtzu x9, s6 +; CHECK-CVT-NEXT: fcvtzu x10, s4 ; CHECK-CVT-NEXT: fmov d1, x8 -; CHECK-CVT-NEXT: fcvtzu x9, s3 -; CHECK-CVT-NEXT: fcvtzu x8, s4 -; CHECK-CVT-NEXT: fmov d3, x10 -; CHECK-CVT-NEXT: fcvtzu x10, s6 ; CHECK-CVT-NEXT: mov v0.d[1], x9 -; CHECK-CVT-NEXT: mov v1.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x10 +; CHECK-CVT-NEXT: mov v1.d[1], x10 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h3, v0.h[2] -; CHECK-FP16-NEXT: mov h5, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] +; CHECK-FP16-NEXT: mov h5, v0.h[2] ; CHECK-FP16-NEXT: fcvtzu x8, h1 -; CHECK-FP16-NEXT: mov h4, v1.h[2] -; CHECK-FP16-NEXT: mov h6, v1.h[3] -; CHECK-FP16-NEXT: fcvtzu x10, h2 +; CHECK-FP16-NEXT: mov h2, v1.h[1] +; CHECK-FP16-NEXT: mov h3, v1.h[2] +; CHECK-FP16-NEXT: mov h1, v1.h[3] +; CHECK-FP16-NEXT: fcvtzu x9, h2 ; CHECK-FP16-NEXT: fmov d2, x8 ; CHECK-FP16-NEXT: fcvtzu x8, h3 -; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fmov d0, x9 -; CHECK-FP16-NEXT: mov v2.d[1], x10 -; CHECK-FP16-NEXT: fcvtzu x10, h4 -; CHECK-FP16-NEXT: fmov d1, x8 -; CHECK-FP16-NEXT: fcvtzu x9, h3 -; CHECK-FP16-NEXT: fcvtzu x8, h5 -; CHECK-FP16-NEXT: fmov d3, x10 -; CHECK-FP16-NEXT: fcvtzu x10, h6 -; CHECK-FP16-NEXT: mov v0.d[1], x9 -; CHECK-FP16-NEXT: mov v1.d[1], x8 +; CHECK-FP16-NEXT: fcvtzu x10, h1 +; CHECK-FP16-NEXT: mov h1, v0.h[1] +; CHECK-FP16-NEXT: mov v2.d[1], x9 +; CHECK-FP16-NEXT: fcvtzu x9, h0 +; CHECK-FP16-NEXT: fmov d3, x8 +; CHECK-FP16-NEXT: mov h0, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu x8, h1 +; CHECK-FP16-NEXT: fmov d4, x9 +; CHECK-FP16-NEXT: fcvtzu x9, h5 ; CHECK-FP16-NEXT: mov v3.d[1], x10 +; CHECK-FP16-NEXT: fcvtzu x10, h0 +; CHECK-FP16-NEXT: fmov d1, x9 +; CHECK-FP16-NEXT: mov v4.d[1], x8 +; CHECK-FP16-NEXT: mov v1.d[1], x10 +; CHECK-FP16-NEXT: mov v0.16b, v4.16b ; CHECK-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x @@ -2196,20 +2197,20 @@ ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #1904214015 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov x21, #68719476735 -; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: mov x24, #68719476735 ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: mov h0, v0.h[3] +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x10, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x20, x21, x8, gt +; CHECK-NEXT: csinv x8, x10, xzr, le +; CHECK-NEXT: csel x20, x24, x9, gt ; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: str x9, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: str x8, [sp, #24] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload @@ -2218,7 +2219,7 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csel x23, x21, x8, gt +; CHECK-NEXT: csel x22, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: str x9, [sp, #16] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti @@ -2230,9 +2231,9 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x24, x21, x9, gt -; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill +; CHECK-NEXT: csel x23, x24, x9, gt ; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: str x8, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 @@ -2242,9 +2243,9 @@ ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 ; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x26, x21, x9, gt -; CHECK-NEXT: str x8, [sp, #8] // 8-byte Folded Spill +; CHECK-NEXT: csel x26, x24, x9, gt ; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: str x8, [sp, #8] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 @@ -2253,9 +2254,10 @@ ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x29, x9, xzr, le -; CHECK-NEXT: csel x28, x21, x8, gt +; CHECK-NEXT: csinv x9, x9, xzr, le +; CHECK-NEXT: csel x28, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 +; CHECK-NEXT: str x9, [sp] // 8-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -2263,8 +2265,8 @@ ; CHECK-NEXT: csel x9, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x27, x9, xzr, le -; CHECK-NEXT: csel x22, x21, x8, gt +; CHECK-NEXT: csinv x25, x9, xzr, le +; CHECK-NEXT: csel x27, x24, x8, gt ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #48] // 16-byte Folded Reload @@ -2274,58 +2276,57 @@ ; CHECK-NEXT: csel x9, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: csel x25, x21, x9, gt -; CHECK-NEXT: str x8, [sp] // 8-byte Folded Spill +; CHECK-NEXT: csel x29, x24, x9, gt +; CHECK-NEXT: csinv x21, x8, xzr, le ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr x11, [sp, #8] // 8-byte Folded Reload -; CHECK-NEXT: fmov d0, x27 -; CHECK-NEXT: fmov d1, x29 ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: lsr x10, x22, #28 -; CHECK-NEXT: stur x11, [x19, #75] +; CHECK-NEXT: fmov d0, x25 +; CHECK-NEXT: ldr x8, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: lsr x11, x28, #28 -; CHECK-NEXT: mov v0.d[1], x22 -; CHECK-NEXT: ldr x12, [sp, #32] // 8-byte Folded Reload -; CHECK-NEXT: mov v1.d[1], x28 -; CHECK-NEXT: csel x8, xzr, x0, lt -; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: csel x10, xzr, x1, lt ; CHECK-NEXT: fcmp s8, s9 -; CHECK-NEXT: stur x12, [x19, #50] -; CHECK-NEXT: fmov x12, d0 -; CHECK-NEXT: fmov x13, d1 -; CHECK-NEXT: csinv x8, x8, xzr, le -; CHECK-NEXT: ldp d0, d1, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: csel x9, x21, x9, gt -; CHECK-NEXT: strb w10, [x19, #49] -; CHECK-NEXT: extr x10, x22, x12, #28 -; CHECK-NEXT: bfi x9, x12, #36, #28 -; CHECK-NEXT: stur x8, [x19, #25] -; CHECK-NEXT: extr x8, x28, x13, #28 -; CHECK-NEXT: mov v0.d[1], x23 +; CHECK-NEXT: stur x8, [x19, #75] +; CHECK-NEXT: mov v0.d[1], x27 +; CHECK-NEXT: lsr x8, x27, #28 +; CHECK-NEXT: ldr d1, [sp] // 8-byte Folded Reload ; CHECK-NEXT: strb w11, [x19, #24] -; CHECK-NEXT: mov v1.d[1], x20 +; CHECK-NEXT: csinv x9, x9, xzr, le +; CHECK-NEXT: fmov x12, d0 +; CHECK-NEXT: strb w8, [x19, #49] +; CHECK-NEXT: csel x8, x24, x10, gt +; CHECK-NEXT: mov v1.d[1], x28 +; CHECK-NEXT: stur x9, [x19, #25] +; CHECK-NEXT: extr x10, x27, x12, #28 +; CHECK-NEXT: bfi x8, x12, #36, #28 +; CHECK-NEXT: ldr x11, [sp, #32] // 8-byte Folded Reload +; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: stur x10, [x19, #41] -; CHECK-NEXT: stur x9, [x19, #33] -; CHECK-NEXT: bfi x25, x13, #36, #28 +; CHECK-NEXT: lsr x10, x20, #28 +; CHECK-NEXT: stur x11, [x19, #50] +; CHECK-NEXT: stur x8, [x19, #33] +; CHECK-NEXT: extr x8, x28, x9, #28 +; CHECK-NEXT: ldr d0, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: bfi x29, x9, #36, #28 +; CHECK-NEXT: lsr x9, x22, #28 ; CHECK-NEXT: str x8, [x19, #16] -; CHECK-NEXT: lsr x9, x23, #28 +; CHECK-NEXT: mov v0.d[1], x22 ; CHECK-NEXT: fmov x8, d0 -; CHECK-NEXT: ldr x12, [sp] // 8-byte Folded Reload -; CHECK-NEXT: fmov x11, d1 -; CHECK-NEXT: lsr x10, x20, #28 +; CHECK-NEXT: ldr d0, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: stp x21, x29, [x19] ; CHECK-NEXT: strb w9, [x19, #99] -; CHECK-NEXT: stp x12, x25, [x19] -; CHECK-NEXT: extr x12, x23, x8, #28 +; CHECK-NEXT: mov v0.d[1], x20 +; CHECK-NEXT: extr x12, x22, x8, #28 ; CHECK-NEXT: bfi x26, x8, #36, #28 -; CHECK-NEXT: extr x8, x20, x11, #28 -; CHECK-NEXT: bfi x24, x11, #36, #28 ; CHECK-NEXT: strb w10, [x19, #74] +; CHECK-NEXT: fmov x11, d0 ; CHECK-NEXT: stur x12, [x19, #91] ; CHECK-NEXT: stur x26, [x19, #83] +; CHECK-NEXT: extr x8, x20, x11, #28 +; CHECK-NEXT: bfi x23, x11, #36, #28 ; CHECK-NEXT: stur x8, [x19, #66] -; CHECK-NEXT: stur x24, [x19, #58] +; CHECK-NEXT: stur x23, [x19, #58] ; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload ; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload @@ -2372,19 +2373,19 @@ ; CHECK-NEXT: str q0, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: mov w8, #2139095039 +; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fmov s9, w8 -; CHECK-NEXT: csel x8, xzr, x1, lt -; CHECK-NEXT: csel x9, xzr, x0, lt +; CHECK-NEXT: mov h0, v0.h[1] +; CHECK-NEXT: csel x9, xzr, x1, lt +; CHECK-NEXT: csel x10, xzr, x0, lt ; CHECK-NEXT: fcmp s8, s9 ; CHECK-NEXT: fcvt s8, h0 -; CHECK-NEXT: csinv x9, x9, xzr, le -; CHECK-NEXT: csinv x8, x8, xzr, le +; CHECK-NEXT: csinv x11, x10, xzr, le +; CHECK-NEXT: csinv x8, x9, xzr, le ; CHECK-NEXT: fmov s0, s8 -; CHECK-NEXT: stp x8, x9, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp x8, x11, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 diff --git a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll --- a/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll +++ b/llvm/test/CodeGen/AArch64/funnel-shift-rot.ll @@ -77,12 +77,12 @@ define <4 x i32> @rotl_v4i32(<4 x i32> %x, <4 x i32> %z) { ; CHECK-LABEL: rotl_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #31 ; CHECK-NEXT: neg v3.4s, v1.4s +; CHECK-NEXT: movi v2.4s, #31 ; CHECK-NEXT: and v1.16b, v1.16b, v2.16b ; CHECK-NEXT: and v2.16b, v3.16b, v2.16b -; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: neg v2.4s, v2.4s +; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret @@ -167,11 +167,11 @@ define <4 x i32> @rotr_v4i32(<4 x i32> %x, <4 x i32> %z) { ; CHECK-LABEL: rotr_v4i32: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #31 -; CHECK-NEXT: neg v3.4s, v1.4s -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: and v2.16b, v3.16b, v2.16b +; CHECK-NEXT: movi v3.4s, #31 +; CHECK-NEXT: neg v2.4s, v1.4s +; CHECK-NEXT: and v1.16b, v1.16b, v3.16b ; CHECK-NEXT: neg v1.4s, v1.4s +; CHECK-NEXT: and v2.16b, v2.16b, v3.16b ; CHECK-NEXT: ushl v2.4s, v0.4s, v2.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s ; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b diff --git a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll --- a/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ b/llvm/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -187,9 +187,9 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_splat_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -219,9 +219,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -233,8 +233,8 @@ define <4 x i1> @vec_4xi32_nonsplat_undef1_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_nonsplat_undef1_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: neg v1.4s, v1.4s +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s ; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 @@ -248,8 +248,8 @@ define <4 x i1> @vec_4xi32_nonsplat_undef2_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_nonsplat_undef2_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: neg v1.4s, v1.4s +; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s ; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 diff --git a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll --- a/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll +++ b/llvm/test/CodeGen/AArch64/insert-subvector-res-legalization.ll @@ -97,17 +97,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: cnth x8 -; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] -; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: ptrue p0.h ; CHECK-NEXT: sub x8, x8, #8 ; CHECK-NEXT: mov w9, #8 ; CHECK-NEXT: cmp x8, #8 +; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: lsl x8, x8, #1 ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ushll v1.8h, v1.8b, #0 ; CHECK-NEXT: st1h { z0.h }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1h { z0.h }, p0/z, [sp] @@ -146,17 +146,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: cntw x8 -; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] -; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: ptrue p0.s ; CHECK-NEXT: sub x8, x8, #4 ; CHECK-NEXT: mov w9, #4 ; CHECK-NEXT: cmp x8, #4 +; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: lsl x8, x8, #2 ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 ; CHECK-NEXT: st1w { z0.s }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1w { z0.s }, p0/z, [sp] @@ -195,17 +195,17 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: cntd x8 -; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] -; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: sub x8, x8, #2 ; CHECK-NEXT: mov w9, #2 ; CHECK-NEXT: cmp x8, #2 +; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] +; CHECK-NEXT: ldr d1, [x1] ; CHECK-NEXT: csel x8, x8, x9, lo -; CHECK-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-NEXT: lsl x8, x8, #3 ; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ushll v1.2d, v1.2s, #0 ; CHECK-NEXT: st1d { z0.d }, p0, [sp] ; CHECK-NEXT: str q1, [x9, x8] ; CHECK-NEXT: ld1d { z0.d }, p0/z, [sp] diff --git a/llvm/test/CodeGen/AArch64/known-never-nan.ll b/llvm/test/CodeGen/AArch64/known-never-nan.ll --- a/llvm/test/CodeGen/AArch64/known-never-nan.ll +++ b/llvm/test/CodeGen/AArch64/known-never-nan.ll @@ -31,10 +31,10 @@ ; CHECK-NEXT: mov w8, #-8388608 ; CHECK-NEXT: ucvtf s0, w0 ; CHECK-NEXT: ucvtf s1, w1 -; CHECK-NEXT: fmov s3, #17.00000000 -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fadd s1, s1, s3 -; CHECK-NEXT: fmul s0, s0, s2 +; CHECK-NEXT: fmov s2, #17.00000000 +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: fadd s1, s1, s2 +; CHECK-NEXT: fmul s0, s0, s3 ; CHECK-NEXT: fcmp s0, s1 ; CHECK-NEXT: fcsel s0, s0, s1, pl ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll b/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll --- a/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll +++ b/llvm/test/CodeGen/AArch64/lowerMUL-newload.ll @@ -22,9 +22,9 @@ ; CHECK-LABEL: mlai16_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v0.4s, v1.4h, v0.4h -; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff +; CHECK-NEXT: movi v1.2d, #0x00ffff0000ffff ; CHECK-NEXT: uaddw v0.4s, v0.4s, v2.4h -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %v0 = sext <4 x i16> %vec0 to <4 x i32> @@ -90,9 +90,9 @@ ; CHECK-LABEL: addmuli16_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v1.4s, v1.4h, v2.4h -; CHECK-NEXT: movi v3.2d, #0x00ffff0000ffff ; CHECK-NEXT: smlal v1.4s, v0.4h, v2.4h -; CHECK-NEXT: and v0.16b, v1.16b, v3.16b +; CHECK-NEXT: movi v0.2d, #0x00ffff0000ffff +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret entry: %v0 = sext <4 x i16> %vec0 to <4 x i32> @@ -158,9 +158,9 @@ ; CHECK-LABEL: mlai32_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v0.2d, v1.2s, v0.2s -; CHECK-NEXT: movi v3.2d, #0x000000ffffffff +; CHECK-NEXT: movi v1.2d, #0x000000ffffffff ; CHECK-NEXT: uaddw v0.2d, v0.2d, v2.2s -; CHECK-NEXT: and v0.16b, v0.16b, v3.16b +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret entry: %v0 = sext <2 x i32> %vec0 to <2 x i64> @@ -226,9 +226,9 @@ ; CHECK-LABEL: addmuli32_and: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: smull v1.2d, v1.2s, v2.2s -; CHECK-NEXT: movi v3.2d, #0x000000ffffffff ; CHECK-NEXT: smlal v1.2d, v0.2s, v2.2s -; CHECK-NEXT: and v0.16b, v1.16b, v3.16b +; CHECK-NEXT: movi v0.2d, #0x000000ffffffff +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret entry: %v0 = sext <2 x i32> %vec0 to <2 x i64> diff --git a/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll b/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll --- a/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll +++ b/llvm/test/CodeGen/AArch64/minmax-of-minmax.ll @@ -10,9 +10,9 @@ define <4 x i32> @smin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -26,9 +26,9 @@ define <4 x i32> @smin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -74,9 +74,9 @@ define <4 x i32> @smin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -90,9 +90,9 @@ define <4 x i32> @smin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -138,9 +138,9 @@ define <4 x i32> @smin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -154,9 +154,9 @@ define <4 x i32> @smin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -202,9 +202,9 @@ define <4 x i32> @smin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -218,9 +218,9 @@ define <4 x i32> @smin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp slt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -266,9 +266,9 @@ define <4 x i32> @smax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -282,9 +282,9 @@ define <4 x i32> @smax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -330,9 +330,9 @@ define <4 x i32> @smax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -346,9 +346,9 @@ define <4 x i32> @smax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -394,9 +394,9 @@ define <4 x i32> @smax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -410,9 +410,9 @@ define <4 x i32> @smax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -458,9 +458,9 @@ define <4 x i32> @smax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -474,9 +474,9 @@ define <4 x i32> @smax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: smax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: smax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: smax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp sgt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -522,9 +522,9 @@ define <4 x i32> @umin_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -538,9 +538,9 @@ define <4 x i32> @umin_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -586,9 +586,9 @@ define <4 x i32> @umin_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -602,9 +602,9 @@ define <4 x i32> @umin_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -650,9 +650,9 @@ define <4 x i32> @umin_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -666,9 +666,9 @@ define <4 x i32> @umin_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -714,9 +714,9 @@ define <4 x i32> @umin_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -730,9 +730,9 @@ define <4 x i32> @umin_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umin v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umin v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ult <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -778,9 +778,9 @@ define <4 x i32> @umax_ab_bc(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -794,9 +794,9 @@ define <4 x i32> @umax_ab_cb(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -842,9 +842,9 @@ define <4 x i32> @umax_ab_bc_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -858,9 +858,9 @@ define <4 x i32> @umax_ab_cb_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -906,9 +906,9 @@ define <4 x i32> @umax_ab_bc_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -922,9 +922,9 @@ define <4 x i32> @umax_ab_cb_eq_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -970,9 +970,9 @@ define <4 x i32> @umax_ab_bc_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -986,9 +986,9 @@ define <4 x i32> @umax_ab_cb_eq_swap_pred(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: umax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: umax v2.4s, v2.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s -; CHECK-NEXT: umax v0.4s, v0.4s, v2.4s +; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s +; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %cmp_ab = icmp ugt <4 x i32> %a, %b %min_ab = select <4 x i1> %cmp_ab, <4 x i32> %a, <4 x i32> %b @@ -1034,8 +1034,8 @@ define <4 x i32> @notted_smin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s @@ -1056,8 +1056,8 @@ define <4 x i32> @notted_smin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s @@ -1078,9 +1078,9 @@ define <4 x i32> @notted_smin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1100,9 +1100,9 @@ define <4 x i32> @notted_smin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1122,8 +1122,8 @@ define <4 x i32> @notted_smin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s @@ -1144,8 +1144,8 @@ define <4 x i32> @notted_smin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s @@ -1166,9 +1166,9 @@ define <4 x i32> @notted_smin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1188,9 +1188,9 @@ define <4 x i32> @notted_smin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1210,8 +1210,8 @@ define <4 x i32> @notted_smin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s @@ -1232,8 +1232,8 @@ define <4 x i32> @notted_smin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s @@ -1254,9 +1254,9 @@ define <4 x i32> @notted_smin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab_eq_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1276,9 +1276,9 @@ define <4 x i32> @notted_smin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba_eq_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1298,8 +1298,8 @@ define <4 x i32> @notted_smin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v2.4s @@ -1320,8 +1320,8 @@ define <4 x i32> @notted_smin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v1.4s, v2.4s, v1.4s @@ -1342,9 +1342,9 @@ define <4 x i32> @notted_smin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1364,9 +1364,9 @@ define <4 x i32> @notted_smin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smin_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smin v0.4s, v2.4s, v0.4s @@ -1386,8 +1386,8 @@ define <4 x i32> @notted_smax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s @@ -1408,8 +1408,8 @@ define <4 x i32> @notted_smax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s @@ -1430,9 +1430,9 @@ define <4 x i32> @notted_smax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1452,9 +1452,9 @@ define <4 x i32> @notted_smax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1474,8 +1474,8 @@ define <4 x i32> @notted_smax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s @@ -1496,8 +1496,8 @@ define <4 x i32> @notted_smax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s @@ -1518,9 +1518,9 @@ define <4 x i32> @notted_smax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1540,9 +1540,9 @@ define <4 x i32> @notted_smax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1562,8 +1562,8 @@ define <4 x i32> @notted_smax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s @@ -1584,8 +1584,8 @@ define <4 x i32> @notted_smax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s @@ -1606,9 +1606,9 @@ define <4 x i32> @notted_smax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab_eq_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1628,9 +1628,9 @@ define <4 x i32> @notted_smax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba_eq_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1650,8 +1650,8 @@ define <4 x i32> @notted_smax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v1.4s, v2.4s @@ -1672,8 +1672,8 @@ define <4 x i32> @notted_smax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v1.4s, v2.4s, v1.4s @@ -1694,9 +1694,9 @@ define <4 x i32> @notted_smax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1716,9 +1716,9 @@ define <4 x i32> @notted_smax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_smax_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: smax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: smax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: smax v0.4s, v2.4s, v0.4s @@ -1738,8 +1738,8 @@ define <4 x i32> @notted_umin_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s @@ -1760,8 +1760,8 @@ define <4 x i32> @notted_umin_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s @@ -1782,9 +1782,9 @@ define <4 x i32> @notted_umin_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1804,9 +1804,9 @@ define <4 x i32> @notted_umin_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1826,8 +1826,8 @@ define <4 x i32> @notted_umin_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s @@ -1848,8 +1848,8 @@ define <4 x i32> @notted_umin_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s @@ -1870,9 +1870,9 @@ define <4 x i32> @notted_umin_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1892,9 +1892,9 @@ define <4 x i32> @notted_umin_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1914,8 +1914,8 @@ define <4 x i32> @notted_umin_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s @@ -1936,8 +1936,8 @@ define <4 x i32> @notted_umin_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s @@ -1958,9 +1958,9 @@ define <4 x i32> @notted_umin_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab_eq_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -1980,9 +1980,9 @@ define <4 x i32> @notted_umin_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba_eq_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -2002,8 +2002,8 @@ define <4 x i32> @notted_umin_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v1.4s, v2.4s @@ -2024,8 +2024,8 @@ define <4 x i32> @notted_umin_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v1.4s, v2.4s, v1.4s @@ -2046,9 +2046,9 @@ define <4 x i32> @notted_umin_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -2068,9 +2068,9 @@ define <4 x i32> @notted_umin_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umin_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umin v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umin v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umin v0.4s, v2.4s, v0.4s @@ -2090,8 +2090,8 @@ define <4 x i32> @notted_umax_ab_bc(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s @@ -2112,8 +2112,8 @@ define <4 x i32> @notted_umax_ab_cb(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s @@ -2134,9 +2134,9 @@ define <4 x i32> @notted_umax_bc_ab(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2156,9 +2156,9 @@ define <4 x i32> @notted_umax_bc_ba(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2178,8 +2178,8 @@ define <4 x i32> @notted_umax_ab_bc_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s @@ -2200,8 +2200,8 @@ define <4 x i32> @notted_umax_ab_cb_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s @@ -2222,9 +2222,9 @@ define <4 x i32> @notted_umax_bc_ab_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2244,9 +2244,9 @@ define <4 x i32> @notted_umax_bc_ba_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2266,8 +2266,8 @@ define <4 x i32> @notted_umax_ab_bc_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s @@ -2288,8 +2288,8 @@ define <4 x i32> @notted_umax_ab_cb_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb_eq_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s @@ -2310,9 +2310,9 @@ define <4 x i32> @notted_umax_bc_ab_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab_eq_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2332,9 +2332,9 @@ define <4 x i32> @notted_umax_bc_ba_eq_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba_eq_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2354,8 +2354,8 @@ define <4 x i32> @notted_umax_ab_bc_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_bc_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v1.4s, v2.4s @@ -2376,8 +2376,8 @@ define <4 x i32> @notted_umax_ab_cb_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_ab_cb_eq_swap_pred: ; CHECK: // %bb.0: -; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v1.4s, v2.4s, v1.4s @@ -2398,9 +2398,9 @@ define <4 x i32> @notted_umax_bc_ab_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ab_eq_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v0.4s, v1.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s @@ -2420,9 +2420,9 @@ define <4 x i32> @notted_umax_bc_ba_eq_swap_pred(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) { ; CHECK-LABEL: notted_umax_bc_ba_eq_swap_pred: ; CHECK: // %bb.0: +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: mvn v1.16b, v1.16b ; CHECK-NEXT: mvn v2.16b, v2.16b -; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: umax v2.4s, v1.4s, v2.4s ; CHECK-NEXT: umax v0.4s, v1.4s, v0.4s ; CHECK-NEXT: umax v0.4s, v2.4s, v0.4s diff --git a/llvm/test/CodeGen/AArch64/minmax.ll b/llvm/test/CodeGen/AArch64/minmax.ll --- a/llvm/test/CodeGen/AArch64/minmax.ll +++ b/llvm/test/CodeGen/AArch64/minmax.ll @@ -108,9 +108,9 @@ define <16 x i32> @t11(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: t11: ; CHECK: // %bb.0: -; CHECK-NEXT: smin v2.4s, v2.4s, v6.4s ; CHECK-NEXT: smin v0.4s, v0.4s, v4.4s ; CHECK-NEXT: smin v1.4s, v1.4s, v5.4s +; CHECK-NEXT: smin v2.4s, v2.4s, v6.4s ; CHECK-NEXT: smin v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %t1 = icmp sle <16 x i32> %a, %b @@ -122,10 +122,10 @@ define <16 x i8> @t12(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: t12: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.16b, #1 -; CHECK-NEXT: cmhi v3.16b, v1.16b, v0.16b -; CHECK-NEXT: bif v0.16b, v1.16b, v3.16b -; CHECK-NEXT: and v1.16b, v3.16b, v2.16b +; CHECK-NEXT: cmhi v2.16b, v1.16b, v0.16b +; CHECK-NEXT: bif v0.16b, v1.16b, v2.16b +; CHECK-NEXT: movi v1.16b, #1 +; CHECK-NEXT: and v1.16b, v2.16b, v1.16b ; CHECK-NEXT: add v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %t1 = icmp ugt <16 x i8> %b, %a diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll --- a/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffle-reverse-neon.ll @@ -127,12 +127,14 @@ ; CHECK-SELDAG: // %bb.0: ; CHECK-SELDAG-NEXT: rev64 v3.4s, v3.4s ; CHECK-SELDAG-NEXT: rev64 v2.4s, v2.4s -; CHECK-SELDAG-NEXT: rev64 v4.4s, v1.4s -; CHECK-SELDAG-NEXT: rev64 v5.4s, v0.4s -; CHECK-SELDAG-NEXT: ext v0.16b, v3.16b, v3.16b, #8 -; CHECK-SELDAG-NEXT: ext v1.16b, v2.16b, v2.16b, #8 -; CHECK-SELDAG-NEXT: ext v2.16b, v4.16b, v4.16b, #8 -; CHECK-SELDAG-NEXT: ext v3.16b, v5.16b, v5.16b, #8 +; CHECK-SELDAG-NEXT: rev64 v1.4s, v1.4s +; CHECK-SELDAG-NEXT: rev64 v0.4s, v0.4s +; CHECK-SELDAG-NEXT: ext v4.16b, v3.16b, v3.16b, #8 +; CHECK-SELDAG-NEXT: ext v5.16b, v2.16b, v2.16b, #8 +; CHECK-SELDAG-NEXT: ext v2.16b, v1.16b, v1.16b, #8 +; CHECK-SELDAG-NEXT: ext v3.16b, v0.16b, v0.16b, #8 +; CHECK-SELDAG-NEXT: mov v0.16b, v4.16b +; CHECK-SELDAG-NEXT: mov v1.16b, v5.16b ; CHECK-SELDAG-NEXT: ret ; ; CHECK-FASTISEL-LABEL: reverse_v16f32: diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll --- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-neon.ll @@ -39,9 +39,8 @@ define <8 x i32> @splice_v8i32_idx(<8 x i32> %a, <8 x i32> %b) #0 { ; CHECK-LABEL: splice_v8i32_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v3.16b, v2.16b, v3.16b, #4 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #4 -; CHECK-NEXT: mov v1.16b, v3.16b +; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #4 ; CHECK-NEXT: ret %res = call <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32> %a, <8 x i32> %b, i32 5) ret <8 x i32> %res @@ -51,12 +50,10 @@ define <16 x float> @splice_v16f32_idx(<16 x float> %a, <16 x float> %b) #0 { ; CHECK-LABEL: splice_v16f32_idx: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v5.16b, v4.16b, v5.16b, #12 -; CHECK-NEXT: ext v6.16b, v2.16b, v3.16b, #12 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #12 +; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #12 ; CHECK-NEXT: ext v2.16b, v3.16b, v4.16b, #12 -; CHECK-NEXT: mov v3.16b, v5.16b -; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: ext v3.16b, v4.16b, v5.16b, #12 ; CHECK-NEXT: ret %res = call <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float> %a, <16 x float> %b, i32 7) ret <16 x float> %res @@ -107,9 +104,8 @@ define <8 x i32> @splice_v8i32(<8 x i32> %a, <8 x i32> %b) #0 { ; CHECK-LABEL: splice_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v3.16b, v2.16b, v3.16b, #4 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #4 -; CHECK-NEXT: mov v1.16b, v3.16b +; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #4 ; CHECK-NEXT: ret %res = call <8 x i32> @llvm.experimental.vector.splice.v8i32(<8 x i32> %a, <8 x i32> %b, i32 -3) ret <8 x i32> %res @@ -119,12 +115,10 @@ define <16 x float> @splice_v16f32(<16 x float> %a, <16 x float> %b) #0 { ; CHECK-LABEL: splice_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v5.16b, v4.16b, v5.16b, #12 -; CHECK-NEXT: ext v6.16b, v2.16b, v3.16b, #12 ; CHECK-NEXT: ext v0.16b, v1.16b, v2.16b, #12 +; CHECK-NEXT: ext v1.16b, v2.16b, v3.16b, #12 ; CHECK-NEXT: ext v2.16b, v3.16b, v4.16b, #12 -; CHECK-NEXT: mov v3.16b, v5.16b -; CHECK-NEXT: mov v1.16b, v6.16b +; CHECK-NEXT: ext v3.16b, v4.16b, v5.16b, #12 ; CHECK-NEXT: ret %res = call <16 x float> @llvm.experimental.vector.splice.v16f32(<16 x float> %a, <16 x float> %b, i32 -9) ret <16 x float> %res diff --git a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll --- a/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll +++ b/llvm/test/CodeGen/AArch64/named-vector-shuffles-sve.ll @@ -530,8 +530,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.d, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: ptrue p2.d +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p2/z, z1.d, #0 ; CHECK-NEXT: ret @@ -545,8 +545,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.s, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.s, p0/z, #1 // =0x1 -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: ptrue p2.s +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: and z1.s, z1.s, #0x1 ; CHECK-NEXT: cmpne p0.s, p2/z, z1.s, #0 ; CHECK-NEXT: ret @@ -560,8 +560,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.h, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.h, p0/z, #1 // =0x1 -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: ptrue p2.h +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: and z1.h, z1.h, #0x1 ; CHECK-NEXT: cmpne p0.h, p2/z, z1.h, #0 ; CHECK-NEXT: ret @@ -575,8 +575,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 ; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 -; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: ptrue p2.b +; CHECK-NEXT: ext z1.b, z1.b, z0.b, #8 ; CHECK-NEXT: and z1.b, z1.b, #0x1 ; CHECK-NEXT: cmpne p0.b, p2/z, z1.b, #0 ; CHECK-NEXT: ret @@ -1042,10 +1042,10 @@ define @splice_nxv2i1( %a, %b) #0 { ; CHECK-LABEL: splice_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.d ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 -; CHECK-NEXT: lastb d0, p2, z0.d +; CHECK-NEXT: ptrue p2.d ; CHECK-NEXT: mov z1.d, p1/z, #1 // =0x1 +; CHECK-NEXT: lastb d0, p2, z0.d ; CHECK-NEXT: insr z1.d, d0 ; CHECK-NEXT: and z1.d, z1.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p2/z, z1.d, #0 @@ -1058,10 +1058,10 @@ define @splice_nxv4i1( %a, %b) #0 { ; CHECK-LABEL: splice_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.s ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: lastb s0, p2, z0.s +; CHECK-NEXT: ptrue p2.s ; CHECK-NEXT: mov z1.s, p1/z, #1 // =0x1 +; CHECK-NEXT: lastb s0, p2, z0.s ; CHECK-NEXT: insr z1.s, s0 ; CHECK-NEXT: and z1.s, z1.s, #0x1 ; CHECK-NEXT: cmpne p0.s, p2/z, z1.s, #0 @@ -1074,10 +1074,10 @@ define @splice_nxv8i1( %a, %b) #0 { ; CHECK-LABEL: splice_nxv8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.h ; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 -; CHECK-NEXT: lastb h0, p2, z0.h +; CHECK-NEXT: ptrue p2.h ; CHECK-NEXT: mov z1.h, p1/z, #1 // =0x1 +; CHECK-NEXT: lastb h0, p2, z0.h ; CHECK-NEXT: insr z1.h, h0 ; CHECK-NEXT: and z1.h, z1.h, #0x1 ; CHECK-NEXT: cmpne p0.h, p2/z, z1.h, #0 @@ -1090,10 +1090,10 @@ define @splice_nxv16i1( %a, %b) #0 { ; CHECK-LABEL: splice_nxv16i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 -; CHECK-NEXT: lastb b0, p2, z0.b +; CHECK-NEXT: ptrue p2.b ; CHECK-NEXT: mov z1.b, p1/z, #1 // =0x1 +; CHECK-NEXT: lastb b0, p2, z0.b ; CHECK-NEXT: insr z1.b, b0 ; CHECK-NEXT: and z1.b, z1.b, #0x1 ; CHECK-NEXT: cmpne p0.b, p2/z, z1.b, #0 diff --git a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll --- a/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-bitwise-instructions.ll @@ -895,8 +895,8 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 ; CHECK-NEXT: mov v0.d[1], v1.d[0] -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI89_0] -; CHECK-NEXT: tbl v0.8b, { v0.16b }, v1.8b +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI89_0] +; CHECK-NEXT: tbl v0.8b, { v0.16b }, v2.8b ; CHECK-NEXT: ret %c = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> ret <8 x i8> %c diff --git a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll --- a/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll +++ b/llvm/test/CodeGen/AArch64/neon-compare-instructions.ll @@ -1242,9 +1242,9 @@ ; ; GISEL-LABEL: cmtst8xi8: ; GISEL: // %bb.0: -; GISEL-NEXT: movi v2.2d, #0000000000000000 ; GISEL-NEXT: and v0.8b, v0.8b, v1.8b -; GISEL-NEXT: cmeq v0.8b, v0.8b, v2.8b +; GISEL-NEXT: movi v1.2d, #0000000000000000 +; GISEL-NEXT: cmeq v0.8b, v0.8b, v1.8b ; GISEL-NEXT: mvn v0.8b, v0.8b ; GISEL-NEXT: shl v0.8b, v0.8b, #7 ; GISEL-NEXT: sshr v0.8b, v0.8b, #7 @@ -1284,9 +1284,9 @@ ; ; GISEL-LABEL: cmtst4xi16: ; GISEL: // %bb.0: -; GISEL-NEXT: movi v2.2d, #0000000000000000 ; GISEL-NEXT: and v0.8b, v0.8b, v1.8b -; GISEL-NEXT: cmeq v0.4h, v0.4h, v2.4h +; GISEL-NEXT: movi v1.2d, #0000000000000000 +; GISEL-NEXT: cmeq v0.4h, v0.4h, v1.4h ; GISEL-NEXT: mvn v0.8b, v0.8b ; GISEL-NEXT: shl v0.4h, v0.4h, #15 ; GISEL-NEXT: sshr v0.4h, v0.4h, #15 @@ -1326,9 +1326,9 @@ ; ; GISEL-LABEL: cmtst2xi32: ; GISEL: // %bb.0: -; GISEL-NEXT: movi v2.2d, #0000000000000000 ; GISEL-NEXT: and v0.8b, v0.8b, v1.8b -; GISEL-NEXT: cmeq v0.2s, v0.2s, v2.2s +; GISEL-NEXT: movi v1.2d, #0000000000000000 +; GISEL-NEXT: cmeq v0.2s, v0.2s, v1.2s ; GISEL-NEXT: mvn v0.8b, v0.8b ; GISEL-NEXT: shl v0.2s, v0.2s, #31 ; GISEL-NEXT: sshr v0.2s, v0.2s, #31 diff --git a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll --- a/llvm/test/CodeGen/AArch64/neon-dotreduce.ll +++ b/llvm/test/CodeGen/AArch64/neon-dotreduce.ll @@ -7,11 +7,11 @@ define i32 @test_udot_v8i8(i8* nocapture readonly %a, i8* nocapture readonly %b) { ; CHECK-LABEL: test_udot_v8i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: udot v0.2s, v2.8b, v1.8b -; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: udot v2.2s, v1.8b, v0.8b +; CHECK-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -29,11 +29,11 @@ define i32 @test_udot_v8i8_nomla(i8* nocapture readonly %a1) { ; CHECK-LABEL: test_udot_v8i8_nomla: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: movi v0.8b, #1 -; CHECK-NEXT: ldr d2, [x0] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: udot v1.2s, v2.8b, v0.8b -; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: udot v2.2s, v1.8b, v0.8b +; CHECK-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -47,11 +47,11 @@ define i32 @test_sdot_v8i8(i8* nocapture readonly %a, i8* nocapture readonly %b) { ; CHECK-LABEL: test_sdot_v8i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr d1, [x0] -; CHECK-NEXT: ldr d2, [x1] -; CHECK-NEXT: sdot v0.2s, v2.8b, v1.8b -; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s +; CHECK-NEXT: ldr d0, [x0] +; CHECK-NEXT: ldr d1, [x1] +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: sdot v2.2s, v1.8b, v0.8b +; CHECK-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -69,11 +69,11 @@ define i32 @test_sdot_v8i8_nomla(i8* nocapture readonly %a1) { ; CHECK-LABEL: test_sdot_v8i8_nomla: ; CHECK: // %bb.0: // %entry +; CHECK-NEXT: ldr d1, [x0] ; CHECK-NEXT: movi v0.8b, #1 -; CHECK-NEXT: ldr d2, [x0] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: sdot v1.2s, v2.8b, v0.8b -; CHECK-NEXT: addp v0.2s, v1.2s, v1.2s +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: sdot v2.2s, v1.8b, v0.8b +; CHECK-NEXT: addp v0.2s, v2.2s, v2.2s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -88,11 +88,11 @@ define i32 @test_udot_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_udot_v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: udot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: udot v2.4s, v0.16b, v1.16b +; CHECK-NEXT: addv s0, v2.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret @@ -112,11 +112,11 @@ define i32 @test_udot_v16i8_nomla(i8* nocapture readonly %a1) { ; CHECK-LABEL: test_udot_v16i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.16b, #1 -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: udot v1.4s, v2.16b, v0.16b -; CHECK-NEXT: addv s0, v1.4s +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: movi v1.16b, #1 +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: udot v2.4s, v0.16b, v1.16b +; CHECK-NEXT: addv s0, v2.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: @@ -130,11 +130,11 @@ define i32 @test_sdot_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %sum) { ; CHECK-LABEL: test_sdot_v16i8: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: ldr q1, [x1] -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: sdot v0.4s, v1.16b, v2.16b -; CHECK-NEXT: addv s0, v0.4s +; CHECK-NEXT: ldr q0, [x1] +; CHECK-NEXT: ldr q1, [x0] +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: sdot v2.4s, v0.16b, v1.16b +; CHECK-NEXT: addv s0, v2.4s ; CHECK-NEXT: fmov w8, s0 ; CHECK-NEXT: add w0, w8, w2 ; CHECK-NEXT: ret @@ -154,11 +154,11 @@ define i32 @test_sdot_v16i8_nomla(i8* nocapture readonly %a1) { ; CHECK-LABEL: test_sdot_v16i8_nomla: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: movi v0.16b, #1 -; CHECK-NEXT: ldr q2, [x0] -; CHECK-NEXT: movi v1.2d, #0000000000000000 -; CHECK-NEXT: sdot v1.4s, v2.16b, v0.16b -; CHECK-NEXT: addv s0, v1.4s +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: movi v1.16b, #1 +; CHECK-NEXT: movi v2.2d, #0000000000000000 +; CHECK-NEXT: sdot v2.4s, v0.16b, v1.16b +; CHECK-NEXT: addv s0, v2.4s ; CHECK-NEXT: fmov w0, s0 ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/neon-mla-mls.ll b/llvm/test/CodeGen/AArch64/neon-mla-mls.ll --- a/llvm/test/CodeGen/AArch64/neon-mla-mls.ll +++ b/llvm/test/CodeGen/AArch64/neon-mla-mls.ll @@ -138,8 +138,9 @@ define <8 x i8> @mls2v8xi8(<8 x i8> %A, <8 x i8> %B, <8 x i8> %C) { ; CHECK-LABEL: mls2v8xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.8b, v0.8b, v1.8b -; CHECK-NEXT: sub v0.8b, v0.8b, v2.8b +; CHECK-NEXT: neg v2.8b, v2.8b +; CHECK-NEXT: mla v2.8b, v0.8b, v1.8b +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <8 x i8> %A, %B; %tmp2 = sub <8 x i8> %tmp1, %C; @@ -149,8 +150,9 @@ define <16 x i8> @mls2v16xi8(<16 x i8> %A, <16 x i8> %B, <16 x i8> %C) { ; CHECK-LABEL: mls2v16xi8: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.16b, v0.16b, v1.16b -; CHECK-NEXT: sub v0.16b, v0.16b, v2.16b +; CHECK-NEXT: neg v2.16b, v2.16b +; CHECK-NEXT: mla v2.16b, v0.16b, v1.16b +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp1 = mul <16 x i8> %A, %B; %tmp2 = sub <16 x i8> %tmp1, %C; @@ -160,8 +162,9 @@ define <4 x i16> @mls2v4xi16(<4 x i16> %A, <4 x i16> %B, <4 x i16> %C) { ; CHECK-LABEL: mls2v4xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h -; CHECK-NEXT: sub v0.4h, v0.4h, v2.4h +; CHECK-NEXT: neg v2.4h, v2.4h +; CHECK-NEXT: mla v2.4h, v0.4h, v1.4h +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <4 x i16> %A, %B; %tmp2 = sub <4 x i16> %tmp1, %C; @@ -171,8 +174,9 @@ define <8 x i16> @mls2v8xi16(<8 x i16> %A, <8 x i16> %B, <8 x i16> %C) { ; CHECK-LABEL: mls2v8xi16: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h -; CHECK-NEXT: sub v0.8h, v0.8h, v2.8h +; CHECK-NEXT: neg v2.8h, v2.8h +; CHECK-NEXT: mla v2.8h, v0.8h, v1.8h +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp1 = mul <8 x i16> %A, %B; %tmp2 = sub <8 x i16> %tmp1, %C; @@ -182,8 +186,9 @@ define <2 x i32> @mls2v2xi32(<2 x i32> %A, <2 x i32> %B, <2 x i32> %C) { ; CHECK-LABEL: mls2v2xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s -; CHECK-NEXT: sub v0.2s, v0.2s, v2.2s +; CHECK-NEXT: neg v2.2s, v2.2s +; CHECK-NEXT: mla v2.2s, v0.2s, v1.2s +; CHECK-NEXT: fmov d0, d2 ; CHECK-NEXT: ret %tmp1 = mul <2 x i32> %A, %B; %tmp2 = sub <2 x i32> %tmp1, %C; @@ -193,8 +198,9 @@ define <4 x i32> @mls2v4xi32(<4 x i32> %A, <4 x i32> %B, <4 x i32> %C) { ; CHECK-LABEL: mls2v4xi32: ; CHECK: // %bb.0: -; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s +; CHECK-NEXT: neg v2.4s, v2.4s +; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s +; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %tmp1 = mul <4 x i32> %A, %B; %tmp2 = sub <4 x i32> %tmp1, %C; diff --git a/llvm/test/CodeGen/AArch64/neon-truncstore.ll b/llvm/test/CodeGen/AArch64/neon-truncstore.ll --- a/llvm/test/CodeGen/AArch64/neon-truncstore.ll +++ b/llvm/test/CodeGen/AArch64/neon-truncstore.ll @@ -45,10 +45,10 @@ ; CHECK-LABEL: v2i32_v2i16: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x0] +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: strh w8, [x0, #2] +; CHECK-NEXT: strh w9, [x0] ; CHECK-NEXT: ret %b = trunc <2 x i32> %a to <2 x i16> store <2 x i16> %b, <2 x i16>* %result @@ -96,10 +96,10 @@ ; CHECK-LABEL: v2i32_v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x0] +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: strb w8, [x0, #1] +; CHECK-NEXT: strb w9, [x0] ; CHECK-NEXT: ret %b = trunc <2 x i32> %a to <2 x i8> store <2 x i8> %b, <2 x i8>* %result @@ -173,10 +173,10 @@ ; CHECK-LABEL: v2i16_v2i8: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x0] +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: strb w8, [x0, #1] +; CHECK-NEXT: strb w9, [x0] ; CHECK-NEXT: ret %b = trunc <2 x i16> %a to <2 x i8> store <2 x i8> %b, <2 x i8>* %result diff --git a/llvm/test/CodeGen/AArch64/nontemporal.ll b/llvm/test/CodeGen/AArch64/nontemporal.ll --- a/llvm/test/CodeGen/AArch64/nontemporal.ll +++ b/llvm/test/CodeGen/AArch64/nontemporal.ll @@ -208,8 +208,8 @@ define void @test_stnp_v4f32_invalid_offset_4(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_4: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: add x8, x0, #4 +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stnp d0, d1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 4 @@ -221,8 +221,8 @@ define void @test_stnp_v4f32_invalid_offset_neg_4(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_4: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: sub x8, x0, #4 +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stnp d0, d1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 -4 @@ -234,8 +234,8 @@ define void @test_stnp_v4f32_invalid_offset_512(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_512: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: add x8, x0, #512 +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stnp d0, d1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 512 @@ -259,8 +259,8 @@ define void @test_stnp_v4f32_invalid_offset_508(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_508: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: add x8, x0, #508 +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stnp d0, d1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 508 @@ -272,8 +272,8 @@ define void @test_stnp_v4f32_invalid_offset_neg_520(i8* %p, <4 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v4f32_invalid_offset_neg_520: ; CHECK: ; %bb.0: -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: sub x8, x0, #520 +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stnp d0, d1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 -520 @@ -298,9 +298,9 @@ define void @test_stnp_v2f32_invalid_offset_256(i8* %p, <2 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v2f32_invalid_offset_256: ; CHECK: ; %bb.0: +; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: add x8, x0, #256 ; CHECK-NEXT: stnp s0, s1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 256 @@ -325,9 +325,9 @@ define void @test_stnp_v2f32_invalid_offset_neg_260(i8* %p, <2 x float> %v) #0 { ; CHECK-LABEL: test_stnp_v2f32_invalid_offset_neg_260: ; CHECK: ; %bb.0: +; CHECK-NEXT: sub x8, x0, #260 ; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0[1] -; CHECK-NEXT: sub x8, x0, #260 ; CHECK-NEXT: stnp s0, s1, [x8] ; CHECK-NEXT: ret %tmp0 = getelementptr i8, i8* %p, i32 -260 @@ -355,8 +355,8 @@ ; CHECK-LABEL: test_stnp_v4f32_offset_alloca: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #32 -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill ; CHECK-NEXT: stnp d0, d1, [sp] ; CHECK-NEXT: bl _dummy @@ -373,8 +373,8 @@ ; CHECK-LABEL: test_stnp_v4f32_offset_alloca_2: ; CHECK: ; %bb.0: ; CHECK-NEXT: sub sp, sp, #48 -; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: mov x0, sp +; CHECK-NEXT: mov d1, v0[1] ; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill ; CHECK-NEXT: stnp d0, d1, [sp, #16] ; CHECK-NEXT: bl _dummy @@ -450,44 +450,44 @@ define void @test_stnp_v17f32(<17 x float> %v, <17 x float>* %ptr) { ; CHECK-LABEL: test_stnp_v17f32: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: ldr s16, [sp, #16] -; CHECK-NEXT: add x8, sp, #20 -; CHECK-NEXT: ldr s17, [sp] -; CHECK-NEXT: add x9, sp, #4 +; CHECK-NEXT: add x9, sp, #20 +; CHECK-NEXT: ldr s17, [sp, #16] +; CHECK-NEXT: add x8, sp, #4 +; CHECK-NEXT: ldr s16, [sp] +; CHECK-NEXT: add x11, sp, #24 +; CHECK-NEXT: add x10, sp, #8 +; CHECK-NEXT: ld1.s { v17 }[1], [x9] +; CHECK-NEXT: add x13, sp, #28 +; CHECK-NEXT: ld1.s { v16 }[1], [x8] ; CHECK-NEXT: ; kill: def $s4 killed $s4 def $q4 +; CHECK-NEXT: add x12, sp, #12 ; CHECK-NEXT: ; kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: ; kill: def $s5 killed $s5 def $q5 ; CHECK-NEXT: ; kill: def $s1 killed $s1 def $q1 +; CHECK-NEXT: mov.s v4[1], v5[0] ; CHECK-NEXT: ; kill: def $s6 killed $s6 def $q6 ; CHECK-NEXT: ; kill: def $s2 killed $s2 def $q2 ; CHECK-NEXT: ; kill: def $s7 killed $s7 def $q7 ; CHECK-NEXT: ; kill: def $s3 killed $s3 def $q3 -; CHECK-NEXT: ld1.s { v16 }[1], [x8] -; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: ld1.s { v17 }[1], [x9] -; CHECK-NEXT: add x9, sp, #8 -; CHECK-NEXT: mov.s v4[1], v5[0] ; CHECK-NEXT: mov.s v0[1], v1[0] -; CHECK-NEXT: ld1.s { v16 }[2], [x8] -; CHECK-NEXT: add x8, sp, #28 -; CHECK-NEXT: ld1.s { v17 }[2], [x9] -; CHECK-NEXT: add x9, sp, #12 +; CHECK-NEXT: ldr s1, [sp, #32] +; CHECK-NEXT: ld1.s { v17 }[2], [x11] ; CHECK-NEXT: mov.s v4[2], v6[0] +; CHECK-NEXT: ld1.s { v16 }[2], [x10] ; CHECK-NEXT: mov.s v0[2], v2[0] -; CHECK-NEXT: ld1.s { v16 }[3], [x8] -; CHECK-NEXT: ld1.s { v17 }[3], [x9] ; CHECK-NEXT: mov.s v4[3], v7[0] +; CHECK-NEXT: str s1, [x0, #64] ; CHECK-NEXT: mov.s v0[3], v3[0] -; CHECK-NEXT: mov d1, v16[1] +; CHECK-NEXT: ld1.s { v17 }[3], [x13] +; CHECK-NEXT: mov d5, v4[1] +; CHECK-NEXT: ld1.s { v16 }[3], [x12] +; CHECK-NEXT: stnp d4, d5, [x0, #16] ; CHECK-NEXT: mov d2, v17[1] -; CHECK-NEXT: mov d3, v4[1] -; CHECK-NEXT: mov d5, v0[1] -; CHECK-NEXT: stnp d16, d1, [x0, #48] -; CHECK-NEXT: ldr s1, [sp, #32] -; CHECK-NEXT: stnp d17, d2, [x0, #32] -; CHECK-NEXT: stnp d4, d3, [x0, #16] -; CHECK-NEXT: stnp d0, d5, [x0] -; CHECK-NEXT: str s1, [x0, #64] +; CHECK-NEXT: mov d3, v16[1] +; CHECK-NEXT: stnp d17, d2, [x0, #48] +; CHECK-NEXT: mov d2, v0[1] +; CHECK-NEXT: stnp d16, d3, [x0, #32] +; CHECK-NEXT: stnp d0, d2, [x0] ; CHECK-NEXT: ret entry: @@ -497,12 +497,12 @@ define void @test_stnp_v16i32_invalid_offset(<16 x i32> %v, <16 x i32>* %ptr) { ; CHECK-LABEL: test_stnp_v16i32_invalid_offset: ; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: mov w8, #32032 -; CHECK-NEXT: mov w9, #32000 +; CHECK-NEXT: mov w8, #32000 +; CHECK-NEXT: mov w9, #32032 ; CHECK-NEXT: add x8, x0, x8 ; CHECK-NEXT: add x9, x0, x9 -; CHECK-NEXT: stnp q2, q3, [x8] -; CHECK-NEXT: stnp q0, q1, [x9] +; CHECK-NEXT: stnp q2, q3, [x9] +; CHECK-NEXT: stnp q0, q1, [x8] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll b/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll --- a/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll +++ b/llvm/test/CodeGen/AArch64/overeager_mla_fusing.ll @@ -13,9 +13,9 @@ ; CHECK-NEXT: mul v0.8h, v2.8h, v0.8h ; CHECK-NEXT: mul v1.8h, v3.8h, v1.8h ; CHECK-NEXT: add v2.8h, v0.8h, v1.8h -; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h ; CHECK-NEXT: str q2, [x9, x8] ; CHECK-NEXT: ldr x9, [x2, #56] +; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h ; CHECK-NEXT: str q0, [x9, x8] ; CHECK-NEXT: ret entry: diff --git a/llvm/test/CodeGen/AArch64/pow.ll b/llvm/test/CodeGen/AArch64/pow.ll --- a/llvm/test/CodeGen/AArch64/pow.ll +++ b/llvm/test/CodeGen/AArch64/pow.ll @@ -69,9 +69,9 @@ ; CHECK-LABEL: pow_v4f32_one_fourth_not_enough_fmf: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: mov s0, v0.s[1] -; CHECK-NEXT: fmov s1, #0.25000000 ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl powf ; CHECK-NEXT: fmov s1, #0.25000000 @@ -91,9 +91,9 @@ ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill ; CHECK-NEXT: fmov s1, #0.25000000 +; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: bl powf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 @@ -110,9 +110,9 @@ ; CHECK-LABEL: pow_v2f64_one_fourth_not_enough_fmf: ; CHECK: // %bb.0: ; CHECK-NEXT: sub sp, sp, #48 +; CHECK-NEXT: fmov d1, #0.25000000 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: mov d0, v0.d[1] -; CHECK-NEXT: fmov d1, #0.25000000 ; CHECK-NEXT: str x30, [sp, #32] // 8-byte Folded Spill ; CHECK-NEXT: bl pow ; CHECK-NEXT: fmov d1, #0.25000000 diff --git a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll --- a/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll +++ b/llvm/test/CodeGen/AArch64/ragreedy-local-interval-cost.ll @@ -8,12 +8,11 @@ define dso_local void @run_test() local_unnamed_addr #0 { ; CHECK-LABEL: run_test: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: sub sp, sp, #96 -; CHECK-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 96 +; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill +; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 64 ; CHECK-NEXT: .cfi_offset b8, -8 ; CHECK-NEXT: .cfi_offset b9, -16 ; CHECK-NEXT: .cfi_offset b10, -24 @@ -22,13 +21,14 @@ ; CHECK-NEXT: .cfi_offset b13, -48 ; CHECK-NEXT: .cfi_offset b14, -56 ; CHECK-NEXT: .cfi_offset b15, -64 -; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: adrp x10, B+48 ; CHECK-NEXT: adrp x11, A ; CHECK-NEXT: mov x8, xzr ; CHECK-NEXT: mov x9, xzr ; CHECK-NEXT: add x10, x10, :lo12:B+48 ; CHECK-NEXT: add x11, x11, :lo12:A +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: // implicit-def: $q1 ; CHECK-NEXT: // implicit-def: $q2 ; CHECK-NEXT: // implicit-def: $q3 ; CHECK-NEXT: // implicit-def: $q4 @@ -57,113 +57,102 @@ ; CHECK-NEXT: // implicit-def: $q11 ; CHECK-NEXT: // implicit-def: $q12 ; CHECK-NEXT: // implicit-def: $q13 -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: // implicit-def: $q0 -; CHECK-NEXT: // kill: killed $q0 ; CHECK-NEXT: .LBB0_1: // %for.cond1.preheader ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: mov x12, xzr ; CHECK-NEXT: ldr q14, [x8] ; CHECK-NEXT: ldr q15, [x10], #64 -; CHECK-NEXT: add x15, x11, x8 +; CHECK-NEXT: add x16, x11, x8 ; CHECK-NEXT: add x9, x9, #1 -; CHECK-NEXT: ldr q0, [x12] -; CHECK-NEXT: fmov x13, d14 -; CHECK-NEXT: ldr x12, [x12] -; CHECK-NEXT: fmov x0, d15 +; CHECK-NEXT: ldr x13, [x12] +; CHECK-NEXT: fmov x15, d14 ; CHECK-NEXT: mov x14, v14.d[1] -; CHECK-NEXT: ldr x15, [x15, #128] -; CHECK-NEXT: fmov x16, d0 -; CHECK-NEXT: mul x17, x13, x12 -; CHECK-NEXT: mov x18, v0.d[1] -; CHECK-NEXT: mul x4, x0, x12 -; CHECK-NEXT: mul x1, x16, x12 -; CHECK-NEXT: mul x3, x14, x12 -; CHECK-NEXT: fmov d0, x17 -; CHECK-NEXT: mul x5, x13, x15 -; CHECK-NEXT: mov x17, v15.d[1] -; CHECK-NEXT: fmov d15, x4 -; CHECK-NEXT: fmov d14, x1 -; CHECK-NEXT: mul x1, x18, x12 +; CHECK-NEXT: ldr q14, [x12] +; CHECK-NEXT: fmov x12, d15 +; CHECK-NEXT: mov x18, v15.d[1] +; CHECK-NEXT: mul x17, x15, x13 +; CHECK-NEXT: ldr x16, [x16, #128] +; CHECK-NEXT: fmov x3, d14 +; CHECK-NEXT: mul x2, x14, x13 +; CHECK-NEXT: mul x0, x12, x13 +; CHECK-NEXT: mov x1, v14.d[1] +; CHECK-NEXT: fmov d14, x17 +; CHECK-NEXT: mul x17, x1, x13 +; CHECK-NEXT: fmov d15, x0 +; CHECK-NEXT: mul x0, x3, x13 +; CHECK-NEXT: mov v14.d[1], x2 +; CHECK-NEXT: mul x13, x18, x13 ; CHECK-NEXT: ldr x2, [x8], #8 -; CHECK-NEXT: mov v0.d[1], x3 -; CHECK-NEXT: mul x3, x16, x15 -; CHECK-NEXT: mul x12, x17, x12 -; CHECK-NEXT: fmov d1, x5 -; CHECK-NEXT: mul x13, x13, x2 +; CHECK-NEXT: add v12.2d, v12.2d, v14.2d ; CHECK-NEXT: cmp x8, #64 -; CHECK-NEXT: mov v14.d[1], x1 -; CHECK-NEXT: mul x1, x14, x15 -; CHECK-NEXT: add v12.2d, v12.2d, v0.2d -; CHECK-NEXT: mul x14, x14, x2 -; CHECK-NEXT: mov v15.d[1], x12 -; CHECK-NEXT: mul x12, x18, x2 -; CHECK-NEXT: mul x18, x18, x15 -; CHECK-NEXT: fmov d0, x3 -; CHECK-NEXT: mov v1.d[1], x1 -; CHECK-NEXT: mul x16, x16, x2 -; CHECK-NEXT: mul x3, x0, x15 +; CHECK-NEXT: fmov d14, x0 +; CHECK-NEXT: mul x0, x14, x16 +; CHECK-NEXT: mov v15.d[1], x13 +; CHECK-NEXT: mul x13, x3, x16 +; CHECK-NEXT: mov v14.d[1], x17 +; CHECK-NEXT: mul x17, x15, x16 ; CHECK-NEXT: add v10.2d, v10.2d, v15.2d -; CHECK-NEXT: fmov d15, x13 -; CHECK-NEXT: mov v0.d[1], x18 -; CHECK-NEXT: mul x13, x0, x2 -; CHECK-NEXT: add v29.2d, v29.2d, v1.2d -; CHECK-NEXT: mul x15, x17, x15 -; CHECK-NEXT: mov v15.d[1], x14 -; CHECK-NEXT: fmov d1, x16 -; CHECK-NEXT: add v28.2d, v28.2d, v0.2d -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: mul x15, x15, x2 ; CHECK-NEXT: add v13.2d, v13.2d, v14.2d -; CHECK-NEXT: mov v1.d[1], x12 -; CHECK-NEXT: mul x12, x17, x2 -; CHECK-NEXT: add v0.2d, v0.2d, v15.2d ; CHECK-NEXT: add v11.2d, v11.2d, v14.2d -; CHECK-NEXT: fmov d14, x3 -; CHECK-NEXT: add v9.2d, v9.2d, v1.2d -; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: fmov d0, x13 -; CHECK-NEXT: mov v14.d[1], x15 -; CHECK-NEXT: add v31.2d, v31.2d, v1.2d -; CHECK-NEXT: mov v0.d[1], x12 -; CHECK-NEXT: add v26.2d, v26.2d, v1.2d -; CHECK-NEXT: add v23.2d, v23.2d, v1.2d -; CHECK-NEXT: add v21.2d, v21.2d, v1.2d -; CHECK-NEXT: add v19.2d, v19.2d, v1.2d -; CHECK-NEXT: add v17.2d, v17.2d, v1.2d -; CHECK-NEXT: add v7.2d, v7.2d, v1.2d -; CHECK-NEXT: add v5.2d, v5.2d, v1.2d -; CHECK-NEXT: add v3.2d, v3.2d, v1.2d -; CHECK-NEXT: add v2.2d, v2.2d, v1.2d -; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload -; CHECK-NEXT: add v27.2d, v27.2d, v14.2d +; CHECK-NEXT: fmov d14, x17 +; CHECK-NEXT: mul x17, x1, x16 +; CHECK-NEXT: fmov d15, x13 +; CHECK-NEXT: mul x13, x14, x2 +; CHECK-NEXT: mul x14, x12, x16 +; CHECK-NEXT: mul x16, x18, x16 +; CHECK-NEXT: mov v15.d[1], x17 +; CHECK-NEXT: mul x12, x12, x2 +; CHECK-NEXT: mov v14.d[1], x0 +; CHECK-NEXT: mul x0, x1, x2 +; CHECK-NEXT: add v28.2d, v28.2d, v15.2d +; CHECK-NEXT: mul x1, x3, x2 +; CHECK-NEXT: fmov d15, x14 +; CHECK-NEXT: add v29.2d, v29.2d, v14.2d +; CHECK-NEXT: mov v15.d[1], x16 +; CHECK-NEXT: fmov d14, x1 +; CHECK-NEXT: add v27.2d, v27.2d, v15.2d +; CHECK-NEXT: fmov d15, x15 +; CHECK-NEXT: mov v14.d[1], x0 +; CHECK-NEXT: mov v15.d[1], x13 +; CHECK-NEXT: mul x13, x18, x2 +; CHECK-NEXT: add v9.2d, v9.2d, v14.2d ; CHECK-NEXT: add v8.2d, v8.2d, v15.2d ; CHECK-NEXT: add v25.2d, v25.2d, v15.2d ; CHECK-NEXT: add v22.2d, v22.2d, v15.2d ; CHECK-NEXT: add v18.2d, v18.2d, v15.2d ; CHECK-NEXT: add v6.2d, v6.2d, v15.2d -; CHECK-NEXT: add v30.2d, v30.2d, v0.2d -; CHECK-NEXT: add v24.2d, v24.2d, v0.2d -; CHECK-NEXT: add v20.2d, v20.2d, v0.2d -; CHECK-NEXT: add v16.2d, v16.2d, v0.2d -; CHECK-NEXT: add v4.2d, v4.2d, v0.2d -; CHECK-NEXT: add v1.2d, v1.2d, v0.2d -; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: add v0.2d, v0.2d, v15.2d +; CHECK-NEXT: fmov d15, x12 +; CHECK-NEXT: add v31.2d, v31.2d, v14.2d +; CHECK-NEXT: add v26.2d, v26.2d, v14.2d +; CHECK-NEXT: mov v15.d[1], x13 +; CHECK-NEXT: add v23.2d, v23.2d, v14.2d +; CHECK-NEXT: add v21.2d, v21.2d, v14.2d +; CHECK-NEXT: add v19.2d, v19.2d, v14.2d +; CHECK-NEXT: add v17.2d, v17.2d, v14.2d +; CHECK-NEXT: add v7.2d, v7.2d, v14.2d +; CHECK-NEXT: add v5.2d, v5.2d, v14.2d +; CHECK-NEXT: add v3.2d, v3.2d, v14.2d +; CHECK-NEXT: add v2.2d, v2.2d, v14.2d +; CHECK-NEXT: add v30.2d, v30.2d, v15.2d +; CHECK-NEXT: add v24.2d, v24.2d, v15.2d +; CHECK-NEXT: add v20.2d, v20.2d, v15.2d +; CHECK-NEXT: add v16.2d, v16.2d, v15.2d +; CHECK-NEXT: add v4.2d, v4.2d, v15.2d +; CHECK-NEXT: add v1.2d, v1.2d, v15.2d ; CHECK-NEXT: b.ne .LBB0_1 ; CHECK-NEXT: // %bb.2: // %for.cond.cleanup ; CHECK-NEXT: adrp x8, C -; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: add x8, x8, :lo12:C -; CHECK-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: stp q13, q12, [x8] ; CHECK-NEXT: stp q11, q10, [x8, #32] ; CHECK-NEXT: stp q9, q8, [x8, #64] -; CHECK-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-NEXT: stp q0, q2, [x8, #464] -; CHECK-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload +; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: stp q31, q30, [x8, #96] -; CHECK-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload +; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: stp q29, q28, [x8, #144] -; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: stp q27, q26, [x8, #176] ; CHECK-NEXT: str q25, [x8, #208] ; CHECK-NEXT: stp q24, q23, [x8, #240] @@ -173,8 +162,9 @@ ; CHECK-NEXT: stp q16, q7, [x8, #368] ; CHECK-NEXT: stp q6, q5, [x8, #400] ; CHECK-NEXT: stp q4, q3, [x8, #432] -; CHECK-NEXT: str q0, [x8, #496] -; CHECK-NEXT: add sp, sp, #96 +; CHECK-NEXT: stp q0, q2, [x8, #464] +; CHECK-NEXT: str q1, [x8, #496] +; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: br label %for.cond1.preheader diff --git a/llvm/test/CodeGen/AArch64/reduce-and.ll b/llvm/test/CodeGen/AArch64/reduce-and.ll --- a/llvm/test/CodeGen/AArch64/reduce-and.ll +++ b/llvm/test/CodeGen/AArch64/reduce-and.ll @@ -21,8 +21,8 @@ ; CHECK-LABEL: test_redand_v2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w8, w9, w8 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -81,17 +81,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: and w8, w8, w12 -; CHECK-NEXT: and w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: and w8, w8, w11 +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -101,24 +101,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 -; GISEL-NEXT: and w8, w8, w10 -; GISEL-NEXT: and w9, w9, w11 -; GISEL-NEXT: and w9, w12, w9 +; GISEL-NEXT: and w11, w12, w13 +; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret @@ -134,18 +134,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: and w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w8, w8, w11 +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -154,45 +154,45 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: mov b16, v0.b[8] -; GISEL-NEXT: mov b17, v0.b[9] -; GISEL-NEXT: mov b18, v0.b[10] -; GISEL-NEXT: mov b19, v0.b[11] +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: fmov w10, s4 ; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: fmov w12, s6 ; GISEL-NEXT: fmov w13, s7 -; GISEL-NEXT: mov b20, v0.b[12] -; GISEL-NEXT: mov b21, v0.b[13] -; GISEL-NEXT: mov b22, v0.b[14] -; GISEL-NEXT: mov b23, v0.b[15] +; GISEL-NEXT: mov b16, v0.b[8] +; GISEL-NEXT: mov b17, v0.b[9] +; GISEL-NEXT: mov b18, v0.b[10] +; GISEL-NEXT: mov b19, v0.b[11] ; GISEL-NEXT: and w10, w10, w11 ; GISEL-NEXT: and w11, w12, w13 ; GISEL-NEXT: fmov w12, s16 ; GISEL-NEXT: fmov w13, s17 ; GISEL-NEXT: fmov w14, s18 ; GISEL-NEXT: fmov w15, s19 -; GISEL-NEXT: fmov w16, s22 -; GISEL-NEXT: fmov w17, s23 -; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: mov b20, v0.b[12] +; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: mov b22, v0.b[14] +; GISEL-NEXT: mov b23, v0.b[15] ; GISEL-NEXT: and w12, w12, w13 -; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w13, w14, w15 ; GISEL-NEXT: fmov w14, s20 ; GISEL-NEXT: fmov w15, s21 -; GISEL-NEXT: and w10, w12, w13 +; GISEL-NEXT: fmov w16, s22 +; GISEL-NEXT: fmov w17, s23 ; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: and w10, w12, w13 ; GISEL-NEXT: and w14, w14, w15 +; GISEL-NEXT: and w8, w8, w9 ; GISEL-NEXT: and w15, w16, w17 ; GISEL-NEXT: and w11, w14, w15 ; GISEL-NEXT: and w9, w10, w11 @@ -275,17 +275,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: and w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: and w8, w8, w12 -; CHECK-NEXT: and w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: and w8, w8, w9 -; CHECK-NEXT: and w0, w8, w10 +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: and w8, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v8i8: @@ -294,24 +294,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 -; GISEL-NEXT: and w8, w8, w10 -; GISEL-NEXT: and w9, w9, w11 -; GISEL-NEXT: and w9, w12, w9 +; GISEL-NEXT: and w11, w12, w13 +; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w0, w8, w9 ; GISEL-NEXT: ret %and_result = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> %a) @@ -326,18 +326,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: and w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v16i8: @@ -347,24 +347,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 -; GISEL-NEXT: and w8, w8, w10 -; GISEL-NEXT: and w9, w9, w11 -; GISEL-NEXT: and w9, w12, w9 +; GISEL-NEXT: and w11, w12, w13 +; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w0, w8, w9 ; GISEL-NEXT: ret %and_result = call i8 @llvm.vector.reduce.and.v16i8(<16 x i8> %a) @@ -380,18 +380,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: and w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v32i8: @@ -402,24 +402,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: and w8, w8, w9 +; GISEL-NEXT: and w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: and w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: and w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: and w12, w12, w13 -; GISEL-NEXT: and w8, w8, w10 -; GISEL-NEXT: and w9, w9, w11 -; GISEL-NEXT: and w9, w12, w9 +; GISEL-NEXT: and w11, w12, w13 +; GISEL-NEXT: and w9, w10, w11 ; GISEL-NEXT: and w0, w8, w9 ; GISEL-NEXT: ret %and_result = call i8 @llvm.vector.reduce.and.v32i8(<32 x i8> %a) @@ -465,10 +465,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v8i16: @@ -499,10 +499,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redand_v16i16: @@ -529,8 +529,8 @@ ; CHECK-LABEL: test_redand_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret ; @@ -551,8 +551,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret ; @@ -575,8 +575,8 @@ ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/reduce-or.ll b/llvm/test/CodeGen/AArch64/reduce-or.ll --- a/llvm/test/CodeGen/AArch64/reduce-or.ll +++ b/llvm/test/CodeGen/AArch64/reduce-or.ll @@ -21,8 +21,8 @@ ; CHECK-LABEL: test_redor_v2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: orr w8, w9, w8 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -81,17 +81,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: orr w8, w8, w12 -; CHECK-NEXT: orr w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: orr w8, w8, w11 +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -101,24 +101,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 -; GISEL-NEXT: orr w8, w8, w10 -; GISEL-NEXT: orr w9, w9, w11 -; GISEL-NEXT: orr w9, w12, w9 +; GISEL-NEXT: orr w11, w12, w13 +; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret @@ -134,18 +134,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: orr w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: orr w8, w8, w12 ; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w8, w8, w11 +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -154,45 +154,45 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: mov b16, v0.b[8] -; GISEL-NEXT: mov b17, v0.b[9] -; GISEL-NEXT: mov b18, v0.b[10] -; GISEL-NEXT: mov b19, v0.b[11] +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: fmov w10, s4 ; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: fmov w12, s6 ; GISEL-NEXT: fmov w13, s7 -; GISEL-NEXT: mov b20, v0.b[12] -; GISEL-NEXT: mov b21, v0.b[13] -; GISEL-NEXT: mov b22, v0.b[14] -; GISEL-NEXT: mov b23, v0.b[15] +; GISEL-NEXT: mov b16, v0.b[8] +; GISEL-NEXT: mov b17, v0.b[9] +; GISEL-NEXT: mov b18, v0.b[10] +; GISEL-NEXT: mov b19, v0.b[11] ; GISEL-NEXT: orr w10, w10, w11 ; GISEL-NEXT: orr w11, w12, w13 ; GISEL-NEXT: fmov w12, s16 ; GISEL-NEXT: fmov w13, s17 ; GISEL-NEXT: fmov w14, s18 ; GISEL-NEXT: fmov w15, s19 -; GISEL-NEXT: fmov w16, s22 -; GISEL-NEXT: fmov w17, s23 -; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: mov b20, v0.b[12] +; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: mov b22, v0.b[14] +; GISEL-NEXT: mov b23, v0.b[15] ; GISEL-NEXT: orr w12, w12, w13 -; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w13, w14, w15 ; GISEL-NEXT: fmov w14, s20 ; GISEL-NEXT: fmov w15, s21 -; GISEL-NEXT: orr w10, w12, w13 +; GISEL-NEXT: fmov w16, s22 +; GISEL-NEXT: fmov w17, s23 ; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: orr w10, w12, w13 ; GISEL-NEXT: orr w14, w14, w15 +; GISEL-NEXT: orr w8, w8, w9 ; GISEL-NEXT: orr w15, w16, w17 ; GISEL-NEXT: orr w11, w14, w15 ; GISEL-NEXT: orr w9, w10, w11 @@ -274,17 +274,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: orr w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: orr w8, w8, w12 -; CHECK-NEXT: orr w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: orr w0, w8, w10 +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: orr w8, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v8i8: @@ -293,24 +293,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 -; GISEL-NEXT: orr w8, w8, w10 -; GISEL-NEXT: orr w9, w9, w11 -; GISEL-NEXT: orr w9, w12, w9 +; GISEL-NEXT: orr w11, w12, w13 +; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w0, w8, w9 ; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> %a) @@ -325,18 +325,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: orr w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: orr w8, w8, w12 ; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v16i8: @@ -346,24 +346,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 -; GISEL-NEXT: orr w8, w8, w10 -; GISEL-NEXT: orr w9, w9, w11 -; GISEL-NEXT: orr w9, w12, w9 +; GISEL-NEXT: orr w11, w12, w13 +; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w0, w8, w9 ; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v16i8(<16 x i8> %a) @@ -379,18 +379,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: orr w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: orr w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: orr w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: orr w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: orr w8, w8, w12 ; CHECK-NEXT: orr w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v32i8: @@ -401,24 +401,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: orr w8, w8, w9 +; GISEL-NEXT: orr w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: orr w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: orr w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: orr w12, w12, w13 -; GISEL-NEXT: orr w8, w8, w10 -; GISEL-NEXT: orr w9, w9, w11 -; GISEL-NEXT: orr w9, w12, w9 +; GISEL-NEXT: orr w11, w12, w13 +; GISEL-NEXT: orr w9, w10, w11 ; GISEL-NEXT: orr w0, w8, w9 ; GISEL-NEXT: ret %or_result = call i8 @llvm.vector.reduce.or.v32i8(<32 x i8> %a) @@ -464,10 +464,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v8i16: @@ -498,10 +498,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: orr w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: orr w8, w8, w10 -; CHECK-NEXT: orr w0, w8, w11 +; CHECK-NEXT: orr w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redor_v16i16: @@ -528,8 +528,8 @@ ; CHECK-LABEL: test_redor_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret ; @@ -550,8 +550,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret ; @@ -574,8 +574,8 @@ ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: orr w0, w9, w8 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/reduce-xor.ll b/llvm/test/CodeGen/AArch64/reduce-xor.ll --- a/llvm/test/CodeGen/AArch64/reduce-xor.ll +++ b/llvm/test/CodeGen/AArch64/reduce-xor.ll @@ -20,8 +20,8 @@ ; CHECK-LABEL: test_redxor_v2i1: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: eor w8, w9, w8 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret @@ -80,17 +80,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: eor w8, w8, w12 -; CHECK-NEXT: eor w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: eor w8, w8, w11 +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -100,24 +100,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 -; GISEL-NEXT: eor w8, w8, w10 -; GISEL-NEXT: eor w9, w9, w11 -; GISEL-NEXT: eor w9, w12, w9 +; GISEL-NEXT: eor w11, w12, w13 +; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: and w0, w8, #0x1 ; GISEL-NEXT: ret @@ -133,18 +133,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: eor w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: eor w8, w8, w12 ; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w8, w8, w11 +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: and w0, w8, #0x1 ; CHECK-NEXT: ret ; @@ -153,45 +153,45 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: mov b16, v0.b[8] -; GISEL-NEXT: mov b17, v0.b[9] -; GISEL-NEXT: mov b18, v0.b[10] -; GISEL-NEXT: mov b19, v0.b[11] +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: fmov w10, s4 ; GISEL-NEXT: fmov w11, s5 ; GISEL-NEXT: fmov w12, s6 ; GISEL-NEXT: fmov w13, s7 -; GISEL-NEXT: mov b20, v0.b[12] -; GISEL-NEXT: mov b21, v0.b[13] -; GISEL-NEXT: mov b22, v0.b[14] -; GISEL-NEXT: mov b23, v0.b[15] +; GISEL-NEXT: mov b16, v0.b[8] +; GISEL-NEXT: mov b17, v0.b[9] +; GISEL-NEXT: mov b18, v0.b[10] +; GISEL-NEXT: mov b19, v0.b[11] ; GISEL-NEXT: eor w10, w10, w11 ; GISEL-NEXT: eor w11, w12, w13 ; GISEL-NEXT: fmov w12, s16 ; GISEL-NEXT: fmov w13, s17 ; GISEL-NEXT: fmov w14, s18 ; GISEL-NEXT: fmov w15, s19 -; GISEL-NEXT: fmov w16, s22 -; GISEL-NEXT: fmov w17, s23 -; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: mov b20, v0.b[12] +; GISEL-NEXT: mov b21, v0.b[13] +; GISEL-NEXT: mov b22, v0.b[14] +; GISEL-NEXT: mov b23, v0.b[15] ; GISEL-NEXT: eor w12, w12, w13 -; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: eor w13, w14, w15 ; GISEL-NEXT: fmov w14, s20 ; GISEL-NEXT: fmov w15, s21 -; GISEL-NEXT: eor w10, w12, w13 +; GISEL-NEXT: fmov w16, s22 +; GISEL-NEXT: fmov w17, s23 ; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: eor w10, w12, w13 ; GISEL-NEXT: eor w14, w14, w15 +; GISEL-NEXT: eor w8, w8, w9 ; GISEL-NEXT: eor w15, w16, w17 ; GISEL-NEXT: eor w11, w14, w15 ; GISEL-NEXT: eor w9, w10, w11 @@ -273,17 +273,17 @@ ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] ; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] -; CHECK-NEXT: umov w13, v0.b[5] ; CHECK-NEXT: eor w8, w9, w8 -; CHECK-NEXT: umov w9, v0.b[6] +; CHECK-NEXT: umov w9, v0.b[4] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: umov w10, v0.b[7] +; CHECK-NEXT: umov w10, v0.b[5] ; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: eor w8, w8, w12 -; CHECK-NEXT: eor w8, w8, w13 +; CHECK-NEXT: umov w11, v0.b[6] ; CHECK-NEXT: eor w8, w8, w9 -; CHECK-NEXT: eor w0, w8, w10 +; CHECK-NEXT: umov w9, v0.b[7] +; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: eor w8, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v8i8: @@ -292,24 +292,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 -; GISEL-NEXT: eor w8, w8, w10 -; GISEL-NEXT: eor w9, w9, w11 -; GISEL-NEXT: eor w9, w12, w9 +; GISEL-NEXT: eor w11, w12, w13 +; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: eor w0, w8, w9 ; GISEL-NEXT: ret %xor_result = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> %a) @@ -324,18 +324,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: eor w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: eor w8, w8, w12 ; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v16i8: @@ -345,24 +345,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 -; GISEL-NEXT: eor w8, w8, w10 -; GISEL-NEXT: eor w9, w9, w11 -; GISEL-NEXT: eor w9, w12, w9 +; GISEL-NEXT: eor w11, w12, w13 +; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: eor w0, w8, w9 ; GISEL-NEXT: ret %xor_result = call i8 @llvm.vector.reduce.xor.v16i8(<16 x i8> %a) @@ -378,18 +378,18 @@ ; CHECK-NEXT: umov w8, v0.b[1] ; CHECK-NEXT: umov w9, v0.b[0] ; CHECK-NEXT: umov w10, v0.b[2] -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: umov w12, v0.b[4] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.b[3] +; CHECK-NEXT: eor w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: eor w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: eor w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: eor w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: eor w8, w8, w12 ; CHECK-NEXT: eor w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v32i8: @@ -400,24 +400,24 @@ ; GISEL-NEXT: mov b1, v0.b[1] ; GISEL-NEXT: mov b2, v0.b[2] ; GISEL-NEXT: mov b3, v0.b[3] -; GISEL-NEXT: mov b4, v0.b[4] -; GISEL-NEXT: mov b5, v0.b[5] -; GISEL-NEXT: mov b6, v0.b[6] -; GISEL-NEXT: mov b7, v0.b[7] ; GISEL-NEXT: fmov w8, s0 ; GISEL-NEXT: fmov w9, s1 ; GISEL-NEXT: fmov w10, s2 ; GISEL-NEXT: fmov w11, s3 -; GISEL-NEXT: fmov w12, s4 -; GISEL-NEXT: fmov w13, s5 +; GISEL-NEXT: mov b4, v0.b[4] +; GISEL-NEXT: mov b5, v0.b[5] +; GISEL-NEXT: mov b6, v0.b[6] +; GISEL-NEXT: mov b7, v0.b[7] +; GISEL-NEXT: eor w8, w8, w9 +; GISEL-NEXT: eor w9, w10, w11 +; GISEL-NEXT: fmov w10, s4 +; GISEL-NEXT: fmov w11, s5 +; GISEL-NEXT: fmov w12, s6 +; GISEL-NEXT: fmov w13, s7 ; GISEL-NEXT: eor w8, w8, w9 -; GISEL-NEXT: fmov w9, s6 ; GISEL-NEXT: eor w10, w10, w11 -; GISEL-NEXT: fmov w11, s7 -; GISEL-NEXT: eor w12, w12, w13 -; GISEL-NEXT: eor w8, w8, w10 -; GISEL-NEXT: eor w9, w9, w11 -; GISEL-NEXT: eor w9, w12, w9 +; GISEL-NEXT: eor w11, w12, w13 +; GISEL-NEXT: eor w9, w10, w11 ; GISEL-NEXT: eor w0, w8, w9 ; GISEL-NEXT: ret %xor_result = call i8 @llvm.vector.reduce.xor.v32i8(<32 x i8> %a) @@ -463,10 +463,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v8i16: @@ -497,10 +497,10 @@ ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: umov w11, v0.h[3] ; CHECK-NEXT: eor w8, w9, w8 +; CHECK-NEXT: umov w9, v0.h[3] ; CHECK-NEXT: eor w8, w8, w10 -; CHECK-NEXT: eor w0, w8, w11 +; CHECK-NEXT: eor w0, w8, w9 ; CHECK-NEXT: ret ; ; GISEL-LABEL: test_redxor_v16i16: @@ -527,8 +527,8 @@ ; CHECK-LABEL: test_redxor_v2i32: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: eor w0, w9, w8 ; CHECK-NEXT: ret ; @@ -549,8 +549,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: eor w0, w9, w8 ; CHECK-NEXT: ret ; @@ -573,8 +573,8 @@ ; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: eor w0, w9, w8 ; CHECK-NEXT: ret ; diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -54,9 +54,9 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.16b, v2.16b, v6.16b ; CHECK-NEXT: sqadd v0.16b, v0.16b, v4.16b ; CHECK-NEXT: sqadd v1.16b, v1.16b, v5.16b +; CHECK-NEXT: sqadd v2.16b, v2.16b, v6.16b ; CHECK-NEXT: sqadd v3.16b, v3.16b, v7.16b ; CHECK-NEXT: ret %z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) @@ -85,9 +85,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { ; CHECK-LABEL: v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.8h, v2.8h, v6.8h ; CHECK-NEXT: sqadd v0.8h, v0.8h, v4.8h ; CHECK-NEXT: sqadd v1.8h, v1.8h, v5.8h +; CHECK-NEXT: sqadd v2.8h, v2.8h, v6.8h ; CHECK-NEXT: sqadd v3.8h, v3.8h, v7.8h ; CHECK-NEXT: ret %z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) @@ -97,9 +97,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: sqadd v0.8b, v1.8b, v0.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -143,10 +143,10 @@ ; CHECK-NEXT: shl v1.2s, v1.2s, #24 ; CHECK-NEXT: sqadd v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strb w9, [x2, #1] +; CHECK-NEXT: strb w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -158,9 +158,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: sqadd v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -183,10 +183,10 @@ ; CHECK-NEXT: shl v1.2s, v1.2s, #16 ; CHECK-NEXT: sqadd v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strh w9, [x2, #2] +; CHECK-NEXT: strh w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py @@ -224,9 +224,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: sqadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr b0, [x1] +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: sqadd v0.8b, v1.8b, v0.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -239,9 +239,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr h0, [x1] +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: sqadd v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -254,10 +254,10 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { ; CHECK-LABEL: v16i4: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v0.16b, v0.16b, #4 -; CHECK-NEXT: sshr v1.16b, v1.16b, #4 +; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: sshr v0.16b, v0.16b, #4 +; CHECK-NEXT: sshr v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v0.16b, v0.16b, #4 ; CHECK-NEXT: sqadd v0.16b, v0.16b, v1.16b @@ -307,9 +307,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ; CHECK-LABEL: v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.4s, v2.4s, v6.4s ; CHECK-NEXT: sqadd v0.4s, v0.4s, v4.4s ; CHECK-NEXT: sqadd v1.4s, v1.4s, v5.4s +; CHECK-NEXT: sqadd v2.4s, v2.4s, v6.4s ; CHECK-NEXT: sqadd v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %z = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y) @@ -338,9 +338,9 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.2d, v2.2d, v6.2d ; CHECK-NEXT: sqadd v0.2d, v0.2d, v4.2d ; CHECK-NEXT: sqadd v1.2d, v1.2d, v5.2d +; CHECK-NEXT: sqadd v2.2d, v2.2d, v6.2d ; CHECK-NEXT: sqadd v3.2d, v3.2d, v7.2d ; CHECK-NEXT: ret %z = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y) diff --git a/llvm/test/CodeGen/AArch64/select-with-and-or.ll b/llvm/test/CodeGen/AArch64/select-with-and-or.ll --- a/llvm/test/CodeGen/AArch64/select-with-and-or.ll +++ b/llvm/test/CodeGen/AArch64/select-with-and-or.ll @@ -64,9 +64,9 @@ define <4 x i1> @and_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: and_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -78,9 +78,9 @@ define <4 x i1> @or_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: or_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -92,9 +92,9 @@ define <4 x i1> @and_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: and_not_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bic v0.16b, v2.16b, v0.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -106,9 +106,9 @@ define <4 x i1> @or_not_vec(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: or_not_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: orn v0.16b, v2.16b, v0.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -120,9 +120,9 @@ define <4 x i1> @and_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: and_vec_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -134,9 +134,9 @@ define <4 x i1> @or_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: or_vec_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -148,9 +148,9 @@ define <4 x i1> @and_not_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: and_not_vec_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bic v0.16b, v2.16b, v0.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y @@ -162,9 +162,9 @@ define <4 x i1> @or_not_vec_undef(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z, <4 x i32> %w) { ; CHECK-LABEL: or_not_vec_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: cmgt v2.4s, v2.4s, v3.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: orn v0.16b, v2.16b, v0.16b +; CHECK-NEXT: cmgt v1.4s, v2.4s, v3.4s +; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %a = icmp eq <4 x i32> %x, %y diff --git a/llvm/test/CodeGen/AArch64/select_const.ll b/llvm/test/CodeGen/AArch64/select_const.ll --- a/llvm/test/CodeGen/AArch64/select_const.ll +++ b/llvm/test/CodeGen/AArch64/select_const.ll @@ -498,13 +498,13 @@ define double @sel_constants_fadd_constant(i1 %cond) { ; CHECK-LABEL: sel_constants_fadd_constant: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #7378697629483820646 -; CHECK-NEXT: adrp x9, .LCPI42_0 -; CHECK-NEXT: movk x8, #16444, lsl #48 +; CHECK-NEXT: mov x9, #7378697629483820646 +; CHECK-NEXT: adrp x8, .LCPI42_0 +; CHECK-NEXT: movk x9, #16444, lsl #48 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: ldr d1, [x9, :lo12:.LCPI42_0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcsel d0, d1, d0, ne +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI42_0] +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 %bo = fadd double %sel, 5.1 @@ -607,13 +607,13 @@ define double @frem_constant_sel_constants(i1 %cond) { ; CHECK-LABEL: frem_constant_sel_constants: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #7378697629483820646 -; CHECK-NEXT: adrp x9, .LCPI49_0 -; CHECK-NEXT: movk x8, #16404, lsl #48 +; CHECK-NEXT: mov x9, #7378697629483820646 +; CHECK-NEXT: adrp x8, .LCPI49_0 +; CHECK-NEXT: movk x9, #16404, lsl #48 ; CHECK-NEXT: tst w0, #0x1 -; CHECK-NEXT: ldr d1, [x9, :lo12:.LCPI49_0] -; CHECK-NEXT: fmov d0, x8 -; CHECK-NEXT: fcsel d0, d1, d0, ne +; CHECK-NEXT: ldr d0, [x8, :lo12:.LCPI49_0] +; CHECK-NEXT: fmov d1, x9 +; CHECK-NEXT: fcsel d0, d0, d1, ne ; CHECK-NEXT: ret %sel = select i1 %cond, double -4.0, double 23.3 %bo = frem double 5.1, %sel diff --git a/llvm/test/CodeGen/AArch64/select_fmf.ll b/llvm/test/CodeGen/AArch64/select_fmf.ll --- a/llvm/test/CodeGen/AArch64/select_fmf.ll +++ b/llvm/test/CodeGen/AArch64/select_fmf.ll @@ -7,11 +7,11 @@ define float @select_select_fold_select_and(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: select_select_fold_select_and: ; CHECK: // %bb.0: -; CHECK-NEXT: fminnm s5, s1, s2 +; CHECK-NEXT: fminnm s4, s1, s2 ; CHECK-NEXT: fcmp s1, s2 ; CHECK-NEXT: fmaxnm s1, s0, s3 +; CHECK-NEXT: fccmp s4, s0, #4, lt ; CHECK-NEXT: fmov s4, #0.50000000 -; CHECK-NEXT: fccmp s5, s0, #4, lt ; CHECK-NEXT: fcsel s2, s1, s0, gt ; CHECK-NEXT: fadd s1, s0, s4 ; CHECK-NEXT: fadd s4, s1, s2 @@ -65,11 +65,11 @@ define float @select_select_fold_select_or(float %w, float %x, float %y, float %z) { ; CHECK-LABEL: select_select_fold_select_or: ; CHECK: // %bb.0: -; CHECK-NEXT: fminnm s5, s1, s2 +; CHECK-NEXT: fminnm s4, s1, s2 ; CHECK-NEXT: fcmp s1, s2 ; CHECK-NEXT: fmaxnm s1, s0, s3 +; CHECK-NEXT: fccmp s4, s0, #0, ge ; CHECK-NEXT: fmov s4, #0.50000000 -; CHECK-NEXT: fccmp s5, s0, #0, ge ; CHECK-NEXT: fcsel s2, s0, s1, gt ; CHECK-NEXT: fadd s1, s0, s4 ; CHECK-NEXT: fadd s4, s1, s2 diff --git a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll --- a/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/AArch64/selectcc-to-shiftand.ll @@ -205,9 +205,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: mov w8, #65536 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: shl v0.2d, v0.2d, #63 ; CHECK-NEXT: sshr v0.2d, v0.2d, #63 +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %shl = select <2 x i1> %t, <2 x i64> , <2 x i64> zeroinitializer diff --git a/llvm/test/CodeGen/AArch64/shift-mod.ll b/llvm/test/CodeGen/AArch64/shift-mod.ll --- a/llvm/test/CodeGen/AArch64/shift-mod.ll +++ b/llvm/test/CodeGen/AArch64/shift-mod.ll @@ -102,8 +102,8 @@ define <4 x i32> @ashr_add_shl_v4i8(<4 x i32> %r) { ; CHECK-LABEL: ashr_add_shl_v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #1, lsl #24 ; CHECK-NEXT: shl v0.4s, v0.4s, #24 +; CHECK-NEXT: movi v1.4s, #1, lsl #24 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: sshr v0.4s, v0.4s, #24 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll --- a/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll +++ b/llvm/test/CodeGen/AArch64/sink-addsub-of-const.ll @@ -160,8 +160,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI12_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = add <4 x i32> %t0, %b @@ -172,8 +172,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI13_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = add <4 x i32> %b, %t0 @@ -188,8 +188,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI14_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = add <4 x i32> %t0, %b @@ -200,8 +200,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI15_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = add <4 x i32> %b, %t0 @@ -216,8 +216,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI16_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = add <4 x i32> %t0, %b @@ -228,8 +228,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI17_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = add <4 x i32> %b, %t0 @@ -244,8 +244,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI18_0 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = sub <4 x i32> %t0, %b @@ -256,8 +256,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI19_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI19_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI19_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = add <4 x i32> %a, ; constant always on RHS %r = sub <4 x i32> %b, %t0 @@ -272,8 +272,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI20_0 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = sub <4 x i32> %t0, %b @@ -284,8 +284,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI21_0 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_0] -; CHECK-NEXT: add v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_0] +; CHECK-NEXT: add v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %a, %r = sub <4 x i32> %b, %t0 @@ -300,8 +300,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI22_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_0] -; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_0] +; CHECK-NEXT: sub v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = sub <4 x i32> %t0, %b @@ -312,8 +312,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI23_0 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_0] -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_0] +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> , %a %r = sub <4 x i32> %b, %t0 diff --git a/llvm/test/CodeGen/AArch64/sinksplat.ll b/llvm/test/CodeGen/AArch64/sinksplat.ll --- a/llvm/test/CodeGen/AArch64/sinksplat.ll +++ b/llvm/test/CodeGen/AArch64/sinksplat.ll @@ -68,8 +68,8 @@ define <4 x i32> @sqadd(<4 x i32> %x, <4 x i32> *%y) { ; CHECK-LABEL: sqadd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: .LBB2_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 @@ -101,8 +101,8 @@ define <4 x i32> @sqsub(<4 x i32> %x, <4 x i32> *%y) { ; CHECK-LABEL: sqsub: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: .LBB3_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 @@ -134,8 +134,8 @@ define <4 x i32> @sqdmulh(<4 x i32> %x, <4 x i32> *%y) { ; CHECK-LABEL: sqdmulh: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: .LBB4_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 @@ -200,10 +200,9 @@ define <4 x i32> @mlal(<4 x i32> %x, <4 x i32> *%y) { ; CHECK-LABEL: mlal: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: dup v1.4s, v0.s[3] ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: dup v1.4s, v1.s[3] ; CHECK-NEXT: .LBB6_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] @@ -233,8 +232,8 @@ define <4 x float> @fmul(<4 x float> %x, <4 x float> *%y) { ; CHECK-LABEL: fmul: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: .LBB7_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 @@ -266,10 +265,9 @@ define <4 x float> @fmuladd(<4 x float> %x, <4 x float> *%y) { ; CHECK-LABEL: fmuladd: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: dup v1.4s, v0.s[3] ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: dup v1.4s, v1.s[3] ; CHECK-NEXT: .LBB8_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr q2, [x0] @@ -299,17 +297,16 @@ define <4 x float> @fma(<4 x float> %x, <4 x float> *%y) { ; CHECK-LABEL: fma: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: dup v1.4s, v0.s[3] ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: dup v1.4s, v1.s[3] ; CHECK-NEXT: .LBB9_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: mov v2.16b, v0.16b -; CHECK-NEXT: ldr q3, [x0] -; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: ldr q2, [x0] ; CHECK-NEXT: subs w8, w8, #1 -; CHECK-NEXT: fmla v0.4s, v2.4s, v3.4s +; CHECK-NEXT: mov v3.16b, v0.16b +; CHECK-NEXT: mov v0.16b, v1.16b +; CHECK-NEXT: fmla v0.4s, v3.4s, v2.4s ; CHECK-NEXT: b.eq .LBB9_1 ; CHECK-NEXT: // %bb.2: // %l2 ; CHECK-NEXT: ret @@ -333,12 +330,12 @@ define <4 x i32> @smull_nonsplat(<4 x i16> %x, <4 x i16> *%y) { ; CHECK-LABEL: smull_nonsplat: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: fmov d1, d0 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: dup v1.4h, v0.h[3] ; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: ext v1.8b, v0.8b, v1.8b, #4 +; CHECK-NEXT: ext v1.8b, v0.8b, v1.8b, #6 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: dup v2.4h, v1.h[3] -; CHECK-NEXT: ext v2.8b, v1.8b, v2.8b, #4 -; CHECK-NEXT: ext v1.8b, v1.8b, v2.8b, #6 ; CHECK-NEXT: .LBB10_1: // %l1 ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: ldr d2, [x0] diff --git a/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll b/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll --- a/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll +++ b/llvm/test/CodeGen/AArch64/sitofp-fixed-legal.ll @@ -4,20 +4,20 @@ define <16 x double> @test_sitofp_fixed(<16 x i32> %in) { ; CHECK-LABEL: test_sitofp_fixed: ; CHECK: ; %bb.0: -; CHECK-NEXT: sshll2.2d v4, v2, #0 -; CHECK-NEXT: sshll.2d v16, v1, #0 -; CHECK-NEXT: sshll2.2d v5, v0, #0 -; CHECK-NEXT: sshll2.2d v6, v1, #0 +; CHECK-NEXT: sshll2.2d v4, v0, #0 +; CHECK-NEXT: sshll2.2d v5, v1, #0 +; CHECK-NEXT: sshll2.2d v6, v2, #0 ; CHECK-NEXT: sshll2.2d v7, v3, #0 ; CHECK-NEXT: sshll.2d v0, v0, #0 +; CHECK-NEXT: sshll.2d v16, v1, #0 ; CHECK-NEXT: sshll.2d v17, v2, #0 ; CHECK-NEXT: sshll.2d v18, v3, #0 -; CHECK-NEXT: scvtf.2d v1, v5, #6 -; CHECK-NEXT: scvtf.2d v3, v6, #6 -; CHECK-NEXT: scvtf.2d v2, v16, #6 -; CHECK-NEXT: scvtf.2d v5, v4, #6 +; CHECK-NEXT: scvtf.2d v1, v4, #6 ; CHECK-NEXT: scvtf.2d v0, v0, #6 +; CHECK-NEXT: scvtf.2d v3, v5, #6 +; CHECK-NEXT: scvtf.2d v5, v6, #6 ; CHECK-NEXT: scvtf.2d v7, v7, #6 +; CHECK-NEXT: scvtf.2d v2, v16, #6 ; CHECK-NEXT: scvtf.2d v4, v17, #6 ; CHECK-NEXT: scvtf.2d v6, v18, #6 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll --- a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll +++ b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll @@ -543,16 +543,16 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: frsqrte d1, d0 ; CHECK-NEXT: mov x8, #4631107791820423168 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 +; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: frsqrts d2, d0, d2 +; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fmov d2, x8 -; CHECK-NEXT: fmul d3, d1, d1 -; CHECK-NEXT: frsqrts d3, d0, d3 -; CHECK-NEXT: fmul d1, d1, d3 -; CHECK-NEXT: fmul d3, d1, d1 -; CHECK-NEXT: frsqrts d3, d0, d3 -; CHECK-NEXT: fmul d1, d1, d3 -; CHECK-NEXT: fmul d3, d1, d1 -; CHECK-NEXT: frsqrts d3, d0, d3 -; CHECK-NEXT: fmul d1, d1, d3 ; CHECK-NEXT: fmul d0, d0, d1 ; CHECK-NEXT: fmul d2, d1, d2 ; CHECK-NEXT: str d1, [x0] diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-illegal-types.ll @@ -60,49 +60,49 @@ ; CHECK-LABEL: test_srem_vec: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x11, #7282 -; CHECK-NEXT: sbfx x10, x0, #0, #33 -; CHECK-NEXT: movk x11, #29127, lsl #16 ; CHECK-NEXT: mov x9, #7281 -; CHECK-NEXT: movk x11, #50972, lsl #32 +; CHECK-NEXT: movk x11, #29127, lsl #16 ; CHECK-NEXT: movk x9, #29127, lsl #16 -; CHECK-NEXT: movk x11, #7281, lsl #48 +; CHECK-NEXT: movk x11, #50972, lsl #32 ; CHECK-NEXT: movk x9, #50972, lsl #32 -; CHECK-NEXT: sbfx x13, x1, #0, #33 +; CHECK-NEXT: sbfx x10, x0, #0, #33 +; CHECK-NEXT: movk x11, #7281, lsl #48 ; CHECK-NEXT: sbfx x8, x2, #0, #33 -; CHECK-NEXT: smulh x12, x10, x11 ; CHECK-NEXT: movk x9, #7281, lsl #48 -; CHECK-NEXT: smulh x11, x13, x11 +; CHECK-NEXT: smulh x13, x10, x11 +; CHECK-NEXT: sbfx x12, x1, #0, #33 ; CHECK-NEXT: smulh x9, x8, x9 -; CHECK-NEXT: add x12, x12, x12, lsr #63 +; CHECK-NEXT: mov x14, #8589934591 +; CHECK-NEXT: smulh x11, x12, x11 ; CHECK-NEXT: sub x9, x9, x8 +; CHECK-NEXT: add x13, x13, x13, lsr #63 +; CHECK-NEXT: asr x15, x9, #3 +; CHECK-NEXT: dup v0.2d, x14 +; CHECK-NEXT: add x9, x15, x9, lsr #63 +; CHECK-NEXT: add x13, x13, x13, lsl #3 ; CHECK-NEXT: add x11, x11, x11, lsr #63 -; CHECK-NEXT: add x12, x12, x12, lsl #3 -; CHECK-NEXT: asr x14, x9, #3 -; CHECK-NEXT: sub x10, x10, x12 -; CHECK-NEXT: add x9, x14, x9, lsr #63 -; CHECK-NEXT: add x11, x11, x11, lsl #3 -; CHECK-NEXT: sub x11, x13, x11 +; CHECK-NEXT: sub x10, x10, x13 ; CHECK-NEXT: add x9, x9, x9, lsl #3 -; CHECK-NEXT: fmov d0, x10 ; CHECK-NEXT: add x8, x8, x9 -; CHECK-NEXT: mov x9, #8589934591 -; CHECK-NEXT: mov v0.d[1], x11 -; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: dup v2.2d, x9 -; CHECK-NEXT: adrp x8, .LCPI3_0 -; CHECK-NEXT: adrp x9, .LCPI3_1 -; CHECK-NEXT: and v1.16b, v1.16b, v2.16b -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI3_1] -; CHECK-NEXT: cmeq v0.2d, v0.2d, v2.2d -; CHECK-NEXT: cmeq v1.2d, v1.2d, v3.2d -; CHECK-NEXT: mvn v0.16b, v0.16b +; CHECK-NEXT: add x9, x11, x11, lsl #3 +; CHECK-NEXT: sub x9, x12, x9 +; CHECK-NEXT: adrp x11, .LCPI3_1 +; CHECK-NEXT: fmov d1, x10 +; CHECK-NEXT: adrp x10, .LCPI3_0 +; CHECK-NEXT: fmov d2, x8 +; CHECK-NEXT: ldr q3, [x11, :lo12:.LCPI3_1] +; CHECK-NEXT: mov v1.d[1], x9 +; CHECK-NEXT: ldr q4, [x10, :lo12:.LCPI3_0] +; CHECK-NEXT: and v2.16b, v2.16b, v0.16b +; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: cmeq v1.2d, v2.2d, v3.2d +; CHECK-NEXT: cmeq v0.2d, v0.2d, v4.2d ; CHECK-NEXT: mvn v1.16b, v1.16b +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: xtn v0.2s, v0.2d ; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: fmov w0, s0 +; CHECK-NEXT: mov w1, v0.s[1] ; CHECK-NEXT: fmov w2, s1 ; CHECK-NEXT: ret %srem = srem <3 x i33> %X, diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-nonsplat.ll @@ -17,9 +17,9 @@ ; CHECK-NEXT: adrp x8, .LCPI0_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -39,12 +39,12 @@ ; CHECK-NEXT: mov w9, #39321 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: adrp x10, .LCPI1_0 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q0, [x10, :lo12:.LCPI1_0] ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI1_0] ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -60,12 +60,12 @@ ; CHECK-NEXT: mov w9, #39321 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: adrp x10, .LCPI2_0 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: adrp x8, .LCPI2_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q0, [x10, :lo12:.LCPI2_0] ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: cmhi v0.4s, v2.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -89,10 +89,10 @@ ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, @@ -113,10 +113,10 @@ ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, @@ -141,9 +141,9 @@ ; CHECK-NEXT: adrp x8, .LCPI5_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -167,9 +167,9 @@ ; CHECK-NEXT: adrp x8, .LCPI6_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_4] -; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -197,9 +197,9 @@ ; CHECK-NEXT: adrp x8, .LCPI7_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -225,9 +225,9 @@ ; CHECK-NEXT: adrp x8, .LCPI8_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -253,9 +253,9 @@ ; CHECK-NEXT: adrp x8, .LCPI9_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -275,12 +275,12 @@ ; CHECK-NEXT: mov w9, #39321 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: adrp x10, .LCPI10_0 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q0, [x10, :lo12:.LCPI10_0] ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI10_0] ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -304,10 +304,10 @@ ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, @@ -332,9 +332,9 @@ ; CHECK-NEXT: adrp x8, .LCPI12_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -355,12 +355,12 @@ ; CHECK-NEXT: adrp x8, .LCPI13_1 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s -; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_1] +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI13_1] ; CHECK-NEXT: adrp x8, .LCPI13_2 -; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_2] ; CHECK-NEXT: adrp x8, .LCPI13_3 +; CHECK-NEXT: mla v1.4s, v0.4s, v3.4s ; CHECK-NEXT: sshl v2.4s, v1.4s, v2.4s ; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_3] @@ -384,12 +384,12 @@ ; CHECK-NEXT: adrp x8, .LCPI14_1 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s -; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_1] +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI14_1] ; CHECK-NEXT: adrp x8, .LCPI14_2 -; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_2] ; CHECK-NEXT: adrp x8, .LCPI14_3 +; CHECK-NEXT: mla v1.4s, v0.4s, v3.4s ; CHECK-NEXT: sshl v2.4s, v1.4s, v2.4s ; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_3] @@ -413,12 +413,12 @@ ; CHECK-NEXT: adrp x8, .LCPI15_1 ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s ; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s -; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s -; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_1] +; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI15_1] ; CHECK-NEXT: adrp x8, .LCPI15_2 -; CHECK-NEXT: mla v1.4s, v0.4s, v2.4s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s ; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_2] ; CHECK-NEXT: adrp x8, .LCPI15_3 +; CHECK-NEXT: mla v1.4s, v0.4s, v3.4s ; CHECK-NEXT: sshl v2.4s, v1.4s, v2.4s ; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_3] @@ -451,9 +451,9 @@ ; CHECK-NEXT: adrp x8, .LCPI16_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -479,9 +479,9 @@ ; CHECK-NEXT: adrp x8, .LCPI17_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -507,9 +507,9 @@ ; CHECK-NEXT: adrp x8, .LCPI18_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -529,12 +529,12 @@ ; CHECK-NEXT: mov w9, #39321 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: movk w9, #6553, lsl #16 -; CHECK-NEXT: adrp x10, .LCPI19_0 ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: adrp x8, .LCPI19_0 ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q0, [x10, :lo12:.LCPI19_0] ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI19_0] ; CHECK-NEXT: cmhs v0.4s, v0.4s, v2.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -558,10 +558,10 @@ ; CHECK-NEXT: mla v2.4s, v0.4s, v1.4s ; CHECK-NEXT: shl v0.4s, v2.4s, #31 ; CHECK-NEXT: ushr v1.4s, v2.4s, #1 +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_0] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_0] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %srem = srem <4 x i32> %X, @@ -586,9 +586,9 @@ ; CHECK-NEXT: adrp x8, .LCPI21_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -616,9 +616,9 @@ ; CHECK-NEXT: adrp x8, .LCPI22_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -644,9 +644,9 @@ ; CHECK-NEXT: adrp x8, .LCPI23_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -672,9 +672,9 @@ ; CHECK-NEXT: adrp x8, .LCPI24_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -701,9 +701,9 @@ ; CHECK-NEXT: adrp x8, .LCPI25_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -728,9 +728,9 @@ ; CHECK-NEXT: adrp x8, .LCPI26_4 ; CHECK-NEXT: ushl v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_4] ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_4] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll --- a/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/AArch64/srem-seteq-vec-splat.ll @@ -40,9 +40,9 @@ ; CHECK-NEXT: movk w8, #655, lsl #16 ; CHECK-NEXT: shl v0.4s, v2.4s, #30 ; CHECK-NEXT: ushr v1.4s, v2.4s, #2 +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -93,9 +93,9 @@ ; CHECK-NEXT: movk w8, #655, lsl #16 ; CHECK-NEXT: shl v0.4s, v2.4s, #30 ; CHECK-NEXT: ushr v1.4s, v2.4s, #2 +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: orr v0.16b, v1.16b, v0.16b -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -114,14 +114,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; CHECK-NEXT: sshr v2.4s, v1.4s, #3 +; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: movi v1.4s, #25 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: smull2 v3.2d, v0.4s, v2.4s -; CHECK-NEXT: smull v2.2d, v0.2s, v2.2s -; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s -; CHECK-NEXT: sshr v3.4s, v2.4s, #3 -; CHECK-NEXT: usra v3.4s, v2.4s, #31 -; CHECK-NEXT: mls v0.4s, v3.4s, v1.4s +; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b @@ -137,14 +137,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: smull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; CHECK-NEXT: sshr v2.4s, v1.4s, #5 +; CHECK-NEXT: usra v2.4s, v1.4s, #31 ; CHECK-NEXT: movi v1.4s, #100 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: smull2 v3.2d, v0.4s, v2.4s -; CHECK-NEXT: smull v2.2d, v0.2s, v2.2s -; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s -; CHECK-NEXT: sshr v3.4s, v2.4s, #5 -; CHECK-NEXT: usra v3.4s, v2.4s, #31 -; CHECK-NEXT: mls v0.4s, v3.4s, v1.4s +; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b @@ -184,12 +184,12 @@ define <4 x i32> @test_srem_pow2(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_pow2: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v2.4s, v0.4s, #31 -; CHECK-NEXT: mov v3.16b, v0.16b +; CHECK-NEXT: sshr v1.4s, v0.4s, #31 +; CHECK-NEXT: mov v2.16b, v0.16b +; CHECK-NEXT: usra v2.4s, v1.4s, #28 ; CHECK-NEXT: movi v1.4s, #1 -; CHECK-NEXT: usra v3.4s, v2.4s, #28 -; CHECK-NEXT: bic v3.4s, #15 -; CHECK-NEXT: sub v0.4s, v0.4s, v3.4s +; CHECK-NEXT: bic v2.4s, #15 +; CHECK-NEXT: sub v0.4s, v0.4s, v2.4s ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -203,11 +203,11 @@ define <4 x i32> @test_srem_int_min(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_srem_int_min: ; CHECK: // %bb.0: -; CHECK-NEXT: sshr v2.4s, v0.4s, #31 -; CHECK-NEXT: mov v3.16b, v0.16b +; CHECK-NEXT: sshr v1.4s, v0.4s, #31 +; CHECK-NEXT: mov v2.16b, v0.16b +; CHECK-NEXT: usra v2.4s, v1.4s, #1 ; CHECK-NEXT: movi v1.4s, #128, lsl #24 -; CHECK-NEXT: usra v3.4s, v2.4s, #1 -; CHECK-NEXT: and v1.16b, v3.16b, v1.16b +; CHECK-NEXT: and v1.16b, v2.16b, v1.16b ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 diff --git a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll --- a/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/srem-vector-lkk.ll @@ -4,48 +4,48 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) { ; CHECK-LABEL: fold_srem_vec_1: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #63421 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[1] -; CHECK-NEXT: smov w9, v0.h[0] -; CHECK-NEXT: mov w10, #63421 -; CHECK-NEXT: mov w11, #37253 -; CHECK-NEXT: movk w10, #31710, lsl #16 -; CHECK-NEXT: movk w11, #44150, lsl #16 -; CHECK-NEXT: smov w13, v0.h[2] -; CHECK-NEXT: mov w12, #33437 -; CHECK-NEXT: smull x10, w8, w10 -; CHECK-NEXT: movk w12, #21399, lsl #16 -; CHECK-NEXT: smull x11, w9, w11 +; CHECK-NEXT: smov w9, v0.h[1] +; CHECK-NEXT: movk w8, #31710, lsl #16 +; CHECK-NEXT: mov w10, #37253 +; CHECK-NEXT: movk w10, #44150, lsl #16 +; CHECK-NEXT: smov w11, v0.h[0] +; CHECK-NEXT: smull x8, w9, w8 +; CHECK-NEXT: mov w12, #-124 +; CHECK-NEXT: smull x10, w11, w10 +; CHECK-NEXT: mov w13, #33437 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: movk w13, #21399, lsl #16 +; CHECK-NEXT: sub w8, w8, w9 ; CHECK-NEXT: lsr x10, x10, #32 -; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: sub w10, w10, w8 -; CHECK-NEXT: add w11, w11, w9 -; CHECK-NEXT: asr w14, w10, #6 -; CHECK-NEXT: asr w15, w11, #6 -; CHECK-NEXT: add w10, w14, w10, lsr #31 -; CHECK-NEXT: add w11, w15, w11, lsr #31 -; CHECK-NEXT: mov w14, #95 -; CHECK-NEXT: mov w15, #-124 -; CHECK-NEXT: smull x12, w13, w12 -; CHECK-NEXT: msub w9, w11, w14, w9 -; CHECK-NEXT: msub w8, w10, w15, w8 -; CHECK-NEXT: lsr x10, x12, #63 -; CHECK-NEXT: asr x11, x12, #37 +; CHECK-NEXT: asr w14, w8, #6 +; CHECK-NEXT: add w10, w10, w11 +; CHECK-NEXT: add w8, w14, w8, lsr #31 +; CHECK-NEXT: smov w14, v0.h[2] +; CHECK-NEXT: msub w8, w8, w12, w9 +; CHECK-NEXT: asr w9, w10, #6 +; CHECK-NEXT: mov w12, #95 +; CHECK-NEXT: add w9, w9, w10, lsr #31 +; CHECK-NEXT: smull x10, w14, w13 +; CHECK-NEXT: mov w13, #63249 +; CHECK-NEXT: msub w9, w9, w12, w11 ; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: add w10, w11, w10 +; CHECK-NEXT: movk w13, #48808, lsl #16 +; CHECK-NEXT: lsr x11, x10, #63 +; CHECK-NEXT: asr x10, x10, #37 +; CHECK-NEXT: add w10, w10, w11 ; CHECK-NEXT: mov w11, #98 +; CHECK-NEXT: smull x13, w12, w13 ; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: mov w9, #63249 -; CHECK-NEXT: movk w9, #48808, lsl #16 -; CHECK-NEXT: msub w10, w10, w11, w13 -; CHECK-NEXT: smull x9, w12, w9 +; CHECK-NEXT: msub w10, w10, w11, w14 +; CHECK-NEXT: lsr x11, x13, #63 +; CHECK-NEXT: asr x13, x13, #40 +; CHECK-NEXT: add w11, w13, w11 +; CHECK-NEXT: mov w13, #-1003 ; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: lsr x8, x9, #63 -; CHECK-NEXT: asr x9, x9, #40 -; CHECK-NEXT: add w8, w9, w8 -; CHECK-NEXT: mov w9, #-1003 +; CHECK-NEXT: msub w8, w11, w13, w12 ; CHECK-NEXT: mov v0.h[2], w10 -; CHECK-NEXT: msub w8, w8, w9, w12 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -56,41 +56,41 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) { ; CHECK-LABEL: fold_srem_vec_2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w9, v0.h[0] ; CHECK-NEXT: mov w8, #37253 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: movk w8, #44150, lsl #16 -; CHECK-NEXT: smov w10, v0.h[1] -; CHECK-NEXT: smov w14, v0.h[2] +; CHECK-NEXT: smov w10, v0.h[0] +; CHECK-NEXT: smov w13, v0.h[2] ; CHECK-NEXT: mov w12, #95 ; CHECK-NEXT: smull x11, w9, w8 -; CHECK-NEXT: smull x13, w10, w8 +; CHECK-NEXT: smov w15, v0.h[3] +; CHECK-NEXT: smull x14, w10, w8 ; CHECK-NEXT: lsr x11, x11, #32 +; CHECK-NEXT: lsr x14, x14, #32 ; CHECK-NEXT: add w11, w11, w9 -; CHECK-NEXT: lsr x13, x13, #32 -; CHECK-NEXT: asr w15, w11, #6 -; CHECK-NEXT: add w13, w13, w10 -; CHECK-NEXT: add w11, w15, w11, lsr #31 -; CHECK-NEXT: smov w15, v0.h[3] -; CHECK-NEXT: asr w16, w13, #6 +; CHECK-NEXT: add w14, w14, w10 +; CHECK-NEXT: asr w16, w11, #6 +; CHECK-NEXT: asr w17, w14, #6 +; CHECK-NEXT: add w11, w16, w11, lsr #31 +; CHECK-NEXT: smull x16, w13, w8 +; CHECK-NEXT: add w14, w17, w14, lsr #31 ; CHECK-NEXT: msub w9, w11, w12, w9 -; CHECK-NEXT: add w13, w16, w13, lsr #31 -; CHECK-NEXT: smull x11, w14, w8 -; CHECK-NEXT: msub w10, w13, w12, w10 -; CHECK-NEXT: lsr x11, x11, #32 ; CHECK-NEXT: smull x8, w15, w8 -; CHECK-NEXT: add w11, w11, w14 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: asr w9, w11, #6 +; CHECK-NEXT: lsr x11, x16, #32 +; CHECK-NEXT: msub w10, w14, w12, w10 +; CHECK-NEXT: add w11, w11, w13 +; CHECK-NEXT: asr w14, w11, #6 ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: add w9, w9, w11, lsr #31 +; CHECK-NEXT: add w11, w14, w11, lsr #31 ; CHECK-NEXT: add w8, w8, w15 -; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: asr w10, w8, #6 -; CHECK-NEXT: msub w9, w9, w12, w14 -; CHECK-NEXT: add w8, w10, w8, lsr #31 +; CHECK-NEXT: asr w14, w8, #6 +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: msub w11, w11, w12, w13 +; CHECK-NEXT: add w8, w14, w8, lsr #31 +; CHECK-NEXT: mov v0.h[1], w9 ; CHECK-NEXT: msub w8, w8, w12, w15 -; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: mov v0.h[2], w11 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -103,46 +103,46 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) { ; CHECK-LABEL: combine_srem_sdiv: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w9, v0.h[0] ; CHECK-NEXT: mov w8, #37253 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: movk w8, #44150, lsl #16 -; CHECK-NEXT: smov w10, v0.h[1] -; CHECK-NEXT: smov w11, v0.h[2] -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: mov w14, #95 +; CHECK-NEXT: smov w11, v0.h[0] +; CHECK-NEXT: smov w12, v0.h[2] +; CHECK-NEXT: mov w10, #95 ; CHECK-NEXT: smull x13, w9, w8 -; CHECK-NEXT: smull x15, w10, w8 +; CHECK-NEXT: smov w14, v0.h[3] +; CHECK-NEXT: smull x15, w11, w8 ; CHECK-NEXT: lsr x13, x13, #32 -; CHECK-NEXT: smull x16, w11, w8 -; CHECK-NEXT: add w13, w13, w9 +; CHECK-NEXT: smull x16, w12, w8 ; CHECK-NEXT: lsr x15, x15, #32 +; CHECK-NEXT: add w13, w13, w9 +; CHECK-NEXT: add w15, w15, w11 ; CHECK-NEXT: asr w17, w13, #6 -; CHECK-NEXT: add w15, w15, w10 ; CHECK-NEXT: add w13, w17, w13, lsr #31 ; CHECK-NEXT: asr w17, w15, #6 ; CHECK-NEXT: add w15, w17, w15, lsr #31 -; CHECK-NEXT: smull x8, w12, w8 -; CHECK-NEXT: msub w9, w13, w14, w9 +; CHECK-NEXT: smull x8, w14, w8 ; CHECK-NEXT: lsr x16, x16, #32 -; CHECK-NEXT: add w16, w16, w11 -; CHECK-NEXT: msub w10, w15, w14, w10 -; CHECK-NEXT: asr w17, w16, #6 +; CHECK-NEXT: msub w9, w13, w10, w9 +; CHECK-NEXT: msub w11, w15, w10, w11 +; CHECK-NEXT: add w16, w16, w12 ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: fmov s1, w13 +; CHECK-NEXT: asr w17, w16, #6 +; CHECK-NEXT: add w8, w8, w14 ; CHECK-NEXT: add w16, w17, w16, lsr #31 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: add w8, w8, w12 -; CHECK-NEXT: asr w9, w8, #6 -; CHECK-NEXT: add w8, w9, w8, lsr #31 -; CHECK-NEXT: msub w9, w16, w14, w11 -; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: mov v1.h[1], w15 -; CHECK-NEXT: msub w10, w8, w14, w12 -; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: asr w17, w8, #6 +; CHECK-NEXT: fmov s1, w15 +; CHECK-NEXT: fmov s0, w11 +; CHECK-NEXT: add w8, w17, w8, lsr #31 +; CHECK-NEXT: msub w11, w16, w10, w12 +; CHECK-NEXT: mov v1.h[1], w13 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: msub w9, w8, w10, w14 ; CHECK-NEXT: mov v1.h[2], w16 -; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: mov v0.h[2], w11 ; CHECK-NEXT: mov v1.h[3], w8 +; CHECK-NEXT: mov v0.h[3], w9 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, @@ -156,39 +156,39 @@ ; CHECK-LABEL: dont_fold_srem_power_of_two: ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w9, v0.h[1] -; CHECK-NEXT: smov w10, v0.h[0] ; CHECK-NEXT: mov w8, #37253 +; CHECK-NEXT: smov w9, v0.h[1] ; CHECK-NEXT: movk w8, #44150, lsl #16 -; CHECK-NEXT: add w11, w9, #31 +; CHECK-NEXT: smov w10, v0.h[0] +; CHECK-NEXT: smov w11, v0.h[3] +; CHECK-NEXT: add w12, w9, #31 ; CHECK-NEXT: cmp w9, #0 -; CHECK-NEXT: add w12, w10, #63 -; CHECK-NEXT: csel w11, w11, w9, lt +; CHECK-NEXT: add w13, w10, #63 +; CHECK-NEXT: csel w12, w12, w9, lt ; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: and w11, w11, #0xffffffe0 -; CHECK-NEXT: csel w12, w12, w10, lt -; CHECK-NEXT: sub w9, w9, w11 -; CHECK-NEXT: and w12, w12, #0xffffffc0 -; CHECK-NEXT: sub w10, w10, w12 -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: smov w10, v0.h[2] -; CHECK-NEXT: smull x8, w12, w8 -; CHECK-NEXT: mov v1.h[1], w9 +; CHECK-NEXT: smull x8, w11, w8 +; CHECK-NEXT: csel w13, w13, w10, lt +; CHECK-NEXT: and w13, w13, #0xffffffc0 +; CHECK-NEXT: smov w14, v0.h[2] ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: add w9, w10, #7 -; CHECK-NEXT: cmp w10, #0 -; CHECK-NEXT: csel w9, w9, w10, lt -; CHECK-NEXT: add w8, w8, w12 -; CHECK-NEXT: and w9, w9, #0xfffffff8 -; CHECK-NEXT: sub w9, w10, w9 -; CHECK-NEXT: asr w10, w8, #6 -; CHECK-NEXT: add w8, w10, w8, lsr #31 -; CHECK-NEXT: mov w10, #95 -; CHECK-NEXT: mov v1.h[2], w9 -; CHECK-NEXT: msub w8, w8, w10, w12 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: sub w10, w10, w13 +; CHECK-NEXT: add w8, w8, w11 +; CHECK-NEXT: and w12, w12, #0xffffffe0 +; CHECK-NEXT: asr w15, w8, #6 +; CHECK-NEXT: add w13, w14, #7 +; CHECK-NEXT: cmp w14, #0 +; CHECK-NEXT: add w8, w15, w8, lsr #31 +; CHECK-NEXT: mov w15, #95 +; CHECK-NEXT: sub w9, w9, w12 +; CHECK-NEXT: csel w12, w13, w14, lt +; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: msub w8, w8, w15, w11 +; CHECK-NEXT: and w11, w12, #0xfffffff8 +; CHECK-NEXT: sub w10, w14, w11 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: mov v0.h[2], w10 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -198,40 +198,40 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) { ; CHECK-LABEL: dont_fold_srem_one: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #17097 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[1] ; CHECK-NEXT: smov w9, v0.h[2] +; CHECK-NEXT: movk w8, #45590, lsl #16 ; CHECK-NEXT: mov w10, #30865 -; CHECK-NEXT: mov w11, #17097 ; CHECK-NEXT: movk w10, #51306, lsl #16 -; CHECK-NEXT: movk w11, #45590, lsl #16 -; CHECK-NEXT: mov w12, #654 -; CHECK-NEXT: smull x10, w8, w10 -; CHECK-NEXT: smull x11, w9, w11 +; CHECK-NEXT: smov w11, v0.h[1] +; CHECK-NEXT: smull x8, w9, w8 +; CHECK-NEXT: mov w13, #23 +; CHECK-NEXT: smull x10, w11, w10 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: add w8, w8, w9 ; CHECK-NEXT: lsr x10, x10, #32 -; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: add w10, w10, w8 -; CHECK-NEXT: add w11, w11, w9 -; CHECK-NEXT: asr w13, w10, #9 -; CHECK-NEXT: add w10, w13, w10, lsr #31 -; CHECK-NEXT: asr w13, w11, #4 -; CHECK-NEXT: add w11, w13, w11, lsr #31 -; CHECK-NEXT: smov w13, v0.h[3] -; CHECK-NEXT: msub w8, w10, w12, w8 -; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: asr w12, w8, #4 +; CHECK-NEXT: add w10, w10, w11 +; CHECK-NEXT: add w8, w12, w8, lsr #31 ; CHECK-NEXT: mov w12, #47143 -; CHECK-NEXT: mov w10, #23 ; CHECK-NEXT: movk w12, #24749, lsl #16 -; CHECK-NEXT: msub w9, w11, w10, w9 -; CHECK-NEXT: smull x10, w13, w12 -; CHECK-NEXT: mov v0.h[1], w8 -; CHECK-NEXT: lsr x8, x10, #63 -; CHECK-NEXT: asr x10, x10, #43 -; CHECK-NEXT: add w8, w10, w8 -; CHECK-NEXT: mov w10, #5423 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: msub w8, w8, w10, w13 -; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: msub w8, w8, w13, w9 +; CHECK-NEXT: smov w9, v0.h[3] +; CHECK-NEXT: asr w13, w10, #9 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: add w10, w13, w10, lsr #31 +; CHECK-NEXT: mov w13, #654 +; CHECK-NEXT: smull x12, w9, w12 +; CHECK-NEXT: msub w10, w10, w13, w11 +; CHECK-NEXT: lsr x11, x12, #63 +; CHECK-NEXT: asr x12, x12, #43 +; CHECK-NEXT: add w11, w12, w11 +; CHECK-NEXT: mov w12, #5423 +; CHECK-NEXT: mov v0.h[1], w10 +; CHECK-NEXT: msub w9, w11, w12, w9 +; CHECK-NEXT: mov v0.h[2], w8 +; CHECK-NEXT: mov v0.h[3], w9 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, @@ -242,38 +242,38 @@ define <4 x i16> @dont_fold_srem_i16_smax(<4 x i16> %x) { ; CHECK-LABEL: dont_fold_srem_i16_smax: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: smov w8, v0.h[2] ; CHECK-NEXT: mov w9, #17097 -; CHECK-NEXT: smov w10, v0.h[1] +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: smov w10, v0.h[2] ; CHECK-NEXT: movk w9, #45590, lsl #16 -; CHECK-NEXT: mov w11, #32767 -; CHECK-NEXT: smov w12, v0.h[3] -; CHECK-NEXT: movi d1, #0000000000000000 -; CHECK-NEXT: smull x9, w8, w9 -; CHECK-NEXT: add w11, w10, w11 -; CHECK-NEXT: cmp w10, #0 +; CHECK-NEXT: smov w12, v0.h[1] +; CHECK-NEXT: mov w13, #47143 +; CHECK-NEXT: mov w8, #32767 +; CHECK-NEXT: smull x9, w10, w9 +; CHECK-NEXT: movk w13, #24749, lsl #16 +; CHECK-NEXT: smov w14, v0.h[3] +; CHECK-NEXT: add w8, w12, w8 ; CHECK-NEXT: lsr x9, x9, #32 -; CHECK-NEXT: csel w11, w11, w10, lt -; CHECK-NEXT: add w9, w9, w8 -; CHECK-NEXT: and w11, w11, #0xffff8000 -; CHECK-NEXT: asr w13, w9, #4 -; CHECK-NEXT: sub w10, w10, w11 -; CHECK-NEXT: mov w11, #47143 -; CHECK-NEXT: add w9, w13, w9, lsr #31 -; CHECK-NEXT: mov w13, #23 -; CHECK-NEXT: movk w11, #24749, lsl #16 -; CHECK-NEXT: mov v1.h[1], w10 -; CHECK-NEXT: msub w8, w9, w13, w8 -; CHECK-NEXT: smull x9, w12, w11 -; CHECK-NEXT: lsr x10, x9, #63 -; CHECK-NEXT: asr x9, x9, #43 +; CHECK-NEXT: cmp w12, #0 ; CHECK-NEXT: add w9, w9, w10 -; CHECK-NEXT: mov w10, #5423 -; CHECK-NEXT: mov v1.h[2], w8 -; CHECK-NEXT: msub w8, w9, w10, w12 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: smull x13, w14, w13 +; CHECK-NEXT: csel w8, w8, w12, lt +; CHECK-NEXT: asr w15, w9, #4 +; CHECK-NEXT: mov w11, #23 +; CHECK-NEXT: and w8, w8, #0xffff8000 +; CHECK-NEXT: add w9, w15, w9, lsr #31 +; CHECK-NEXT: lsr x15, x13, #63 +; CHECK-NEXT: asr x13, x13, #43 +; CHECK-NEXT: sub w8, w12, w8 +; CHECK-NEXT: add w13, w13, w15 +; CHECK-NEXT: mov w15, #5423 +; CHECK-NEXT: msub w9, w9, w11, w10 +; CHECK-NEXT: movi d0, #0000000000000000 +; CHECK-NEXT: msub w10, w13, w15, w14 +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = srem <4 x i16> %x, ret <4 x i16> %1 @@ -286,38 +286,38 @@ ; CHECK-NEXT: mov x8, #8549 ; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: movk x8, #22795, lsl #16 -; CHECK-NEXT: mov x12, #6055 +; CHECK-NEXT: mov x12, #21445 ; CHECK-NEXT: movk x8, #17096, lsl #32 -; CHECK-NEXT: movk x12, #58853, lsl #16 +; CHECK-NEXT: mov x11, #6055 ; CHECK-NEXT: movk x8, #45590, lsl #48 -; CHECK-NEXT: mov x14, #21445 -; CHECK-NEXT: mov x10, v1.d[1] -; CHECK-NEXT: movk x12, #47142, lsl #32 +; CHECK-NEXT: movk x12, #1603, lsl #16 +; CHECK-NEXT: movk x11, #58853, lsl #16 +; CHECK-NEXT: movk x12, #15432, lsl #32 ; CHECK-NEXT: smulh x8, x9, x8 -; CHECK-NEXT: movk x14, #1603, lsl #16 -; CHECK-NEXT: mov x11, v0.d[1] -; CHECK-NEXT: movk x12, #24749, lsl #48 +; CHECK-NEXT: movk x11, #47142, lsl #32 +; CHECK-NEXT: movk x12, #25653, lsl #48 +; CHECK-NEXT: mov x14, v0.d[1] ; CHECK-NEXT: add x8, x8, x9 -; CHECK-NEXT: movk x14, #15432, lsl #32 +; CHECK-NEXT: mov x10, v1.d[1] +; CHECK-NEXT: movk x11, #24749, lsl #48 ; CHECK-NEXT: asr x13, x8, #4 -; CHECK-NEXT: movk x14, #25653, lsl #48 ; CHECK-NEXT: add x8, x13, x8, lsr #63 ; CHECK-NEXT: mov w13, #23 -; CHECK-NEXT: smulh x12, x10, x12 -; CHECK-NEXT: smulh x14, x11, x14 +; CHECK-NEXT: smulh x12, x14, x12 +; CHECK-NEXT: smulh x11, x10, x11 ; CHECK-NEXT: msub x8, x8, x13, x9 -; CHECK-NEXT: asr x13, x12, #11 +; CHECK-NEXT: asr x13, x12, #8 ; CHECK-NEXT: add x12, x13, x12, lsr #63 -; CHECK-NEXT: asr x13, x14, #8 -; CHECK-NEXT: mov w9, #5423 -; CHECK-NEXT: add x13, x13, x14, lsr #63 -; CHECK-NEXT: mov w14, #654 -; CHECK-NEXT: msub x9, x12, x9, x10 +; CHECK-NEXT: asr x13, x11, #11 +; CHECK-NEXT: mov w9, #654 +; CHECK-NEXT: add x11, x13, x11, lsr #63 +; CHECK-NEXT: mov w13, #5423 +; CHECK-NEXT: msub x9, x12, x9, x14 ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: msub x10, x13, x14, x11 +; CHECK-NEXT: msub x10, x11, x13, x10 ; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: mov v1.d[1], x9 -; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: mov v1.d[1], x10 ; CHECK-NEXT: ret %1 = srem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -55,9 +55,9 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.16b, v2.16b, v6.16b ; CHECK-NEXT: sqsub v0.16b, v0.16b, v4.16b ; CHECK-NEXT: sqsub v1.16b, v1.16b, v5.16b +; CHECK-NEXT: sqsub v2.16b, v2.16b, v6.16b ; CHECK-NEXT: sqsub v3.16b, v3.16b, v7.16b ; CHECK-NEXT: ret %z = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) @@ -86,9 +86,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { ; CHECK-LABEL: v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.8h, v2.8h, v6.8h ; CHECK-NEXT: sqsub v0.8h, v0.8h, v4.8h ; CHECK-NEXT: sqsub v1.8h, v1.8h, v5.8h +; CHECK-NEXT: sqsub v2.8h, v2.8h, v6.8h ; CHECK-NEXT: sqsub v3.8h, v3.8h, v7.8h ; CHECK-NEXT: ret %z = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) @@ -98,9 +98,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqsub v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: sqsub v0.8b, v1.8b, v0.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -144,10 +144,10 @@ ; CHECK-NEXT: shl v1.2s, v1.2s, #24 ; CHECK-NEXT: sqsub v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #24 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strb w9, [x2, #1] +; CHECK-NEXT: strb w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -159,9 +159,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: sqsub v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -184,10 +184,10 @@ ; CHECK-NEXT: shl v1.2s, v1.2s, #16 ; CHECK-NEXT: sqsub v0.2s, v1.2s, v0.2s ; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strh w9, [x2, #2] +; CHECK-NEXT: strh w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py @@ -225,9 +225,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: sqsub v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr b0, [x1] +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: sqsub v0.8b, v1.8b, v0.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -240,9 +240,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr h0, [x1] +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: sqsub v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -255,10 +255,10 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { ; CHECK-LABEL: v16i4: ; CHECK: // %bb.0: -; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v0.16b, v0.16b, #4 -; CHECK-NEXT: sshr v1.16b, v1.16b, #4 +; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: sshr v0.16b, v0.16b, #4 +; CHECK-NEXT: sshr v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v1.16b, v1.16b, #4 ; CHECK-NEXT: shl v0.16b, v0.16b, #4 ; CHECK-NEXT: sqsub v0.16b, v0.16b, v1.16b @@ -310,9 +310,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ; CHECK-LABEL: v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.4s, v2.4s, v6.4s ; CHECK-NEXT: sqsub v0.4s, v0.4s, v4.4s ; CHECK-NEXT: sqsub v1.4s, v1.4s, v5.4s +; CHECK-NEXT: sqsub v2.4s, v2.4s, v6.4s ; CHECK-NEXT: sqsub v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %z = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %x, <16 x i32> %y) @@ -341,9 +341,9 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.2d, v2.2d, v6.2d ; CHECK-NEXT: sqsub v0.2d, v0.2d, v4.2d ; CHECK-NEXT: sqsub v1.2d, v1.2d, v5.2d +; CHECK-NEXT: sqsub v2.2d, v2.2d, v6.2d ; CHECK-NEXT: sqsub v3.2d, v3.2d, v7.2d ; CHECK-NEXT: ret %z = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %x, <8 x i64> %y) diff --git a/llvm/test/CodeGen/AArch64/sve-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-fcvt.ll --- a/llvm/test/CodeGen/AArch64/sve-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-fcvt.ll @@ -454,9 +454,9 @@ define @scvtf_h_nxv2i1( %a) { ; CHECK-LABEL: scvtf_h_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.h, p1/m, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z0.h, p0/m, z0.d ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -495,9 +495,9 @@ define @scvtf_h_nxv3i1( %a) { ; CHECK-LABEL: scvtf_h_nxv3i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.h, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.h, p0/m, z0.s ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -516,9 +516,9 @@ define @scvtf_h_nxv4i1( %a) { ; CHECK-LABEL: scvtf_h_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.h, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.h, p0/m, z0.s ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -547,9 +547,9 @@ define @scvtf_h_nxv7i1( %a) { ; CHECK-LABEL: scvtf_h_nxv7i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.h, p1/m, z0.h +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: scvtf z0.h, p0/m, z0.h ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -568,9 +568,9 @@ define @scvtf_h_nxv8i1( %a) { ; CHECK-LABEL: scvtf_h_nxv8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z0.h, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.h, p1/m, z0.h +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: scvtf z0.h, p0/m, z0.h ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -589,9 +589,9 @@ define @scvtf_s_nxv2i1( %a) { ; CHECK-LABEL: scvtf_s_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.s, p1/m, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z0.s, p0/m, z0.d ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -620,9 +620,9 @@ define @scvtf_s_nxv3i1( %a) { ; CHECK-LABEL: scvtf_s_nxv3i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.s, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -641,9 +641,9 @@ define @scvtf_s_nxv4i1( %a) { ; CHECK-LABEL: scvtf_s_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.s, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: scvtf z0.s, p0/m, z0.s ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -662,9 +662,9 @@ define @scvtf_d_nxv2i1( %a) { ; CHECK-LABEL: scvtf_d_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p0/z, #-1 // =0xffffffffffffffff -; CHECK-NEXT: scvtf z0.d, p1/m, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: scvtf z0.d, p0/m, z0.d ; CHECK-NEXT: ret %res = sitofp %a to ret %res @@ -695,9 +695,9 @@ define @ucvtf_h_nxv2i1( %a) { ; CHECK-LABEL: ucvtf_h_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.h, p1/m, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d ; CHECK-NEXT: ret %res = uitofp %a to ret %res @@ -736,9 +736,9 @@ define @ucvtf_h_nxv3i1( %a) { ; CHECK-LABEL: ucvtf_h_nxv3i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.h, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s ; CHECK-NEXT: ret %res = uitofp %a to ret %res @@ -767,9 +767,9 @@ define @ucvtf_h_nxv4i1( %a) { ; CHECK-LABEL: ucvtf_h_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.h, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.s ; CHECK-NEXT: ret %res = uitofp %a to ret %res @@ -798,9 +798,9 @@ define @ucvtf_h_nxv8i1( %a) { ; CHECK-LABEL: ucvtf_h_nxv8i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.h ; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.h, p1/m, z0.h +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: ucvtf z0.h, p0/m, z0.h ; CHECK-NEXT: ret %res = uitofp %a to ret %res @@ -819,9 +819,9 @@ define @ucvtf_s_nxv2i1( %a) { ; CHECK-LABEL: ucvtf_s_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.s, p1/m, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d ; CHECK-NEXT: ret %res = uitofp %a to ret %res @@ -850,9 +850,9 @@ define @ucvtf_s_nxv4i1( %a) { ; CHECK-LABEL: ucvtf_s_nxv4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.s ; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.s, p1/m, z0.s +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: ucvtf z0.s, p0/m, z0.s ; CHECK-NEXT: ret %res = uitofp %a to ret %res @@ -871,9 +871,9 @@ define @ucvtf_d_nxv2i1( %a) { ; CHECK-LABEL: ucvtf_d_nxv2i1: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 -; CHECK-NEXT: ucvtf z0.d, p1/m, z0.d +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: ucvtf z0.d, p0/m, z0.d ; CHECK-NEXT: ret %res = uitofp %a to ret %res diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-extend-trunc.ll @@ -179,10 +179,10 @@ ; VBITS_EQ_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: fcvt z0.d, p0/m, z0.h -; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: uunpklo z1.s, z1.h +; VBITS_EQ_256-NEXT: fcvt z0.d, p0/m, z0.h ; VBITS_EQ_256-NEXT: uunpklo z1.d, z1.s +; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: fcvt z1.d, p0/m, z1.h ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_EQ_256-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-div.ll @@ -35,10 +35,10 @@ ; CHECK-NEXT: sdiv [[DIV:z[0-9]+]].s, [[PG0]]/m, [[OP1_LO_LO]].s, [[OP2_LO_LO]].s ; CHECK-NEXT: uzp1 [[RES:z[0-9]+]].h, [[DIV]].h, [[DIV]].h ; CHECK-NEXT: umov [[SCALAR0:w[0-9]+]], [[VEC:v[0-9]+]].h[0] -; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: fmov s0, [[SCALAR0]] -; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] +; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: mov [[FINAL:v[0-9]+]].b[1], [[SCALAR1]] +; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] ; CHECK-NEXT: mov [[FINAL]].b[2], [[SCALAR2]] ; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[3] ; CHECK-NEXT: mov [[FINAL]].b[3], [[SCALAR3]] @@ -641,10 +641,10 @@ ; CHECK-NEXT: udiv [[DIV:z[0-9]+]].s, [[PG0]]/m, [[OP1_LO_LO]].s, [[OP2_LO_LO]].s ; CHECK-NEXT: uzp1 [[RES:z[0-9]+]].h, [[DIV]].h, [[DIV]].h ; CHECK-NEXT: umov [[SCALAR0:w[0-9]+]], [[VEC:v[0-9]+]].h[0] -; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: fmov s0, [[SCALAR0]] -; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] +; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: mov [[FINAL:v[0-9]+]].b[1], [[SCALAR1]] +; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] ; CHECK-NEXT: mov [[FINAL]].b[2], [[SCALAR2]] ; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[3] ; CHECK-NEXT: mov [[FINAL]].b[3], [[SCALAR3]] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-mulh.ll @@ -39,10 +39,10 @@ ; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b ; CHECK-NEXT: ushr v1.8h, v0.8h, #8 ; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umov w9, v1.h[1] ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: mov v0.b[1], w8 ; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[1], w9 ; CHECK-NEXT: mov v0.b[2], w8 ; CHECK-NEXT: umov w8, v1.h[3] ; CHECK-NEXT: mov v0.b[3], w8 @@ -667,10 +667,10 @@ ; CHECK-NEXT: umull v0.8h, v0.8b, v1.8b ; CHECK-NEXT: ushr v1.8h, v0.8h, #8 ; CHECK-NEXT: umov w8, v1.h[0] -; CHECK-NEXT: umov w9, v1.h[1] ; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: umov w8, v1.h[1] +; CHECK-NEXT: mov v0.b[1], w8 ; CHECK-NEXT: umov w8, v1.h[2] -; CHECK-NEXT: mov v0.b[1], w9 ; CHECK-NEXT: mov v0.b[2], w8 ; CHECK-NEXT: umov w8, v1.h[3] ; CHECK-NEXT: mov v0.b[3], w8 diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-rem.ll @@ -35,10 +35,10 @@ ; CHECK-NEXT: sdivr [[DIV1:z[0-9]+]].s, [[PG1]]/m, [[OP2_LO_LO]].s, [[OP1_LO_LO]].s ; CHECK-NEXT: uzp1 [[UZP1:z[0-9]+]].h, [[DIV1]].h, [[DIV1]].h ; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC:v[0-9]+]].h[0] -; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: fmov s3, [[SCALAR1]] -; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[2] +; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: mov [[FINAL:v[0-9]+]].b[1], [[SCALAR2]] +; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[2] ; CHECK-NEXT: mov [[FINAL]].b[2], [[SCALAR3]] ; CHECK-NEXT: umov [[SCALAR4:w[0-9]+]], [[VEC]].h[3] ; CHECK-NEXT: mov [[FINAL]].b[3], [[SCALAR4]] @@ -714,10 +714,10 @@ ; CHECK-NEXT: udivr [[DIV1:z[0-9]+]].s, [[PG1]]/m, [[OP2_LO_LO]].s, [[OP1_LO_LO]].s ; CHECK-NEXT: uzp1 [[UZP1:z[0-9]+]].h, [[DIV1]].h, [[DIV1]].h ; CHECK-NEXT: umov [[SCALAR0:w[0-9]+]], [[VEC:v[0-9]+]].h[0] -; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: fmov s3, [[SCALAR0]] -; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] +; CHECK-NEXT: umov [[SCALAR1:w[0-9]+]], [[VEC]].h[1] ; CHECK-NEXT: mov [[FINAL:v[0-9]+]].b[1], [[SCALAR1]] +; CHECK-NEXT: umov [[SCALAR2:w[0-9]+]], [[VEC]].h[2] ; CHECK-NEXT: mov [[FINAL]].b[2], [[SCALAR2]] ; CHECK-NEXT: umov [[SCALAR3:w[0-9]+]], [[VEC]].h[3] ; CHECK-NEXT: mov [[FINAL]].b[3], [[SCALAR3]] diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-int-to-fp.ll @@ -278,10 +278,10 @@ ; VBITS_EQ_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: ucvtf z0.d, p0/m, z0.d -; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: uunpklo z1.s, z1.h +; VBITS_EQ_256-NEXT: ucvtf z0.d, p0/m, z0.d ; VBITS_EQ_256-NEXT: uunpklo z1.d, z1.s +; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: ucvtf z1.d, p0/m, z1.d ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_EQ_256-NEXT: ret @@ -1221,10 +1221,10 @@ ; VBITS_EQ_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: sunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: sunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: scvtf z0.d, p0/m, z0.d -; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: sunpklo z1.s, z1.h +; VBITS_EQ_256-NEXT: scvtf z0.d, p0/m, z0.d ; VBITS_EQ_256-NEXT: sunpklo z1.d, z1.s +; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x1] ; VBITS_EQ_256-NEXT: scvtf z1.d, p0/m, z1.d ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x1, x8, lsl #3] ; VBITS_EQ_256-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-gather.ll @@ -29,10 +29,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldrb w9, [x0, #1] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrb w8, [x0, #1] -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: cmeq v0.2s, v0.2s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 @@ -105,7 +105,6 @@ ; VBITS_EQ_256-NEXT: uzp1 v0.8b, v1.8b, v0.8b ; VBITS_EQ_256-NEXT: str d0, [x0] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_gather_v8i8: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr d0, [x0] @@ -159,15 +158,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_v32i8: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.b, vl32 -; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1b { z0.b }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p2/z, [x1] ; VBITS_GE_2048-NEXT: cmpeq p1.b, p0/z, z0.b, #0 ; VBITS_GE_2048-NEXT: mov z0.b, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 ; VBITS_GE_2048-NEXT: ld1b { z0.d }, p1/z, [z1.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h @@ -191,10 +190,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldrh w9, [x0, #2] ; CHECK-NEXT: ldr q1, [x1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrh w8, [x0, #2] -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: cmeq v0.2s, v0.2s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 @@ -247,10 +246,10 @@ ; VBITS_EQ_256-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z0.d, #0 -; VBITS_EQ_256-NEXT: ld1h { z0.d }, p1/z, [z3.d] ; VBITS_EQ_256-NEXT: uunpklo z1.s, z1.h +; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z0.d, #0 ; VBITS_EQ_256-NEXT: uunpklo z1.d, z1.s +; VBITS_EQ_256-NEXT: ld1h { z0.d }, p1/z, [z3.d] ; VBITS_EQ_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 ; VBITS_EQ_256-NEXT: ld1h { z1.d }, p0/z, [z2.d] ; VBITS_EQ_256-NEXT: uzp1 z0.s, z0.s, z0.s @@ -260,7 +259,6 @@ ; VBITS_EQ_256-NEXT: mov v0.d[1], v1.d[0] ; VBITS_EQ_256-NEXT: str q0, [x0] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_gather_v8i16: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr q0, [x0] @@ -287,15 +285,15 @@ ; VBITS_GE_1024-LABEL: masked_gather_v16i16: ; VBITS_GE_1024: // %bb.0: ; VBITS_GE_1024-NEXT: ptrue p0.h, vl16 -; VBITS_GE_1024-NEXT: ptrue p1.d, vl16 +; VBITS_GE_1024-NEXT: ptrue p2.d, vl16 ; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_1024-NEXT: cmpeq p2.h, p0/z, z0.h, #0 -; VBITS_GE_1024-NEXT: ld1d { z0.d }, p1/z, [x1] -; VBITS_GE_1024-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_1024-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_1024-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_1024-NEXT: ld1h { z0.d }, p1/z, [z0.d] +; VBITS_GE_1024-NEXT: ld1d { z1.d }, p2/z, [x1] +; VBITS_GE_1024-NEXT: cmpeq p1.h, p0/z, z0.h, #0 +; VBITS_GE_1024-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_1024-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_1024-NEXT: ld1h { z0.d }, p1/z, [z1.d] ; VBITS_GE_1024-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_1024-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x0] @@ -312,15 +310,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_v32i16: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: cmpeq p2.h, p0/z, z0.h, #0 -; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [z0.d] +; VBITS_GE_2048-NEXT: ld1d { z1.d }, p2/z, [x1] +; VBITS_GE_2048-NEXT: cmpeq p1.h, p0/z, z0.h, #0 +; VBITS_GE_2048-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [z1.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] @@ -388,34 +386,33 @@ ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_EQ_256-NEXT: ptrue p1.d, vl4 ; VBITS_EQ_256-NEXT: ld1d { z1.d }, p1/z, [x1, x8, lsl #3] -; VBITS_EQ_256-NEXT: ld1d { z2.d }, p1/z, [x1] ; VBITS_EQ_256-NEXT: cmpeq p2.s, p0/z, z0.s, #0 -; VBITS_EQ_256-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_EQ_256-NEXT: uunpklo z3.d, z0.s -; VBITS_EQ_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s +; VBITS_EQ_256-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_EQ_256-NEXT: mov z2.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_EQ_256-NEXT: uunpklo z3.d, z2.s +; VBITS_EQ_256-NEXT: ext z2.b, z2.b, z2.b, #16 +; VBITS_EQ_256-NEXT: uunpklo z2.d, z2.s ; VBITS_EQ_256-NEXT: cmpne p2.d, p1/z, z3.d, #0 -; VBITS_EQ_256-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_EQ_256-NEXT: ld1w { z2.d }, p2/z, [z2.d] -; VBITS_EQ_256-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_EQ_256-NEXT: cmpne p1.d, p1/z, z2.d, #0 +; VBITS_EQ_256-NEXT: ld1w { z0.d }, p2/z, [z0.d] +; VBITS_EQ_256-NEXT: ld1w { z1.d }, p1/z, [z1.d] ; VBITS_EQ_256-NEXT: ptrue p1.s, vl4 -; VBITS_EQ_256-NEXT: uzp1 z1.s, z2.s, z2.s ; VBITS_EQ_256-NEXT: uzp1 z0.s, z0.s, z0.s -; VBITS_EQ_256-NEXT: splice z1.s, p1, z1.s, z0.s -; VBITS_EQ_256-NEXT: st1w { z1.s }, p0, [x0] +; VBITS_EQ_256-NEXT: uzp1 z1.s, z1.s, z1.s +; VBITS_EQ_256-NEXT: splice z0.s, p1, z0.s, z1.s +; VBITS_EQ_256-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_gather_v8i32: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: ptrue p1.d, vl8 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_512-NEXT: cmpeq p2.s, p0/z, z0.s, #0 -; VBITS_GE_512-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_512-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_512-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_512-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_512-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: ret @@ -433,12 +430,12 @@ ; VBITS_GE_1024-NEXT: ptrue p0.s, vl16 ; VBITS_GE_1024-NEXT: ptrue p1.d, vl16 ; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_1024-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_1024-NEXT: cmpeq p2.s, p0/z, z0.s, #0 -; VBITS_GE_1024-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_1024-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_1024-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_1024-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_1024-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_1024-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_1024-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_1024-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_1024-NEXT: ret @@ -456,12 +453,12 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: cmpeq p2.s, p0/z, z0.s, #0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret @@ -554,7 +551,6 @@ ; VBITS_EQ_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [x0] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_gather_v8i64: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 @@ -699,15 +695,15 @@ ; VBITS_GE_1024-LABEL: masked_gather_v16f16: ; VBITS_GE_1024: // %bb.0: ; VBITS_GE_1024-NEXT: ptrue p0.h, vl16 -; VBITS_GE_1024-NEXT: ptrue p1.d, vl16 +; VBITS_GE_1024-NEXT: ptrue p2.d, vl16 ; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_1024-NEXT: fcmeq p2.h, p0/z, z0.h, #0.0 -; VBITS_GE_1024-NEXT: ld1d { z0.d }, p1/z, [x1] -; VBITS_GE_1024-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_1024-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_1024-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_1024-NEXT: ld1h { z0.d }, p1/z, [z0.d] +; VBITS_GE_1024-NEXT: ld1d { z1.d }, p2/z, [x1] +; VBITS_GE_1024-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; VBITS_GE_1024-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_1024-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_1024-NEXT: ld1h { z0.d }, p1/z, [z1.d] ; VBITS_GE_1024-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_1024-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_1024-NEXT: st1h { z0.h }, p0, [x0] @@ -724,15 +720,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_v32f16: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: fcmeq p2.h, p0/z, z0.h, #0.0 -; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [z0.d] +; VBITS_GE_2048-NEXT: ld1d { z1.d }, p2/z, [x1] +; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; VBITS_GE_2048-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [z1.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] @@ -797,12 +793,12 @@ ; VBITS_GE_512-NEXT: ptrue p0.s, vl8 ; VBITS_GE_512-NEXT: ptrue p1.d, vl8 ; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_512-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_512-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_512-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_512-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_512-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_512-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_512-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_512-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_512-NEXT: ret @@ -820,12 +816,12 @@ ; VBITS_GE_1024-NEXT: ptrue p0.s, vl16 ; VBITS_GE_1024-NEXT: ptrue p1.d, vl16 ; VBITS_GE_1024-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_1024-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_1024-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_1024-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_1024-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_1024-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_1024-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_1024-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_1024-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_1024-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_1024-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_1024-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_1024-NEXT: ret @@ -843,12 +839,12 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret @@ -986,15 +982,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_32b_scaled_sext_f16: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: fcmeq p2.h, p0/z, z0.h, #0.0 -; VBITS_GE_2048-NEXT: ld1sw { z0.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z0.d, lsl #1] +; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p2/z, [x1] +; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; VBITS_GE_2048-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z1.d, lsl #1] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] @@ -1016,12 +1012,12 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z1.d, lsl #2] +; VBITS_GE_2048-NEXT: ld1sw { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z0.d, lsl #2] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret @@ -1061,15 +1057,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_32b_scaled_zext: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: fcmeq p2.h, p0/z, z0.h, #0.0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z0.d, lsl #1] +; VBITS_GE_2048-NEXT: ld1w { z1.d }, p2/z, [x1] +; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; VBITS_GE_2048-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z1.d, lsl #1] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] @@ -1089,15 +1085,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_32b_unscaled_sext: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: fcmeq p2.h, p0/z, z0.h, #0.0 -; VBITS_GE_2048-NEXT: ld1sw { z0.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z0.d] +; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p2/z, [x1] +; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; VBITS_GE_2048-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z1.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] @@ -1118,15 +1114,15 @@ ; VBITS_GE_2048-LABEL: masked_gather_32b_unscaled_zext: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 +; VBITS_GE_2048-NEXT: ptrue p2.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: fcmeq p2.h, p0/z, z0.h, #0.0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z1.h, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h -; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 -; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z0.d] +; VBITS_GE_2048-NEXT: ld1w { z1.d }, p2/z, [x1] +; VBITS_GE_2048-NEXT: fcmeq p1.h, p0/z, z0.h, #0.0 +; VBITS_GE_2048-NEXT: mov z0.h, p1/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h +; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p2/z, z0.d, #0 +; VBITS_GE_2048-NEXT: ld1h { z0.d }, p1/z, [x2, z1.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_2048-NEXT: st1h { z0.h }, p0, [x0] @@ -1148,12 +1144,12 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z1.d, lsl #2] +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z0.d, lsl #2] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret @@ -1172,12 +1168,12 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z1.d] +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [x2, z0.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret @@ -1253,15 +1249,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x2] -; VBITS_GE_2048-NEXT: ld1w { z1.d }, p1/z, [z1.d] -; VBITS_GE_2048-NEXT: uzp1 z1.s, z1.s, z1.s -; VBITS_GE_2048-NEXT: mov z0.s, p2/m, z1.s +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z1.s }, p0/z, [x2] +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z0.d] +; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s +; VBITS_GE_2048-NEXT: sel z0.s, p2, z0.s, z1.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret %cvals = load <32 x float>, <32 x float>* %a @@ -1279,12 +1275,12 @@ ; VBITS_GE_2048-NEXT: ptrue p0.s, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p2.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: mov z0.s, p2/z, #-1 // =0xffffffffffffffff -; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z0.d, #0 -; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z1.d] +; VBITS_GE_2048-NEXT: ld1d { z0.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: mov z1.s, p2/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: ld1w { z0.d }, p1/z, [z0.d] ; VBITS_GE_2048-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_2048-NEXT: st1w { z0.s }, p0, [x0] ; VBITS_GE_2048-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-scatter.ll @@ -29,10 +29,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldrb w9, [x0, #1] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrb w8, [x0, #1] -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: cmeq v1.2s, v0.2s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: ushll v1.2d, v1.2s, #0 @@ -76,18 +76,18 @@ ; VBITS_EQ_256-NEXT: mov x8, #4 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 ; VBITS_EQ_256-NEXT: cmeq v1.8b, v0.8b, #0 -; VBITS_EQ_256-NEXT: zip1 v5.8b, v0.8b, v0.8b ; VBITS_EQ_256-NEXT: ld1d { z3.d }, p0/z, [x1, x8, lsl #3] ; VBITS_EQ_256-NEXT: ld1d { z4.d }, p0/z, [x1] +; VBITS_EQ_256-NEXT: zip1 v5.8b, v0.8b, v0.8b ; VBITS_EQ_256-NEXT: zip1 v2.8b, v1.8b, v0.8b ; VBITS_EQ_256-NEXT: zip2 v1.8b, v1.8b, v0.8b ; VBITS_EQ_256-NEXT: zip2 v0.8b, v0.8b, v0.8b ; VBITS_EQ_256-NEXT: shl v2.4h, v2.4h, #8 ; VBITS_EQ_256-NEXT: shl v1.4h, v1.4h, #8 ; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h -; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s ; VBITS_EQ_256-NEXT: sshr v2.4h, v2.4h, #8 ; VBITS_EQ_256-NEXT: sshr v1.4h, v1.4h, #8 +; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s ; VBITS_EQ_256-NEXT: uunpklo z2.s, z2.h ; VBITS_EQ_256-NEXT: uunpklo z1.s, z1.h ; VBITS_EQ_256-NEXT: uunpklo z2.d, z2.s @@ -99,7 +99,6 @@ ; VBITS_EQ_256-NEXT: st1b { z1.d }, p1, [z4.d] ; VBITS_EQ_256-NEXT: st1b { z0.d }, p0, [z3.d] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_scatter_v8i8: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr d0, [x0] @@ -108,8 +107,8 @@ ; VBITS_GE_512-NEXT: cmeq v2.8b, v0.8b, #0 ; VBITS_GE_512-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_512-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_512-NEXT: uunpklo z2.h, z2.b +; VBITS_GE_512-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_512-NEXT: uunpklo z2.s, z2.h ; VBITS_GE_512-NEXT: uunpklo z2.d, z2.s ; VBITS_GE_512-NEXT: cmpne p0.d, p0/z, z2.d, #0 @@ -131,8 +130,8 @@ ; VBITS_GE_1024-NEXT: cmeq v2.16b, v0.16b, #0 ; VBITS_GE_1024-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_1024-NEXT: uunpklo z2.h, z2.b +; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_1024-NEXT: uunpklo z2.s, z2.h ; VBITS_GE_1024-NEXT: uunpklo z2.d, z2.s ; VBITS_GE_1024-NEXT: cmpne p0.d, p0/z, z2.d, #0 @@ -149,18 +148,18 @@ ; VBITS_GE_2048-LABEL: masked_scatter_v32i8: ; VBITS_GE_2048: // %bb.0: ; VBITS_GE_2048-NEXT: ptrue p0.b, vl32 -; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1b { z0.b }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: cmpeq p0.b, p0/z, z0.b, #0 ; VBITS_GE_2048-NEXT: uunpklo z0.h, z0.b ; VBITS_GE_2048-NEXT: mov z1.b, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: ptrue p0.d, vl32 +; VBITS_GE_2048-NEXT: ld1d { z2.d }, p0/z, [x1] ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h ; VBITS_GE_2048-NEXT: uunpklo z1.h, z1.b ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s ; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h ; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: cmpne p0.d, p0/z, z1.d, #0 ; VBITS_GE_2048-NEXT: st1b { z0.d }, p0, [z2.d] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x i8>, <32 x i8>* %a @@ -179,10 +178,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 +; CHECK-NEXT: ldrh w9, [x0, #2] ; CHECK-NEXT: ldr q2, [x1] ; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: ldrh w8, [x0, #2] -; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mov v0.s[1], w9 ; CHECK-NEXT: cmeq v1.2s, v0.2s, #0 ; CHECK-NEXT: ushll v0.2d, v0.2s, #0 ; CHECK-NEXT: ushll v1.2d, v1.2s, #0 @@ -225,24 +224,23 @@ ; VBITS_EQ_256-NEXT: mov x8, #4 ; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 ; VBITS_EQ_256-NEXT: cmeq v1.8h, v0.8h, #0 -; VBITS_EQ_256-NEXT: ld1d { z4.d }, p0/z, [x1, x8, lsl #3] -; VBITS_EQ_256-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h -; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: uunpklo z2.s, z1.h +; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] +; VBITS_EQ_256-NEXT: ld1d { z4.d }, p0/z, [x1] +; VBITS_EQ_256-NEXT: uunpklo z3.s, z1.h +; VBITS_EQ_256-NEXT: uunpklo z3.d, z3.s ; VBITS_EQ_256-NEXT: ext v1.16b, v1.16b, v1.16b, #8 -; VBITS_EQ_256-NEXT: uunpklo z2.d, z2.s -; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z2.d, #0 -; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1] -; VBITS_EQ_256-NEXT: uunpklo z3.s, z3.h +; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z3.d, #0 +; VBITS_EQ_256-NEXT: uunpklo z3.s, z0.h +; VBITS_EQ_256-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; VBITS_EQ_256-NEXT: uunpklo z1.s, z1.h ; VBITS_EQ_256-NEXT: uunpklo z1.d, z1.s -; VBITS_EQ_256-NEXT: st1h { z0.d }, p1, [z2.d] +; VBITS_EQ_256-NEXT: uunpklo z3.d, z3.s +; VBITS_EQ_256-NEXT: uunpklo z0.s, z0.h ; VBITS_EQ_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; VBITS_EQ_256-NEXT: uunpklo z1.d, z3.s -; VBITS_EQ_256-NEXT: st1h { z1.d }, p0, [z4.d] +; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s +; VBITS_EQ_256-NEXT: st1h { z3.d }, p1, [z4.d] +; VBITS_EQ_256-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_scatter_v8i16: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ldr q0, [x0] @@ -269,15 +267,15 @@ ; VBITS_GE_1024-NEXT: ptrue p0.h, vl16 ; VBITS_GE_1024-NEXT: ptrue p1.d, vl16 ; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_1024-NEXT: ld1d { z1.d }, p1/z, [x1] +; VBITS_GE_1024-NEXT: ld1d { z2.d }, p1/z, [x1] ; VBITS_GE_1024-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_1024-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_1024-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_1024-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_1024-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_1024-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_1024-NEXT: st1h { z0.d }, p0, [z1.d] +; VBITS_GE_1024-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_1024-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_1024-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_GE_1024-NEXT: ret %vals = load <16 x i16>, <16 x i16>* %a %ptrs = load <16 x i16*>, <16 x i16*>* %b @@ -292,15 +290,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1d { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: cmpeq p0.h, p0/z, z0.h, #0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_2048-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [z1.d] +; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x i16>, <32 x i16>* %a %ptrs = load <32 x i16*>, <32 x i16*>* %b @@ -358,23 +356,22 @@ ; VBITS_EQ_256-NEXT: ptrue p0.s, vl8 ; VBITS_EQ_256-NEXT: mov x8, #4 ; VBITS_EQ_256-NEXT: ld1w { z0.s }, p0/z, [x0] -; VBITS_EQ_256-NEXT: ptrue p1.d, vl4 -; VBITS_EQ_256-NEXT: ld1d { z2.d }, p1/z, [x1, x8, lsl #3] -; VBITS_EQ_256-NEXT: ld1d { z4.d }, p1/z, [x1] ; VBITS_EQ_256-NEXT: cmpeq p0.s, p0/z, z0.s, #0 ; VBITS_EQ_256-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff +; VBITS_EQ_256-NEXT: ptrue p0.d, vl4 +; VBITS_EQ_256-NEXT: ld1d { z2.d }, p0/z, [x1, x8, lsl #3] +; VBITS_EQ_256-NEXT: ld1d { z4.d }, p0/z, [x1] ; VBITS_EQ_256-NEXT: uunpklo z3.d, z1.s ; VBITS_EQ_256-NEXT: ext z1.b, z1.b, z1.b, #16 -; VBITS_EQ_256-NEXT: cmpne p0.d, p1/z, z3.d, #0 +; VBITS_EQ_256-NEXT: cmpne p1.d, p0/z, z3.d, #0 ; VBITS_EQ_256-NEXT: uunpklo z3.d, z0.s ; VBITS_EQ_256-NEXT: uunpklo z1.d, z1.s ; VBITS_EQ_256-NEXT: ext z0.b, z0.b, z0.b, #16 -; VBITS_EQ_256-NEXT: cmpne p1.d, p1/z, z1.d, #0 +; VBITS_EQ_256-NEXT: cmpne p0.d, p0/z, z1.d, #0 ; VBITS_EQ_256-NEXT: uunpklo z0.d, z0.s -; VBITS_EQ_256-NEXT: st1w { z3.d }, p0, [z4.d] -; VBITS_EQ_256-NEXT: st1w { z0.d }, p1, [z2.d] +; VBITS_EQ_256-NEXT: st1w { z3.d }, p1, [z4.d] +; VBITS_EQ_256-NEXT: st1w { z0.d }, p0, [z2.d] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_scatter_v8i32: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.s, vl8 @@ -466,10 +463,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q2, [x1] -; CHECK-NEXT: cmeq v1.2d, v0.2d, #0 -; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: st1d { z0.d }, p0, [z2.d] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: cmeq v2.2d, v0.2d, #0 +; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] ; CHECK-NEXT: ret %vals = load <2 x i64>, <2 x i64>* %a %ptrs = load <2 x i64*>, <2 x i64*>* %b @@ -509,7 +506,6 @@ ; VBITS_EQ_256-NEXT: st1d { z1.d }, p0, [z3.d] ; VBITS_EQ_256-NEXT: st1d { z0.d }, p1, [z2.d] ; VBITS_EQ_256-NEXT: ret -; ; VBITS_GE_512-LABEL: masked_scatter_v8i64: ; VBITS_GE_512: // %bb.0: ; VBITS_GE_512-NEXT: ptrue p0.d, vl8 @@ -643,15 +639,15 @@ ; VBITS_GE_1024-NEXT: ptrue p0.h, vl16 ; VBITS_GE_1024-NEXT: ptrue p1.d, vl16 ; VBITS_GE_1024-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_1024-NEXT: ld1d { z1.d }, p1/z, [x1] +; VBITS_GE_1024-NEXT: ld1d { z2.d }, p1/z, [x1] ; VBITS_GE_1024-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_1024-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_1024-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_1024-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_1024-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_1024-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_1024-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_1024-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_1024-NEXT: st1h { z0.d }, p0, [z1.d] +; VBITS_GE_1024-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_1024-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_1024-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_1024-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_GE_1024-NEXT: ret %vals = load <16 x half>, <16 x half>* %a %ptrs = load <16 x half*>, <16 x half*>* %b @@ -666,15 +662,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1d { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_2048-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [z1.d] +; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [z2.d] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %ptrs = load <32 x half*>, <32 x half*>* %b @@ -817,10 +813,10 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ldr q0, [x0] ; CHECK-NEXT: ptrue p0.d, vl2 -; CHECK-NEXT: ldr q2, [x1] -; CHECK-NEXT: fcmeq v1.2d, v0.2d, #0.0 -; CHECK-NEXT: cmpne p0.d, p0/z, z1.d, #0 -; CHECK-NEXT: st1d { z0.d }, p0, [z2.d] +; CHECK-NEXT: ldr q1, [x1] +; CHECK-NEXT: fcmeq v2.2d, v0.2d, #0.0 +; CHECK-NEXT: cmpne p0.d, p0/z, z2.d, #0 +; CHECK-NEXT: st1d { z0.d }, p0, [z1.d] ; CHECK-NEXT: ret %vals = load <2 x double>, <2 x double>* %a %ptrs = load <2 x double*>, <2 x double*>* %b @@ -903,15 +899,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1sw { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_2048-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z1.d, lsl #1] +; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z2.d, lsl #1] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %idxs = load <32 x i32>, <32 x i32>* %b @@ -972,15 +968,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1w { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_2048-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z1.d, lsl #1] +; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z2.d, lsl #1] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %idxs = load <32 x i32>, <32 x i32>* %b @@ -998,15 +994,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1sw { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1sw { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_2048-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z1.d] +; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z2.d] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %idxs = load <32 x i32>, <32 x i32>* %b @@ -1025,15 +1021,15 @@ ; VBITS_GE_2048-NEXT: ptrue p0.h, vl32 ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1h { z0.h }, p0/z, [x0] -; VBITS_GE_2048-NEXT: ld1w { z1.d }, p1/z, [x1] +; VBITS_GE_2048-NEXT: ld1w { z2.d }, p1/z, [x1] ; VBITS_GE_2048-NEXT: fcmeq p0.h, p0/z, z0.h, #0.0 ; VBITS_GE_2048-NEXT: uunpklo z0.s, z0.h -; VBITS_GE_2048-NEXT: mov z2.h, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.s, z2.h -; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s -; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 -; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z1.d] +; VBITS_GE_2048-NEXT: uunpklo z1.s, z1.h +; VBITS_GE_2048-NEXT: uunpklo z1.d, z1.s +; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z1.d, #0 +; VBITS_GE_2048-NEXT: st1h { z0.d }, p0, [x2, z2.d] ; VBITS_GE_2048-NEXT: ret %vals = load <32 x half>, <32 x half>* %a %idxs = load <32 x i32>, <32 x i32>* %b @@ -1098,12 +1094,12 @@ ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z2.d, x2 +; VBITS_GE_2048-NEXT: mov z3.d, x2 ; VBITS_GE_2048-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: add z1.d, p1/m, z1.d, z2.d -; VBITS_GE_2048-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: add z1.d, p1/m, z1.d, z3.d +; VBITS_GE_2048-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.d, z3.s +; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s ; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 ; VBITS_GE_2048-NEXT: st1w { z0.d }, p0, [z1.d] ; VBITS_GE_2048-NEXT: ret @@ -1124,12 +1120,12 @@ ; VBITS_GE_2048-NEXT: ptrue p1.d, vl32 ; VBITS_GE_2048-NEXT: ld1w { z0.s }, p0/z, [x0] ; VBITS_GE_2048-NEXT: ld1d { z1.d }, p1/z, [x1] -; VBITS_GE_2048-NEXT: mov z2.d, #4 // =0x4 +; VBITS_GE_2048-NEXT: mov z3.d, #4 // =0x4 ; VBITS_GE_2048-NEXT: fcmeq p0.s, p0/z, z0.s, #0.0 -; VBITS_GE_2048-NEXT: add z1.d, p1/m, z1.d, z2.d -; VBITS_GE_2048-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff +; VBITS_GE_2048-NEXT: add z1.d, p1/m, z1.d, z3.d +; VBITS_GE_2048-NEXT: mov z2.s, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_2048-NEXT: uunpklo z0.d, z0.s -; VBITS_GE_2048-NEXT: uunpklo z2.d, z3.s +; VBITS_GE_2048-NEXT: uunpklo z2.d, z2.s ; VBITS_GE_2048-NEXT: cmpne p0.d, p1/z, z2.d, #0 ; VBITS_GE_2048-NEXT: st1w { z0.d }, p0, [z1.d] ; VBITS_GE_2048-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll --- a/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll +++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-masked-stores.ll @@ -162,10 +162,10 @@ ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_512-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_512-NEXT: ptrue p0.b, vl8 -; VBITS_GE_512-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h -; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h +; VBITS_GE_512-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b +; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_512-NEXT: uzp1 z1.b, z1.b, z1.b ; VBITS_GE_512-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x2] @@ -188,8 +188,8 @@ ; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s ; VBITS_GE_512-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_512-NEXT: ptrue p0.h, vl8 -; VBITS_GE_512-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h +; VBITS_GE_512-NEXT: uzp1 z1.s, z1.s, z1.s ; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_512-NEXT: cmpne p0.h, p0/z, z1.h, #0 ; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x2] @@ -234,8 +234,8 @@ ; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h ; VBITS_GE_512-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff ; VBITS_GE_512-NEXT: ptrue p0.b, vl16 -; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b +; VBITS_GE_512-NEXT: uzp1 z1.h, z1.h, z1.h ; VBITS_GE_512-NEXT: uzp1 z1.b, z1.b, z1.b ; VBITS_GE_512-NEXT: cmpne p0.b, p0/z, z1.b, #0 ; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x2] diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll --- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll +++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll @@ -357,10 +357,10 @@ define @test_predicate_insert_2xi1_immediate ( %val, i1 %elt) { ; CHECK-LABEL: test_predicate_insert_2xi1_immediate: ; CHECK: // %bb.0: -; CHECK-NEXT: ptrue p1.d, vl1 ; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ptrue p0.d, vl1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: mov z0.d, p1/m, x0 +; CHECK-NEXT: mov z0.d, p0/m, x0 ; CHECK-NEXT: ptrue p0.d ; CHECK-NEXT: and z0.d, z0.d, #0x1 ; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 @@ -373,14 +373,14 @@ ; CHECK-LABEL: test_predicate_insert_4xi1_immediate: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: index z1.s, #0, #1 +; CHECK-NEXT: index z0.s, #0, #1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z0.s, w8 -; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z0.s -; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.s, p2/m, w0 -; CHECK-NEXT: and z0.s, z0.s, #0x1 -; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0 +; CHECK-NEXT: mov z2.s, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: cmpeq p0.s, p1/z, z0.s, z1.s +; CHECK-NEXT: mov z2.s, p0/m, w0 +; CHECK-NEXT: and z2.s, z2.s, #0x1 +; CHECK-NEXT: cmpne p0.s, p1/z, z2.s, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 %elt, i32 2 ret %res @@ -392,14 +392,14 @@ ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK-NEXT: sxtw x8, w0 ; CHECK-NEXT: mov w9, #1 -; CHECK-NEXT: index z1.h, #0, #1 +; CHECK-NEXT: index z0.h, #0, #1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z0.h, w8 -; CHECK-NEXT: cmpeq p2.h, p1/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.h, p2/m, w9 -; CHECK-NEXT: and z0.h, z0.h, #0x1 -; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0 +; CHECK-NEXT: mov z2.h, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: cmpeq p0.h, p1/z, z0.h, z1.h +; CHECK-NEXT: mov z2.h, p0/m, w9 +; CHECK-NEXT: and z2.h, z2.h, #0x1 +; CHECK-NEXT: cmpne p0.h, p1/z, z2.h, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 1, i32 %idx ret %res @@ -410,14 +410,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w9, #4 ; CHECK-NEXT: mov w8, wzr -; CHECK-NEXT: index z1.b, #0, #1 +; CHECK-NEXT: index z0.b, #0, #1 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mov z0.b, w9 -; CHECK-NEXT: cmpeq p2.b, p1/z, z1.b, z0.b -; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.b, p2/m, w8 -; CHECK-NEXT: and z0.b, z0.b, #0x1 -; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 +; CHECK-NEXT: mov z2.b, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.b, w9 +; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, z1.b +; CHECK-NEXT: mov z2.b, p0/m, w8 +; CHECK-NEXT: and z2.b, z2.b, #0x1 +; CHECK-NEXT: cmpne p0.b, p1/z, z2.b, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 0, i32 4 ret %res @@ -429,15 +429,15 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x8, w1 -; CHECK-NEXT: index z1.d, #0, #1 +; CHECK-NEXT: index z0.d, #0, #1 ; CHECK-NEXT: ptrue p1.d +; CHECK-NEXT: mov z2.d, p0/z, #1 // =0x1 ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK-NEXT: mov z0.d, x8 -; CHECK-NEXT: cmpeq p2.d, p1/z, z1.d, z0.d -; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.d, p2/m, x0 -; CHECK-NEXT: and z0.d, z0.d, #0x1 -; CHECK-NEXT: cmpne p0.d, p1/z, z0.d, #0 +; CHECK-NEXT: mov z1.d, x8 +; CHECK-NEXT: cmpeq p0.d, p1/z, z0.d, z1.d +; CHECK-NEXT: mov z2.d, p0/m, x0 +; CHECK-NEXT: and z2.d, z2.d, #0x1 +; CHECK-NEXT: cmpne p0.d, p1/z, z2.d, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 %elt, i32 %idx ret %res @@ -448,14 +448,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x8, w1 -; CHECK-NEXT: index z1.s, #0, #1 +; CHECK-NEXT: index z0.s, #0, #1 ; CHECK-NEXT: ptrue p1.s -; CHECK-NEXT: mov z0.s, w8 -; CHECK-NEXT: cmpeq p2.s, p1/z, z1.s, z0.s -; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.s, p2/m, w0 -; CHECK-NEXT: and z0.s, z0.s, #0x1 -; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0 +; CHECK-NEXT: mov z2.s, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.s, w8 +; CHECK-NEXT: cmpeq p0.s, p1/z, z0.s, z1.s +; CHECK-NEXT: mov z2.s, p0/m, w0 +; CHECK-NEXT: and z2.s, z2.s, #0x1 +; CHECK-NEXT: cmpne p0.s, p1/z, z2.s, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 %elt, i32 %idx ret %res @@ -465,14 +465,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x8, w1 -; CHECK-NEXT: index z1.h, #0, #1 +; CHECK-NEXT: index z0.h, #0, #1 ; CHECK-NEXT: ptrue p1.h -; CHECK-NEXT: mov z0.h, w8 -; CHECK-NEXT: cmpeq p2.h, p1/z, z1.h, z0.h -; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.h, p2/m, w0 -; CHECK-NEXT: and z0.h, z0.h, #0x1 -; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0 +; CHECK-NEXT: mov z2.h, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.h, w8 +; CHECK-NEXT: cmpeq p0.h, p1/z, z0.h, z1.h +; CHECK-NEXT: mov z2.h, p0/m, w0 +; CHECK-NEXT: and z2.h, z2.h, #0x1 +; CHECK-NEXT: cmpne p0.h, p1/z, z2.h, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 %elt, i32 %idx ret %res @@ -483,14 +483,14 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x8, w1 -; CHECK-NEXT: index z1.b, #0, #1 +; CHECK-NEXT: index z0.b, #0, #1 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: mov z0.b, w8 -; CHECK-NEXT: cmpeq p2.b, p1/z, z1.b, z0.b -; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 -; CHECK-NEXT: mov z0.b, p2/m, w0 -; CHECK-NEXT: and z0.b, z0.b, #0x1 -; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 +; CHECK-NEXT: mov z2.b, p0/z, #1 // =0x1 +; CHECK-NEXT: mov z1.b, w8 +; CHECK-NEXT: cmpeq p0.b, p1/z, z0.b, z1.b +; CHECK-NEXT: mov z2.b, p0/m, w0 +; CHECK-NEXT: and z2.b, z2.b, #0x1 +; CHECK-NEXT: cmpne p0.b, p1/z, z2.b, #0 ; CHECK-NEXT: ret %res = insertelement %val, i1 %elt, i32 %idx ret %res @@ -507,12 +507,12 @@ ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 ; CHECK-NEXT: sxtw x9, w1 ; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 +; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 ; CHECK-NEXT: ptrue p1.b -; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl] -; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 ; CHECK-NEXT: addvl x8, x8, #2 -; CHECK-NEXT: st1b { z0.b }, p1, [sp] +; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl] ; CHECK-NEXT: cmp x9, x8 +; CHECK-NEXT: st1b { z1.b }, p1, [sp] ; CHECK-NEXT: csel x8, x9, x8, lo ; CHECK-NEXT: mov x9, sp ; CHECK-NEXT: strb w0, [x9, x8] diff --git a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll --- a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll @@ -307,9 +307,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: punpklo p2.h, p0.b ; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p2/z, #-1 // =0xffffffffffffffff ; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: scvtf z0.d, p1/m, z0.d ; CHECK-NEXT: scvtf z1.d, p1/m, z1.d ; CHECK-NEXT: ret @@ -367,9 +367,9 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: punpklo p2.h, p0.b ; CHECK-NEXT: punpkhi p0.h, p0.b -; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: mov z0.d, p2/z, #1 // =0x1 ; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1 +; CHECK-NEXT: ptrue p1.d ; CHECK-NEXT: ucvtf z0.d, p1/m, z0.d ; CHECK-NEXT: ucvtf z1.d, p1/m, z1.d ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll --- a/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll +++ b/llvm/test/CodeGen/AArch64/sve-vscale-attr.ll @@ -15,8 +15,8 @@ ; CHECK-NOARG-NEXT: ldp q6, q4, [x1] ; CHECK-NOARG-NEXT: stp q0, q1, [x0, #32] ; CHECK-NOARG-NEXT: add v2.4s, v2.4s, v6.4s -; CHECK-NOARG-NEXT: add v3.4s, v3.4s, v4.4s -; CHECK-NOARG-NEXT: stp q2, q3, [x0] +; CHECK-NOARG-NEXT: add v0.4s, v3.4s, v4.4s +; CHECK-NOARG-NEXT: stp q2, q0, [x0] ; CHECK-NOARG-NEXT: ret ; ; CHECK-ARG-LABEL: func_vscale_none: @@ -47,8 +47,8 @@ ; CHECK-NEXT: ldp q6, q4, [x1] ; CHECK-NEXT: stp q0, q1, [x0, #32] ; CHECK-NEXT: add v2.4s, v2.4s, v6.4s -; CHECK-NEXT: add v3.4s, v3.4s, v4.4s -; CHECK-NEXT: stp q2, q3, [x0] +; CHECK-NEXT: add v0.4s, v3.4s, v4.4s +; CHECK-NEXT: stp q2, q0, [x0] ; CHECK-NEXT: ret %op1 = load <16 x i32>, <16 x i32>* %a %op2 = load <16 x i32>, <16 x i32>* %b diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -54,9 +54,9 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.16b, v2.16b, v6.16b ; CHECK-NEXT: uqadd v0.16b, v0.16b, v4.16b ; CHECK-NEXT: uqadd v1.16b, v1.16b, v5.16b +; CHECK-NEXT: uqadd v2.16b, v2.16b, v6.16b ; CHECK-NEXT: uqadd v3.16b, v3.16b, v7.16b ; CHECK-NEXT: ret %z = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) @@ -85,9 +85,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { ; CHECK-LABEL: v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.8h, v2.8h, v6.8h ; CHECK-NEXT: uqadd v0.8h, v0.8h, v4.8h ; CHECK-NEXT: uqadd v1.8h, v1.8h, v5.8h +; CHECK-NEXT: uqadd v2.8h, v2.8h, v6.8h ; CHECK-NEXT: uqadd v3.8h, v3.8h, v7.8h ; CHECK-NEXT: ret %z = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) @@ -97,9 +97,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -112,13 +112,13 @@ define void @v4i8(<4 x i8>* %px, <4 x i8>* %py, <4 x i8>* %pz) nounwind { ; CHECK-LABEL: v4i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: movi d0, #0xff00ff00ff00ff -; CHECK-NEXT: ldr s2, [x1] +; CHECK-NEXT: ldr s0, [x0] +; CHECK-NEXT: ldr s1, [x1] +; CHECK-NEXT: ushll v0.8h, v0.8b, #0 ; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: ushll v2.8h, v2.8b, #0 -; CHECK-NEXT: add v1.4h, v1.4h, v2.4h -; CHECK-NEXT: umin v0.4h, v1.4h, v0.4h +; CHECK-NEXT: add v0.4h, v0.4h, v1.4h +; CHECK-NEXT: movi d1, #0xff00ff00ff00ff +; CHECK-NEXT: umin v0.4h, v0.4h, v1.4h ; CHECK-NEXT: xtn v0.8b, v0.8h ; CHECK-NEXT: str s0, [x2] ; CHECK-NEXT: ret @@ -132,21 +132,21 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind { ; CHECK-LABEL: v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x1] +; CHECK-NEXT: ldrb w8, [x0] ; CHECK-NEXT: movi d0, #0x0000ff000000ff -; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: ldrb w10, [x1, #1] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: mov v2.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w9 +; CHECK-NEXT: ldrb w9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #1] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: ldrb w8, [x1, #1] +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov v2.s[1], w8 ; CHECK-NEXT: add v1.2s, v1.2s, v2.2s ; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strb w9, [x2, #1] +; CHECK-NEXT: strb w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -158,9 +158,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqadd v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -173,21 +173,21 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind { ; CHECK-LABEL: v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x1] +; CHECK-NEXT: ldrh w8, [x0] ; CHECK-NEXT: movi d0, #0x00ffff0000ffff -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: ldrh w10, [x1, #2] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: ldrh w9, [x0, #2] -; CHECK-NEXT: mov v2.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w9 +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: ldrh w10, [x0, #2] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: ldrh w8, [x1, #2] +; CHECK-NEXT: fmov s2, w9 +; CHECK-NEXT: mov v1.s[1], w10 +; CHECK-NEXT: mov v2.s[1], w8 ; CHECK-NEXT: add v1.2s, v1.2s, v2.2s ; CHECK-NEXT: umin v0.2s, v1.2s, v0.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strh w9, [x2, #2] +; CHECK-NEXT: strh w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py @@ -225,9 +225,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr b0, [x1] +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: uqadd v0.8b, v1.8b, v0.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -240,9 +240,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: uqadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr h0, [x1] +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: uqadd v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -305,9 +305,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ; CHECK-LABEL: v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.4s, v2.4s, v6.4s ; CHECK-NEXT: uqadd v0.4s, v0.4s, v4.4s ; CHECK-NEXT: uqadd v1.4s, v1.4s, v5.4s +; CHECK-NEXT: uqadd v2.4s, v2.4s, v6.4s ; CHECK-NEXT: uqadd v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %z = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y) @@ -336,9 +336,9 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.2d, v2.2d, v6.2d ; CHECK-NEXT: uqadd v0.2d, v0.2d, v4.2d ; CHECK-NEXT: uqadd v1.2d, v1.2d, v5.2d +; CHECK-NEXT: uqadd v2.2d, v2.2d, v6.2d ; CHECK-NEXT: uqadd v3.2d, v3.2d, v7.2d ; CHECK-NEXT: ret %z = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y) diff --git a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll --- a/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll +++ b/llvm/test/CodeGen/AArch64/unfold-masked-merge-vector-variablemask.ll @@ -31,8 +31,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0x0000ff000000ff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <2 x i8> %x, %mask @@ -63,8 +63,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0xff00ff00ff00ff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <4 x i8> %x, %mask @@ -79,8 +79,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0xff00ff00ff00ff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <4 x i8> %x, %mask @@ -95,8 +95,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi d3, #0x00ffff0000ffff ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b -; CHECK-NEXT: eor v2.8b, v2.8b, v3.8b -; CHECK-NEXT: and v1.8b, v1.8b, v2.8b +; CHECK-NEXT: eor v3.8b, v2.8b, v3.8b +; CHECK-NEXT: and v1.8b, v1.8b, v3.8b ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: ret %mx = and <2 x i16> %x, %mask diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-illegal-types.ll @@ -67,25 +67,25 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fmov s0, w0 ; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: adrp x9, .LCPI4_1 +; CHECK-NEXT: movi d3, #0x0000000000ffff ; CHECK-NEXT: mov v0.h[1], w1 ; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] -; CHECK-NEXT: adrp x8, .LCPI4_1 +; CHECK-NEXT: ldr d2, [x9, :lo12:.LCPI4_1] +; CHECK-NEXT: adrp x8, .LCPI4_2 ; CHECK-NEXT: mov v0.h[2], w2 ; CHECK-NEXT: sub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_1] -; CHECK-NEXT: adrp x8, .LCPI4_2 -; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h -; CHECK-NEXT: movi d1, #0x0000000000ffff -; CHECK-NEXT: ldr d3, [x8, :lo12:.LCPI4_2] +; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_2] ; CHECK-NEXT: adrp x8, .LCPI4_3 +; CHECK-NEXT: mul v0.4h, v0.4h, v2.4h ; CHECK-NEXT: shl v2.4h, v0.4h, #1 ; CHECK-NEXT: bic v0.4h, #248, lsl #8 -; CHECK-NEXT: ushl v2.4h, v2.4h, v3.4h -; CHECK-NEXT: ushl v0.4h, v0.4h, v1.4h -; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_3] -; CHECK-NEXT: orr v0.8b, v0.8b, v2.8b +; CHECK-NEXT: ushl v0.4h, v0.4h, v3.4h +; CHECK-NEXT: ushl v1.4h, v2.4h, v1.4h +; CHECK-NEXT: ldr d2, [x8, :lo12:.LCPI4_3] +; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b ; CHECK-NEXT: bic v0.4h, #248, lsl #8 -; CHECK-NEXT: cmhi v0.4h, v0.4h, v1.4h +; CHECK-NEXT: cmhi v0.4h, v0.4h, v2.4h ; CHECK-NEXT: umov w0, v0.h[0] ; CHECK-NEXT: umov w1, v0.h[1] ; CHECK-NEXT: umov w2, v0.h[2] diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonsplat.ll @@ -15,9 +15,9 @@ ; CHECK-NEXT: adrp x8, .LCPI0_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI0_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -79,9 +79,9 @@ ; CHECK-NEXT: adrp x8, .LCPI3_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -103,9 +103,9 @@ ; CHECK-NEXT: adrp x8, .LCPI4_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI4_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_3] -; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -129,9 +129,9 @@ ; CHECK-NEXT: adrp x8, .LCPI5_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -153,9 +153,9 @@ ; CHECK-NEXT: adrp x8, .LCPI6_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI6_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI6_3] -; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -181,9 +181,9 @@ ; CHECK-NEXT: adrp x8, .LCPI7_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI7_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI7_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -207,9 +207,9 @@ ; CHECK-NEXT: adrp x8, .LCPI8_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI8_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI8_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -233,9 +233,9 @@ ; CHECK-NEXT: adrp x8, .LCPI9_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI9_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI9_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -252,11 +252,11 @@ ; CHECK-LABEL: test_urem_odd_one: ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #52429 -; CHECK-NEXT: adrp x9, .LCPI10_0 ; CHECK-NEXT: movk w8, #52428, lsl #16 ; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: adrp x8, .LCPI10_0 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI10_0] +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI10_0] ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b @@ -276,11 +276,11 @@ ; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI11_0 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI11_0] ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI11_0] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -304,9 +304,9 @@ ; CHECK-NEXT: adrp x8, .LCPI12_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI12_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI12_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -332,9 +332,9 @@ ; CHECK-NEXT: adrp x8, .LCPI13_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI13_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI13_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -358,9 +358,9 @@ ; CHECK-NEXT: adrp x8, .LCPI14_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI14_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI14_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -384,9 +384,9 @@ ; CHECK-NEXT: adrp x8, .LCPI15_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI15_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI15_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -412,9 +412,9 @@ ; CHECK-NEXT: adrp x8, .LCPI16_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI16_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI16_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -438,9 +438,9 @@ ; CHECK-NEXT: adrp x8, .LCPI17_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI17_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI17_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -464,9 +464,9 @@ ; CHECK-NEXT: adrp x8, .LCPI18_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI18_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI18_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -511,9 +511,9 @@ ; CHECK-NEXT: adrp x8, .LCPI20_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI20_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI20_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -537,9 +537,9 @@ ; CHECK-NEXT: adrp x8, .LCPI21_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI21_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI21_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -565,9 +565,9 @@ ; CHECK-NEXT: adrp x8, .LCPI22_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI22_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI22_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -591,9 +591,9 @@ ; CHECK-NEXT: adrp x8, .LCPI23_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI23_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI23_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -617,9 +617,9 @@ ; CHECK-NEXT: adrp x8, .LCPI24_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI24_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -644,9 +644,9 @@ ; CHECK-NEXT: adrp x8, .LCPI25_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI25_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI25_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -669,9 +669,9 @@ ; CHECK-NEXT: adrp x8, .LCPI26_3 ; CHECK-NEXT: ushl v1.4s, v0.4s, v1.4s ; CHECK-NEXT: ushl v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI26_3] ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI26_3] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-nonzero.ll @@ -54,11 +54,11 @@ ; CHECK-NEXT: mov w8, #43690 ; CHECK-NEXT: movk w8, #10922, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, @@ -70,18 +70,18 @@ ; CHECK-LABEL: t32_6_part1: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI3_0 +; CHECK-NEXT: mov w9, #43691 +; CHECK-NEXT: movk w9, #43690, lsl #16 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: mov w8, #43691 -; CHECK-NEXT: movk w8, #43690, lsl #16 -; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: dup v1.4s, w8 ; CHECK-NEXT: adrp x8, .LCPI3_1 -; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v2.4s, w9 +; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s +; CHECK-NEXT: mul v0.4s, v0.4s, v2.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] ; CHECK-NEXT: shl v1.4s, v0.4s, #31 ; CHECK-NEXT: ushr v0.4s, v0.4s, #1 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_1] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-splat.ll @@ -32,11 +32,11 @@ ; CHECK-NEXT: mov w8, #23592 ; CHECK-NEXT: movk w8, #655, lsl #16 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: dup v2.4s, w8 ; CHECK-NEXT: shl v1.4s, v0.4s, #30 ; CHECK-NEXT: ushr v0.4s, v0.4s, #2 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: dup v1.4s, w8 -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -75,11 +75,11 @@ ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] ; CHECK-NEXT: adrp x8, .LCPI3_1 ; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_1] ; CHECK-NEXT: shl v1.4s, v0.4s, #30 ; CHECK-NEXT: ushr v0.4s, v0.4s, #2 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_1] -; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s +; CHECK-NEXT: cmhs v0.4s, v2.4s, v0.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret @@ -98,13 +98,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v1.4s, #25 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: umull2 v3.2d, v0.4s, v2.4s -; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s -; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s -; CHECK-NEXT: ushr v2.4s, v2.4s, #3 -; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; CHECK-NEXT: movi v2.4s, #25 +; CHECK-NEXT: ushr v1.4s, v1.4s, #3 +; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b @@ -120,13 +120,13 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov w8, #34079 ; CHECK-NEXT: movk w8, #20971, lsl #16 -; CHECK-NEXT: movi v1.4s, #100 -; CHECK-NEXT: dup v2.4s, w8 -; CHECK-NEXT: umull2 v3.2d, v0.4s, v2.4s -; CHECK-NEXT: umull v2.2d, v0.2s, v2.2s -; CHECK-NEXT: uzp2 v2.4s, v2.4s, v3.4s -; CHECK-NEXT: ushr v2.4s, v2.4s, #5 -; CHECK-NEXT: mls v0.4s, v2.4s, v1.4s +; CHECK-NEXT: dup v1.4s, w8 +; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s +; CHECK-NEXT: uzp2 v1.4s, v1.4s, v2.4s +; CHECK-NEXT: movi v2.4s, #100 +; CHECK-NEXT: ushr v1.4s, v1.4s, #5 +; CHECK-NEXT: mls v0.4s, v1.4s, v2.4s ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b @@ -168,8 +168,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: movi v1.4s, #15 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, @@ -183,8 +183,8 @@ ; CHECK-LABEL: test_urem_int_min: ; CHECK: // %bb.0: ; CHECK-NEXT: bic v0.4s, #128, lsl #24 -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, @@ -197,8 +197,8 @@ define <4 x i32> @test_urem_allones(<4 x i32> %X) nounwind { ; CHECK-LABEL: test_urem_allones: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: neg v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: cmhs v0.4s, v1.4s, v0.4s ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll --- a/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll +++ b/llvm/test/CodeGen/AArch64/urem-seteq-vec-tautological.ll @@ -5,11 +5,11 @@ ; CHECK-LABEL: t0_all_tautological: ; CHECK: // %bb.0: ; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: adrp x9, .LCPI0_1 ; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] -; CHECK-NEXT: adrp x8, .LCPI0_1 +; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_1] -; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s +; CHECK-NEXT: cmeq v0.4s, v0.4s, v2.4s ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret %urem = urem <4 x i32> %X, @@ -82,10 +82,10 @@ ; CHECK-NEXT: mov x10, v0.d[1] ; CHECK-NEXT: mul x9, x9, x8 ; CHECK-NEXT: mul x8, x10, x8 +; CHECK-NEXT: adrp x10, .LCPI4_0 ; CHECK-NEXT: fmov d0, x9 -; CHECK-NEXT: adrp x9, .LCPI4_0 +; CHECK-NEXT: ldr q1, [x10, :lo12:.LCPI4_0] ; CHECK-NEXT: mov v0.d[1], x8 -; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI4_0] ; CHECK-NEXT: cmhs v0.2d, v1.2d, v0.2d ; CHECK-NEXT: movi d1, #0xffffffff00000000 ; CHECK-NEXT: xtn v0.2s, v0.2d diff --git a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll --- a/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/AArch64/urem-vector-lkk.ll @@ -4,42 +4,42 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) { ; CHECK-LABEL: fold_urem_vec_1: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #8969 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w8, v0.h[0] -; CHECK-NEXT: mov w9, #8969 -; CHECK-NEXT: movk w9, #22765, lsl #16 -; CHECK-NEXT: umov w10, v0.h[1] -; CHECK-NEXT: mov w12, #16913 -; CHECK-NEXT: mov w13, #95 -; CHECK-NEXT: movk w12, #8456, lsl #16 -; CHECK-NEXT: umull x9, w8, w9 -; CHECK-NEXT: ubfx w14, w10, #2, #14 -; CHECK-NEXT: lsr x9, x9, #32 -; CHECK-NEXT: sub w11, w8, w9 -; CHECK-NEXT: umull x12, w14, w12 -; CHECK-NEXT: add w9, w9, w11, lsr #1 -; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: lsr w9, w9, #6 -; CHECK-NEXT: lsr x12, x12, #34 -; CHECK-NEXT: msub w8, w9, w13, w8 -; CHECK-NEXT: mov w9, #33437 -; CHECK-NEXT: movk w9, #21399, lsl #16 +; CHECK-NEXT: umov w9, v0.h[1] +; CHECK-NEXT: movk w8, #22765, lsl #16 +; CHECK-NEXT: umov w10, v0.h[0] +; CHECK-NEXT: mov w11, #16913 +; CHECK-NEXT: ubfx w12, w9, #2, #14 +; CHECK-NEXT: movk w11, #8456, lsl #16 +; CHECK-NEXT: umull x8, w10, w8 ; CHECK-NEXT: mov w13, #124 -; CHECK-NEXT: umull x9, w11, w9 -; CHECK-NEXT: msub w10, w12, w13, w10 -; CHECK-NEXT: umov w12, v0.h[3] -; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: mov w14, #33437 +; CHECK-NEXT: umull x11, w12, w11 +; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: sub w12, w10, w8 +; CHECK-NEXT: movk w14, #21399, lsl #16 +; CHECK-NEXT: lsr x11, x11, #34 +; CHECK-NEXT: add w8, w8, w12, lsr #1 +; CHECK-NEXT: umov w12, v0.h[2] +; CHECK-NEXT: msub w9, w11, w13, w9 +; CHECK-NEXT: mov w11, #95 +; CHECK-NEXT: lsr w8, w8, #6 ; CHECK-NEXT: mov w13, #2287 -; CHECK-NEXT: lsr x8, x9, #37 -; CHECK-NEXT: mov w9, #98 ; CHECK-NEXT: movk w13, #16727, lsl #16 -; CHECK-NEXT: msub w8, w8, w9, w11 -; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: umull x9, w12, w13 -; CHECK-NEXT: mov w10, #1003 -; CHECK-NEXT: lsr x9, x9, #40 -; CHECK-NEXT: mov v0.h[2], w8 -; CHECK-NEXT: msub w8, w9, w10, w12 +; CHECK-NEXT: umull x14, w12, w14 +; CHECK-NEXT: msub w8, w8, w11, w10 +; CHECK-NEXT: umov w10, v0.h[3] +; CHECK-NEXT: lsr x11, x14, #37 +; CHECK-NEXT: mov w14, #98 +; CHECK-NEXT: umull x13, w10, w13 +; CHECK-NEXT: msub w11, w11, w14, w12 +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: lsr x12, x13, #40 +; CHECK-NEXT: mov w13, #1003 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: msub w8, w12, w13, w10 +; CHECK-NEXT: mov v0.h[2], w11 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -50,41 +50,41 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) { ; CHECK-LABEL: fold_urem_vec_2: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: mov w8, #8969 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: movk w8, #22765, lsl #16 -; CHECK-NEXT: umov w15, v0.h[2] +; CHECK-NEXT: umov w10, v0.h[0] +; CHECK-NEXT: umov w14, v0.h[2] ; CHECK-NEXT: umov w16, v0.h[3] -; CHECK-NEXT: umull x12, w10, w8 ; CHECK-NEXT: umull x11, w9, w8 -; CHECK-NEXT: lsr x12, x12, #32 +; CHECK-NEXT: umull x12, w10, w8 ; CHECK-NEXT: lsr x11, x11, #32 -; CHECK-NEXT: sub w14, w10, w12 +; CHECK-NEXT: lsr x12, x12, #32 ; CHECK-NEXT: sub w13, w9, w11 -; CHECK-NEXT: add w12, w12, w14, lsr #1 -; CHECK-NEXT: umull x14, w15, w8 +; CHECK-NEXT: sub w15, w10, w12 ; CHECK-NEXT: add w11, w11, w13, lsr #1 -; CHECK-NEXT: mov w13, #95 +; CHECK-NEXT: umull x13, w14, w8 +; CHECK-NEXT: add w12, w12, w15, lsr #1 +; CHECK-NEXT: mov w15, #95 ; CHECK-NEXT: lsr w12, w12, #6 -; CHECK-NEXT: lsr w11, w11, #6 ; CHECK-NEXT: umull x8, w16, w8 -; CHECK-NEXT: msub w10, w12, w13, w10 -; CHECK-NEXT: lsr x12, x14, #32 -; CHECK-NEXT: msub w9, w11, w13, w9 -; CHECK-NEXT: sub w11, w15, w12 +; CHECK-NEXT: lsr x13, x13, #32 +; CHECK-NEXT: lsr w11, w11, #6 +; CHECK-NEXT: msub w10, w12, w15, w10 +; CHECK-NEXT: sub w12, w14, w13 ; CHECK-NEXT: lsr x8, x8, #32 +; CHECK-NEXT: msub w9, w11, w15, w9 +; CHECK-NEXT: add w12, w13, w12, lsr #1 +; CHECK-NEXT: sub w13, w16, w8 +; CHECK-NEXT: lsr w11, w12, #6 +; CHECK-NEXT: add w8, w8, w13, lsr #1 ; CHECK-NEXT: fmov s0, w10 -; CHECK-NEXT: add w10, w12, w11, lsr #1 -; CHECK-NEXT: lsr w10, w10, #6 -; CHECK-NEXT: sub w11, w16, w8 -; CHECK-NEXT: mov v0.h[1], w9 -; CHECK-NEXT: msub w9, w10, w13, w15 -; CHECK-NEXT: add w8, w8, w11, lsr #1 +; CHECK-NEXT: msub w11, w11, w15, w14 ; CHECK-NEXT: lsr w8, w8, #6 -; CHECK-NEXT: mov v0.h[2], w9 -; CHECK-NEXT: msub w8, w8, w13, w16 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: msub w8, w8, w15, w16 +; CHECK-NEXT: mov v0.h[2], w11 ; CHECK-NEXT: mov v0.h[3], w8 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret @@ -97,46 +97,46 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) { ; CHECK-LABEL: combine_urem_udiv: ; CHECK: // %bb.0: -; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w9, v0.h[0] ; CHECK-NEXT: mov w8, #8969 +; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: movk w8, #22765, lsl #16 -; CHECK-NEXT: umov w10, v0.h[1] +; CHECK-NEXT: umov w9, v0.h[1] ; CHECK-NEXT: umov w11, v0.h[2] -; CHECK-NEXT: mov w15, #95 ; CHECK-NEXT: umov w13, v0.h[3] -; CHECK-NEXT: umull x12, w9, w8 ; CHECK-NEXT: umull x14, w10, w8 +; CHECK-NEXT: umull x12, w9, w8 +; CHECK-NEXT: lsr x14, x14, #32 +; CHECK-NEXT: umull x15, w11, w8 +; CHECK-NEXT: sub w17, w10, w14 ; CHECK-NEXT: lsr x12, x12, #32 -; CHECK-NEXT: umull x17, w11, w8 ; CHECK-NEXT: sub w16, w9, w12 -; CHECK-NEXT: lsr x14, x14, #32 -; CHECK-NEXT: lsr x17, x17, #32 ; CHECK-NEXT: umull x8, w13, w8 +; CHECK-NEXT: add w14, w14, w17, lsr #1 +; CHECK-NEXT: mov w17, #95 +; CHECK-NEXT: lsr w14, w14, #6 +; CHECK-NEXT: lsr x15, x15, #32 ; CHECK-NEXT: add w12, w12, w16, lsr #1 -; CHECK-NEXT: sub w16, w10, w14 -; CHECK-NEXT: lsr w12, w12, #6 +; CHECK-NEXT: sub w16, w11, w15 +; CHECK-NEXT: msub w10, w14, w17, w10 ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: add w14, w14, w16, lsr #1 -; CHECK-NEXT: sub w16, w11, w17 -; CHECK-NEXT: msub w9, w12, w15, w9 -; CHECK-NEXT: lsr w14, w14, #6 -; CHECK-NEXT: add w16, w17, w16, lsr #1 -; CHECK-NEXT: fmov s1, w12 -; CHECK-NEXT: msub w10, w14, w15, w10 -; CHECK-NEXT: sub w17, w13, w8 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: lsr w9, w16, #6 -; CHECK-NEXT: mov v1.h[1], w14 -; CHECK-NEXT: add w8, w8, w17, lsr #1 -; CHECK-NEXT: msub w11, w9, w15, w11 +; CHECK-NEXT: lsr w12, w12, #6 +; CHECK-NEXT: add w15, w15, w16, lsr #1 +; CHECK-NEXT: sub w16, w13, w8 +; CHECK-NEXT: lsr w15, w15, #6 +; CHECK-NEXT: msub w9, w12, w17, w9 +; CHECK-NEXT: fmov s1, w14 +; CHECK-NEXT: add w8, w8, w16, lsr #1 +; CHECK-NEXT: fmov s0, w10 ; CHECK-NEXT: lsr w8, w8, #6 -; CHECK-NEXT: mov v0.h[1], w10 -; CHECK-NEXT: msub w10, w8, w15, w13 -; CHECK-NEXT: mov v1.h[2], w9 +; CHECK-NEXT: msub w11, w15, w17, w11 +; CHECK-NEXT: mov v1.h[1], w12 +; CHECK-NEXT: msub w13, w8, w17, w13 +; CHECK-NEXT: mov v0.h[1], w9 +; CHECK-NEXT: mov v1.h[2], w15 ; CHECK-NEXT: mov v0.h[2], w11 ; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: mov v0.h[3], w10 +; CHECK-NEXT: mov v0.h[3], w13 ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, @@ -150,28 +150,28 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) { ; CHECK-LABEL: dont_fold_urem_power_of_two: ; CHECK: // %bb.0: +; CHECK-NEXT: mov w8, #8969 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 -; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: umov w9, v0.h[3] -; CHECK-NEXT: mov w8, #8969 -; CHECK-NEXT: umov w11, v0.h[1] ; CHECK-NEXT: movk w8, #22765, lsl #16 -; CHECK-NEXT: and w10, w10, #0x3f +; CHECK-NEXT: umov w12, v0.h[0] +; CHECK-NEXT: umov w13, v0.h[1] +; CHECK-NEXT: mov w11, #95 ; CHECK-NEXT: umull x8, w9, w8 -; CHECK-NEXT: and w11, w11, #0x1f +; CHECK-NEXT: and w13, w13, #0x1f ; CHECK-NEXT: lsr x8, x8, #32 -; CHECK-NEXT: fmov s1, w10 -; CHECK-NEXT: umov w10, v0.h[2] -; CHECK-NEXT: sub w12, w9, w8 -; CHECK-NEXT: mov v1.h[1], w11 -; CHECK-NEXT: add w8, w8, w12, lsr #1 -; CHECK-NEXT: and w10, w10, #0x7 +; CHECK-NEXT: sub w10, w9, w8 +; CHECK-NEXT: add w8, w8, w10, lsr #1 +; CHECK-NEXT: and w10, w12, #0x3f ; CHECK-NEXT: lsr w8, w8, #6 -; CHECK-NEXT: mov w11, #95 +; CHECK-NEXT: umov w12, v0.h[2] +; CHECK-NEXT: fmov s0, w10 ; CHECK-NEXT: msub w8, w8, w11, w9 -; CHECK-NEXT: mov v1.h[2], w10 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: and w9, w12, #0x7 +; CHECK-NEXT: mov v0.h[1], w13 +; CHECK-NEXT: mov v0.h[2], w9 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -186,29 +186,29 @@ ; CHECK-NEXT: mov w8, #30865 ; CHECK-NEXT: movk w8, #51306, lsl #16 ; CHECK-NEXT: umov w11, v0.h[2] +; CHECK-NEXT: ubfx w10, w9, #1, #15 ; CHECK-NEXT: mov w12, #654 -; CHECK-NEXT: movi d1, #0000000000000000 ; CHECK-NEXT: mov w13, #47143 -; CHECK-NEXT: ubfx w10, w9, #1, #15 -; CHECK-NEXT: movk w13, #24749, lsl #16 ; CHECK-NEXT: umull x8, w10, w8 ; CHECK-NEXT: mov w10, #17097 ; CHECK-NEXT: movk w10, #45590, lsl #16 +; CHECK-NEXT: movk w13, #24749, lsl #16 ; CHECK-NEXT: lsr x8, x8, #40 ; CHECK-NEXT: umull x10, w11, w10 ; CHECK-NEXT: msub w8, w8, w12, w9 ; CHECK-NEXT: umov w9, v0.h[3] -; CHECK-NEXT: lsr x10, x10, #36 ; CHECK-NEXT: mov w12, #23 +; CHECK-NEXT: lsr x10, x10, #36 +; CHECK-NEXT: umull x13, w9, w13 +; CHECK-NEXT: movi d0, #0000000000000000 ; CHECK-NEXT: msub w10, w10, w12, w11 -; CHECK-NEXT: mov w11, #5423 -; CHECK-NEXT: mov v1.h[1], w8 -; CHECK-NEXT: umull x8, w9, w13 -; CHECK-NEXT: lsr x8, x8, #43 -; CHECK-NEXT: mov v1.h[2], w10 -; CHECK-NEXT: msub w8, w8, w11, w9 -; CHECK-NEXT: mov v1.h[3], w8 -; CHECK-NEXT: fmov d0, d1 +; CHECK-NEXT: mov w12, #5423 +; CHECK-NEXT: lsr x11, x13, #43 +; CHECK-NEXT: mov v0.h[1], w8 +; CHECK-NEXT: msub w8, w11, w12, w9 +; CHECK-NEXT: mov v0.h[2], w10 +; CHECK-NEXT: mov v0.h[3], w8 +; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: ret %1 = urem <4 x i16> %x, ret <4 x i16> %1 @@ -228,39 +228,39 @@ ; CHECK-LABEL: dont_fold_urem_i64: ; CHECK: // %bb.0: ; CHECK-NEXT: mov x8, #17097 -; CHECK-NEXT: fmov x9, d1 +; CHECK-NEXT: mov x11, #21445 ; CHECK-NEXT: movk x8, #45590, lsl #16 -; CHECK-NEXT: mov x13, #21445 +; CHECK-NEXT: fmov x9, d1 ; CHECK-NEXT: movk x8, #34192, lsl #32 -; CHECK-NEXT: movk x13, #1603, lsl #16 -; CHECK-NEXT: movk x8, #25644, lsl #48 -; CHECK-NEXT: movk x13, #15432, lsl #32 ; CHECK-NEXT: mov x10, v0.d[1] -; CHECK-NEXT: movk x13, #25653, lsl #48 +; CHECK-NEXT: movk x11, #1603, lsl #16 +; CHECK-NEXT: movk x8, #25644, lsl #48 +; CHECK-NEXT: movk x11, #15432, lsl #32 +; CHECK-NEXT: lsr x12, x10, #1 +; CHECK-NEXT: movk x11, #25653, lsl #48 ; CHECK-NEXT: umulh x8, x9, x8 -; CHECK-NEXT: mov x11, v1.d[1] -; CHECK-NEXT: sub x12, x9, x8 -; CHECK-NEXT: lsr x14, x10, #1 -; CHECK-NEXT: add x8, x8, x12, lsr #1 +; CHECK-NEXT: mov x14, v1.d[1] +; CHECK-NEXT: umulh x11, x12, x11 ; CHECK-NEXT: mov x12, #12109 +; CHECK-NEXT: sub x13, x9, x8 ; CHECK-NEXT: movk x12, #52170, lsl #16 -; CHECK-NEXT: umulh x13, x14, x13 ; CHECK-NEXT: movk x12, #28749, lsl #32 -; CHECK-NEXT: mov w14, #23 +; CHECK-NEXT: lsr x11, x11, #7 ; CHECK-NEXT: movk x12, #49499, lsl #48 +; CHECK-NEXT: add x8, x8, x13, lsr #1 +; CHECK-NEXT: mov w13, #23 ; CHECK-NEXT: lsr x8, x8, #4 -; CHECK-NEXT: lsr x13, x13, #7 -; CHECK-NEXT: umulh x12, x11, x12 -; CHECK-NEXT: msub x8, x8, x14, x9 -; CHECK-NEXT: mov w9, #5423 +; CHECK-NEXT: umulh x12, x14, x12 +; CHECK-NEXT: msub x8, x8, x13, x9 +; CHECK-NEXT: mov w9, #654 +; CHECK-NEXT: mov w13, #5423 ; CHECK-NEXT: lsr x12, x12, #12 -; CHECK-NEXT: mov w14, #654 -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: msub x9, x12, x9, x11 -; CHECK-NEXT: msub x10, x13, x14, x10 +; CHECK-NEXT: msub x9, x11, x9, x10 +; CHECK-NEXT: msub x10, x12, x13, x14 ; CHECK-NEXT: fmov d1, x8 -; CHECK-NEXT: mov v1.d[1], x9 -; CHECK-NEXT: mov v0.d[1], x10 +; CHECK-NEXT: movi v0.2d, #0000000000000000 +; CHECK-NEXT: mov v0.d[1], x9 +; CHECK-NEXT: mov v1.d[1], x10 ; CHECK-NEXT: ret %1 = urem <4 x i64> %x, ret <4 x i64> %1 diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -55,9 +55,9 @@ define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { ; CHECK-LABEL: v64i8: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.16b, v2.16b, v6.16b ; CHECK-NEXT: uqsub v0.16b, v0.16b, v4.16b ; CHECK-NEXT: uqsub v1.16b, v1.16b, v5.16b +; CHECK-NEXT: uqsub v2.16b, v2.16b, v6.16b ; CHECK-NEXT: uqsub v3.16b, v3.16b, v7.16b ; CHECK-NEXT: ret %z = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) @@ -86,9 +86,9 @@ define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { ; CHECK-LABEL: v32i16: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.8h, v2.8h, v6.8h ; CHECK-NEXT: uqsub v0.8h, v0.8h, v4.8h ; CHECK-NEXT: uqsub v1.8h, v1.8h, v5.8h +; CHECK-NEXT: uqsub v2.8h, v2.8h, v6.8h ; CHECK-NEXT: uqsub v3.8h, v3.8h, v7.8h ; CHECK-NEXT: ret %z = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) @@ -98,9 +98,9 @@ define void @v8i8(<8 x i8>* %px, <8 x i8>* %py, <8 x i8>* %pz) nounwind { ; CHECK-LABEL: v8i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqsub v0.8b, v1.8b, v0.8b ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <8 x i8>, <8 x i8>* %px @@ -131,19 +131,19 @@ define void @v2i8(<2 x i8>* %px, <2 x i8>* %py, <2 x i8>* %pz) nounwind { ; CHECK-LABEL: v2i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrb w8, [x1] -; CHECK-NEXT: ldrb w9, [x0] -; CHECK-NEXT: ldrb w10, [x1, #1] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: ldrb w9, [x0, #1] -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: ldrb w8, [x0] +; CHECK-NEXT: ldrb w9, [x1] +; CHECK-NEXT: ldrb w10, [x0, #1] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldrb w8, [x1, #1] +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w8 ; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strb w9, [x2] -; CHECK-NEXT: strb w8, [x2, #1] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strb w9, [x2, #1] +; CHECK-NEXT: strb w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i8>, <2 x i8>* %px %y = load <2 x i8>, <2 x i8>* %py @@ -155,9 +155,9 @@ define void @v4i16(<4 x i16>* %px, <4 x i16>* %py, <4 x i16>* %pz) nounwind { ; CHECK-LABEL: v4i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr d0, [x0] -; CHECK-NEXT: ldr d1, [x1] -; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr d0, [x1] +; CHECK-NEXT: ldr d1, [x0] +; CHECK-NEXT: uqsub v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str d0, [x2] ; CHECK-NEXT: ret %x = load <4 x i16>, <4 x i16>* %px @@ -170,19 +170,19 @@ define void @v2i16(<2 x i16>* %px, <2 x i16>* %py, <2 x i16>* %pz) nounwind { ; CHECK-LABEL: v2i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x1] -; CHECK-NEXT: ldrh w9, [x0] -; CHECK-NEXT: ldrh w10, [x1, #2] -; CHECK-NEXT: fmov s1, w8 -; CHECK-NEXT: fmov s0, w9 -; CHECK-NEXT: ldrh w9, [x0, #2] -; CHECK-NEXT: mov v1.s[1], w10 -; CHECK-NEXT: mov v0.s[1], w9 +; CHECK-NEXT: ldrh w8, [x0] +; CHECK-NEXT: ldrh w9, [x1] +; CHECK-NEXT: ldrh w10, [x0, #2] +; CHECK-NEXT: fmov s0, w8 +; CHECK-NEXT: ldrh w8, [x1, #2] +; CHECK-NEXT: fmov s1, w9 +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: mov v1.s[1], w8 ; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] +; CHECK-NEXT: fmov w8, s0 +; CHECK-NEXT: mov w9, v0.s[1] +; CHECK-NEXT: strh w9, [x2, #2] +; CHECK-NEXT: strh w8, [x2] ; CHECK-NEXT: ret %x = load <2 x i16>, <2 x i16>* %px %y = load <2 x i16>, <2 x i16>* %py @@ -220,9 +220,9 @@ define void @v1i8(<1 x i8>* %px, <1 x i8>* %py, <1 x i8>* %pz) nounwind { ; CHECK-LABEL: v1i8: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr b0, [x0] -; CHECK-NEXT: ldr b1, [x1] -; CHECK-NEXT: uqsub v0.8b, v0.8b, v1.8b +; CHECK-NEXT: ldr b0, [x1] +; CHECK-NEXT: ldr b1, [x0] +; CHECK-NEXT: uqsub v0.8b, v1.8b, v0.8b ; CHECK-NEXT: st1 { v0.b }[0], [x2] ; CHECK-NEXT: ret %x = load <1 x i8>, <1 x i8>* %px @@ -235,9 +235,9 @@ define void @v1i16(<1 x i16>* %px, <1 x i16>* %py, <1 x i16>* %pz) nounwind { ; CHECK-LABEL: v1i16: ; CHECK: // %bb.0: -; CHECK-NEXT: ldr h0, [x0] -; CHECK-NEXT: ldr h1, [x1] -; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h +; CHECK-NEXT: ldr h0, [x1] +; CHECK-NEXT: ldr h1, [x0] +; CHECK-NEXT: uqsub v0.4h, v1.4h, v0.4h ; CHECK-NEXT: str h0, [x2] ; CHECK-NEXT: ret %x = load <1 x i16>, <1 x i16>* %px @@ -301,9 +301,9 @@ define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { ; CHECK-LABEL: v16i32: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.4s, v2.4s, v6.4s ; CHECK-NEXT: uqsub v0.4s, v0.4s, v4.4s ; CHECK-NEXT: uqsub v1.4s, v1.4s, v5.4s +; CHECK-NEXT: uqsub v2.4s, v2.4s, v6.4s ; CHECK-NEXT: uqsub v3.4s, v3.4s, v7.4s ; CHECK-NEXT: ret %z = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %x, <16 x i32> %y) @@ -332,9 +332,9 @@ define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { ; CHECK-LABEL: v8i64: ; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.2d, v2.2d, v6.2d ; CHECK-NEXT: uqsub v0.2d, v0.2d, v4.2d ; CHECK-NEXT: uqsub v1.2d, v1.2d, v5.2d +; CHECK-NEXT: uqsub v2.2d, v2.2d, v6.2d ; CHECK-NEXT: uqsub v3.2d, v3.2d, v7.2d ; CHECK-NEXT: ret %z = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %x, <8 x i64> %y) diff --git a/llvm/test/CodeGen/AArch64/vec-extract-branch.ll b/llvm/test/CodeGen/AArch64/vec-extract-branch.ll --- a/llvm/test/CodeGen/AArch64/vec-extract-branch.ll +++ b/llvm/test/CodeGen/AArch64/vec-extract-branch.ll @@ -6,8 +6,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: fcmgt v0.2d, v0.2d, #0.0 ; CHECK-NEXT: xtn v0.2s, v0.2d -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w8, w9, w8 ; CHECK-NEXT: tbz w8, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: // %true diff --git a/llvm/test/CodeGen/AArch64/vec-libcalls.ll b/llvm/test/CodeGen/AArch64/vec-libcalls.ll --- a/llvm/test/CodeGen/AArch64/vec-libcalls.ll +++ b/llvm/test/CodeGen/AArch64/vec-libcalls.ll @@ -124,8 +124,8 @@ ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: mov v1.s[2], v0.s[0] ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill +; CHECK-NEXT: mov s0, v0.s[3] ; CHECK-NEXT: bl sinf ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 diff --git a/llvm/test/CodeGen/AArch64/vec_cttz.ll b/llvm/test/CodeGen/AArch64/vec_cttz.ll --- a/llvm/test/CodeGen/AArch64/vec_cttz.ll +++ b/llvm/test/CodeGen/AArch64/vec_cttz.ll @@ -85,8 +85,8 @@ ; CHECK-NEXT: movi v1.8h, #1 ; CHECK-NEXT: sub v1.8h, v0.8h, v1.8h ; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-NEXT: movi v1.8h, #16 ; CHECK-NEXT: clz v0.8h, v0.8h +; CHECK-NEXT: movi v1.8h, #16 ; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h ; CHECK-NEXT: ret %b = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) @@ -99,8 +99,8 @@ ; CHECK-NEXT: movi v1.4s, #1 ; CHECK-NEXT: sub v1.4s, v0.4s, v1.4s ; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b -; CHECK-NEXT: movi v1.4s, #32 ; CHECK-NEXT: clz v0.4s, v0.4s +; CHECK-NEXT: movi v1.4s, #32 ; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s ; CHECK-NEXT: ret %b = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -52,8 +52,8 @@ ; CHECK-NEXT: add v1.4s, v0.4s, v1.4s ; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: cmhi v0.4s, v0.4s, v1.4s -; CHECK-NEXT: st1 { v1.s }[2], [x8] ; CHECK-NEXT: str d1, [x0] +; CHECK-NEXT: st1 { v1.s }[2], [x8] ; CHECK-NEXT: ret %t = call {<3 x i32>, <3 x i1>} @llvm.uadd.with.overflow.v3i32(<3 x i32> %a0, <3 x i32> %a1) %val = extractvalue {<3 x i32>, <3 x i1>} %t, 0 @@ -81,34 +81,34 @@ define <6 x i32> @uaddo_v6i32(<6 x i32> %a0, <6 x i32> %a1, <6 x i32>* %p2) nounwind { ; CHECK-LABEL: uaddo_v6i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s0, w6 -; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: add x9, sp, #8 -; CHECK-NEXT: ldr s2, [sp, #16] +; CHECK-NEXT: fmov s1, w6 +; CHECK-NEXT: mov x9, sp +; CHECK-NEXT: ldr s0, [sp, #16] +; CHECK-NEXT: add x8, sp, #24 +; CHECK-NEXT: fmov s2, w0 ; CHECK-NEXT: fmov s3, w4 -; CHECK-NEXT: mov v0.s[1], w7 -; CHECK-NEXT: mov v1.s[1], w1 +; CHECK-NEXT: mov v1.s[1], w7 +; CHECK-NEXT: ld1 { v0.s }[1], [x8] +; CHECK-NEXT: add x8, sp, #8 +; CHECK-NEXT: ld1 { v1.s }[2], [x9] +; CHECK-NEXT: mov v2.s[1], w1 ; CHECK-NEXT: mov v3.s[1], w5 -; CHECK-NEXT: ld1 { v0.s }[2], [x8] -; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: mov v1.s[2], w2 -; CHECK-NEXT: ld1 { v2.s }[1], [x8] -; CHECK-NEXT: ld1 { v0.s }[3], [x9] -; CHECK-NEXT: mov v1.s[3], w3 +; CHECK-NEXT: mov v2.s[2], w2 +; CHECK-NEXT: ld1 { v1.s }[3], [x8] +; CHECK-NEXT: mov v2.s[3], w3 ; CHECK-NEXT: ldr x8, [sp, #32] -; CHECK-NEXT: add v2.4s, v3.4s, v2.4s -; CHECK-NEXT: add v0.4s, v1.4s, v0.4s -; CHECK-NEXT: cmhi v3.4s, v3.4s, v2.4s -; CHECK-NEXT: str d2, [x8, #16] -; CHECK-NEXT: cmhi v1.4s, v1.4s, v0.4s -; CHECK-NEXT: str q0, [x8] -; CHECK-NEXT: mov w5, v3.s[1] +; CHECK-NEXT: add v0.4s, v3.4s, v0.4s +; CHECK-NEXT: add v1.4s, v2.4s, v1.4s +; CHECK-NEXT: str d0, [x8, #16] +; CHECK-NEXT: cmhi v3.4s, v3.4s, v0.4s +; CHECK-NEXT: cmhi v2.4s, v2.4s, v1.4s +; CHECK-NEXT: str q1, [x8] ; CHECK-NEXT: fmov w4, s3 -; CHECK-NEXT: mov w1, v1.s[1] -; CHECK-NEXT: mov w2, v1.s[2] -; CHECK-NEXT: mov w3, v1.s[3] -; CHECK-NEXT: fmov w0, s1 +; CHECK-NEXT: fmov w0, s2 +; CHECK-NEXT: mov w1, v2.s[1] +; CHECK-NEXT: mov w2, v2.s[2] +; CHECK-NEXT: mov w3, v2.s[3] +; CHECK-NEXT: mov w5, v3.s[1] ; CHECK-NEXT: ret %t = call {<6 x i32>, <6 x i1>} @llvm.uadd.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1) %val = extractvalue {<6 x i32>, <6 x i1>} %t, 0 @@ -121,10 +121,10 @@ define <8 x i32> @uaddo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) nounwind { ; CHECK-LABEL: uaddo_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: add v3.4s, v1.4s, v3.4s ; CHECK-NEXT: add v2.4s, v0.4s, v2.4s -; CHECK-NEXT: cmhi v1.4s, v1.4s, v3.4s +; CHECK-NEXT: add v3.4s, v1.4s, v3.4s ; CHECK-NEXT: cmhi v0.4s, v0.4s, v2.4s +; CHECK-NEXT: cmhi v1.4s, v1.4s, v3.4s ; CHECK-NEXT: stp q2, q3, [x0] ; CHECK-NEXT: ret %t = call {<8 x i32>, <8 x i1>} @llvm.uadd.with.overflow.v8i32(<8 x i32> %a0, <8 x i32> %a1) @@ -141,23 +141,23 @@ ; CHECK-NEXT: add v4.16b, v0.16b, v1.16b ; CHECK-NEXT: cmhi v0.16b, v0.16b, v4.16b ; CHECK-NEXT: str q4, [x0] -; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: zip1 v2.8b, v0.8b, v0.8b +; CHECK-NEXT: zip1 v1.8b, v0.8b, v0.8b +; CHECK-NEXT: zip2 v2.8b, v0.8b, v0.8b +; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: zip1 v3.8b, v0.8b, v0.8b ; CHECK-NEXT: zip2 v0.8b, v0.8b, v0.8b ; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: zip1 v3.8b, v1.8b, v0.8b -; CHECK-NEXT: zip2 v1.8b, v1.8b, v0.8b +; CHECK-NEXT: ushll v3.4s, v3.4h, #0 ; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: shl v1.4s, v1.4s, #31 ; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: shl v5.4s, v0.4s, #31 -; CHECK-NEXT: sshr v0.4s, v2.4s, #31 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: shl v6.4s, v1.4s, #31 -; CHECK-NEXT: sshr v1.4s, v5.4s, #31 +; CHECK-NEXT: shl v5.4s, v0.4s, #31 +; CHECK-NEXT: sshr v0.4s, v1.4s, #31 +; CHECK-NEXT: sshr v1.4s, v2.4s, #31 ; CHECK-NEXT: sshr v2.4s, v3.4s, #31 -; CHECK-NEXT: sshr v3.4s, v6.4s, #31 +; CHECK-NEXT: sshr v3.4s, v5.4s, #31 ; CHECK-NEXT: ret %t = call {<16 x i8>, <16 x i1>} @llvm.uadd.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1) %val = extractvalue {<16 x i8>, <16 x i1>} %t, 0 @@ -213,26 +213,26 @@ ; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: bic v0.4s, #255, lsl #24 ; CHECK-NEXT: add v0.4s, v0.4s, v1.4s -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, v0.s[3] +; CHECK-NEXT: fmov w11, s0 ; CHECK-NEXT: mov w9, v0.s[2] ; CHECK-NEXT: mov w10, v0.s[1] -; CHECK-NEXT: fmov w11, s0 -; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: strh w9, [x0, #6] +; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: sturh w10, [x0, #3] ; CHECK-NEXT: lsr w9, w9, #16 -; CHECK-NEXT: strh w11, [x0] -; CHECK-NEXT: cmeq v1.4s, v1.4s, v0.4s +; CHECK-NEXT: lsr w10, w10, #16 +; CHECK-NEXT: cmeq v0.4s, v1.4s, v0.4s ; CHECK-NEXT: strb w8, [x0, #11] -; CHECK-NEXT: lsr w8, w10, #16 -; CHECK-NEXT: lsr w10, w11, #16 +; CHECK-NEXT: lsr w8, w11, #16 +; CHECK-NEXT: strh w11, [x0] +; CHECK-NEXT: mvn v0.16b, v0.16b ; CHECK-NEXT: strb w9, [x0, #8] -; CHECK-NEXT: mvn v0.16b, v1.16b -; CHECK-NEXT: strb w8, [x0, #5] -; CHECK-NEXT: strb w10, [x0, #2] +; CHECK-NEXT: strb w10, [x0, #5] +; CHECK-NEXT: strb w8, [x0, #2] ; CHECK-NEXT: ret %t = call {<4 x i24>, <4 x i1>} @llvm.uadd.with.overflow.v4i24(<4 x i24> %a0, <4 x i24> %a1) %val = extractvalue {<4 x i24>, <4 x i1>} %t, 0 @@ -249,20 +249,20 @@ ; CHECK-NEXT: and v1.8b, v1.8b, v2.8b ; CHECK-NEXT: and v0.8b, v0.8b, v2.8b ; CHECK-NEXT: add v0.4h, v0.4h, v1.4h -; CHECK-NEXT: and v1.8b, v0.8b, v2.8b ; CHECK-NEXT: umov w8, v0.h[1] ; CHECK-NEXT: umov w9, v0.h[2] ; CHECK-NEXT: umov w10, v0.h[0] -; CHECK-NEXT: umov w11, v0.h[3] -; CHECK-NEXT: cmeq v1.4h, v1.4h, v0.4h +; CHECK-NEXT: and v1.8b, v0.8b, v2.8b ; CHECK-NEXT: and w8, w8, #0x1 ; CHECK-NEXT: and w9, w9, #0x1 +; CHECK-NEXT: cmeq v1.4h, v1.4h, v0.4h ; CHECK-NEXT: bfi w10, w8, #1, #1 -; CHECK-NEXT: mvn v1.8b, v1.8b +; CHECK-NEXT: umov w8, v0.h[3] ; CHECK-NEXT: bfi w10, w9, #2, #1 -; CHECK-NEXT: bfi w10, w11, #3, #29 -; CHECK-NEXT: and w8, w10, #0xf +; CHECK-NEXT: mvn v1.8b, v1.8b +; CHECK-NEXT: bfi w10, w8, #3, #29 ; CHECK-NEXT: sshll v0.4s, v1.4h, #0 +; CHECK-NEXT: and w8, w10, #0xf ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.uadd.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) @@ -293,10 +293,10 @@ ; CHECK-NEXT: fmov s0, w13 ; CHECK-NEXT: mov v0.s[1], w10 ; CHECK-NEXT: ldr x10, [sp] -; CHECK-NEXT: stp x8, x9, [x10, #16] ; CHECK-NEXT: shl v0.2s, v0.2s, #31 -; CHECK-NEXT: stp x11, x12, [x10] +; CHECK-NEXT: stp x8, x9, [x10, #16] ; CHECK-NEXT: sshr v0.2s, v0.2s, #31 +; CHECK-NEXT: stp x11, x12, [x10] ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.uadd.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -22,8 +22,8 @@ ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: shrn v0.2s, v1.2d, #32 ; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: str s1, [x0] ; CHECK-NEXT: cmtst v0.2s, v0.2s, v0.2s +; CHECK-NEXT: str s1, [x0] ; CHECK-NEXT: ret %t = call {<1 x i32>, <1 x i1>} @llvm.umul.with.overflow.v1i32(<1 x i32> %a0, <1 x i32> %a1) %val = extractvalue {<1 x i32>, <1 x i1>} %t, 0 @@ -39,8 +39,8 @@ ; CHECK-NEXT: umull v1.2d, v0.2s, v1.2s ; CHECK-NEXT: shrn v0.2s, v1.2d, #32 ; CHECK-NEXT: xtn v1.2s, v1.2d -; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: cmtst v0.2s, v0.2s, v0.2s +; CHECK-NEXT: str d1, [x0] ; CHECK-NEXT: ret %t = call {<2 x i32>, <2 x i1>} @llvm.umul.with.overflow.v2i32(<2 x i32> %a0, <2 x i32> %a1) %val = extractvalue {<2 x i32>, <2 x i1>} %t, 0 @@ -54,14 +54,13 @@ ; CHECK-LABEL: umulo_v3i32: ; CHECK: // %bb.0: ; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s -; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: umull v3.2d, v0.2s, v1.2s ; CHECK-NEXT: mul v1.4s, v0.4s, v1.4s +; CHECK-NEXT: add x8, x0, #8 ; CHECK-NEXT: uzp2 v2.4s, v3.4s, v2.4s ; CHECK-NEXT: st1 { v1.s }[2], [x8] +; CHECK-NEXT: cmtst v0.4s, v2.4s, v2.4s ; CHECK-NEXT: str d1, [x0] -; CHECK-NEXT: cmtst v2.4s, v2.4s, v2.4s -; CHECK-NEXT: mov v0.16b, v2.16b ; CHECK-NEXT: ret %t = call {<3 x i32>, <3 x i1>} @llvm.umul.with.overflow.v3i32(<3 x i32> %a0, <3 x i32> %a1) %val = extractvalue {<3 x i32>, <3 x i1>} %t, 0 @@ -93,40 +92,40 @@ define <6 x i32> @umulo_v6i32(<6 x i32> %a0, <6 x i32> %a1, <6 x i32>* %p2) nounwind { ; CHECK-LABEL: umulo_v6i32: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov s0, w6 -; CHECK-NEXT: fmov s1, w0 -; CHECK-NEXT: mov x8, sp -; CHECK-NEXT: add x9, sp, #8 -; CHECK-NEXT: ldr s2, [sp, #16] -; CHECK-NEXT: fmov s3, w4 -; CHECK-NEXT: mov v0.s[1], w7 -; CHECK-NEXT: mov v1.s[1], w1 -; CHECK-NEXT: mov v3.s[1], w5 -; CHECK-NEXT: ld1 { v0.s }[2], [x8] +; CHECK-NEXT: ldr s0, [sp, #16] ; CHECK-NEXT: add x8, sp, #24 -; CHECK-NEXT: mov v1.s[2], w2 -; CHECK-NEXT: ld1 { v2.s }[1], [x8] -; CHECK-NEXT: ld1 { v0.s }[3], [x9] -; CHECK-NEXT: mov v1.s[3], w3 +; CHECK-NEXT: fmov s1, w6 +; CHECK-NEXT: fmov s3, w0 +; CHECK-NEXT: fmov s2, w4 +; CHECK-NEXT: ld1 { v0.s }[1], [x8] +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: mov v1.s[1], w7 +; CHECK-NEXT: mov v3.s[1], w1 +; CHECK-NEXT: ld1 { v1.s }[2], [x8] +; CHECK-NEXT: add x8, sp, #8 +; CHECK-NEXT: mov v3.s[2], w2 +; CHECK-NEXT: mov v2.s[1], w5 +; CHECK-NEXT: mov v3.s[3], w3 +; CHECK-NEXT: ld1 { v1.s }[3], [x8] +; CHECK-NEXT: umull2 v4.2d, v2.4s, v0.4s +; CHECK-NEXT: umull v5.2d, v2.2s, v0.2s +; CHECK-NEXT: mul v0.4s, v2.4s, v0.4s ; CHECK-NEXT: ldr x8, [sp, #32] -; CHECK-NEXT: umull2 v6.2d, v3.4s, v2.4s -; CHECK-NEXT: umull v7.2d, v3.2s, v2.2s -; CHECK-NEXT: umull2 v4.2d, v1.4s, v0.4s -; CHECK-NEXT: umull v5.2d, v1.2s, v0.2s -; CHECK-NEXT: mul v2.4s, v3.4s, v2.4s -; CHECK-NEXT: mul v0.4s, v1.4s, v0.4s -; CHECK-NEXT: uzp2 v4.4s, v5.4s, v4.4s -; CHECK-NEXT: uzp2 v5.4s, v7.4s, v6.4s -; CHECK-NEXT: str d2, [x8, #16] -; CHECK-NEXT: str q0, [x8] +; CHECK-NEXT: umull2 v6.2d, v3.4s, v1.4s +; CHECK-NEXT: umull v7.2d, v3.2s, v1.2s +; CHECK-NEXT: uzp2 v2.4s, v5.4s, v4.4s +; CHECK-NEXT: str d0, [x8, #16] +; CHECK-NEXT: mul v1.4s, v3.4s, v1.4s +; CHECK-NEXT: cmtst v2.4s, v2.4s, v2.4s +; CHECK-NEXT: uzp2 v4.4s, v7.4s, v6.4s ; CHECK-NEXT: cmtst v4.4s, v4.4s, v4.4s -; CHECK-NEXT: cmtst v3.4s, v5.4s, v5.4s +; CHECK-NEXT: fmov w4, s2 +; CHECK-NEXT: mov w5, v2.s[1] +; CHECK-NEXT: str q1, [x8] +; CHECK-NEXT: fmov w0, s4 ; CHECK-NEXT: mov w1, v4.s[1] ; CHECK-NEXT: mov w2, v4.s[2] ; CHECK-NEXT: mov w3, v4.s[3] -; CHECK-NEXT: mov w5, v3.s[1] -; CHECK-NEXT: fmov w0, s4 -; CHECK-NEXT: fmov w4, s3 ; CHECK-NEXT: ret %t = call {<6 x i32>, <6 x i1>} @llvm.umul.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1) %val = extractvalue {<6 x i32>, <6 x i1>} %t, 0 @@ -139,19 +138,18 @@ define <8 x i32> @umulo_v8i32(<8 x i32> %a0, <8 x i32> %a1, <8 x i32>* %p2) nounwind { ; CHECK-LABEL: umulo_v8i32: ; CHECK: // %bb.0: -; CHECK-NEXT: umull2 v4.2d, v1.4s, v3.4s -; CHECK-NEXT: umull2 v5.2d, v0.4s, v2.4s -; CHECK-NEXT: umull v6.2d, v0.2s, v2.2s +; CHECK-NEXT: umull2 v4.2d, v0.4s, v2.4s +; CHECK-NEXT: umull v5.2d, v0.2s, v2.2s +; CHECK-NEXT: umull2 v6.2d, v1.4s, v3.4s ; CHECK-NEXT: umull v7.2d, v1.2s, v3.2s -; CHECK-NEXT: mul v3.4s, v1.4s, v3.4s ; CHECK-NEXT: mul v2.4s, v0.4s, v2.4s -; CHECK-NEXT: uzp2 v5.4s, v6.4s, v5.4s -; CHECK-NEXT: uzp2 v6.4s, v7.4s, v4.4s -; CHECK-NEXT: stp q2, q3, [x0] -; CHECK-NEXT: cmtst v4.4s, v5.4s, v5.4s -; CHECK-NEXT: cmtst v5.4s, v6.4s, v6.4s -; CHECK-NEXT: mov v0.16b, v4.16b -; CHECK-NEXT: mov v1.16b, v5.16b +; CHECK-NEXT: uzp2 v4.4s, v5.4s, v4.4s +; CHECK-NEXT: uzp2 v5.4s, v7.4s, v6.4s +; CHECK-NEXT: mul v6.4s, v1.4s, v3.4s +; CHECK-NEXT: cmtst v3.4s, v4.4s, v4.4s +; CHECK-NEXT: cmtst v1.4s, v5.4s, v5.4s +; CHECK-NEXT: mov v0.16b, v3.16b +; CHECK-NEXT: stp q2, q6, [x0] ; CHECK-NEXT: ret %t = call {<8 x i32>, <8 x i1>} @llvm.umul.with.overflow.v8i32(<8 x i32> %a0, <8 x i32> %a1) %val = extractvalue {<8 x i32>, <8 x i1>} %t, 0 @@ -166,29 +164,27 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b ; CHECK-NEXT: umull v3.8h, v0.8b, v1.8b +; CHECK-NEXT: mul v6.16b, v0.16b, v1.16b ; CHECK-NEXT: uzp2 v2.16b, v3.16b, v2.16b +; CHECK-NEXT: str q6, [x0] ; CHECK-NEXT: cmtst v2.16b, v2.16b, v2.16b -; CHECK-NEXT: ext v3.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: zip1 v4.8b, v2.8b, v0.8b +; CHECK-NEXT: zip1 v3.8b, v2.8b, v0.8b +; CHECK-NEXT: zip2 v4.8b, v2.8b, v0.8b +; CHECK-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-NEXT: zip1 v5.8b, v2.8b, v0.8b ; CHECK-NEXT: zip2 v2.8b, v2.8b, v0.8b -; CHECK-NEXT: zip1 v5.8b, v3.8b, v0.8b -; CHECK-NEXT: zip2 v3.8b, v3.8b, v0.8b ; CHECK-NEXT: ushll v4.4s, v4.4h, #0 -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: ushll v5.4s, v5.4h, #0 -; CHECK-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-NEXT: shl v4.4s, v4.4s, #31 -; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: shl v6.4s, v5.4s, #31 -; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: sshr v4.4s, v4.4s, #31 -; CHECK-NEXT: sshr v5.4s, v2.4s, #31 -; CHECK-NEXT: sshr v2.4s, v6.4s, #31 -; CHECK-NEXT: sshr v3.4s, v3.4s, #31 -; CHECK-NEXT: mul v6.16b, v0.16b, v1.16b -; CHECK-NEXT: mov v0.16b, v4.16b -; CHECK-NEXT: mov v1.16b, v5.16b -; CHECK-NEXT: str q6, [x0] +; CHECK-NEXT: ushll v0.4s, v2.4h, #0 +; CHECK-NEXT: shl v1.4s, v3.4s, #31 +; CHECK-NEXT: shl v2.4s, v4.4s, #31 +; CHECK-NEXT: shl v3.4s, v5.4s, #31 +; CHECK-NEXT: shl v4.4s, v0.4s, #31 +; CHECK-NEXT: sshr v0.4s, v1.4s, #31 +; CHECK-NEXT: sshr v1.4s, v2.4s, #31 +; CHECK-NEXT: sshr v2.4s, v3.4s, #31 +; CHECK-NEXT: sshr v3.4s, v4.4s, #31 ; CHECK-NEXT: ret %t = call {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1) %val = extractvalue {<16 x i8>, <16 x i1>} %t, 0 @@ -204,20 +200,18 @@ ; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h ; CHECK-NEXT: umull v3.4s, v0.4h, v1.4h ; CHECK-NEXT: uzp2 v2.8h, v3.8h, v2.8h +; CHECK-NEXT: mul v3.8h, v0.8h, v1.8h ; CHECK-NEXT: cmtst v2.8h, v2.8h, v2.8h ; CHECK-NEXT: xtn v2.8b, v2.8h -; CHECK-NEXT: zip1 v3.8b, v2.8b, v0.8b -; CHECK-NEXT: zip2 v2.8b, v2.8b, v0.8b -; CHECK-NEXT: ushll v3.4s, v3.4h, #0 -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 -; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: shl v4.4s, v2.4s, #31 -; CHECK-NEXT: sshr v2.4s, v3.4s, #31 -; CHECK-NEXT: sshr v3.4s, v4.4s, #31 -; CHECK-NEXT: mul v4.8h, v0.8h, v1.8h -; CHECK-NEXT: mov v0.16b, v2.16b -; CHECK-NEXT: mov v1.16b, v3.16b -; CHECK-NEXT: str q4, [x0] +; CHECK-NEXT: str q3, [x0] +; CHECK-NEXT: zip1 v0.8b, v2.8b, v0.8b +; CHECK-NEXT: zip2 v1.8b, v2.8b, v0.8b +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: shl v1.4s, v1.4s, #31 +; CHECK-NEXT: sshr v0.4s, v0.4s, #31 +; CHECK-NEXT: sshr v1.4s, v1.4s, #31 ; CHECK-NEXT: ret %t = call {<8 x i16>, <8 x i1>} @llvm.umul.with.overflow.v8i16(<8 x i16> %a0, <8 x i16> %a1) %val = extractvalue {<8 x i16>, <8 x i1>} %t, 0 @@ -230,21 +224,21 @@ define <2 x i32> @umulo_v2i64(<2 x i64> %a0, <2 x i64> %a1, <2 x i64>* %p2) nounwind { ; CHECK-LABEL: umulo_v2i64: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, v1.d[1] -; CHECK-NEXT: fmov x10, d1 -; CHECK-NEXT: mov x9, v0.d[1] +; CHECK-NEXT: fmov x8, d1 +; CHECK-NEXT: mov x9, v1.d[1] +; CHECK-NEXT: mov x10, v0.d[1] ; CHECK-NEXT: fmov x11, d0 -; CHECK-NEXT: umulh x12, x9, x8 -; CHECK-NEXT: umulh x13, x11, x10 +; CHECK-NEXT: umulh x12, x10, x9 +; CHECK-NEXT: umulh x13, x11, x8 ; CHECK-NEXT: cmp xzr, x12 -; CHECK-NEXT: mul x10, x11, x10 +; CHECK-NEXT: mul x8, x11, x8 ; CHECK-NEXT: csetm x12, ne ; CHECK-NEXT: cmp xzr, x13 ; CHECK-NEXT: csetm x13, ne -; CHECK-NEXT: mul x8, x9, x8 -; CHECK-NEXT: fmov d1, x10 +; CHECK-NEXT: mul x9, x10, x9 +; CHECK-NEXT: fmov d1, x8 ; CHECK-NEXT: fmov d0, x13 -; CHECK-NEXT: mov v1.d[1], x8 +; CHECK-NEXT: mov v1.d[1], x9 ; CHECK-NEXT: mov v0.d[1], x12 ; CHECK-NEXT: str q1, [x0] ; CHECK-NEXT: xtn v0.2s, v0.2d @@ -262,30 +256,30 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: bic v1.4s, #255, lsl #24 ; CHECK-NEXT: bic v0.4s, #255, lsl #24 -; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s -; CHECK-NEXT: umull v3.2d, v0.2s, v1.2s -; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s -; CHECK-NEXT: uzp2 v1.4s, v3.4s, v2.4s -; CHECK-NEXT: ushr v2.4s, v0.4s, #24 -; CHECK-NEXT: mov w8, v0.s[3] -; CHECK-NEXT: mov w9, v0.s[2] -; CHECK-NEXT: mov w10, v0.s[1] -; CHECK-NEXT: cmeq v1.4s, v1.4s, #0 -; CHECK-NEXT: fmov w11, s0 -; CHECK-NEXT: cmtst v2.4s, v2.4s, v2.4s +; CHECK-NEXT: mul v2.4s, v0.4s, v1.4s +; CHECK-NEXT: umull2 v3.2d, v0.4s, v1.4s +; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s +; CHECK-NEXT: mov w8, v2.s[3] +; CHECK-NEXT: ushr v1.4s, v2.4s, #24 +; CHECK-NEXT: fmov w11, s2 +; CHECK-NEXT: mov w9, v2.s[2] ; CHECK-NEXT: sturh w8, [x0, #9] ; CHECK-NEXT: lsr w8, w8, #16 +; CHECK-NEXT: mov w10, v2.s[1] ; CHECK-NEXT: strh w9, [x0, #6] +; CHECK-NEXT: uzp2 v0.4s, v0.4s, v3.4s ; CHECK-NEXT: lsr w9, w9, #16 +; CHECK-NEXT: cmtst v1.4s, v1.4s, v1.4s ; CHECK-NEXT: sturh w10, [x0, #3] -; CHECK-NEXT: orn v0.16b, v2.16b, v1.16b +; CHECK-NEXT: lsr w10, w10, #16 ; CHECK-NEXT: strb w8, [x0, #11] -; CHECK-NEXT: lsr w8, w10, #16 -; CHECK-NEXT: lsr w10, w11, #16 +; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 +; CHECK-NEXT: lsr w8, w11, #16 ; CHECK-NEXT: strh w11, [x0] +; CHECK-NEXT: orn v0.16b, v1.16b, v0.16b ; CHECK-NEXT: strb w9, [x0, #8] -; CHECK-NEXT: strb w8, [x0, #5] -; CHECK-NEXT: strb w10, [x0, #2] +; CHECK-NEXT: strb w10, [x0, #5] +; CHECK-NEXT: strb w8, [x0, #2] ; CHECK-NEXT: ret %t = call {<4 x i24>, <4 x i1>} @llvm.umul.with.overflow.v4i24(<4 x i24> %a0, <4 x i24> %a1) %val = extractvalue {<4 x i24>, <4 x i1>} %t, 0 @@ -298,19 +292,18 @@ define <4 x i32> @umulo_v4i1(<4 x i1> %a0, <4 x i1> %a1, <4 x i1>* %p2) nounwind { ; CHECK-LABEL: umulo_v4i1: ; CHECK: // %bb.0: -; CHECK-NEXT: fmov d2, d0 -; CHECK-NEXT: movi v0.2d, #0000000000000000 -; CHECK-NEXT: and v1.8b, v2.8b, v1.8b -; CHECK-NEXT: umov w8, v1.h[1] -; CHECK-NEXT: umov w9, v1.h[2] -; CHECK-NEXT: umov w10, v1.h[0] -; CHECK-NEXT: umov w11, v1.h[3] +; CHECK-NEXT: and v0.8b, v0.8b, v1.8b +; CHECK-NEXT: umov w8, v0.h[1] +; CHECK-NEXT: umov w9, v0.h[2] +; CHECK-NEXT: umov w10, v0.h[0] ; CHECK-NEXT: and w8, w8, #0x1 ; CHECK-NEXT: and w9, w9, #0x1 ; CHECK-NEXT: bfi w10, w8, #1, #1 +; CHECK-NEXT: umov w8, v0.h[3] ; CHECK-NEXT: bfi w10, w9, #2, #1 -; CHECK-NEXT: bfi w10, w11, #3, #29 +; CHECK-NEXT: bfi w10, w8, #3, #29 ; CHECK-NEXT: and w8, w10, #0xf +; CHECK-NEXT: movi v0.2d, #0000000000000000 ; CHECK-NEXT: strb w8, [x0] ; CHECK-NEXT: ret %t = call {<4 x i1>, <4 x i1>} @llvm.umul.with.overflow.v4i1(<4 x i1> %a0, <4 x i1> %a1) @@ -325,52 +318,52 @@ ; CHECK-LABEL: umulo_v2i128: ; CHECK: // %bb.0: ; CHECK-NEXT: cmp x7, #0 -; CHECK-NEXT: umulh x8, x3, x6 +; CHECK-NEXT: umulh x9, x3, x6 ; CHECK-NEXT: mul x10, x7, x2 -; CHECK-NEXT: cset w9, ne +; CHECK-NEXT: cset w8, ne ; CHECK-NEXT: cmp x3, #0 -; CHECK-NEXT: umulh x11, x7, x2 -; CHECK-NEXT: cset w12, ne +; CHECK-NEXT: umulh x12, x7, x2 +; CHECK-NEXT: cset w11, ne ; CHECK-NEXT: madd x10, x3, x6, x10 -; CHECK-NEXT: cmp xzr, x8 -; CHECK-NEXT: umulh x8, x2, x6 -; CHECK-NEXT: cset w13, ne -; CHECK-NEXT: cmp xzr, x11 +; CHECK-NEXT: cmp xzr, x9 +; CHECK-NEXT: umulh x9, x2, x6 +; CHECK-NEXT: and w8, w11, w8 ; CHECK-NEXT: cset w11, ne -; CHECK-NEXT: adds x8, x8, x10 +; CHECK-NEXT: cmp xzr, x12 +; CHECK-NEXT: orr w8, w8, w11 +; CHECK-NEXT: cset w12, ne +; CHECK-NEXT: adds x9, x9, x10 ; CHECK-NEXT: cset w10, hs ; CHECK-NEXT: cmp x5, #0 -; CHECK-NEXT: cset w14, ne +; CHECK-NEXT: cset w11, ne ; CHECK-NEXT: cmp x1, #0 -; CHECK-NEXT: umulh x15, x1, x4 -; CHECK-NEXT: cset w16, ne -; CHECK-NEXT: mul x17, x5, x0 -; CHECK-NEXT: and w14, w16, w14 -; CHECK-NEXT: umulh x16, x5, x0 -; CHECK-NEXT: cmp xzr, x15 -; CHECK-NEXT: madd x15, x1, x4, x17 -; CHECK-NEXT: cset w17, ne -; CHECK-NEXT: umulh x18, x0, x4 -; CHECK-NEXT: cmp xzr, x16 -; CHECK-NEXT: orr w14, w14, w17 -; CHECK-NEXT: cset w16, ne -; CHECK-NEXT: adds x15, x18, x15 -; CHECK-NEXT: orr w14, w14, w16 -; CHECK-NEXT: cset w16, hs -; CHECK-NEXT: and w9, w12, w9 -; CHECK-NEXT: orr w12, w14, w16 -; CHECK-NEXT: orr w9, w9, w13 -; CHECK-NEXT: orr w9, w9, w11 -; CHECK-NEXT: mul x11, x0, x4 -; CHECK-NEXT: orr w9, w9, w10 +; CHECK-NEXT: umulh x13, x1, x4 +; CHECK-NEXT: cset w14, ne +; CHECK-NEXT: mul x15, x5, x0 +; CHECK-NEXT: and w11, w14, w11 +; CHECK-NEXT: umulh x14, x5, x0 +; CHECK-NEXT: cmp xzr, x13 +; CHECK-NEXT: madd x13, x1, x4, x15 +; CHECK-NEXT: cset w15, ne +; CHECK-NEXT: umulh x16, x0, x4 +; CHECK-NEXT: cmp xzr, x14 +; CHECK-NEXT: orr w11, w11, w15 +; CHECK-NEXT: cset w14, ne +; CHECK-NEXT: adds x13, x16, x13 +; CHECK-NEXT: orr w11, w11, w14 +; CHECK-NEXT: cset w14, hs +; CHECK-NEXT: orr w8, w8, w12 +; CHECK-NEXT: orr w11, w11, w14 +; CHECK-NEXT: orr w8, w8, w10 ; CHECK-NEXT: ldr x10, [sp] -; CHECK-NEXT: fmov s0, w12 -; CHECK-NEXT: stp x11, x15, [x10] -; CHECK-NEXT: mov v0.s[1], w9 -; CHECK-NEXT: mul x9, x2, x6 +; CHECK-NEXT: fmov s0, w11 +; CHECK-NEXT: mul x11, x0, x4 +; CHECK-NEXT: mov v0.s[1], w8 +; CHECK-NEXT: mul x8, x2, x6 +; CHECK-NEXT: stp x11, x13, [x10] ; CHECK-NEXT: shl v0.2s, v0.2s, #31 -; CHECK-NEXT: stp x9, x8, [x10, #16] ; CHECK-NEXT: sshr v0.2s, v0.2s, #31 +; CHECK-NEXT: stp x8, x9, [x10, #16] ; CHECK-NEXT: ret %t = call {<2 x i128>, <2 x i1>} @llvm.umul.with.overflow.v2i128(<2 x i128> %a0, <2 x i128> %a1) %val = extractvalue {<2 x i128>, <2 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll @@ -96,9 +96,8 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind { ; CHECK-LABEL: test_v9i8: ; CHECK: // %bb.0: -; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov w8, #-1 -; CHECK-NEXT: umov w12, v0.b[4] +; CHECK-NEXT: mov v1.16b, v0.16b ; CHECK-NEXT: mov v1.b[9], w8 ; CHECK-NEXT: mov v1.b[10], w8 ; CHECK-NEXT: mov v1.b[11], w8 @@ -108,17 +107,18 @@ ; CHECK-NEXT: umov w8, v1.b[1] ; CHECK-NEXT: umov w9, v1.b[0] ; CHECK-NEXT: umov w10, v1.b[2] -; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: and w8, w9, w8 +; CHECK-NEXT: umov w9, v1.b[3] +; CHECK-NEXT: and w8, w8, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: and w8, w8, w9 ; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: and w8, w8, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: and w8, w8, w11 -; CHECK-NEXT: umov w11, v0.b[7] -; CHECK-NEXT: and w8, w8, w12 ; CHECK-NEXT: and w8, w8, w9 +; CHECK-NEXT: umov w9, v0.b[7] ; CHECK-NEXT: and w8, w8, w10 -; CHECK-NEXT: and w0, w8, w11 +; CHECK-NEXT: and w0, w8, w9 ; CHECK-NEXT: ret %b = call i8 @llvm.vector.reduce.and.v9i8(<9 x i8> %a) ret i8 %b @@ -159,8 +159,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %b = call i24 @llvm.vector.reduce.and.v4i24(<4 x i24> %a) @@ -185,8 +185,8 @@ ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: fmov w9, s0 +; CHECK-NEXT: mov w8, v0.s[1] ; CHECK-NEXT: and w0, w9, w8 ; CHECK-NEXT: ret %b = call i32 @llvm.vector.reduce.and.v16i32(<16 x i32> %a) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization-strict.ll @@ -105,9 +105,9 @@ define float @test_v3f32_neutral(<3 x float> %a) nounwind { ; CHECK-LABEL: test_v3f32_neutral: ; CHECK: // %bb.0: -; CHECK-NEXT: mov s1, v0.s[2] -; CHECK-NEXT: faddp s0, v0.2s -; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: faddp s1, v0.2s +; CHECK-NEXT: mov s0, v0.s[2] +; CHECK-NEXT: fadd s0, s1, s0 ; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float -0.0, <3 x float> %a) ret float %b @@ -166,34 +166,34 @@ define float @test_v16f32(<16 x float> %a, float %s) nounwind { ; CHECK-LABEL: test_v16f32: ; CHECK: // %bb.0: -; CHECK-NEXT: mov s6, v0.s[1] ; CHECK-NEXT: fadd s4, s4, s0 -; CHECK-NEXT: mov s7, v0.s[2] +; CHECK-NEXT: mov s5, v0.s[1] +; CHECK-NEXT: fadd s4, s4, s5 +; CHECK-NEXT: mov s5, v0.s[2] ; CHECK-NEXT: mov s0, v0.s[3] -; CHECK-NEXT: mov s5, v3.s[1] -; CHECK-NEXT: fadd s4, s4, s6 -; CHECK-NEXT: mov s6, v1.s[2] -; CHECK-NEXT: fadd s4, s4, s7 +; CHECK-NEXT: fadd s4, s4, s5 ; CHECK-NEXT: fadd s0, s4, s0 ; CHECK-NEXT: mov s4, v1.s[1] ; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s4 +; CHECK-NEXT: mov s4, v1.s[2] ; CHECK-NEXT: mov s1, v1.s[3] ; CHECK-NEXT: fadd s0, s0, s4 -; CHECK-NEXT: mov s4, v2.s[2] -; CHECK-NEXT: fadd s0, s0, s6 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v2.s[1] ; CHECK-NEXT: fadd s0, s0, s2 ; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v2.s[2] +; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v2.s[3] -; CHECK-NEXT: mov s2, v3.s[3] -; CHECK-NEXT: fadd s0, s0, s4 ; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: mov s1, v3.s[2] +; CHECK-NEXT: mov s1, v3.s[1] ; CHECK-NEXT: fadd s0, s0, s3 -; CHECK-NEXT: fadd s0, s0, s5 ; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: mov s1, v3.s[2] +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v3.s[3] +; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float %s, <16 x float> %a) ret float %b @@ -202,31 +202,31 @@ define float @test_v16f32_neutral(<16 x float> %a) nounwind { ; CHECK-LABEL: test_v16f32_neutral: ; CHECK: // %bb.0: +; CHECK-NEXT: faddp s4, v0.2s ; CHECK-NEXT: mov s5, v0.s[2] -; CHECK-NEXT: faddp s6, v0.2s ; CHECK-NEXT: mov s0, v0.s[3] -; CHECK-NEXT: mov s4, v2.s[1] -; CHECK-NEXT: fadd s5, s6, s5 -; CHECK-NEXT: mov s6, v1.s[2] -; CHECK-NEXT: fadd s0, s5, s0 -; CHECK-NEXT: mov s5, v1.s[1] +; CHECK-NEXT: fadd s4, s4, s5 +; CHECK-NEXT: fadd s0, s4, s0 +; CHECK-NEXT: mov s4, v1.s[1] ; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s4 +; CHECK-NEXT: mov s4, v1.s[2] ; CHECK-NEXT: mov s1, v1.s[3] -; CHECK-NEXT: fadd s0, s0, s5 -; CHECK-NEXT: fadd s0, s0, s6 +; CHECK-NEXT: fadd s0, s0, s4 ; CHECK-NEXT: fadd s0, s0, s1 -; CHECK-NEXT: mov s1, v2.s[2] +; CHECK-NEXT: mov s1, v2.s[1] ; CHECK-NEXT: fadd s0, s0, s2 -; CHECK-NEXT: mov s2, v2.s[3] -; CHECK-NEXT: fadd s0, s0, s4 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v2.s[2] +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v2.s[3] ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v3.s[1] -; CHECK-NEXT: fadd s0, s0, s2 -; CHECK-NEXT: mov s2, v3.s[2] ; CHECK-NEXT: fadd s0, s0, s3 ; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: mov s1, v3.s[2] +; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: mov s1, v3.s[3] -; CHECK-NEXT: fadd s0, s0, s2 ; CHECK-NEXT: fadd s0, s0, s1 ; CHECK-NEXT: ret %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a) diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-legalization.ll @@ -64,14 +64,14 @@ ; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 ; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1 ; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2 -; CHECK-NEXT: movi v5.4s, #128, lsl #24 ; CHECK-NEXT: // kill: def $s4 killed $s4 def $q4 ; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3 ; CHECK-NEXT: mov v0.s[1], v1.s[0] -; CHECK-NEXT: mov v5.s[0], v4.s[0] +; CHECK-NEXT: movi v1.4s, #128, lsl #24 ; CHECK-NEXT: mov v0.s[2], v2.s[0] +; CHECK-NEXT: mov v1.s[0], v4.s[0] ; CHECK-NEXT: mov v0.s[3], v3.s[0] -; CHECK-NEXT: fadd v0.4s, v0.4s, v5.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s ; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s ; CHECK-NEXT: faddp s0, v0.2s ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll @@ -32,9 +32,9 @@ ; CHECKNOFP16-NEXT: fadd s1, s2, s1 ; CHECKNOFP16-NEXT: mov h2, v0.h[2] ; CHECKNOFP16-NEXT: mov h0, v0.h[3] -; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s1, s2 ; CHECKNOFP16-NEXT: fcvt h1, s1 @@ -62,30 +62,30 @@ ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s2, s1 ; CHECKNOFP16-NEXT: mov h2, v0.h[2] -; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s1, s2 ; CHECKNOFP16-NEXT: mov h2, v0.h[3] -; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s1, s2 ; CHECKNOFP16-NEXT: mov h2, v0.h[4] -; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s1, s2 ; CHECKNOFP16-NEXT: mov h2, v0.h[5] -; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s1, s2 ; CHECKNOFP16-NEXT: mov h2, v0.h[6] ; CHECKNOFP16-NEXT: mov h0, v0.h[7] -; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fadd s1, s1, s2 ; CHECKNOFP16-NEXT: fcvt h1, s1 @@ -147,65 +147,65 @@ ; CHECKNOFP16-NEXT: fadd s4, s5, s4 ; CHECKNOFP16-NEXT: mov h5, v0.h[2] ; CHECKNOFP16-NEXT: fadd s2, s3, s2 -; CHECKNOFP16-NEXT: mov h3, v1.h[2] -; CHECKNOFP16-NEXT: fcvt h4, s4 ; CHECKNOFP16-NEXT: fcvt s5, h5 +; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: mov h4, v1.h[2] ; CHECKNOFP16-NEXT: fcvt h2, s2 -; CHECKNOFP16-NEXT: fcvt s3, h3 ; CHECKNOFP16-NEXT: fcvt s4, h4 +; CHECKNOFP16-NEXT: fcvt s3, h3 ; CHECKNOFP16-NEXT: fcvt s2, h2 -; CHECKNOFP16-NEXT: fadd s3, s5, s3 +; CHECKNOFP16-NEXT: fadd s4, s5, s4 ; CHECKNOFP16-NEXT: mov h5, v0.h[3] -; CHECKNOFP16-NEXT: fadd s2, s4, s2 -; CHECKNOFP16-NEXT: mov h4, v1.h[3] -; CHECKNOFP16-NEXT: fcvt h3, s3 ; CHECKNOFP16-NEXT: fcvt s5, h5 -; CHECKNOFP16-NEXT: fcvt h2, s2 +; CHECKNOFP16-NEXT: fadd s2, s3, s2 +; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: mov h4, v1.h[3] ; CHECKNOFP16-NEXT: fcvt s4, h4 +; CHECKNOFP16-NEXT: fcvt h2, s2 ; CHECKNOFP16-NEXT: fcvt s3, h3 -; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fadd s4, s5, s4 ; CHECKNOFP16-NEXT: mov h5, v0.h[4] -; CHECKNOFP16-NEXT: fadd s2, s2, s3 -; CHECKNOFP16-NEXT: mov h3, v1.h[4] -; CHECKNOFP16-NEXT: fcvt h4, s4 +; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fcvt s5, h5 +; CHECKNOFP16-NEXT: fadd s2, s2, s3 +; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: mov h4, v1.h[4] +; CHECKNOFP16-NEXT: fcvt s4, h4 ; CHECKNOFP16-NEXT: fcvt h2, s2 ; CHECKNOFP16-NEXT: fcvt s3, h3 -; CHECKNOFP16-NEXT: fcvt s4, h4 -; CHECKNOFP16-NEXT: fcvt s2, h2 -; CHECKNOFP16-NEXT: fadd s3, s5, s3 +; CHECKNOFP16-NEXT: fadd s4, s5, s4 ; CHECKNOFP16-NEXT: mov h5, v0.h[5] -; CHECKNOFP16-NEXT: fadd s2, s2, s4 -; CHECKNOFP16-NEXT: mov h4, v1.h[5] -; CHECKNOFP16-NEXT: fcvt h3, s3 +; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fcvt s5, h5 -; CHECKNOFP16-NEXT: fcvt h2, s2 +; CHECKNOFP16-NEXT: fadd s2, s2, s3 +; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: mov h4, v1.h[5] ; CHECKNOFP16-NEXT: fcvt s4, h4 +; CHECKNOFP16-NEXT: fcvt h2, s2 ; CHECKNOFP16-NEXT: fcvt s3, h3 -; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fadd s4, s5, s4 ; CHECKNOFP16-NEXT: mov h5, v0.h[6] +; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: mov h0, v0.h[7] -; CHECKNOFP16-NEXT: fadd s2, s2, s3 -; CHECKNOFP16-NEXT: mov h3, v1.h[6] -; CHECKNOFP16-NEXT: fcvt h4, s4 ; CHECKNOFP16-NEXT: fcvt s5, h5 -; CHECKNOFP16-NEXT: mov h1, v1.h[7] ; CHECKNOFP16-NEXT: fcvt s0, h0 -; CHECKNOFP16-NEXT: fcvt h2, s2 -; CHECKNOFP16-NEXT: fcvt s3, h3 +; CHECKNOFP16-NEXT: fadd s2, s2, s3 +; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: mov h4, v1.h[6] +; CHECKNOFP16-NEXT: mov h1, v1.h[7] ; CHECKNOFP16-NEXT: fcvt s4, h4 ; CHECKNOFP16-NEXT: fcvt s1, h1 -; CHECKNOFP16-NEXT: fcvt s2, h2 -; CHECKNOFP16-NEXT: fadd s3, s5, s3 +; CHECKNOFP16-NEXT: fcvt h2, s2 +; CHECKNOFP16-NEXT: fcvt s3, h3 +; CHECKNOFP16-NEXT: fadd s4, s5, s4 ; CHECKNOFP16-NEXT: fadd s0, s0, s1 -; CHECKNOFP16-NEXT: fadd s2, s2, s4 -; CHECKNOFP16-NEXT: fcvt h3, s3 +; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fcvt h0, s0 +; CHECKNOFP16-NEXT: fadd s2, s2, s3 +; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: fcvt s0, h0 ; CHECKNOFP16-NEXT: fcvt h2, s2 ; CHECKNOFP16-NEXT: fcvt s3, h3 -; CHECKNOFP16-NEXT: fcvt s0, h0 ; CHECKNOFP16-NEXT: fcvt s2, h2 ; CHECKNOFP16-NEXT: fadd s2, s2, s3 ; CHECKNOFP16-NEXT: fcvt h1, s2 diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmax-legalization.ll @@ -57,9 +57,9 @@ ; CHECK-NOFP-NEXT: fmaxnm s1, s2, s1 ; CHECK-NOFP-NEXT: mov h2, v0.h[2] ; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 ; CHECK-NOFP-NEXT: fcvt h1, s1 @@ -86,9 +86,9 @@ ; CHECK-NOFP-NEXT: fmaxnm s1, s2, s1 ; CHECK-NOFP-NEXT: mov h2, v0.h[2] ; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fmaxnm s1, s1, s2 ; CHECK-NOFP-NEXT: fcvt h1, s1 @@ -183,12 +183,12 @@ ; ; CHECK-FP-LABEL: test_v11f16: ; CHECK-FP: // %bb.0: +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 @@ -198,12 +198,12 @@ ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fmaxnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h @@ -291,12 +291,12 @@ ; ; CHECK-FP-LABEL: test_v11f16_ninf: ; CHECK-FP: // %bb.0: +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 @@ -306,12 +306,12 @@ ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fmaxnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fmaxnmv h0, v0.8h diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll --- a/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll +++ b/llvm/test/CodeGen/AArch64/vecreduce-fmin-legalization.ll @@ -57,9 +57,9 @@ ; CHECK-NOFP-NEXT: fminnm s1, s2, s1 ; CHECK-NOFP-NEXT: mov h2, v0.h[2] ; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fminnm s1, s1, s2 ; CHECK-NOFP-NEXT: fcvt h1, s1 @@ -86,9 +86,9 @@ ; CHECK-NOFP-NEXT: fminnm s1, s2, s1 ; CHECK-NOFP-NEXT: mov h2, v0.h[2] ; CHECK-NOFP-NEXT: mov h0, v0.h[3] -; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s2, h2 ; CHECK-NOFP-NEXT: fcvt s0, h0 +; CHECK-NOFP-NEXT: fcvt h1, s1 ; CHECK-NOFP-NEXT: fcvt s1, h1 ; CHECK-NOFP-NEXT: fminnm s1, s1, s2 ; CHECK-NOFP-NEXT: fcvt h1, s1 @@ -183,12 +183,12 @@ ; ; CHECK-FP-LABEL: test_v11f16: ; CHECK-FP: // %bb.0: +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 @@ -198,12 +198,12 @@ ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fminnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fminnmv h0, v0.8h @@ -291,12 +291,12 @@ ; ; CHECK-FP-LABEL: test_v11f16_ninf: ; CHECK-FP: // %bb.0: +; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h0 killed $h0 def $q0 ; CHECK-FP-NEXT: // kill: def $h1 killed $h1 def $q1 ; CHECK-FP-NEXT: // kill: def $h2 killed $h2 def $q2 ; CHECK-FP-NEXT: // kill: def $h3 killed $h3 def $q3 ; CHECK-FP-NEXT: // kill: def $h4 killed $h4 def $q4 -; CHECK-FP-NEXT: mov x8, sp ; CHECK-FP-NEXT: // kill: def $h5 killed $h5 def $q5 ; CHECK-FP-NEXT: // kill: def $h6 killed $h6 def $q6 ; CHECK-FP-NEXT: // kill: def $h7 killed $h7 def $q7 @@ -306,12 +306,12 @@ ; CHECK-FP-NEXT: ld1 { v1.h }[0], [x8] ; CHECK-FP-NEXT: add x8, sp, #8 ; CHECK-FP-NEXT: mov v0.h[3], v3.h[0] +; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] ; CHECK-FP-NEXT: ld1 { v1.h }[1], [x8] ; CHECK-FP-NEXT: add x8, sp, #16 -; CHECK-FP-NEXT: mov v0.h[4], v4.h[0] -; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[5], v5.h[0] ; CHECK-FP-NEXT: mov v0.h[6], v6.h[0] +; CHECK-FP-NEXT: ld1 { v1.h }[2], [x8] ; CHECK-FP-NEXT: mov v0.h[7], v7.h[0] ; CHECK-FP-NEXT: fminnm v0.8h, v0.8h, v1.8h ; CHECK-FP-NEXT: fminnmv h0, v0.8h diff --git a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll @@ -82,8 +82,8 @@ define <2 x float> @test_copysign_v2f32_v2f64(<2 x float> %a, <2 x double> %b) #0 { ; CHECK-LABEL: test_copysign_v2f32_v2f64: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.2s v2, #128, lsl #24 ; CHECK-NEXT: fcvtn v1.2s, v1.2d +; CHECK-NEXT: movi.2s v2, #128, lsl #24 ; CHECK-NEXT: bit.8b v0, v1, v2 ; CHECK-NEXT: ret %tmp0 = fptrunc <2 x double> %b to <2 x float> @@ -110,9 +110,9 @@ ; CHECK-LABEL: test_copysign_v4f32_v4f64: ; CHECK: ; %bb.0: ; CHECK-NEXT: fcvtn v1.2s, v1.2d -; CHECK-NEXT: movi.4s v3, #128, lsl #24 ; CHECK-NEXT: fcvtn2 v1.4s, v2.2d -; CHECK-NEXT: bit.16b v0, v1, v3 +; CHECK-NEXT: movi.4s v2, #128, lsl #24 +; CHECK-NEXT: bit.16b v0, v1, v2 ; CHECK-NEXT: ret %tmp0 = fptrunc <4 x double> %b to <4 x float> %r = call <4 x float> @llvm.copysign.v4f32(<4 x float> %a, <4 x float> %tmp0) @@ -155,12 +155,12 @@ define <4 x double> @test_copysign_v4f64_v4f32(<4 x double> %a, <4 x float> %b) #0 { ; CHECK-LABEL: test_copysign_v4f64_v4f32: ; CHECK: ; %bb.0: -; CHECK-NEXT: movi.2d v3, #0000000000000000 -; CHECK-NEXT: fcvtl2 v4.2d, v2.4s -; CHECK-NEXT: fcvtl v2.2d, v2.2s -; CHECK-NEXT: fneg.2d v3, v3 -; CHECK-NEXT: bit.16b v1, v4, v3 -; CHECK-NEXT: bit.16b v0, v2, v3 +; CHECK-NEXT: movi.2d v4, #0000000000000000 +; CHECK-NEXT: fcvtl v3.2d, v2.2s +; CHECK-NEXT: fneg.2d v4, v4 +; CHECK-NEXT: fcvtl2 v2.2d, v2.4s +; CHECK-NEXT: bit.16b v0, v3, v4 +; CHECK-NEXT: bit.16b v1, v2, v4 ; CHECK-NEXT: ret %tmp0 = fpext <4 x float> %b to <4 x double> %r = call <4 x double> @llvm.copysign.v4f64(<4 x double> %a, <4 x double> %tmp0) @@ -189,31 +189,31 @@ ; NOFP16: ; %bb.0: ; NOFP16-NEXT: ; kill: def $d1 killed $d1 def $q1 ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 -; NOFP16-NEXT: mov h3, v1[1] -; NOFP16-NEXT: mov h4, v0[1] -; NOFP16-NEXT: movi.4s v2, #128, lsl #24 -; NOFP16-NEXT: fcvt s5, h1 -; NOFP16-NEXT: fcvt s6, h0 -; NOFP16-NEXT: mov h7, v1[2] -; NOFP16-NEXT: mov h16, v0[2] +; NOFP16-NEXT: mov h2, v1[1] +; NOFP16-NEXT: mov h3, v0[1] +; NOFP16-NEXT: mov h4, v1[2] +; NOFP16-NEXT: mov h5, v0[2] +; NOFP16-NEXT: fcvt s2, h2 ; NOFP16-NEXT: fcvt s3, h3 +; NOFP16-NEXT: fcvt s6, h1 +; NOFP16-NEXT: fcvt s7, h0 ; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: fcvt s5, h5 ; NOFP16-NEXT: mov h1, v1[3] -; NOFP16-NEXT: bit.16b v6, v5, v2 -; NOFP16-NEXT: fcvt s5, h7 -; NOFP16-NEXT: fcvt s7, h16 -; NOFP16-NEXT: bit.16b v4, v3, v2 -; NOFP16-NEXT: mov h3, v0[3] -; NOFP16-NEXT: fcvt h0, s6 +; NOFP16-NEXT: mov h0, v0[3] +; NOFP16-NEXT: movi.4s v16, #128, lsl #24 +; NOFP16-NEXT: bit.16b v3, v2, v16 +; NOFP16-NEXT: bit.16b v7, v6, v16 ; NOFP16-NEXT: fcvt s1, h1 -; NOFP16-NEXT: bit.16b v7, v5, v2 -; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: fcvt s3, h3 -; NOFP16-NEXT: fcvt h5, s7 -; NOFP16-NEXT: mov.h v0[1], v4[0] -; NOFP16-NEXT: bit.16b v3, v1, v2 -; NOFP16-NEXT: mov.h v0[2], v5[0] -; NOFP16-NEXT: fcvt h1, s3 +; NOFP16-NEXT: fcvt s2, h0 +; NOFP16-NEXT: fcvt h3, s3 +; NOFP16-NEXT: bit.16b v5, v4, v16 +; NOFP16-NEXT: fcvt h0, s7 +; NOFP16-NEXT: fcvt h4, s5 +; NOFP16-NEXT: bit.16b v2, v1, v16 +; NOFP16-NEXT: mov.h v0[1], v3[0] +; NOFP16-NEXT: fcvt h1, s2 +; NOFP16-NEXT: mov.h v0[2], v4[0] ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; NOFP16-NEXT: ret @@ -232,39 +232,39 @@ ; NOFP16: ; %bb.0: ; NOFP16-NEXT: fcvtn v1.4h, v1.4s ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 -; NOFP16-NEXT: mov h3, v0[1] -; NOFP16-NEXT: movi.4s v2, #128, lsl #24 -; NOFP16-NEXT: fcvt s5, h0 -; NOFP16-NEXT: mov h7, v0[2] -; NOFP16-NEXT: mov h4, v1[1] +; NOFP16-NEXT: mov h2, v0[1] +; NOFP16-NEXT: fcvt s3, h0 +; NOFP16-NEXT: mov h4, v0[2] +; NOFP16-NEXT: fcvt s2, h2 +; NOFP16-NEXT: mov h0, v0[3] +; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: mov h5, v1[1] ; NOFP16-NEXT: fcvt s6, h1 -; NOFP16-NEXT: mov h16, v1[2] -; NOFP16-NEXT: fcvt s3, h3 +; NOFP16-NEXT: mov h7, v1[2] ; NOFP16-NEXT: mov h1, v1[3] -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: bit.16b v5, v6, v2 -; NOFP16-NEXT: fcvt s6, h7 -; NOFP16-NEXT: fcvt s7, h16 +; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: fcvt s1, h1 -; NOFP16-NEXT: bit.16b v3, v4, v2 -; NOFP16-NEXT: mov h4, v0[3] -; NOFP16-NEXT: fcvt h0, s5 -; NOFP16-NEXT: bit.16b v6, v7, v2 -; NOFP16-NEXT: fcvt h3, s3 -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: fcvt h5, s6 -; NOFP16-NEXT: mov.h v0[1], v3[0] -; NOFP16-NEXT: bit.16b v4, v1, v2 -; NOFP16-NEXT: mov.h v0[2], v5[0] -; NOFP16-NEXT: fcvt h1, s4 +; NOFP16-NEXT: movi.4s v16, #128, lsl #24 +; NOFP16-NEXT: bit.16b v3, v6, v16 +; NOFP16-NEXT: bit.16b v2, v5, v16 +; NOFP16-NEXT: fcvt s5, h0 +; NOFP16-NEXT: bit.16b v4, v7, v16 +; NOFP16-NEXT: fcvt h2, s2 +; NOFP16-NEXT: fcvt h0, s3 +; NOFP16-NEXT: fcvt h3, s4 +; NOFP16-NEXT: bit.16b v5, v1, v16 +; NOFP16-NEXT: mov.h v0[1], v2[0] +; NOFP16-NEXT: fcvt h1, s5 +; NOFP16-NEXT: mov.h v0[2], v3[0] ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; NOFP16-NEXT: ret ; ; FP16-LABEL: test_copysign_v4f16_v4f32: ; FP16: ; %bb.0: -; FP16-NEXT: movi.4h v2, #128, lsl #8 ; FP16-NEXT: fcvtn v1.4h, v1.4s +; FP16-NEXT: movi.4h v2, #128, lsl #8 ; FP16-NEXT: bit.8b v0, v1, v2 ; FP16-NEXT: ret %tmp0 = fptrunc <4 x float> %b to <4 x half> @@ -276,47 +276,47 @@ ; NOFP16-LABEL: test_copysign_v4f16_v4f64: ; NOFP16: ; %bb.0: ; NOFP16-NEXT: ; kill: def $d0 killed $d0 def $q0 -; NOFP16-NEXT: mov d4, v1[1] -; NOFP16-NEXT: mov h5, v0[1] -; NOFP16-NEXT: movi.4s v3, #128, lsl #24 +; NOFP16-NEXT: mov d3, v1[1] +; NOFP16-NEXT: mov h4, v0[1] ; NOFP16-NEXT: fcvt s1, d1 ; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: fcvt s3, d3 +; NOFP16-NEXT: fcvt s4, h4 ; NOFP16-NEXT: mov h7, v0[2] -; NOFP16-NEXT: fcvt s4, d4 -; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: bit.16b v6, v1, v3 -; NOFP16-NEXT: fcvt s1, d2 +; NOFP16-NEXT: fcvt s16, d2 +; NOFP16-NEXT: movi.4s v5, #128, lsl #24 +; NOFP16-NEXT: mov h0, v0[3] ; NOFP16-NEXT: fcvt s7, h7 -; NOFP16-NEXT: bit.16b v5, v4, v3 -; NOFP16-NEXT: mov d2, v2[1] -; NOFP16-NEXT: mov h4, v0[3] +; NOFP16-NEXT: bit.16b v6, v1, v5 +; NOFP16-NEXT: mov d1, v2[1] +; NOFP16-NEXT: bit.16b v4, v3, v5 +; NOFP16-NEXT: fcvt s1, d1 +; NOFP16-NEXT: fcvt s2, h0 +; NOFP16-NEXT: fcvt h3, s4 +; NOFP16-NEXT: bit.16b v7, v16, v5 ; NOFP16-NEXT: fcvt h0, s6 -; NOFP16-NEXT: bit.16b v7, v1, v3 -; NOFP16-NEXT: fcvt h1, s5 -; NOFP16-NEXT: fcvt s2, d2 -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: fcvt h5, s7 -; NOFP16-NEXT: mov.h v0[1], v1[0] -; NOFP16-NEXT: bit.16b v4, v2, v3 -; NOFP16-NEXT: mov.h v0[2], v5[0] -; NOFP16-NEXT: fcvt h1, s4 +; NOFP16-NEXT: fcvt h4, s7 +; NOFP16-NEXT: bit.16b v2, v1, v5 +; NOFP16-NEXT: mov.h v0[1], v3[0] +; NOFP16-NEXT: fcvt h1, s2 +; NOFP16-NEXT: mov.h v0[2], v4[0] ; NOFP16-NEXT: mov.h v0[3], v1[0] ; NOFP16-NEXT: ; kill: def $d0 killed $d0 killed $q0 ; NOFP16-NEXT: ret ; ; FP16-LABEL: test_copysign_v4f16_v4f64: ; FP16: ; %bb.0: -; FP16-NEXT: mov d4, v1[1] +; FP16-NEXT: mov d3, v1[1] ; FP16-NEXT: fcvt h1, d1 -; FP16-NEXT: movi.4h v3, #128, lsl #8 -; FP16-NEXT: fcvt h4, d4 -; FP16-NEXT: mov.h v1[1], v4[0] ; FP16-NEXT: fcvt h4, d2 ; FP16-NEXT: mov d2, v2[1] -; FP16-NEXT: mov.h v1[2], v4[0] +; FP16-NEXT: fcvt h3, d3 ; FP16-NEXT: fcvt h2, d2 +; FP16-NEXT: mov.h v1[1], v3[0] +; FP16-NEXT: mov.h v1[2], v4[0] ; FP16-NEXT: mov.h v1[3], v2[0] -; FP16-NEXT: bit.8b v0, v1, v3 +; FP16-NEXT: movi.4h v2, #128, lsl #8 +; FP16-NEXT: bit.8b v0, v1, v2 ; FP16-NEXT: ret %tmp0 = fptrunc <4 x double> %b to <4 x half> %r = call <4 x half> @llvm.copysign.v4f16(<4 x half> %a, <4 x half> %tmp0) @@ -330,59 +330,59 @@ define <8 x half> @test_copysign_v8f16_v8f16(<8 x half> %a, <8 x half> %b) #0 { ; NOFP16-LABEL: test_copysign_v8f16_v8f16: ; NOFP16: ; %bb.0: -; NOFP16-NEXT: mov h5, v1[1] -; NOFP16-NEXT: mov h6, v0[1] +; NOFP16-NEXT: mov h2, v1[1] +; NOFP16-NEXT: mov h3, v0[1] +; NOFP16-NEXT: mov h5, v0[2] +; NOFP16-NEXT: fcvt s6, h1 +; NOFP16-NEXT: fcvt s2, h2 +; NOFP16-NEXT: fcvt s4, h3 ; NOFP16-NEXT: movi.4s v3, #128, lsl #24 -; NOFP16-NEXT: fcvt s2, h1 -; NOFP16-NEXT: fcvt s4, h0 -; NOFP16-NEXT: mov h7, v1[2] -; NOFP16-NEXT: mov h16, v0[2] +; NOFP16-NEXT: fcvt s7, h0 ; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: fcvt s6, h6 +; NOFP16-NEXT: mov h16, v1[3] ; NOFP16-NEXT: mov h17, v0[3] ; NOFP16-NEXT: bit.16b v4, v2, v3 -; NOFP16-NEXT: mov h2, v1[3] -; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: mov h2, v1[2] ; NOFP16-NEXT: fcvt s16, h16 -; NOFP16-NEXT: bit.16b v6, v5, v3 +; NOFP16-NEXT: fcvt s2, h2 ; NOFP16-NEXT: fcvt s17, h17 -; NOFP16-NEXT: fcvt s18, h2 -; NOFP16-NEXT: mov h5, v1[4] -; NOFP16-NEXT: fcvt h2, s4 -; NOFP16-NEXT: bit.16b v16, v7, v3 -; NOFP16-NEXT: mov h7, v0[4] -; NOFP16-NEXT: fcvt h4, s6 -; NOFP16-NEXT: bit.16b v17, v18, v3 -; NOFP16-NEXT: mov h6, v1[5] -; NOFP16-NEXT: mov h18, v0[5] -; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: fcvt s7, h7 -; NOFP16-NEXT: mov.h v2[1], v4[0] -; NOFP16-NEXT: fcvt h4, s16 +; NOFP16-NEXT: bit.16b v7, v6, v3 +; NOFP16-NEXT: mov h6, v0[5] +; NOFP16-NEXT: mov h18, v1[4] +; NOFP16-NEXT: mov h19, v0[4] ; NOFP16-NEXT: fcvt s6, h6 -; NOFP16-NEXT: fcvt s16, h18 -; NOFP16-NEXT: fcvt h17, s17 -; NOFP16-NEXT: bit.16b v7, v5, v3 -; NOFP16-NEXT: mov h5, v0[6] -; NOFP16-NEXT: mov.h v2[2], v4[0] -; NOFP16-NEXT: mov h4, v1[6] -; NOFP16-NEXT: bit.16b v16, v6, v3 +; NOFP16-NEXT: bit.16b v5, v2, v3 +; NOFP16-NEXT: mov h2, v1[5] +; NOFP16-NEXT: bit.16b v17, v16, v3 +; NOFP16-NEXT: fcvt s16, h2 +; NOFP16-NEXT: fcvt h4, s4 +; NOFP16-NEXT: fcvt h2, s7 +; NOFP16-NEXT: fcvt s18, h18 +; NOFP16-NEXT: fcvt s19, h19 +; NOFP16-NEXT: fcvt h5, s5 +; NOFP16-NEXT: mov h7, v1[6] +; NOFP16-NEXT: bit.16b v6, v16, v3 +; NOFP16-NEXT: mov h16, v0[6] +; NOFP16-NEXT: mov.h v2[1], v4[0] +; NOFP16-NEXT: fcvt h4, s17 ; NOFP16-NEXT: mov h1, v1[7] -; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: mov.h v2[3], v17[0] -; NOFP16-NEXT: fcvt h6, s7 -; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: bit.16b v19, v18, v3 +; NOFP16-NEXT: mov.h v2[2], v5[0] +; NOFP16-NEXT: fcvt s5, h7 +; NOFP16-NEXT: fcvt s7, h16 ; NOFP16-NEXT: mov h0, v0[7] +; NOFP16-NEXT: mov.h v2[3], v4[0] +; NOFP16-NEXT: fcvt h4, s19 ; NOFP16-NEXT: fcvt s1, h1 -; NOFP16-NEXT: mov.h v2[4], v6[0] -; NOFP16-NEXT: bit.16b v5, v4, v3 -; NOFP16-NEXT: fcvt h4, s16 ; NOFP16-NEXT: fcvt s0, h0 -; NOFP16-NEXT: fcvt h5, s5 -; NOFP16-NEXT: mov.h v2[5], v4[0] +; NOFP16-NEXT: bit.16b v7, v5, v3 +; NOFP16-NEXT: fcvt h5, s6 +; NOFP16-NEXT: mov.h v2[4], v4[0] +; NOFP16-NEXT: fcvt h4, s7 ; NOFP16-NEXT: bit.16b v0, v1, v3 -; NOFP16-NEXT: mov.h v2[6], v5[0] +; NOFP16-NEXT: mov.h v2[5], v5[0] ; NOFP16-NEXT: fcvt h0, s0 +; NOFP16-NEXT: mov.h v2[6], v4[0] ; NOFP16-NEXT: mov.h v2[7], v0[0] ; NOFP16-NEXT: mov.16b v0, v2 ; NOFP16-NEXT: ret @@ -399,60 +399,60 @@ define <8 x half> @test_copysign_v8f16_v8f32(<8 x half> %a, <8 x float> %b) #0 { ; NOFP16-LABEL: test_copysign_v8f16_v8f32: ; NOFP16: ; %bb.0: -; NOFP16-NEXT: fcvtn v1.4h, v1.4s +; NOFP16-NEXT: fcvtn v4.4h, v1.4s +; NOFP16-NEXT: mov h1, v0[1] +; NOFP16-NEXT: mov h7, v0[3] ; NOFP16-NEXT: fcvtn v2.4h, v2.4s +; NOFP16-NEXT: fcvt s1, h1 +; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: mov h18, v0[4] +; NOFP16-NEXT: mov h3, v4[1] +; NOFP16-NEXT: mov h16, v4[2] +; NOFP16-NEXT: fcvt s17, h4 +; NOFP16-NEXT: mov h4, v4[3] +; NOFP16-NEXT: fcvt s5, h3 +; NOFP16-NEXT: fcvt s16, h16 ; NOFP16-NEXT: movi.4s v3, #128, lsl #24 -; NOFP16-NEXT: mov h4, v0[1] -; NOFP16-NEXT: mov h5, v0[4] -; NOFP16-NEXT: fcvt s7, h0 -; NOFP16-NEXT: mov h17, v0[2] -; NOFP16-NEXT: mov h6, v1[1] -; NOFP16-NEXT: fcvt s16, h1 ; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: mov h18, v1[2] -; NOFP16-NEXT: fcvt s5, h5 -; NOFP16-NEXT: fcvt s17, h17 -; NOFP16-NEXT: fcvt s6, h6 -; NOFP16-NEXT: bit.16b v7, v16, v3 -; NOFP16-NEXT: fcvt s16, h2 ; NOFP16-NEXT: fcvt s18, h18 -; NOFP16-NEXT: bit.16b v4, v6, v3 -; NOFP16-NEXT: mov h6, v0[3] +; NOFP16-NEXT: bit.16b v6, v17, v3 +; NOFP16-NEXT: fcvt s19, h2 +; NOFP16-NEXT: bit.16b v1, v5, v3 +; NOFP16-NEXT: mov h5, v0[2] +; NOFP16-NEXT: bit.16b v7, v4, v3 +; NOFP16-NEXT: mov h4, v0[5] +; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: fcvt s4, h4 +; NOFP16-NEXT: fcvt h17, s1 +; NOFP16-NEXT: fcvt h1, s6 +; NOFP16-NEXT: fcvt h6, s7 +; NOFP16-NEXT: mov h7, v0[6] +; NOFP16-NEXT: mov h0, v0[7] ; NOFP16-NEXT: bit.16b v5, v16, v3 -; NOFP16-NEXT: mov h16, v1[3] -; NOFP16-NEXT: fcvt h1, s7 -; NOFP16-NEXT: mov h7, v0[5] -; NOFP16-NEXT: bit.16b v17, v18, v3 -; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: fcvt s6, h6 +; NOFP16-NEXT: mov h16, v2[1] +; NOFP16-NEXT: bit.16b v18, v19, v3 ; NOFP16-NEXT: fcvt s16, h16 -; NOFP16-NEXT: mov h18, v2[1] -; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: fcvt h5, s5 -; NOFP16-NEXT: mov.h v1[1], v4[0] -; NOFP16-NEXT: fcvt h4, s17 -; NOFP16-NEXT: bit.16b v6, v16, v3 -; NOFP16-NEXT: fcvt s17, h18 +; NOFP16-NEXT: mov.h v1[1], v17[0] +; NOFP16-NEXT: fcvt s0, h0 +; NOFP16-NEXT: bit.16b v4, v16, v3 ; NOFP16-NEXT: mov h16, v2[2] -; NOFP16-NEXT: mov.h v1[2], v4[0] -; NOFP16-NEXT: mov h4, v0[6] -; NOFP16-NEXT: mov h0, v0[7] -; NOFP16-NEXT: fcvt h6, s6 +; NOFP16-NEXT: mov.h v1[2], v5[0] +; NOFP16-NEXT: fcvt s5, h7 +; NOFP16-NEXT: fcvt s7, h16 ; NOFP16-NEXT: mov h2, v2[3] -; NOFP16-NEXT: bit.16b v7, v17, v3 -; NOFP16-NEXT: fcvt s16, h16 -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: fcvt s0, h0 ; NOFP16-NEXT: mov.h v1[3], v6[0] +; NOFP16-NEXT: fcvt h6, s18 +; NOFP16-NEXT: fcvt h4, s4 ; NOFP16-NEXT: fcvt s2, h2 -; NOFP16-NEXT: bit.16b v4, v16, v3 -; NOFP16-NEXT: mov.h v1[4], v5[0] -; NOFP16-NEXT: fcvt h5, s7 +; NOFP16-NEXT: bit.16b v5, v7, v3 +; NOFP16-NEXT: mov.h v1[4], v6[0] +; NOFP16-NEXT: fcvt h5, s5 ; NOFP16-NEXT: bit.16b v0, v2, v3 -; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: mov.h v1[5], v5[0] +; NOFP16-NEXT: mov.h v1[5], v4[0] ; NOFP16-NEXT: fcvt h0, s0 -; NOFP16-NEXT: mov.h v1[6], v4[0] +; NOFP16-NEXT: mov.h v1[6], v5[0] ; NOFP16-NEXT: mov.h v1[7], v0[0] ; NOFP16-NEXT: mov.16b v0, v1 ; NOFP16-NEXT: ret @@ -461,9 +461,9 @@ ; FP16: ; %bb.0: ; FP16-NEXT: fcvtn v2.4h, v2.4s ; FP16-NEXT: fcvtn v1.4h, v1.4s -; FP16-NEXT: movi.8h v3, #128, lsl #8 ; FP16-NEXT: mov.d v1[1], v2[0] -; FP16-NEXT: bit.16b v0, v1, v3 +; FP16-NEXT: movi.8h v2, #128, lsl #8 +; FP16-NEXT: bit.16b v0, v1, v2 ; FP16-NEXT: ret %tmp0 = fptrunc <8 x float> %b to <8 x half> %r = call <8 x half> @llvm.copysign.v8f16(<8 x half> %a, <8 x half> %tmp0) diff --git a/llvm/test/CodeGen/AArch64/vector-gep.ll b/llvm/test/CodeGen/AArch64/vector-gep.ll --- a/llvm/test/CodeGen/AArch64/vector-gep.ll +++ b/llvm/test/CodeGen/AArch64/vector-gep.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=arm64_32-apple-watchos2.0.0 --aarch64-neon-syntax=generic | FileCheck %s target datalayout = "e-m:o-p:32:32-i64:64-i128:128-n32:64-S128" @@ -8,13 +9,6 @@ ; CHECK-NEXT: .quad 4804 define <2 x i8*> @vector_gep(<2 x i8*> %0) { -; CHECK-LABEL: vector_gep: -; CHECK: adrp x[[REG8:[123]?[0-9]]], lCPI0_0@PAGE -; CHECK: movi v[[REG1:[0-9]+]].2d, #0x000000ffffffff -; CHECK: ldr q[[REG2:[0-9]+]], [x[[REG8]], lCPI0_0@PAGEOFF] -; CHECK: add v[[REG0:[0-9]+]].2d, v[[REG0]].2d, v[[REG2]].2d -; CHECK: and v[[REG0]].16b, v[[REG0]].16b, v[[REG1]].16b -; CHECK: ret entry: %1 = getelementptr i8, <2 x i8*> %0, <2 x i32> ret <2 x i8*> %1 diff --git a/llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll b/llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll --- a/llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll +++ b/llvm/test/CodeGen/AArch64/vector-popcnt-128-ult-ugt.ll @@ -4,8 +4,8 @@ define <16 x i8> @ugt_1_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_1_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #1 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -17,8 +17,8 @@ define <16 x i8> @ult_2_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_2_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -30,8 +30,8 @@ define <16 x i8> @ugt_2_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_2_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #2 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -43,8 +43,8 @@ define <16 x i8> @ult_3_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_3_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -56,8 +56,8 @@ define <16 x i8> @ugt_3_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_3_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #3 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -69,8 +69,8 @@ define <16 x i8> @ult_4_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_4_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -82,8 +82,8 @@ define <16 x i8> @ugt_4_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_4_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #4 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -95,8 +95,8 @@ define <16 x i8> @ult_5_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_5_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -108,8 +108,8 @@ define <16 x i8> @ugt_5_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_5_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #5 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -121,8 +121,8 @@ define <16 x i8> @ult_6_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_6_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -134,8 +134,8 @@ define <16 x i8> @ugt_6_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ugt_6_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #6 ; CHECK-NEXT: cmhi v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -147,8 +147,8 @@ define <16 x i8> @ult_7_v16i8(<16 x i8> %0) { ; CHECK-LABEL: ult_7_v16i8: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v1.16b, #7 ; CHECK-NEXT: cnt v0.16b, v0.16b +; CHECK-NEXT: movi v1.16b, #7 ; CHECK-NEXT: cmhi v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %2 = tail call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %0) @@ -1477,8 +1477,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #2 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1494,8 +1494,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1511,8 +1511,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #3 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1528,8 +1528,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1545,8 +1545,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #4 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1562,8 +1562,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #5 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1579,8 +1579,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #5 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1596,8 +1596,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1613,8 +1613,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #6 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1630,8 +1630,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #7 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1647,8 +1647,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #7 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1664,8 +1664,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #8 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1681,8 +1681,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #8 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1698,8 +1698,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1715,8 +1715,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #9 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1732,8 +1732,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #10 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1749,8 +1749,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #10 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1766,8 +1766,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #11 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1783,8 +1783,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #11 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1800,8 +1800,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #12 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1817,8 +1817,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #12 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1834,8 +1834,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #13 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1851,8 +1851,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #13 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1868,8 +1868,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #14 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1885,8 +1885,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #14 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1902,8 +1902,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #15 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1919,8 +1919,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #15 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1936,8 +1936,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1953,8 +1953,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #16 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -1970,8 +1970,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #17 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -1987,8 +1987,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #17 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2004,8 +2004,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #18 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2021,8 +2021,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #18 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2038,8 +2038,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #19 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2055,8 +2055,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #19 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2072,8 +2072,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #20 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2089,8 +2089,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #20 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2106,8 +2106,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #21 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2123,8 +2123,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #21 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2140,8 +2140,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #22 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2157,8 +2157,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #22 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2174,8 +2174,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #23 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2191,8 +2191,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #23 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2208,8 +2208,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #24 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2225,8 +2225,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #24 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2242,8 +2242,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #25 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2259,8 +2259,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #25 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2276,8 +2276,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #26 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2293,8 +2293,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #26 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2310,8 +2310,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #27 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2327,8 +2327,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #27 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2344,8 +2344,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #28 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2361,8 +2361,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #28 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2378,8 +2378,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #29 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2395,8 +2395,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #29 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2412,8 +2412,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #30 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2429,8 +2429,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #30 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2446,8 +2446,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #31 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2463,8 +2463,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #31 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2480,8 +2480,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2497,8 +2497,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #32 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2514,8 +2514,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #33 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2531,8 +2531,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #33 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2548,8 +2548,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #34 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2565,8 +2565,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #34 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2582,8 +2582,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #35 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2599,8 +2599,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #35 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2616,8 +2616,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #36 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2633,8 +2633,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #36 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2650,8 +2650,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2667,8 +2667,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #37 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2684,8 +2684,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #38 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2701,8 +2701,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #38 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2718,8 +2718,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #39 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2735,8 +2735,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #39 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2752,8 +2752,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #40 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2769,8 +2769,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #40 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2786,8 +2786,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #41 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2803,8 +2803,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #41 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2820,8 +2820,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2837,8 +2837,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #42 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2854,8 +2854,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #43 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2871,8 +2871,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #43 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2888,8 +2888,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #44 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2905,8 +2905,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #44 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2922,8 +2922,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #45 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2939,8 +2939,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #45 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2956,8 +2956,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #46 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -2973,8 +2973,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #46 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -2990,8 +2990,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #47 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3007,8 +3007,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #47 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3024,8 +3024,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #48 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3041,8 +3041,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #48 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3058,8 +3058,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #49 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3075,8 +3075,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #49 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3092,8 +3092,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #50 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3109,8 +3109,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #50 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3126,8 +3126,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #51 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3143,8 +3143,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #51 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3160,8 +3160,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #52 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3177,8 +3177,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #52 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3194,8 +3194,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #53 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3211,8 +3211,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #53 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3228,8 +3228,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #54 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3245,8 +3245,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #54 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3262,8 +3262,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #55 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3279,8 +3279,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #55 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3296,8 +3296,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #56 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3313,8 +3313,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #56 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3330,8 +3330,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #57 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3347,8 +3347,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #57 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3364,8 +3364,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #58 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3381,8 +3381,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #58 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3398,8 +3398,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #59 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3415,8 +3415,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #59 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3432,8 +3432,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #60 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3449,8 +3449,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #60 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3466,8 +3466,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #61 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3483,8 +3483,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #61 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3500,8 +3500,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #62 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d @@ -3517,8 +3517,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #62 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v0.2d, v1.2d @@ -3534,8 +3534,8 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: cnt v0.16b, v0.16b ; CHECK-NEXT: mov w8, #63 -; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.8h, v0.16b +; CHECK-NEXT: dup v1.2d, x8 ; CHECK-NEXT: uaddlp v0.4s, v0.8h ; CHECK-NEXT: uaddlp v0.2d, v0.4s ; CHECK-NEXT: cmhi v0.2d, v1.2d, v0.2d diff --git a/llvm/test/CodeGen/AArch64/vselect-constants.ll b/llvm/test/CodeGen/AArch64/vselect-constants.ll --- a/llvm/test/CodeGen/AArch64/vselect-constants.ll +++ b/llvm/test/CodeGen/AArch64/vselect-constants.ll @@ -10,11 +10,11 @@ define <4 x i32> @sel_C1_or_C2_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_C1_or_C2_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x8, .LCPI0_0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x9, .LCPI0_1 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI0_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI0_1] ; CHECK-NEXT: sshr v0.4s, v0.4s, #31 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b @@ -29,9 +29,9 @@ ; CHECK-NEXT: adrp x8, .LCPI1_0 ; CHECK-NEXT: adrp x9, .LCPI1_1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI1_1] -; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI1_0] +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI1_1] +; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -41,11 +41,11 @@ define <4 x i32> @sel_Cplus1_or_C_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_Cplus1_or_C_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x8, .LCPI2_0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x9, .LCPI2_1 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI2_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI2_1] ; CHECK-NEXT: sshr v0.4s, v0.4s, #31 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b @@ -60,9 +60,9 @@ ; CHECK-NEXT: adrp x8, .LCPI3_0 ; CHECK-NEXT: adrp x9, .LCPI3_1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI3_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI3_1] -; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI3_0] +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI3_1] +; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -72,11 +72,11 @@ define <4 x i32> @sel_Cminus1_or_C_vec(<4 x i1> %cond) { ; CHECK-LABEL: sel_Cminus1_or_C_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x8, .LCPI4_0 +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 ; CHECK-NEXT: adrp x9, .LCPI4_1 -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI4_0] ; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI4_1] ; CHECK-NEXT: sshr v0.4s, v0.4s, #31 ; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b @@ -91,9 +91,9 @@ ; CHECK-NEXT: adrp x8, .LCPI5_0 ; CHECK-NEXT: adrp x9, .LCPI5_1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI5_0] -; CHECK-NEXT: ldr q2, [x9, :lo12:.LCPI5_1] -; CHECK-NEXT: bsl v0.16b, v2.16b, v1.16b +; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI5_0] +; CHECK-NEXT: ldr q3, [x9, :lo12:.LCPI5_1] +; CHECK-NEXT: bsl v0.16b, v3.16b, v2.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -159,9 +159,9 @@ define <4 x i32> @cmp_sel_1_or_0_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: cmp_sel_1_or_0_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: and v0.16b, v0.16b, v2.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: and v0.16b, v0.16b, v1.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> @@ -184,9 +184,9 @@ define <4 x i32> @cmp_sel_0_or_1_vec(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: cmp_sel_0_or_1_vec: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: cmeq v0.4s, v0.4s, v1.4s -; CHECK-NEXT: bic v0.16b, v2.16b, v0.16b +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b ; CHECK-NEXT: ret %cond = icmp eq <4 x i32> %x, %y %add = select <4 x i1> %cond, <4 x i32> , <4 x i32> diff --git a/llvm/test/CodeGen/AArch64/xor.ll b/llvm/test/CodeGen/AArch64/xor.ll --- a/llvm/test/CodeGen/AArch64/xor.ll +++ b/llvm/test/CodeGen/AArch64/xor.ll @@ -62,9 +62,9 @@ define <4 x i32> @vec_add_of_not_with_undef(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: vec_add_of_not_with_undef: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.2d, #0xffffffffffffffff ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: movi v1.2d, #0xffffffffffffffff +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %x, %y %r = add <4 x i32> %t0, @@ -74,9 +74,9 @@ define <4 x i32> @vec_add_of_not_with_undef_decrement(<4 x i32> %x, <4 x i32> %y) { ; CHECK-LABEL: vec_add_of_not_with_undef_decrement: ; CHECK: // %bb.0: -; CHECK-NEXT: movi v2.4s, #1 ; CHECK-NEXT: sub v0.4s, v0.4s, v1.4s -; CHECK-NEXT: add v0.4s, v0.4s, v2.4s +; CHECK-NEXT: movi v1.4s, #1 +; CHECK-NEXT: add v0.4s, v0.4s, v1.4s ; CHECK-NEXT: ret %t0 = sub <4 x i32> %x, %y %r = add <4 x i32> %t0, diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-basic-instructions.s @@ -2428,44 +2428,44 @@ # CHECK-NEXT: 1 1 1.00 * stp q3, q5, [sp] # CHECK-NEXT: 1 1 1.00 * stp q17, q19, [sp, #1008] # CHECK-NEXT: 2 6 6.00 * ldp q23, q29, [x1, #-1024] -# CHECK-NEXT: 2 4 1.00 * ldp w3, w5, [sp], #0 +# CHECK-NEXT: 3 4 1.00 * ldp w3, w5, [sp], #0 # CHECK-NEXT: 2 1 1.00 * stp wzr, w9, [sp], #252 -# CHECK-NEXT: 2 4 1.00 * ldp w2, wzr, [sp], #-256 -# CHECK-NEXT: 2 4 1.00 * ldp w9, w10, [sp], #4 -# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [sp], #4 -# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [x2], #-256 -# CHECK-NEXT: 2 4 1.00 * ldpsw x20, x30, [sp], #252 -# CHECK-NEXT: 2 5 2.00 * ldp x21, x29, [x2], #504 -# CHECK-NEXT: 2 5 2.00 * ldp x22, x23, [x3], #-512 -# CHECK-NEXT: 2 5 2.00 * ldp x24, x25, [x4], #8 -# CHECK-NEXT: 2 4 1.00 * ldp s29, s28, [sp], #252 +# CHECK-NEXT: 3 4 1.00 * ldp w2, wzr, [sp], #-256 +# CHECK-NEXT: 3 4 1.00 * ldp w9, w10, [sp], #4 +# CHECK-NEXT: 3 4 1.00 * ldpsw x9, x10, [sp], #4 +# CHECK-NEXT: 3 4 1.00 * ldpsw x9, x10, [x2], #-256 +# CHECK-NEXT: 3 4 1.00 * ldpsw x20, x30, [sp], #252 +# CHECK-NEXT: 3 5 2.00 * ldp x21, x29, [x2], #504 +# CHECK-NEXT: 3 5 2.00 * ldp x22, x23, [x3], #-512 +# CHECK-NEXT: 3 5 2.00 * ldp x24, x25, [x4], #8 +# CHECK-NEXT: 3 4 1.00 * ldp s29, s28, [sp], #252 # CHECK-NEXT: 2 1 1.00 * stp s27, s26, [sp], #-256 -# CHECK-NEXT: 2 4 1.00 * ldp s1, s2, [x3], #44 +# CHECK-NEXT: 3 4 1.00 * ldp s1, s2, [x3], #44 # CHECK-NEXT: 2 1 1.00 * stp d3, d5, [x9], #504 # CHECK-NEXT: 2 1 1.00 * stp d7, d11, [x10], #-512 -# CHECK-NEXT: 2 5 2.00 * ldp d2, d3, [x30], #-8 +# CHECK-NEXT: 3 5 2.00 * ldp d2, d3, [x30], #-8 # CHECK-NEXT: 2 1 1.00 * stp q3, q5, [sp], #0 # CHECK-NEXT: 2 1 1.00 * stp q17, q19, [sp], #1008 -# CHECK-NEXT: 2 6 6.00 * ldp q23, q29, [x1], #-1024 -# CHECK-NEXT: 2 4 1.00 * ldp w3, w5, [sp, #0]! +# CHECK-NEXT: 3 6 6.00 * ldp q23, q29, [x1], #-1024 +# CHECK-NEXT: 3 4 1.00 * ldp w3, w5, [sp, #0]! # CHECK-NEXT: 2 1 1.00 * stp wzr, w9, [sp, #252]! -# CHECK-NEXT: 2 4 1.00 * ldp w2, wzr, [sp, #-256]! -# CHECK-NEXT: 2 4 1.00 * ldp w9, w10, [sp, #4]! -# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [sp, #4]! -# CHECK-NEXT: 2 4 1.00 * ldpsw x9, x10, [x2, #-256]! -# CHECK-NEXT: 2 4 1.00 * ldpsw x20, x30, [sp, #252]! -# CHECK-NEXT: 2 5 2.00 * ldp x21, x29, [x2, #504]! -# CHECK-NEXT: 2 5 2.00 * ldp x22, x23, [x3, #-512]! -# CHECK-NEXT: 2 5 2.00 * ldp x24, x25, [x4, #8]! -# CHECK-NEXT: 2 4 1.00 * ldp s29, s28, [sp, #252]! +# CHECK-NEXT: 3 4 1.00 * ldp w2, wzr, [sp, #-256]! +# CHECK-NEXT: 3 4 1.00 * ldp w9, w10, [sp, #4]! +# CHECK-NEXT: 3 4 1.00 * ldpsw x9, x10, [sp, #4]! +# CHECK-NEXT: 3 4 1.00 * ldpsw x9, x10, [x2, #-256]! +# CHECK-NEXT: 3 4 1.00 * ldpsw x20, x30, [sp, #252]! +# CHECK-NEXT: 3 5 2.00 * ldp x21, x29, [x2, #504]! +# CHECK-NEXT: 3 5 2.00 * ldp x22, x23, [x3, #-512]! +# CHECK-NEXT: 3 5 2.00 * ldp x24, x25, [x4, #8]! +# CHECK-NEXT: 3 4 1.00 * ldp s29, s28, [sp, #252]! # CHECK-NEXT: 2 1 1.00 * stp s27, s26, [sp, #-256]! -# CHECK-NEXT: 2 4 1.00 * ldp s1, s2, [x3, #44]! +# CHECK-NEXT: 3 4 1.00 * ldp s1, s2, [x3, #44]! # CHECK-NEXT: 2 1 1.00 * stp d3, d5, [x9, #504]! # CHECK-NEXT: 2 1 1.00 * stp d7, d11, [x10, #-512]! -# CHECK-NEXT: 2 5 2.00 * ldp d2, d3, [x30, #-8]! +# CHECK-NEXT: 3 5 2.00 * ldp d2, d3, [x30, #-8]! # CHECK-NEXT: 2 1 1.00 * stp q3, q5, [sp, #0]! # CHECK-NEXT: 2 1 1.00 * stp q17, q19, [sp, #1008]! -# CHECK-NEXT: 2 6 6.00 * ldp q23, q29, [x1, #-1024]! +# CHECK-NEXT: 3 6 6.00 * ldp q23, q29, [x1, #-1024]! # CHECK-NEXT: 2 5 2.00 * ldnp w3, w5, [sp] # CHECK-NEXT: 1 1 1.00 * stnp wzr, w9, [sp, #252] # CHECK-NEXT: 2 5 2.00 * ldnp w2, wzr, [sp, #-256] diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-readadv.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-readadv.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-readadv.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-load-readadv.s @@ -161,13 +161,13 @@ # CHECK: Iterations: 100 # CHECK-NEXT: Instructions: 15400 -# CHECK-NEXT: Total Cycles: 30104 -# CHECK-NEXT: Total uOps: 19900 +# CHECK-NEXT: Total Cycles: 30303 +# CHECK-NEXT: Total uOps: 20900 # CHECK: Dispatch Width: 2 -# CHECK-NEXT: uOps Per Cycle: 0.66 +# CHECK-NEXT: uOps Per Cycle: 0.69 # CHECK-NEXT: IPC: 0.51 -# CHECK-NEXT: Block RThroughput: 104.0 +# CHECK-NEXT: Block RThroughput: 104.5 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -305,33 +305,33 @@ # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 # CHECK-NEXT: 2 5 2.00 * ldp d0, d1, [x2, #16] # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 5 2.00 * ldp d0, d1, [x2, #16]! +# CHECK-NEXT: 3 5 2.00 * ldp d0, d1, [x2, #16]! # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 5 2.00 * ldp d0, d1, [x2], #16 +# CHECK-NEXT: 3 5 2.00 * ldp d0, d1, [x2], #16 # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 # CHECK-NEXT: 2 6 6.00 * ldp q0, q1, [x2, #16] # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 6 6.00 * ldp q0, q1, [x2, #16]! +# CHECK-NEXT: 3 6 6.00 * ldp q0, q1, [x2, #16]! # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 6 6.00 * ldp q0, q1, [x2], #16 +# CHECK-NEXT: 3 6 6.00 * ldp q0, q1, [x2], #16 # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 # CHECK-NEXT: 2 4 1.00 * ldp s0, s1, [x2, #16] # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 4 1.00 * ldp s0, s1, [x2, #16]! +# CHECK-NEXT: 3 4 1.00 * ldp s0, s1, [x2, #16]! # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 4 1.00 * ldp s0, s1, [x2], #16 +# CHECK-NEXT: 3 4 1.00 * ldp s0, s1, [x2], #16 # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 # CHECK-NEXT: 2 5 2.00 * ldp x0, x1, [x2, #16] # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 5 2.00 * ldp x0, x1, [x2, #16]! +# CHECK-NEXT: 3 5 2.00 * ldp x0, x1, [x2, #16]! # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 5 2.00 * ldp x0, x1, [x2], #16 +# CHECK-NEXT: 3 5 2.00 * ldp x0, x1, [x2], #16 # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 # CHECK-NEXT: 2 4 1.00 * ldpsw x0, x1, [x2, #16] # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 4 1.00 * ldpsw x0, x1, [x2, #16]! +# CHECK-NEXT: 3 4 1.00 * ldpsw x0, x1, [x2, #16]! # CHECK-NEXT: 1 3 0.50 add x2, x3, #1 -# CHECK-NEXT: 2 4 1.00 * ldpsw x0, x1, [x2], #16 +# CHECK-NEXT: 3 4 1.00 * ldpsw x0, x1, [x2], #16 # CHECK: Resources: # CHECK-NEXT: [0.0] - CortexA55UnitALU @@ -510,162 +510,162 @@ # CHECK: Timeline view: # CHECK-NEXT: 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 -# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 01234 +# CHECK-NEXT: Index 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 0123456789 012345 -# CHECK: [0,0] DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,1] . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2, #16] -# CHECK-NEXT: [0,2] . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,3] . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2, #16]! -# CHECK-NEXT: [0,4] . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,5] . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2], #16 -# CHECK-NEXT: [0,6] . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,7] . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2], #16 -# CHECK-NEXT: [0,8] . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,9] . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, #16]! -# CHECK-NEXT: [0,10] . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,11] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, #16] -# CHECK-NEXT: [0,12] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,13] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2], #16 -# CHECK-NEXT: [0,14] . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,15] . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2, #16]! -# CHECK-NEXT: [0,16] . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,17] . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2, #16] -# CHECK-NEXT: [0,18] . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,19] . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2], #16 -# CHECK-NEXT: [0,20] . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,21] . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, #16]! -# CHECK-NEXT: [0,22] . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,23] . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, #16] -# CHECK-NEXT: [0,24] . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,25] . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2], #16 -# CHECK-NEXT: [0,26] . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,27] . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2, #16]! -# CHECK-NEXT: [0,28] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,29] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2, #16] -# CHECK-NEXT: [0,30] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,31] . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2], #16 -# CHECK-NEXT: [0,32] . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,33] . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, #16]! -# CHECK-NEXT: [0,34] . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,35] . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, #16] -# CHECK-NEXT: [0,36] . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,37] . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2], #16 -# CHECK-NEXT: [0,38] . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,39] . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, #16]! -# CHECK-NEXT: [0,40] . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,41] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, #16] -# CHECK-NEXT: [0,42] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,43] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2], #16 -# CHECK-NEXT: [0,44] . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,45] . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, #16]! -# CHECK-NEXT: [0,46] . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,47] . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, #16] -# CHECK-NEXT: [0,48] . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,49] . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2], #16 -# CHECK-NEXT: [0,50] . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,51] . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, #16]! -# CHECK-NEXT: [0,52] . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,53] . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, #16] -# CHECK-NEXT: [0,54] . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,55] . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2], #16 -# CHECK-NEXT: [0,56] . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,57] . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, #16]! -# CHECK-NEXT: [0,58] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,59] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, #16] -# CHECK-NEXT: [0,60] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,61] . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2], #16 -# CHECK-NEXT: [0,62] . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,63] . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, #16]! -# CHECK-NEXT: [0,64] . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,65] . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, #16] -# CHECK-NEXT: [0,66] . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,67] . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2], #16 -# CHECK-NEXT: [0,68] . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,69] . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, #16]! -# CHECK-NEXT: [0,70] . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,71] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, #16] -# CHECK-NEXT: [0,72] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,73] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, x2, lsl #3] -# CHECK-NEXT: [0,74] . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,75] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, w0, sxtw] -# CHECK-NEXT: [0,76] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,77] . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,78] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,79] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, w0, sxtw] -# CHECK-NEXT: [0,80] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,81] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,82] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,83] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,84] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,85] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,86] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,87] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, w0, sxtw] -# CHECK-NEXT: [0,88] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,89] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, w0, sxtw] -# CHECK-NEXT: [0,90] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,91] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . ldur b0, [x2, #255] -# CHECK-NEXT: [0,92] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,93] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . ldur d0, [x2, #255] -# CHECK-NEXT: [0,94] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,95] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . ldur h0, [x2, #255] -# CHECK-NEXT: [0,96] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,97] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . ldur q0, [x2, #255] -# CHECK-NEXT: [0,98] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,99] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . ldur s0, [x2, #255] -# CHECK-NEXT: [0,100] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,101] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . ldur w0, [x2, #255] -# CHECK-NEXT: [0,102] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,103] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . ldurb w0, [x2, #255] -# CHECK-NEXT: [0,104] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,105] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . ldurh w0, [x2, #255] -# CHECK-NEXT: [0,106] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,107] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . ldursb w0, [x2, #255] -# CHECK-NEXT: [0,108] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,109] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . ldursh w0, [x2, #255] -# CHECK-NEXT: [0,110] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,111] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . ldursw x0, [x2, #255] -# CHECK-NEXT: [0,112] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,113] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . . . ldnp d0, d1, [x2, #16] -# CHECK-NEXT: [0,114] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,115] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . . ldnp q0, q1, [x2, #16] -# CHECK-NEXT: [0,116] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,117] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . ldnp s0, s1, [x2, #16] -# CHECK-NEXT: [0,118] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,119] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . ldnp s0, s1, [x2, #16] -# CHECK-NEXT: [0,120] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,121] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . ldnp w0, w1, [x2, #16] -# CHECK-NEXT: [0,122] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,123] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . ldnp x0, x1, [x2, #16] -# CHECK-NEXT: [0,124] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,125] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . ldp d0, d1, [x2, #16] -# CHECK-NEXT: [0,126] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,127] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . ldp d0, d1, [x2, #16]! -# CHECK-NEXT: [0,128] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,129] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . ldp d0, d1, [x2], #16 -# CHECK-NEXT: [0,130] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,131] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE . . . . . . . . . . . ldp q0, q1, [x2, #16] -# CHECK-NEXT: [0,132] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,133] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE. . . . . . . . . . ldp q0, q1, [x2, #16]! -# CHECK-NEXT: [0,134] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,135] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE . . . . . . . . ldp q0, q1, [x2], #16 -# CHECK-NEXT: [0,136] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,137] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . ldp s0, s1, [x2, #16] -# CHECK-NEXT: [0,138] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,139] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . ldp s0, s1, [x2, #16]! -# CHECK-NEXT: [0,140] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,141] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . ldp s0, s1, [x2], #16 -# CHECK-NEXT: [0,142] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,143] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . ldp x0, x1, [x2, #16] -# CHECK-NEXT: [0,144] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . add x2, x3, #1 -# CHECK-NEXT: [0,145] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . ldp x0, x1, [x2, #16]! -# CHECK-NEXT: [0,146] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . add x2, x3, #1 -# CHECK-NEXT: [0,147] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . ldp x0, x1, [x2], #16 -# CHECK-NEXT: [0,148] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . add x2, x3, #1 -# CHECK-NEXT: [0,149] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . ldpsw x0, x1, [x2, #16] -# CHECK-NEXT: [0,150] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . add x2, x3, #1 -# CHECK-NEXT: [0,151] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . ldpsw x0, x1, [x2, #16]! -# CHECK-NEXT: [0,152] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . add x2, x3, #1 -# CHECK-NEXT: [0,153] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE ldpsw x0, x1, [x2], #16 +# CHECK: [0,0] DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,1] . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2, #16] +# CHECK-NEXT: [0,2] . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,3] . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2, #16]! +# CHECK-NEXT: [0,4] . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,5] . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr b0, [x2], #16 +# CHECK-NEXT: [0,6] . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,7] . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2], #16 +# CHECK-NEXT: [0,8] . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,9] . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, #16]! +# CHECK-NEXT: [0,10] . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,11] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, #16] +# CHECK-NEXT: [0,12] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,13] . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2], #16 +# CHECK-NEXT: [0,14] . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,15] . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2, #16]! +# CHECK-NEXT: [0,16] . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,17] . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr h0, [x2, #16] +# CHECK-NEXT: [0,18] . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,19] . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2], #16 +# CHECK-NEXT: [0,20] . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,21] . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, #16]! +# CHECK-NEXT: [0,22] . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,23] . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, #16] +# CHECK-NEXT: [0,24] . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,25] . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2], #16 +# CHECK-NEXT: [0,26] . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,27] . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2, #16]! +# CHECK-NEXT: [0,28] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,29] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr s0, [x2, #16] +# CHECK-NEXT: [0,30] . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,31] . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2], #16 +# CHECK-NEXT: [0,32] . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,33] . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, #16]! +# CHECK-NEXT: [0,34] . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,35] . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, #16] +# CHECK-NEXT: [0,36] . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,37] . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2], #16 +# CHECK-NEXT: [0,38] . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,39] . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, #16]! +# CHECK-NEXT: [0,40] . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,41] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, #16] +# CHECK-NEXT: [0,42] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,43] . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2], #16 +# CHECK-NEXT: [0,44] . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,45] . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, #16]! +# CHECK-NEXT: [0,46] . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,47] . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, #16] +# CHECK-NEXT: [0,48] . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,49] . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2], #16 +# CHECK-NEXT: [0,50] . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,51] . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, #16]! +# CHECK-NEXT: [0,52] . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,53] . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, #16] +# CHECK-NEXT: [0,54] . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,55] . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2], #16 +# CHECK-NEXT: [0,56] . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,57] . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, #16]! +# CHECK-NEXT: [0,58] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,59] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, #16] +# CHECK-NEXT: [0,60] . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,61] . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2], #16 +# CHECK-NEXT: [0,62] . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,63] . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, #16]! +# CHECK-NEXT: [0,64] . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,65] . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, #16] +# CHECK-NEXT: [0,66] . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,67] . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2], #16 +# CHECK-NEXT: [0,68] . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,69] . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, #16]! +# CHECK-NEXT: [0,70] . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,71] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, #16] +# CHECK-NEXT: [0,72] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,73] . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr d0, [x2, x2, lsl #3] +# CHECK-NEXT: [0,74] . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,75] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr q0, [x2, w0, sxtw] +# CHECK-NEXT: [0,76] . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,77] . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,78] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,79] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldr x0, [x2, w0, sxtw] +# CHECK-NEXT: [0,80] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,81] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrb w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,82] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,83] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsb w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,84] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,85] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrh w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,86] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,87] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsh w0, [x2, w0, sxtw] +# CHECK-NEXT: [0,88] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,89] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . ldrsw x0, [x2, w0, sxtw] +# CHECK-NEXT: [0,90] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,91] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . ldur b0, [x2, #255] +# CHECK-NEXT: [0,92] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,93] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . ldur d0, [x2, #255] +# CHECK-NEXT: [0,94] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,95] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . ldur h0, [x2, #255] +# CHECK-NEXT: [0,96] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,97] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . ldur q0, [x2, #255] +# CHECK-NEXT: [0,98] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,99] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . ldur s0, [x2, #255] +# CHECK-NEXT: [0,100] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,101] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . ldur w0, [x2, #255] +# CHECK-NEXT: [0,102] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,103] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . ldurb w0, [x2, #255] +# CHECK-NEXT: [0,104] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,105] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . ldurh w0, [x2, #255] +# CHECK-NEXT: [0,106] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,107] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . ldursb w0, [x2, #255] +# CHECK-NEXT: [0,108] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,109] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . ldursh w0, [x2, #255] +# CHECK-NEXT: [0,110] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,111] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . ldursw x0, [x2, #255] +# CHECK-NEXT: [0,112] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,113] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . . . ldnp d0, d1, [x2, #16] +# CHECK-NEXT: [0,114] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,115] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . . ldnp q0, q1, [x2, #16] +# CHECK-NEXT: [0,116] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,117] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . . ldnp s0, s1, [x2, #16] +# CHECK-NEXT: [0,118] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,119] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . . ldnp s0, s1, [x2, #16] +# CHECK-NEXT: [0,120] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,121] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . . ldnp w0, w1, [x2, #16] +# CHECK-NEXT: [0,122] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,123] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . . ldnp x0, x1, [x2, #16] +# CHECK-NEXT: [0,124] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,125] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . . ldp d0, d1, [x2, #16] +# CHECK-NEXT: [0,126] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,127] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . . ldp d0, d1, [x2, #16]! +# CHECK-NEXT: [0,128] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,129] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . . . . . . . . ldp d0, d1, [x2], #16 +# CHECK-NEXT: [0,130] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,131] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE . . . . . . . . . . . ldp q0, q1, [x2, #16] +# CHECK-NEXT: [0,132] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,133] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE. . . . . . . . . . ldp q0, q1, [x2, #16]! +# CHECK-NEXT: [0,134] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,135] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeeE . . . . . . . . ldp q0, q1, [x2], #16 +# CHECK-NEXT: [0,136] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,137] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE. . . . . . . . ldp s0, s1, [x2, #16] +# CHECK-NEXT: [0,138] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeE. . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,139] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . . ldp s0, s1, [x2, #16]! +# CHECK-NEXT: [0,140] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,141] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . . . . . ldp s0, s1, [x2], #16 +# CHECK-NEXT: [0,142] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,143] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . . ldp x0, x1, [x2, #16] +# CHECK-NEXT: [0,144] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . . add x2, x3, #1 +# CHECK-NEXT: [0,145] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . . ldp x0, x1, [x2, #16]! +# CHECK-NEXT: [0,146] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . . add x2, x3, #1 +# CHECK-NEXT: [0,147] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeeE . . . ldp x0, x1, [x2], #16 +# CHECK-NEXT: [0,148] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . . add x2, x3, #1 +# CHECK-NEXT: [0,149] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeeE . . ldpsw x0, x1, [x2, #16] +# CHECK-NEXT: [0,150] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . . add x2, x3, #1 +# CHECK-NEXT: [0,151] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE . ldpsw x0, x1, [x2, #16]! +# CHECK-NEXT: [0,152] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . DeeE . add x2, x3, #1 +# CHECK-NEXT: [0,153] . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . .DeeeE ldpsw x0, x1, [x2], #16 # CHECK: Average Wait times (based on the timeline view): # CHECK-NEXT: [0]: Executions diff --git a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s --- a/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s +++ b/llvm/test/tools/llvm-mca/AArch64/Cortex/A55-neon-instructions.s @@ -1,1065 +1,861 @@ # NOTE: Assertions have been autogenerated by utils/update_mca_test_checks.py # RUN: llvm-mca -mtriple=aarch64 -mcpu=cortex-a55 -instruction-tables < %s | FileCheck %s -abs d29, d24 -abs v0.16b, v0.16b -abs v0.2d, v0.2d -abs v0.2s, v0.2s -abs v0.4h, v0.4h -abs v0.4s, v0.4s -abs v0.8b, v0.8b -abs v0.8h, v0.8h -add d17, d31, d29 -add v0.8b, v0.8b, v0.8b -addhn v0.2s, v0.2d, v0.2d -addhn v0.4h, v0.4s, v0.4s -addhn v0.8b, v0.8h, v0.8h -addhn2 v0.16b, v0.8h, v0.8h -addhn2 v0.4s, v0.2d, v0.2d -addhn2 v0.8h, v0.4s, v0.4s -addp v0.2d, v0.2d, v0.2d -addp v0.8b, v0.8b, v0.8b -and v0.8b, v0.8b, v0.8b -bic v0.4h, #15, lsl #8 -bic v0.8b, v0.8b, v0.8b -bif v0.16b, v0.16b, v0.16b -bit v0.16b, v0.16b, v0.16b -bsl v0.8b, v0.8b, v0.8b -cls v0.16b, v0.16b -cls v0.2s, v0.2s -cls v0.4h, v0.4h -cls v0.4s, v0.4s -cls v0.8b, v0.8b -cls v0.8h, v0.8h -clz v0.16b, v0.16b -clz v0.2s, v0.2s -clz v0.4h, v0.4h -clz v0.4s, v0.4s -clz v0.8b, v0.8b -clz v0.8h, v0.8h -cmeq d20, d21, 0 -cmeq d20, d21, d22 -cmeq v0.16b, v0.16b, 0 -cmeq v0.16b, v0.16b, v0.16b -cmge d20, d21, 0 -cmge d20, d21, d22 -cmge v0.4h, v0.4h, v0.4h -cmge v0.8b, v0.8b, 0 -cmgt d20, d21, 0 -cmgt d20, d21, d22 -cmgt v0.2s, v0.2s, 0 -cmgt v0.4s, v0.4s, v0.4s -cmhi d20, d21, d22 -cmhi v0.8h, v0.8h, v0.8h -cmhs d20, d21, d22 -cmhs v0.8b, v0.8b, v0.8b -cmle d20, d21, 0 -cmle v0.2d, v0.2d, 0 -cmlt d20, d21, 0 -cmlt v0.8h, v0.8h, 0 -cmtst d20, d21, d22 -cmtst v0.2s, v0.2s, v0.2s -cnt v0.16b, v0.16b -cnt v0.8b, v0.8b -dup v0.16b,w28 -dup v0.2d,x28 -dup v0.2s,w28 -dup v0.4h,w28 -dup v0.4s,w28 -dup v0.8b,w28 -dup v0.8h,w28 -eor v0.16b, v0.16b, v0.16b -ext v0.16b, v0.16b, v0.16b, #3 -ext v0.8b, v0.8b, v0.8b, #3 -fabd d29, d24, d20 -fabd s29, s24, s20 -fabd v0.4s, v0.4s, v0.4s -fabs v0.2d, v0.2d -fabs v0.2s, v0.2s -fabs v0.4h, v0.4h -fabs v0.4s, v0.4s -fabs v0.8h, v0.8h -facge d20, d21, d22 -facge s10, s11, s12 -facge v0.4s, v0.4s, v0.4s -facgt d20, d21, d22 -facgt s10, s11, s12 -facgt v0.2d, v0.2d, v0.2d -fadd v0.4s, v0.4s, v0.4s -faddp v0.2s, v0.2s, v0.2s -faddp v0.4s, v0.4s, v0.4s -fcmeq d20, d21, #0.0 -fcmeq d20, d21, d22 -fcmeq s10, s11, #0.0 -fcmeq s10, s11, s12 -fcmeq v0.2s, v0.2s, #0.0 -fcmeq v0.2s, v0.2s, v0.2s -fcmge d20, d21, #0.0 -fcmge d20, d21, d22 -fcmge s10, s11, #0.0 -fcmge s10, s11, s12 -fcmge v0.2d, v0.2d, #0.0 -fcmge v0.4s, v0.4s, v0.4s -fcmgt d20, d21, #0.0 -fcmgt d20, d21, d22 -fcmgt s10, s11, #0.0 -fcmgt s10, s11, s12 -fcmgt v0.4s, v0.4s, #0.0 -fcmgt v0.4s, v0.4s, v0.4s -fcmle d20, d21, #0.0 -fcmle s10, s11, #0.0 -fcmle v0.2d, v0.2d, #0.0 -fcmlt d20, d21, #0.0 -fcmlt s10, s11, #0.0 -fcmlt v0.4s, v0.4s, #0.0 -fcvtas d21, d14 -fcvtas s12, s13 -fcvtas v0.2d, v0.2d -fcvtas v0.2s, v0.2s -fcvtas v0.4h, v0.4h -fcvtas v0.4s, v0.4s -fcvtas v0.8h, v0.8h -fcvtau d21, d14 -fcvtau s12, s13 -fcvtau v0.2d, v0.2d -fcvtau v0.2s, v0.2s -fcvtau v0.4h, v0.4h -fcvtau v0.4s, v0.4s -fcvtau v0.8h, v0.8h -fcvtl v0.2d, v0.2s -fcvtl v0.4s, v0.4h -fcvtl2 v0.2d, v0.4s -fcvtl2 v0.4s, v0.8h -fcvtms d21, d14 -fcvtms s22, s13 -fcvtms v0.2d, v0.2d -fcvtms v0.2s, v0.2s -fcvtms v0.4h, v0.4h -fcvtms v0.4s, v0.4s -fcvtms v0.8h, v0.8h -fcvtmu d21, d14 -fcvtmu s12, s13 -fcvtmu v0.2d, v0.2d -fcvtmu v0.2s, v0.2s -fcvtmu v0.4h, v0.4h -fcvtmu v0.4s, v0.4s -fcvtmu v0.8h, v0.8h -fcvtn v0.2s, v0.2d -fcvtn v0.4h, v0.4s -fcvtn2 v0.4s, v0.2d -fcvtn2 v0.8h, v0.4s -fcvtns d21, d14 -fcvtns s22, s13 -fcvtns v0.2d, v0.2d -fcvtns v0.2s, v0.2s -fcvtns v0.4h, v0.4h -fcvtns v0.4s, v0.4s -fcvtns v0.8h, v0.8h -fcvtnu d21, d14 -fcvtnu s12, s13 -fcvtnu v0.2d, v0.2d -fcvtnu v0.2s, v0.2s -fcvtnu v0.4h, v0.4h -fcvtnu v0.4s, v0.4s -fcvtnu v0.8h, v0.8h -fcvtps d21, d14 -fcvtps s22, s13 -fcvtps v0.2d, v0.2d -fcvtps v0.2s, v0.2s -fcvtps v0.4h, v0.4h -fcvtps v0.4s, v0.4s -fcvtps v0.8h, v0.8h -fcvtpu d21, d14 -fcvtpu s12, s13 -fcvtpu v0.2d, v0.2d -fcvtpu v0.2s, v0.2s -fcvtpu v0.4h, v0.4h -fcvtpu v0.4s, v0.4s -fcvtpu v0.8h, v0.8h -fcvtxn s22, d13 -fcvtxn v0.2s, v0.2d -fcvtxn2 v0.4s, v0.2d -fcvtzs d21, d12, #1 -fcvtzs d21, d14 -fcvtzs s12, s13 -fcvtzs s21, s12, #1 -fcvtzs v0.2d, v0.2d -fcvtzs v0.2d, v0.2d, #3 -fcvtzs v0.2s, v0.2s -fcvtzs v0.2s, v0.2s, #3 -fcvtzs v0.4h, v0.4h -fcvtzs v0.4s, v0.4s -fcvtzs v0.4s, v0.4s, #3 -fcvtzs v0.8h, v0.8h -fcvtzu d21, d12, #1 -fcvtzu d21, d14 -fcvtzu s12, s13 -fcvtzu s21, s12, #1 -fcvtzu v0.2d, v0.2d -fcvtzu v0.2d, v0.2d, #3 -fcvtzu v0.2s, v0.2s -fcvtzu v0.2s, v0.2s, #3 -fcvtzu v0.4h, v0.4h -fcvtzu v0.4s, v0.4s -fcvtzu v0.4s, v0.4s, #3 -fcvtzu v0.8h, v0.8h -fdiv v0.2s, v0.2s, v0.2s -fmax v0.2d, v0.2d, v0.2d -fmax v0.2s, v0.2s, v0.2s -fmax v0.4s, v0.4s, v0.4s -fmaxnm v0.2d, v0.2d, v0.2d -fmaxnm v0.2s, v0.2s, v0.2s -fmaxnm v0.4s, v0.4s, v0.4s -fmaxnmp v0.2d, v0.2d, v0.2d -fmaxnmp v0.2s, v0.2s, v0.2s -fmaxnmp v0.4s, v0.4s, v0.4s -fmaxp v0.2d, v0.2d, v0.2d -fmaxp v0.2s, v0.2s, v0.2s -fmaxp v0.4s, v0.4s, v0.4s -fmin v0.2d, v0.2d, v0.2d -fmin v0.2s, v0.2s, v0.2s -fmin v0.4s, v0.4s, v0.4s -fminnm v0.2d, v0.2d, v0.2d -fminnm v0.2s, v0.2s, v0.2s -fminnm v0.4s, v0.4s, v0.4s -fminnmp v0.2d, v0.2d, v0.2d -fminnmp v0.2s, v0.2s, v0.2s -fminnmp v0.4s, v0.4s, v0.4s -fminp v0.2d, v0.2d, v0.2d -fminp v0.2s, v0.2s, v0.2s -fminp v0.4s, v0.4s, v0.4s -fmla d0, d1, v0.d[1] -fmla s0, s1, v0.s[3] -fmla v0.2s, v0.2s, v0.2s -fmls d0, d4, v0.d[1] -fmls s3, s5, v0.s[3] -fmls v0.2s, v0.2s, v0.2s -fmov v0.2d, #-1.25 -fmov v0.2s, #13.0 -fmov v0.4s, #1.0 -fmul d0, d1, v0.d[1] -fmul s0, s1, v0.s[3] -fmul v0.2s, v0.2s, v0.2s -fmulx d0, d4, v0.d[1] -fmulx d23, d11, d1 -fmulx s20, s22, s15 -fmulx s3, s5, v0.s[3] -fmulx v0.2d, v0.2d, v0.2d -fmulx v0.2s, v0.2s, v0.2s -fmulx v0.4s, v0.4s, v0.4s -fneg v0.2d, v0.2d -fneg v0.2s, v0.2s -fneg v0.4h, v0.4h -fneg v0.4s, v0.4s -fneg v0.8h, v0.8h -frecpe d13, d13 -frecpe s19, s14 -frecpe v0.2d, v0.2d -frecpe v0.2s, v0.2s -frecpe v0.4h, v0.4h -frecpe v0.4s, v0.4s -frecpe v0.8h, v0.8h -frecps v0.4s, v0.4s, v0.4s -frecps d22, d30, d21 -frecps s21, s16, s13 -frecpx d16, d19 -frecpx s18, s10 -frinta v0.2d, v0.2d -frinta v0.2s, v0.2s -frinta v0.4h, v0.4h -frinta v0.4s, v0.4s -frinta v0.8h, v0.8h -frinti v0.2d, v0.2d -frinti v0.2s, v0.2s -frinti v0.4h, v0.4h -frinti v0.4s, v0.4s -frinti v0.8h, v0.8h -frintm v0.2d, v0.2d -frintm v0.2s, v0.2s -frintm v0.4h, v0.4h -frintm v0.4s, v0.4s -frintm v0.8h, v0.8h -frintn v0.2d, v0.2d -frintn v0.2s, v0.2s -frintn v0.4h, v0.4h -frintn v0.4s, v0.4s -frintn v0.8h, v0.8h -frintp v0.2d, v0.2d -frintp v0.2s, v0.2s -frintp v0.4h, v0.4h -frintp v0.4s, v0.4s -frintp v0.8h, v0.8h -frintx v0.2d, v0.2d -frintx v0.2s, v0.2s -frintx v0.4h, v0.4h -frintx v0.4s, v0.4s -frintx v0.8h, v0.8h -frintz v0.2d, v0.2d -frintz v0.2s, v0.2s -frintz v0.4h, v0.4h -frintz v0.4s, v0.4s -frintz v0.8h, v0.8h -frsqrte d21, d12 -frsqrte s22, s13 -frsqrte v0.2d, v0.2d -frsqrte v0.2s, v0.2s -frsqrte v0.4h, v0.4h -frsqrte v0.4s, v0.4s -frsqrte v0.8h, v0.8h -frsqrts d8, d22, d18 -frsqrts s21, s5, s12 -frsqrts v0.2d, v0.2d, v0.2d -fsqrt v0.2d, v0.2d -fsqrt v0.2s, v0.2s -fsqrt v0.4h, v0.4h -fsqrt v0.4s, v0.4s -fsqrt v0.8h, v0.8h -fsub v0.2s, v0.2s, v0.2s -ld1 { v0.16b }, [x0] -ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 -ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] -ld1 { v0.4s, v1.4s }, [sp], #32 -ld1 { v0.4s, v1.4s, v2.4s }, [sp] -ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 -ld1 { v0.8h }, [x15], x2 -ld1 { v0.8h, v1.8h }, [x15] -ld1 { v0.b }[9], [x0] -ld1 { v0.b }[9], [x0], #1 -ld1r { v0.16b }, [x0] -ld1r { v0.16b }, [x0], #1 -ld1r { v0.8h }, [x15] -ld1r { v0.8h }, [x15], #2 -ld2 { v0.16b, v1.16b }, [x0], x1 -ld2 { v0.8b, v1.8b }, [x0] -ld2 { v0.h, v1.h }[7], [x15] -ld2 { v0.h, v1.h }[7], [x15], #4 -ld2r { v0.2d, v1.2d }, [x0] -ld2r { v0.2d, v1.2d }, [x0], #16 -ld2r { v0.4s, v1.4s }, [sp] -ld2r { v0.4s, v1.4s }, [sp], #8 -ld3 { v0.4h, v1.4h, v2.4h }, [x15] -ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2 -ld3 { v0.s, v1.s, v2.s }[3], [sp] -ld3 { v0.s, v1.s, v2.s }[3], [sp], x3 -ld3r { v0.4h, v1.4h, v2.4h }, [x15] -ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6 -ld3r { v0.8b, v1.8b, v2.8b }, [x0] -ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 -ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 -ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] -ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 -ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 -ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp] -ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7 -ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30 -mla v0.8b, v0.8b, v0.8b -mls v0.4h, v0.4h, v0.4h -mov b0, v0.b[15] -mov d6, v0.d[1] -mov h2, v0.h[5] -mov s17, v0.s[2] -mov v0.16b, v0.16b -mov v0.8b, v0.8b -movi d15, #0xff00ff00ff00ff -movi v0.16b, #31 -movi v0.2d, #0xff0000ff0000ffff -movi v0.2s, #8, msl #8 -movi v0.4s, #255, lsl #24 -movi v0.8b, #255 -mul v0.8b, v0.8b, v0.8b -mvni v0.2s, 0 -mvni v0.4s, #16, msl #16 -neg d29, d24 -neg v0.16b, v0.16b -neg v0.2d, v0.2d -neg v0.2s, v0.2s -neg v0.4h, v0.4h -neg v0.4s, v0.4s -neg v0.8b, v0.8b -neg v0.8h, v0.8h -not v0.16b, v0.16b -not v0.8b, v0.8b -orn v0.16b, v0.16b, v0.16b -orr v0.16b, v0.16b, v0.16b -orr v0.8h, #31 -pmul v0.16b, v0.16b, v0.16b -pmul v0.8b, v0.8b, v0.8b -pmull v0.8h, v0.8b, v0.8b -pmull2 v0.8h, v0.16b, v0.16b -raddhn v0.2s, v0.2d, v0.2d -raddhn v0.4h, v0.4s, v0.4s -raddhn v0.8b, v0.8h, v0.8h -raddhn2 v0.16b, v0.8h, v0.8h -raddhn2 v0.4s, v0.2d, v0.2d -raddhn2 v0.8h, v0.4s, v0.4s -rbit v0.16b, v0.16b -rbit v0.8b, v0.8b -rev16 v21.8b, v1.8b -rev16 v30.16b, v31.16b -rev32 v0.4h, v9.4h -rev32 v21.8b, v1.8b -rev32 v30.16b, v31.16b -rev32 v4.8h, v7.8h -rev64 v0.16b, v31.16b -rev64 v1.8b, v9.8b -rev64 v13.4h, v21.4h -rev64 v2.8h, v4.8h -rev64 v4.2s, v0.2s -rev64 v6.4s, v8.4s -rshrn v0.2s, v0.2d, #3 -rshrn v0.4h, v0.4s, #3 -rshrn v0.8b, v0.8h, #3 -rshrn2 v0.16b, v0.8h, #3 -rshrn2 v0.4s, v0.2d, #3 -rshrn2 v0.8h, v0.4s, #3 -rsubhn v0.2s, v0.2d, v0.2d -rsubhn v0.4h, v0.4s, v0.4s -rsubhn v0.8b, v0.8h, v0.8h -rsubhn2 v0.16b, v0.8h, v0.8h -rsubhn2 v0.4s, v0.2d, v0.2d -rsubhn2 v0.8h, v0.4s, v0.4s -saba v0.16b, v0.16b, v0.16b -sabal v0.2d, v0.2s, v0.2s -sabal v0.4s, v0.4h, v0.4h -sabal v0.8h, v0.8b, v0.8b -sabal2 v0.2d, v0.4s, v0.4s -sabal2 v0.4s, v0.8h, v0.8h -sabal2 v0.8h, v0.16b, v0.16b -sabd v0.4h, v0.4h, v0.4h -sabdl v0.2d, v0.2s, v0.2s -sabdl v0.4s, v0.4h, v0.4h -sabdl v0.8h, v0.8b, v0.8b -sabdl2 v0.2d, v0.4s, v0.4s -sabdl2 v0.4s, v0.8h, v0.8h -sabdl2 v0.8h, v0.16b, v0.16b -sadalp v0.1d, v0.2s -sadalp v0.2d, v0.4s -sadalp v0.2s, v0.4h -sadalp v0.4h, v0.8b -sadalp v0.4s, v0.8h -sadalp v0.8h, v0.16b -saddl v0.2d, v0.2s, v0.2s -saddl v0.4s, v0.4h, v0.4h -saddl v0.8h, v0.8b, v0.8b -saddl2 v0.2d, v0.4s, v0.4s -saddl2 v0.4s, v0.8h, v0.8h -saddl2 v0.8h, v0.16b, v0.16b -saddlp v0.1d, v0.2s -saddlp v0.2d, v0.4s -saddlp v0.2s, v0.4h -saddlp v0.4h, v0.8b -saddlp v0.4s, v0.8h -saddlp v0.8h, v0.16b -saddw v0.2d, v0.2d, v0.2s -saddw v0.4s, v0.4s, v0.4h -saddw v0.8h, v0.8h, v0.8b -saddw2 v0.2d, v0.2d, v0.4s -saddw2 v0.4s, v0.4s, v0.8h -saddw2 v0.8h, v0.8h, v0.16b -scvtf d21, d12 -scvtf d21, d12, #64 -scvtf s22, s13 -scvtf s22, s13, #32 -scvtf v0.2d, v0.2d -scvtf v0.2d, v0.2d, #3 -scvtf v0.2s, v0.2s -scvtf v0.2s, v0.2s, #3 -scvtf v0.4h, v0.4h -scvtf v0.4s, v0.4s -scvtf v0.4s, v0.4s, #3 -scvtf v0.8h, v0.8h -shadd v0.8b, v0.8b, v0.8b -shl d7, d10, #12 -shl v0.16b, v0.16b, #3 -shl v0.2d, v0.2d, #3 -shl v0.4h, v0.4h, #3 -shl v0.4s, v0.4s, #3 -shll v0.2d, v0.2s, #32 -shll v0.4s, v0.4h, #16 -shll v0.8h, v0.8b, #8 -shll v0.2d, v0.2s, #32 -shll v0.4s, v0.4h, #16 -shll v0.8h, v0.8b, #8 -shll2 v0.2d, v0.4s, #32 -shll2 v0.4s, v0.8h, #16 -shll2 v0.8h, v0.16b, #8 -shll2 v0.2d, v0.4s, #32 -shll2 v0.4s, v0.8h, #16 -shll2 v0.8h, v0.16b, #8 -shrn v0.2s, v0.2d, #3 -shrn v0.4h, v0.4s, #3 -shrn v0.8b, v0.8h, #3 -shrn2 v0.16b, v0.8h, #3 -shrn2 v0.4s, v0.2d, #3 -shrn2 v0.8h, v0.4s, #3 -shsub v0.2s, v0.2s, v0.2s -shsub v0.4h, v0.4h, v0.4h -sli d10, d14, #12 -sli v0.16b, v0.16b, #3 -sli v0.2d, v0.2d, #3 -sli v0.2s, v0.2s, #3 -sli v0.4h, v0.4h, #3 -sli v0.4s, v0.4s, #3 -sli v0.8b, v0.8b, #3 -sli v0.8h, v0.8h, #3 -smax v0.2s, v0.2s, v0.2s -smax v0.4h, v0.4h, v0.4h -smax v0.8b, v0.8b, v0.8b -smaxp v0.2s, v0.2s, v0.2s -smaxp v0.4h, v0.4h, v0.4h -smaxp v0.8b, v0.8b, v0.8b -smin v0.16b, v0.16b, v0.16b -smin v0.4s, v0.4s, v0.4s -smin v0.8h, v0.8h, v0.8h -sminp v0.16b, v0.16b, v0.16b -sminp v0.4s, v0.4s, v0.4s -sminp v0.8h, v0.8h, v0.8h -smlal v0.2d, v0.2s, v0.2s -smlal v0.4s, v0.4h, v0.4h -smlal v0.8h, v0.8b, v0.8b -smlal2 v0.2d, v0.4s, v0.4s -smlal2 v0.4s, v0.8h, v0.8h -smlal2 v0.8h, v0.16b, v0.16b -smlsl v0.2d, v0.2s, v0.2s -smlsl v0.4s, v0.4h, v0.4h -smlsl v0.8h, v0.8b, v0.8b -smlsl2 v0.2d, v0.4s, v0.4s -smlsl2 v0.4s, v0.8h, v0.8h -smlsl2 v0.8h, v0.16b, v0.16b -smull v0.2d, v0.2s, v0.2s -smull v0.4s, v0.4h, v0.4h -smull v0.8h, v0.8b, v0.8b -smull2 v0.2d, v0.4s, v0.4s -smull2 v0.4s, v0.8h, v0.8h -smull2 v0.8h, v0.16b, v0.16b -sqabs b19, b14 -sqabs d18, d12 -sqabs h21, h15 -sqabs s20, s12 -sqabs v0.16b, v0.16b -sqabs v0.2d, v0.2d -sqabs v0.2s, v0.2s -sqabs v0.4h, v0.4h -sqabs v0.4s, v0.4s -sqabs v0.8b, v0.8b -sqabs v0.8h, v0.8h -sqadd b20, b11, b15 -sqadd v0.16b, v0.16b, v0.16b -sqadd v0.2s, v0.2s, v0.2s -sqdmlal d19, s24, s12 -sqdmlal d8, s9, v0.s[1] -sqdmlal s0, h0, v0.h[3] -sqdmlal s17, h27, h12 -sqdmlal v0.2d, v0.2s, v0.2s -sqdmlal v0.4s, v0.4h, v0.4h -sqdmlal2 v0.2d, v0.4s, v0.4s -sqdmlal2 v0.4s, v0.8h, v0.8h -sqdmlsl d12, s23, s13 -sqdmlsl d8, s9, v0.s[1] -sqdmlsl s0, h0, v0.h[3] -sqdmlsl s14, h12, h25 -sqdmlsl v0.2d, v0.2s, v0.2s -sqdmlsl v0.4s, v0.4h, v0.4h -sqdmlsl2 v0.2d, v0.4s, v0.4s -sqdmlsl2 v0.4s, v0.8h, v0.8h -sqdmulh h10, h11, h12 -sqdmulh h7, h15, v0.h[3] -sqdmulh s15, s14, v0.s[1] -sqdmulh s20, s21, s2 -sqdmulh v0.2s, v0.2s, v0.2s -sqdmulh v0.4s, v0.4s, v0.4s -sqdmull d1, s1, v0.s[1] -sqdmull d15, s22, s12 -sqdmull s1, h1, v0.h[3] -sqdmull s12, h22, h12 -sqdmull v0.2d, v0.2s, v0.2s -sqdmull v0.4s, v0.4h, v0.4h -sqdmull2 v0.2d, v0.4s, v0.4s -sqdmull2 v0.4s, v0.8h, v0.8h -sqneg b19, b14 -sqneg d18, d12 -sqneg h21, h15 -sqneg s20, s12 -sqneg v0.16b, v0.16b -sqneg v0.2d, v0.2d -sqneg v0.2s, v0.2s -sqneg v0.4h, v0.4h -sqneg v0.4s, v0.4s -sqneg v0.8b, v0.8b -sqneg v0.8h, v0.8h -sqrdmulh h10, h11, h12 -sqrdmulh h7, h15, v0.h[3] -sqrdmulh s15, s14, v0.s[1] -sqrdmulh s20, s21, s2 -sqrdmulh v0.4h, v0.4h, v0.4h -sqrdmulh v0.8h, v0.8h, v0.8h -sqrshl d31, d31, d31 -sqrshl h3, h4, h15 -sqrshl v0.2s, v0.2s, v0.2s -sqrshl v0.4h, v0.4h, v0.4h -sqrshl v0.8b, v0.8b, v0.8b -sqrshrn b10, h13, #2 -sqrshrn h15, s10, #6 -sqrshrn s15, d12, #9 -sqrshrn v0.2s, v0.2d, #3 -sqrshrn v0.4h, v0.4s, #3 -sqrshrn v0.8b, v0.8h, #3 -sqrshrn2 v0.16b, v0.8h, #3 -sqrshrn2 v0.4s, v0.2d, #3 -sqrshrn2 v0.8h, v0.4s, #3 -sqrshrun b17, h10, #6 -sqrshrun h10, s13, #15 -sqrshrun s22, d16, #31 -sqrshrun v0.2s, v0.2d, #3 -sqrshrun v0.4h, v0.4s, #3 -sqrshrun v0.8b, v0.8h, #3 -sqrshrun2 v0.16b, v0.8h, #3 -sqrshrun2 v0.4s, v0.2d, #3 -sqrshrun2 v0.8h, v0.4s, #3 -sqshl b11, b19, #7 -sqshl d15, d16, #51 -sqshl d31, d31, d31 -sqshl h13, h18, #11 -sqshl h3, h4, h15 -sqshl s14, s17, #22 -sqshl v0.16b, v0.16b, #3 -sqshl v0.2d, v0.2d, #3 -sqshl v0.2s, v0.2s, #3 -sqshl v0.2s, v0.2s, v0.2s -sqshl v0.4h, v0.4h, #3 -sqshl v0.4h, v0.4h, v0.4h -sqshl v0.4s, v0.4s, #3 -sqshl v0.8b, v0.8b, #3 -sqshl v0.8b, v0.8b, v0.8b -sqshl v0.8h, v0.8h, #3 -sqshlu b15, b18, #6 -sqshlu d11, d13, #32 -sqshlu h19, h17, #6 -sqshlu s16, s14, #25 -sqshlu v0.16b, v0.16b, #3 -sqshlu v0.2d, v0.2d, #3 -sqshlu v0.2s, v0.2s, #3 -sqshlu v0.4h, v0.4h, #3 -sqshlu v0.4s, v0.4s, #3 -sqshlu v0.8b, v0.8b, #3 -sqshlu v0.8h, v0.8h, #3 -sqshrn b10, h15, #5 -sqshrn h17, s10, #4 -sqshrn s18, d10, #31 -sqshrn v0.2s, v0.2d, #3 -sqshrn v0.4h, v0.4s, #3 -sqshrn v0.8b, v0.8h, #3 -sqshrn2 v0.16b, v0.8h, #3 -sqshrn2 v0.4s, v0.2d, #3 -sqshrn2 v0.8h, v0.4s, #3 -sqshrun b15, h10, #7 -sqshrun h20, s14, #3 -sqshrun s10, d15, #15 -sqshrun v0.2s, v0.2d, #3 -sqshrun v0.4h, v0.4s, #3 -sqshrun v0.8b, v0.8h, #3 -sqshrun2 v0.16b, v0.8h, #3 -sqshrun2 v0.4s, v0.2d, #3 -sqshrun2 v0.8h, v0.4s, #3 -sqsub s20, s10, s7 -sqsub v0.2d, v0.2d, v0.2d -sqsub v0.4s, v0.4s, v0.4s -sqsub v0.8b, v0.8b, v0.8b -sqxtn b18, h18 -sqxtn h20, s17 -sqxtn s19, d14 -sqxtn v0.2s, v0.2d -sqxtn v0.4h, v0.4s -sqxtn v0.8b, v0.8h -sqxtn2 v0.16b, v0.8h -sqxtn2 v0.4s, v0.2d -sqxtn2 v0.8h, v0.4s -sqxtun b19, h14 -sqxtun h21, s15 -sqxtun s20, d12 -sqxtun v0.2s, v0.2d -sqxtun v0.4h, v0.4s -sqxtun v0.8b, v0.8h -sqxtun2 v0.16b, v0.8h -sqxtun2 v0.4s, v0.2d -sqxtun2 v0.8h, v0.4s -srhadd v0.2s, v0.2s, v0.2s -srhadd v0.4h, v0.4h, v0.4h -srhadd v0.8b, v0.8b, v0.8b -sri d10, d12, #14 -sri v0.16b, v0.16b, #3 -sri v0.2d, v0.2d, #3 -sri v0.2s, v0.2s, #3 -sri v0.4h, v0.4h, #3 -sri v0.4s, v0.4s, #3 -sri v0.8b, v0.8b, #3 -sri v0.8h, v0.8h, #3 -srshl d16, d16, d16 -srshl v0.2s, v0.2s, v0.2s -srshl v0.4h, v0.4h, v0.4h -srshl v0.8b, v0.8b, v0.8b -srshr d19, d18, #7 -srshr v0.16b, v0.16b, #3 -srshr v0.2d, v0.2d, #3 -srshr v0.2s, v0.2s, #3 -srshr v0.4h, v0.4h, #3 -srshr v0.4s, v0.4s, #3 -srshr v0.8b, v0.8b, #3 -srshr v0.8h, v0.8h, #3 -srsra d15, d11, #19 -srsra v0.16b, v0.16b, #3 -srsra v0.2d, v0.2d, #3 -srsra v0.2s, v0.2s, #3 -srsra v0.4h, v0.4h, #3 -srsra v0.4s, v0.4s, #3 -srsra v0.8b, v0.8b, #3 -srsra v0.8h, v0.8h, #3 -sshl d31, d31, d31 -sshl v0.2d, v0.2d, v0.2d -sshl v0.2s, v0.2s, v0.2s -sshl v0.4h, v0.4h, v0.4h -sshl v0.8b, v0.8b, v0.8b -sshll v0.2d, v0.2s, #3 -sshll2 v0.4s, v0.8h, #3 -sshr d15, d16, #12 -sshr v0.16b, v0.16b, #3 -sshr v0.2d, v0.2d, #3 -sshr v0.2s, v0.2s, #3 -sshr v0.4h, v0.4h, #3 -sshr v0.4s, v0.4s, #3 -sshr v0.8b, v0.8b, #3 -sshr v0.8h, v0.8h, #3 -ssra d18, d12, #21 -ssra v0.16b, v0.16b, #3 -ssra v0.2d, v0.2d, #3 -ssra v0.2s, v0.2s, #3 -ssra v0.4h, v0.4h, #3 -ssra v0.4s, v0.4s, #3 -ssra v0.8b, v0.8b, #3 -ssra v0.8h, v0.8h, #3 -ssubl v0.2d, v0.2s, v0.2s -ssubl v0.4s, v0.4h, v0.4h -ssubl v0.8h, v0.8b, v0.8b -ssubl2 v0.2d, v0.4s, v0.4s -ssubl2 v0.4s, v0.8h, v0.8h -ssubl2 v0.8h, v0.16b, v0.16b -ssubw v0.2d, v0.2d, v0.2s -ssubw v0.4s, v0.4s, v0.4h -ssubw v0.8h, v0.8h, v0.8b -ssubw2 v0.2d, v0.2d, v0.4s -ssubw2 v0.4s, v0.4s, v0.8h -ssubw2 v0.8h, v0.8h, v0.16b -st1 { v0.16b }, [x0] -st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 -st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] -st1 { v0.4s, v1.4s }, [sp], #32 -st1 { v0.4s, v1.4s, v2.4s }, [sp] -st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 -st1 { v0.8h }, [x15], x2 -st1 { v0.8h, v1.8h }, [x15] -st1 { v0.d }[1], [x0] -st1 { v0.d }[1], [x0], #8 -st2 { v0.16b, v1.16b }, [x0], x1 -st2 { v0.8b, v1.8b }, [x0] -st2 { v0.s, v1.s }[3], [sp] -st2 { v0.s, v1.s }[3], [sp], #8 -st3 { v0.4h, v1.4h, v2.4h }, [x15] -st3 { v0.8h, v1.8h, v2.8h }, [x15], x2 -st3 { v0.h, v1.h, v2.h }[7], [x15] -st3 { v0.h, v1.h, v2.h }[7], [x15], #6 -st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 -st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] -st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 -sub d15, d5, d16 -sub v0.2d, v0.2d, v0.2d -suqadd b19, b14 -suqadd d18, d22 -suqadd h20, h15 -suqadd s21, s12 -suqadd v0.16b, v0.16b -suqadd v0.2d, v0.2d -suqadd v0.2s, v0.2s -suqadd v0.4h, v0.4h -suqadd v0.4s, v0.4s -suqadd v0.8b, v0.8b -suqadd v0.8h, v0.8h -tbl v0.16b, { v0.16b }, v0.16b -tbl v0.16b, { v0.16b, v1.16b }, v0.16b -tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -tbl v0.8b, { v0.16b }, v0.8b -tbl v0.8b, { v0.16b, v1.16b }, v0.8b -tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -tbx v0.16b, { v0.16b }, v0.16b -tbx v0.16b, { v0.16b, v1.16b }, v0.16b -tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -tbx v0.8b, { v0.16b }, v0.8b -tbx v0.8b, { v0.16b, v1.16b }, v0.8b -tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -trn1 v0.16b, v0.16b, v0.16b -trn1 v0.2d, v0.2d, v0.2d -trn1 v0.2s, v0.2s, v0.2s -trn1 v0.4h, v0.4h, v0.4h -trn1 v0.4s, v0.4s, v0.4s -trn1 v0.8b, v0.8b, v0.8b -trn1 v0.8h, v0.8h, v0.8h -trn2 v0.16b, v0.16b, v0.16b -trn2 v0.2d, v0.2d, v0.2d -trn2 v0.2s, v0.2s, v0.2s -trn2 v0.4h, v0.4h, v0.4h -trn2 v0.4s, v0.4s, v0.4s -trn2 v0.8b, v0.8b, v0.8b -trn2 v0.8h, v0.8h, v0.8h -uaba v0.8b, v0.8b, v0.8b -uabal v0.2d, v0.2s, v0.2s -uabal v0.4s, v0.4h, v0.4h -uabal v0.8h, v0.8b, v0.8b -uabal2 v0.2d, v0.4s, v0.4s -uabal2 v0.4s, v0.8h, v0.8h -uabal2 v0.8h, v0.16b, v0.16b -uabd v0.4h, v0.4h, v0.4h -uabdl v0.2d, v0.2s, v0.2s -uabdl v0.4s, v0.4h, v0.4h -uabdl v0.8h, v0.8b, v0.8b -uabdl2 v0.2d, v0.4s, v0.4s -uabdl2 v0.4s, v0.8h, v0.8h -uabdl2 v0.8h, v0.16b, v0.16b -uadalp v0.1d, v0.2s -uadalp v0.2d, v0.4s -uadalp v0.2s, v0.4h -uadalp v0.4h, v0.8b -uadalp v0.4s, v0.8h -uadalp v0.8h, v0.16b -uaddl v0.2d, v0.2s, v0.2s -uaddl v0.4s, v0.4h, v0.4h -uaddl v0.8h, v0.8b, v0.8b -uaddl2 v0.2d, v0.4s, v0.4s -uaddl2 v0.4s, v0.8h, v0.8h -uaddl2 v0.8h, v0.16b, v0.16b -uaddlp v0.1d, v0.2s -uaddlp v0.2d, v0.4s -uaddlp v0.2s, v0.4h -uaddlp v0.4h, v0.8b -uaddlp v0.4s, v0.8h -uaddlp v0.8h, v0.16b -uaddw v0.2d, v0.2d, v0.2s -uaddw v0.4s, v0.4s, v0.4h -uaddw v0.8h, v0.8h, v0.8b -uaddw2 v0.2d, v0.2d, v0.4s -uaddw2 v0.4s, v0.4s, v0.8h -uaddw2 v0.8h, v0.8h, v0.16b -ucvtf d21, d14 -ucvtf d21, d14, #64 -ucvtf s22, s13 -ucvtf s22, s13, #32 -ucvtf v0.2d, v0.2d -ucvtf v0.2d, v0.2d, #3 -ucvtf v0.2s, v0.2s -ucvtf v0.2s, v0.2s, #3 -ucvtf v0.4h, v0.4h -ucvtf v0.4s, v0.4s -ucvtf v0.4s, v0.4s, #3 -ucvtf v0.8h, v0.8h -uhadd v0.16b, v0.16b, v0.16b -uhadd v0.8h, v0.8h, v0.8h -uhsub v0.4s, v0.4s, v0.4s -umax v0.16b, v0.16b, v0.16b -umax v0.4s, v0.4s, v0.4s -umax v0.8h, v0.8h, v0.8h -umaxp v0.16b, v0.16b, v0.16b -umaxp v0.4s, v0.4s, v0.4s -umaxp v0.8h, v0.8h, v0.8h -umin v0.2s, v0.2s, v0.2s -umin v0.4h, v0.4h, v0.4h -umin v0.8b, v0.8b, v0.8b -uminp v0.2s, v0.2s, v0.2s -uminp v0.4h, v0.4h, v0.4h -uminp v0.8b, v0.8b, v0.8b -umlal v0.2d, v0.2s, v0.2s -umlal v0.4s, v0.4h, v0.4h -umlal v0.8h, v0.8b, v0.8b -umlal2 v0.2d, v0.4s, v0.4s -umlal2 v0.4s, v0.8h, v0.8h -umlal2 v0.8h, v0.16b, v0.16b -umlsl v0.2d, v0.2s, v0.2s -umlsl v0.4s, v0.4h, v0.4h -umlsl v0.8h, v0.8b, v0.8b -umlsl2 v0.2d, v0.4s, v0.4s -umlsl2 v0.4s, v0.8h, v0.8h -umlsl2 v0.8h, v0.16b, v0.16b -umull v0.2d, v0.2s, v0.2s -umull v0.4s, v0.4h, v0.4h -umull v0.8h, v0.8b, v0.8b -umull2 v0.2d, v0.4s, v0.4s -umull2 v0.4s, v0.8h, v0.8h -umull2 v0.8h, v0.16b, v0.16b -uqadd h0, h1, h5 -uqadd v0.8h, v0.8h, v0.8h -uqrshl b11, b20, b30 -uqrshl s23, s20, s16 -uqrshl v0.16b, v0.16b, v0.16b -uqrshl v0.4s, v0.4s, v0.4s -uqrshl v0.4s, v0.4s, v0.4s -uqrshl v0.8h, v0.8h, v0.8h -uqrshrn b10, h12, #5 -uqrshrn h12, s10, #14 -uqrshrn s10, d10, #25 -uqrshrn v0.2s, v0.2d, #3 -uqrshrn v0.4h, v0.4s, #3 -uqrshrn v0.8b, v0.8h, #3 -uqrshrn2 v0.16b, v0.8h, #3 -uqrshrn2 v0.4s, v0.2d, #3 -uqrshrn2 v0.8h, v0.4s, #3 -uqshl b11, b20, b30 -uqshl b18, b15, #6 -uqshl d15, d12, #19 -uqshl h11, h18, #7 -uqshl s14, s19, #18 -uqshl s23, s20, s16 -uqshl v0.16b, v0.16b, #3 -uqshl v0.16b, v0.16b, v0.16b -uqshl v0.2d, v0.2d, #3 -uqshl v0.2d, v0.2d, v0.2d -uqshl v0.2s, v0.2s, #3 -uqshl v0.4h, v0.4h, #3 -uqshl v0.4s, v0.4s, #3 -uqshl v0.4s, v0.4s, v0.4s -uqshl v0.8b, v0.8b, #3 -uqshl v0.8h, v0.8h, #3 -uqshl v0.8h, v0.8h, v0.8h -uqshrn b12, h10, #7 -uqshrn h10, s14, #5 -uqshrn s10, d12, #13 -uqshrn v0.2s, v0.2d, #3 -uqshrn v0.4h, v0.4s, #3 -uqshrn v0.8b, v0.8h, #3 -uqshrn2 v0.16b, v0.8h, #3 -uqshrn2 v0.4s, v0.2d, #3 -uqshrn2 v0.8h, v0.4s, #3 -uqsub d16, d16, d16 -uqsub v0.4h, v0.4h, v0.4h -uqxtn b18, h18 -uqxtn h20, s17 -uqxtn s19, d14 -uqxtn v0.2s, v0.2d -uqxtn v0.4h, v0.4s -uqxtn v0.8b, v0.8h -uqxtn2 v0.16b, v0.8h -uqxtn2 v0.4s, v0.2d -uqxtn2 v0.8h, v0.4s -urecpe v0.2s, v0.2s -urecpe v0.4s, v0.4s -urhadd v0.16b, v0.16b, v0.16b -urhadd v0.4s, v0.4s, v0.4s -urhadd v0.8h, v0.8h, v0.8h -urshl d8, d7, d4 -urshl v0.16b, v0.16b, v0.16b -urshl v0.2d, v0.2d, v0.2d -urshl v0.4s, v0.4s, v0.4s -urshl v0.8h, v0.8h, v0.8h -urshr d20, d23, #31 -urshr v0.16b, v0.16b, #3 -urshr v0.2d, v0.2d, #3 -urshr v0.2s, v0.2s, #3 -urshr v0.4h, v0.4h, #3 -urshr v0.4s, v0.4s, #3 -urshr v0.8b, v0.8b, #3 -urshr v0.8h, v0.8h, #3 -ursqrte v0.2s, v0.2s -ursqrte v0.4s, v0.4s -ursra d18, d10, #13 -ursra v0.16b, v0.16b, #3 -ursra v0.2d, v0.2d, #3 -ursra v0.2s, v0.2s, #3 -ursra v0.4h, v0.4h, #3 -ursra v0.4s, v0.4s, #3 -ursra v0.8b, v0.8b, #3 -ursra v0.8h, v0.8h, #3 -ushl d0, d0, d0 -ushl v0.16b, v0.16b, v0.16b -ushl v0.4s, v0.4s, v0.4s -ushl v0.8h, v0.8h, v0.8h -ushll v0.4s, v0.4h, #3 -ushll2 v0.8h, v0.16b, #3 -ushr d10, d17, #18 -ushr v0.16b, v0.16b, #3 -ushr v0.2d, v0.2d, #3 -ushr v0.2s, v0.2s, #3 -ushr v0.4h, v0.4h, #3 -ushr v0.4s, v0.4s, #3 -ushr v0.8b, v0.8b, #3 -ushr v0.8h, v0.8h, #3 -usqadd b19, b14 -usqadd d18, d22 -usqadd h20, h15 -usqadd s21, s12 -usqadd v0.16b, v0.16b -usqadd v0.2d, v0.2d -usqadd v0.2s, v0.2s -usqadd v0.4h, v0.4h -usqadd v0.4s, v0.4s -usqadd v0.8b, v0.8b -usqadd v0.8h, v0.8h -usra d20, d13, #61 -usra v0.16b, v0.16b, #3 -usra v0.2d, v0.2d, #3 -usra v0.2s, v0.2s, #3 -usra v0.4h, v0.4h, #3 -usra v0.4s, v0.4s, #3 -usra v0.8b, v0.8b, #3 -usra v0.8h, v0.8h, #3 -usubl v0.2d, v0.2s, v0.2s -usubl v0.4s, v0.4h, v0.4h -usubl v0.8h, v0.8b, v0.8b -usubl2 v0.2d, v0.4s, v0.4s -usubl2 v0.4s, v0.8h, v0.8h -usubl2 v0.8h, v0.16b, v0.16b -usubw v0.2d, v0.2d, v0.2s -usubw v0.4s, v0.4s, v0.4h -usubw v0.8h, v0.8h, v0.8b -usubw2 v0.2d, v0.2d, v0.4s -usubw2 v0.4s, v0.4s, v0.8h -usubw2 v0.8h, v0.8h, v0.16b -uzp1 v0.16b, v0.16b, v0.16b -uzp1 v0.2d, v0.2d, v0.2d -uzp1 v0.2s, v0.2s, v0.2s -uzp1 v0.4h, v0.4h, v0.4h -uzp1 v0.4s, v0.4s, v0.4s -uzp1 v0.8b, v0.8b, v0.8b -uzp1 v0.8h, v0.8h, v0.8h -uzp2 v0.16b, v0.16b, v0.16b -uzp2 v0.2d, v0.2d, v0.2d -uzp2 v0.2s, v0.2s, v0.2s -uzp2 v0.4h, v0.4h, v0.4h -uzp2 v0.4s, v0.4s, v0.4s -uzp2 v0.8b, v0.8b, v0.8b -uzp2 v0.8h, v0.8h, v0.8h -xtn v0.2s, v0.2d -xtn v0.4h, v0.4s -xtn v0.8b, v0.8h -xtn2 v0.16b, v0.8h -xtn2 v0.4s, v0.2d -xtn2 v0.8h, v0.4s -zip1 v0.16b, v0.16b, v0.16b -zip1 v0.2d, v0.2d, v0.2d -zip1 v0.2s, v0.2s, v0.2s -zip1 v0.4h, v0.4h, v0.4h -zip1 v0.4s, v0.4s, v0.4s -zip1 v0.8b, v0.8b, v0.8b -zip1 v0.8h, v0.8h, v0.8h -zip2 v0.16b, v0.16b, v0.16b -zip2 v0.2d, v0.2d, v0.2d -zip2 v0.2s, v0.2s, v0.2s -zip2 v0.4h, v0.4h, v0.4h -zip2 v0.4s, v0.4s, v0.4s -zip2 v0.8b, v0.8b, v0.8b -zip2 v0.8h, v0.8h, v0.8h + .text + add v31.8b, v31.8b, v31.8b + sub v0.2d, v0.2d, v0.2d + fadd v0.4s, v0.4s, v0.4s + fsub v31.2s, v31.2s, v31.2s + mul v0.8b, v1.8b, v2.8b + fmul v0.2s, v1.2s, v2.2s + fdiv v31.2s, v31.2s, v31.2s + pmul v0.8b, v15.8b, v16.8b + pmul v31.16b, v7.16b, v8.16b + and v2.8b, v2.8b, v2.8b + orr v31.16b, v31.16b, v30.16b + eor v0.16b, v1.16b, v2.16b + orn v9.16b, v10.16b, v11.16b + bic v31.8b, v30.8b, v29.8b + bsl v0.8b, v1.8b, v2.8b + bit v31.16b, v31.16b, v31.16b + bif v0.16b, v1.16b, v2.16b + mla v0.8b, v1.8b, v2.8b + mls v31.4h, v31.4h, v31.4h + fmla v0.2s, v1.2s, v2.2s + fmls v31.2s, v31.2s, v31.2s + movi v31.4s, #255, lsl #24 + mvni v0.2s, #0 + bic v15.4h, #15, lsl #8 + orr v16.8h, #31 + movi v8.2s, #8, msl #8 + mvni v16.4s, #16, msl #16 + movi v16.8b, #255 + movi v31.16b, #31 + movi d15, #0xff00ff00ff00ff + movi v31.2d, #0xff0000ff0000ffff + fmov v0.2s, #13.00000000 + fmov v15.4s, #1.00000000 + fmov v31.2d, #-1.25000000 + mov v1.16b, v15.16b + mov v25.8b, v4.8b + uaba v0.8b, v1.8b, v2.8b + saba v31.16b, v30.16b, v29.16b + uabd v15.4h, v16.4h, v17.4h + sabd v5.4h, v4.4h, v6.4h + fabd v1.4s, v31.4s, v16.4s + add d17, d31, d29 + sub d15, d5, d16 + frsqrts v31.2d, v15.2d, v8.2d + frecps v5.4s, v7.4s, v16.4s + facge v0.4s, v31.4s, v16.4s + facgt v31.2d, v29.2d, v28.2d + cmeq v5.16b, v15.16b, v31.16b + cmhs v1.8b, v16.8b, v30.8b + cmge v20.4h, v11.4h, v23.4h + cmhi v13.8h, v3.8h, v27.8h + cmgt v9.4s, v4.4s, v28.4s + cmtst v21.2s, v19.2s, v18.2s + fcmeq v0.2s, v15.2s, v16.2s + fcmge v31.4s, v7.4s, v29.4s + fcmgt v17.4s, v8.4s, v25.4s + cmeq v31.16b, v15.16b, #0 + cmge v3.8b, v15.8b, #0 + cmgt v22.2s, v9.2s, #0 + cmle v5.2d, v14.2d, #0 + cmlt v13.8h, v11.8h, #0 + fcmeq v15.2s, v21.2s, #0.0 + fcmge v14.2d, v13.2d, #0.0 + fcmgt v9.4s, v23.4s, #0.0 + fcmle v11.2d, v6.2d, #0.0 + fcmlt v12.4s, v25.4s, #0.0 + shadd v0.8b, v31.8b, v29.8b + uhadd v15.16b, v16.16b, v17.16b + shsub v0.4h, v1.4h, v2.4h + uhadd v5.8h, v7.8h, v8.8h + shsub v9.2s, v11.2s, v21.2s + uhsub v22.4s, v30.4s, v19.4s + srhadd v3.8b, v5.8b, v7.8b + urhadd v7.16b, v17.16b, v27.16b + srhadd v10.4h, v11.4h, v13.4h + urhadd v1.8h, v2.8h, v3.8h + srhadd v4.2s, v5.2s, v6.2s + urhadd v7.4s, v7.4s, v7.4s + sqsub v0.8b, v1.8b, v2.8b + sqadd v0.16b, v1.16b, v2.16b + uqsub v0.4h, v1.4h, v2.4h + uqadd v0.8h, v1.8h, v2.8h + sqadd v0.2s, v1.2s, v2.2s + sqsub v0.4s, v1.4s, v2.4s + sqsub v0.2d, v1.2d, v2.2d + sqadd b20, b11, b15 + uqadd h0, h1, h5 + sqsub s20, s10, s7 + uqsub d16, d16, d16 + sshl v10.8b, v15.8b, v22.8b + ushl v10.16b, v5.16b, v2.16b + sshl v10.4h, v15.4h, v22.4h + ushl v10.8h, v5.8h, v2.8h + sshl v10.2s, v15.2s, v22.2s + ushl v10.4s, v5.4s, v2.4s + sshl v0.2d, v1.2d, v2.2d + sqshl v1.8b, v15.8b, v22.8b + uqshl v2.16b, v14.16b, v23.16b + sqshl v3.4h, v13.4h, v24.4h + uqshl v4.8h, v12.8h, v25.8h + sqshl v5.2s, v11.2s, v26.2s + uqshl v6.4s, v10.4s, v27.4s + uqshl v0.2d, v1.2d, v2.2d + srshl v10.8b, v5.8b, v22.8b + urshl v10.16b, v5.16b, v2.16b + srshl v1.4h, v5.4h, v31.4h + urshl v1.8h, v5.8h, v2.8h + srshl v10.2s, v15.2s, v2.2s + urshl v1.4s, v5.4s, v2.4s + urshl v0.2d, v1.2d, v2.2d + sqrshl v1.8b, v15.8b, v22.8b + uqrshl v2.16b, v14.16b, v23.16b + sqrshl v3.4h, v13.4h, v24.4h + uqrshl v4.8h, v12.8h, v25.8h + sqrshl v5.2s, v11.2s, v26.2s + uqrshl v6.4s, v10.4s, v27.4s + uqrshl v6.4s, v10.4s, v27.4s + sshl d31, d31, d31 + ushl d0, d0, d0 + sqshl d31, d31, d31 + uqshl s23, s20, s16 + sqshl h3, h4, h15 + uqshl b11, b20, b30 + srshl d16, d16, d16 + urshl d8, d7, d4 + sqrshl d31, d31, d31 + uqrshl s23, s20, s16 + sqrshl h3, h4, h15 + uqrshl b11, b20, b30 + smax v1.8b, v15.8b, v22.8b + umax v2.16b, v14.16b, v23.16b + smax v3.4h, v13.4h, v24.4h + umax v4.8h, v12.8h, v25.8h + smax v5.2s, v11.2s, v26.2s + umax v6.4s, v10.4s, v27.4s + umin v1.8b, v15.8b, v22.8b + smin v2.16b, v14.16b, v23.16b + umin v3.4h, v13.4h, v24.4h + smin v4.8h, v12.8h, v25.8h + umin v5.2s, v11.2s, v26.2s + smin v6.4s, v10.4s, v27.4s + fmax v29.2s, v28.2s, v25.2s + fmax v9.4s, v8.4s, v5.4s + fmax v11.2d, v10.2d, v7.2d + fmin v29.2s, v28.2s, v25.2s + fmin v9.4s, v8.4s, v5.4s + fmin v11.2d, v10.2d, v7.2d + fmaxnm v9.2s, v8.2s, v5.2s + fmaxnm v9.4s, v8.4s, v5.4s + fmaxnm v11.2d, v10.2d, v7.2d + fminnm v2.2s, v8.2s, v25.2s + fminnm v9.4s, v8.4s, v5.4s + fminnm v11.2d, v10.2d, v7.2d + smaxp v1.8b, v15.8b, v22.8b + umaxp v2.16b, v14.16b, v23.16b + smaxp v3.4h, v13.4h, v24.4h + umaxp v4.8h, v12.8h, v25.8h + smaxp v5.2s, v11.2s, v26.2s + umaxp v6.4s, v10.4s, v27.4s + uminp v1.8b, v15.8b, v22.8b + sminp v2.16b, v14.16b, v23.16b + uminp v3.4h, v13.4h, v24.4h + sminp v4.8h, v12.8h, v25.8h + uminp v5.2s, v11.2s, v26.2s + sminp v6.4s, v10.4s, v27.4s + fmaxp v29.2s, v28.2s, v25.2s + fmaxp v9.4s, v8.4s, v5.4s + fmaxp v11.2d, v10.2d, v7.2d + fminp v29.2s, v28.2s, v25.2s + fminp v9.4s, v8.4s, v5.4s + fminp v11.2d, v10.2d, v7.2d + fmaxnmp v9.2s, v8.2s, v5.2s + fmaxnmp v9.4s, v8.4s, v5.4s + fmaxnmp v11.2d, v10.2d, v7.2d + fminnmp v2.2s, v8.2s, v25.2s + fminnmp v9.4s, v8.4s, v5.4s + fminnmp v11.2d, v10.2d, v7.2d + addp v31.8b, v31.8b, v31.8b + addp v0.2d, v0.2d, v0.2d + faddp v0.4s, v0.4s, v0.4s + faddp v31.2s, v31.2s, v31.2s + sqdmulh v31.2s, v31.2s, v31.2s + sqdmulh v5.4s, v7.4s, v9.4s + sqrdmulh v31.4h, v3.4h, v13.4h + sqrdmulh v0.8h, v10.8h, v20.8h + fmulx v1.2s, v22.2s, v2.2s + fmulx v21.4s, v15.4s, v3.4s + fmulx v11.2d, v5.2d, v23.2d + shll2 v2.8h, v4.16b, #8 + shll2 v6.4s, v8.8h, #16 + shll2 v6.2d, v8.4s, #32 + shll v2.8h, v4.8b, #8 + shll v6.4s, v8.4h, #16 + shll v6.2d, v8.2s, #32 + shl v0.4h, v1.4h, #3 + shl v0.16b, v1.16b, #3 + shl v0.4s, v1.4s, #3 + shl v0.2d, v1.2d, #3 + sshll v0.2d, v1.2s, #3 + sshll2 v0.4s, v1.8h, #3 + ushll v0.4s, v1.4h, #3 + ushll2 v0.8h, v1.16b, #3 + sshr v0.8b, v1.8b, #3 + sshr v0.4h, v1.4h, #3 + sshr v0.2s, v1.2s, #3 + sshr v0.16b, v1.16b, #3 + sshr v0.8h, v1.8h, #3 + sshr v0.4s, v1.4s, #3 + sshr v0.2d, v1.2d, #3 + ushr v0.8b, v1.8b, #3 + ushr v0.4h, v1.4h, #3 + ushr v0.2s, v1.2s, #3 + ushr v0.16b, v1.16b, #3 + ushr v0.8h, v1.8h, #3 + ushr v0.4s, v1.4s, #3 + ushr v0.2d, v1.2d, #3 + ssra v0.8b, v1.8b, #3 + ssra v0.4h, v1.4h, #3 + ssra v0.2s, v1.2s, #3 + ssra v0.16b, v1.16b, #3 + ssra v0.8h, v1.8h, #3 + ssra v0.4s, v1.4s, #3 + ssra v0.2d, v1.2d, #3 + usra v0.8b, v1.8b, #3 + usra v0.4h, v1.4h, #3 + usra v0.2s, v1.2s, #3 + usra v0.16b, v1.16b, #3 + usra v0.8h, v1.8h, #3 + usra v0.4s, v1.4s, #3 + usra v0.2d, v1.2d, #3 + srshr v0.8b, v1.8b, #3 + srshr v0.4h, v1.4h, #3 + srshr v0.2s, v1.2s, #3 + srshr v0.16b, v1.16b, #3 + srshr v0.8h, v1.8h, #3 + srshr v0.4s, v1.4s, #3 + srshr v0.2d, v1.2d, #3 + urshr v0.8b, v1.8b, #3 + urshr v0.4h, v1.4h, #3 + urshr v0.2s, v1.2s, #3 + urshr v0.16b, v1.16b, #3 + urshr v0.8h, v1.8h, #3 + urshr v0.4s, v1.4s, #3 + urshr v0.2d, v1.2d, #3 + srsra v0.8b, v1.8b, #3 + srsra v0.4h, v1.4h, #3 + srsra v0.2s, v1.2s, #3 + srsra v0.16b, v1.16b, #3 + srsra v0.8h, v1.8h, #3 + srsra v0.4s, v1.4s, #3 + srsra v0.2d, v1.2d, #3 + ursra v0.8b, v1.8b, #3 + ursra v0.4h, v1.4h, #3 + ursra v0.2s, v1.2s, #3 + ursra v0.16b, v1.16b, #3 + ursra v0.8h, v1.8h, #3 + ursra v0.4s, v1.4s, #3 + ursra v0.2d, v1.2d, #3 + sri v0.8b, v1.8b, #3 + sri v0.4h, v1.4h, #3 + sri v0.2s, v1.2s, #3 + sri v0.16b, v1.16b, #3 + sri v0.8h, v1.8h, #3 + sri v0.4s, v1.4s, #3 + sri v0.2d, v1.2d, #3 + sli v0.8b, v1.8b, #3 + sli v0.4h, v1.4h, #3 + sli v0.2s, v1.2s, #3 + sli v0.16b, v1.16b, #3 + sli v0.8h, v1.8h, #3 + sli v0.4s, v1.4s, #3 + sli v0.2d, v1.2d, #3 + sqshlu v0.8b, v1.8b, #3 + sqshlu v0.4h, v1.4h, #3 + sqshlu v0.2s, v1.2s, #3 + sqshlu v0.16b, v1.16b, #3 + sqshlu v0.8h, v1.8h, #3 + sqshlu v0.4s, v1.4s, #3 + sqshlu v0.2d, v1.2d, #3 + sqshl v0.8b, v1.8b, #3 + sqshl v0.4h, v1.4h, #3 + sqshl v0.2s, v1.2s, #3 + sqshl v0.16b, v1.16b, #3 + sqshl v0.8h, v1.8h, #3 + sqshl v0.4s, v1.4s, #3 + sqshl v0.2d, v1.2d, #3 + uqshl v0.8b, v1.8b, #3 + uqshl v0.4h, v1.4h, #3 + uqshl v0.2s, v1.2s, #3 + uqshl v0.16b, v1.16b, #3 + uqshl v0.8h, v1.8h, #3 + uqshl v0.4s, v1.4s, #3 + uqshl v0.2d, v1.2d, #3 + shrn v0.8b, v1.8h, #3 + shrn v0.4h, v1.4s, #3 + shrn v0.2s, v1.2d, #3 + shrn2 v0.16b, v1.8h, #3 + shrn2 v0.8h, v1.4s, #3 + shrn2 v0.4s, v1.2d, #3 + sqshrun v0.8b, v1.8h, #3 + sqshrun v0.4h, v1.4s, #3 + sqshrun v0.2s, v1.2d, #3 + sqshrun2 v0.16b, v1.8h, #3 + sqshrun2 v0.8h, v1.4s, #3 + sqshrun2 v0.4s, v1.2d, #3 + rshrn v0.8b, v1.8h, #3 + rshrn v0.4h, v1.4s, #3 + rshrn v0.2s, v1.2d, #3 + rshrn2 v0.16b, v1.8h, #3 + rshrn2 v0.8h, v1.4s, #3 + rshrn2 v0.4s, v1.2d, #3 + sqrshrun v0.8b, v1.8h, #3 + sqrshrun v0.4h, v1.4s, #3 + sqrshrun v0.2s, v1.2d, #3 + sqrshrun2 v0.16b, v1.8h, #3 + sqrshrun2 v0.8h, v1.4s, #3 + sqrshrun2 v0.4s, v1.2d, #3 + sqshrn v0.8b, v1.8h, #3 + sqshrn v0.4h, v1.4s, #3 + sqshrn v0.2s, v1.2d, #3 + sqshrn2 v0.16b, v1.8h, #3 + sqshrn2 v0.8h, v1.4s, #3 + sqshrn2 v0.4s, v1.2d, #3 + uqshrn v0.8b, v1.8h, #3 + uqshrn v0.4h, v1.4s, #3 + uqshrn v0.2s, v1.2d, #3 + uqshrn2 v0.16b, v1.8h, #3 + uqshrn2 v0.8h, v1.4s, #3 + uqshrn2 v0.4s, v1.2d, #3 + sqrshrn v0.8b, v1.8h, #3 + sqrshrn v0.4h, v1.4s, #3 + sqrshrn v0.2s, v1.2d, #3 + sqrshrn2 v0.16b, v1.8h, #3 + sqrshrn2 v0.8h, v1.4s, #3 + sqrshrn2 v0.4s, v1.2d, #3 + uqrshrn v0.8b, v1.8h, #3 + uqrshrn v0.4h, v1.4s, #3 + uqrshrn v0.2s, v1.2d, #3 + uqrshrn2 v0.16b, v1.8h, #3 + uqrshrn2 v0.8h, v1.4s, #3 + uqrshrn2 v0.4s, v1.2d, #3 + scvtf v0.2s, v1.2s, #3 + scvtf v0.4s, v1.4s, #3 + scvtf v0.2d, v1.2d, #3 + ucvtf v0.2s, v1.2s, #3 + ucvtf v0.4s, v1.4s, #3 + ucvtf v0.2d, v1.2d, #3 + fcvtzs v0.2s, v1.2s, #3 + fcvtzs v0.4s, v1.4s, #3 + fcvtzs v0.2d, v1.2d, #3 + fcvtzu v0.2s, v1.2s, #3 + fcvtzu v0.4s, v1.4s, #3 + fcvtzu v0.2d, v1.2d, #3 + saddl v0.8h, v1.8b, v2.8b + saddl v0.4s, v1.4h, v2.4h + saddl v0.2d, v1.2s, v2.2s + saddl2 v0.4s, v1.8h, v2.8h + saddl2 v0.8h, v1.16b, v2.16b + saddl2 v0.2d, v1.4s, v2.4s + uaddl v0.8h, v1.8b, v2.8b + uaddl v0.4s, v1.4h, v2.4h + uaddl v0.2d, v1.2s, v2.2s + uaddl2 v0.8h, v1.16b, v2.16b + uaddl2 v0.4s, v1.8h, v2.8h + uaddl2 v0.2d, v1.4s, v2.4s + ssubl v0.8h, v1.8b, v2.8b + ssubl v0.4s, v1.4h, v2.4h + ssubl v0.2d, v1.2s, v2.2s + ssubl2 v0.8h, v1.16b, v2.16b + ssubl2 v0.4s, v1.8h, v2.8h + ssubl2 v0.2d, v1.4s, v2.4s + usubl v0.8h, v1.8b, v2.8b + usubl v0.4s, v1.4h, v2.4h + usubl v0.2d, v1.2s, v2.2s + usubl2 v0.8h, v1.16b, v2.16b + usubl2 v0.4s, v1.8h, v2.8h + usubl2 v0.2d, v1.4s, v2.4s + sabal v0.8h, v1.8b, v2.8b + sabal v0.4s, v1.4h, v2.4h + sabal v0.2d, v1.2s, v2.2s + sabal2 v0.8h, v1.16b, v2.16b + sabal2 v0.4s, v1.8h, v2.8h + sabal2 v0.2d, v1.4s, v2.4s + uabal v0.8h, v1.8b, v2.8b + uabal v0.4s, v1.4h, v2.4h + uabal v0.2d, v1.2s, v2.2s + uabal2 v0.8h, v1.16b, v2.16b + uabal2 v0.4s, v1.8h, v2.8h + uabal2 v0.2d, v1.4s, v2.4s + sabdl v0.8h, v1.8b, v2.8b + sabdl v0.4s, v1.4h, v2.4h + sabdl v0.2d, v1.2s, v2.2s + sabdl2 v0.8h, v1.16b, v2.16b + sabdl2 v0.4s, v1.8h, v2.8h + sabdl2 v0.2d, v1.4s, v2.4s + uabdl v0.8h, v1.8b, v2.8b + uabdl v0.4s, v1.4h, v2.4h + uabdl v0.2d, v1.2s, v2.2s + uabdl2 v0.8h, v1.16b, v2.16b + uabdl2 v0.4s, v1.8h, v2.8h + uabdl2 v0.2d, v1.4s, v2.4s + smlal v0.8h, v1.8b, v2.8b + smlal v0.4s, v1.4h, v2.4h + smlal v0.2d, v1.2s, v2.2s + smlal2 v0.8h, v1.16b, v2.16b + smlal2 v0.4s, v1.8h, v2.8h + smlal2 v0.2d, v1.4s, v2.4s + umlal v0.8h, v1.8b, v2.8b + umlal v0.4s, v1.4h, v2.4h + umlal v0.2d, v1.2s, v2.2s + umlal2 v0.8h, v1.16b, v2.16b + umlal2 v0.4s, v1.8h, v2.8h + umlal2 v0.2d, v1.4s, v2.4s + smlsl v0.8h, v1.8b, v2.8b + smlsl v0.4s, v1.4h, v2.4h + smlsl v0.2d, v1.2s, v2.2s + smlsl2 v0.8h, v1.16b, v2.16b + smlsl2 v0.4s, v1.8h, v2.8h + smlsl2 v0.2d, v1.4s, v2.4s + umlsl v0.8h, v1.8b, v2.8b + umlsl v0.4s, v1.4h, v2.4h + umlsl v0.2d, v1.2s, v2.2s + umlsl2 v0.8h, v1.16b, v2.16b + umlsl2 v0.4s, v1.8h, v2.8h + umlsl2 v0.2d, v1.4s, v2.4s + smull v0.8h, v1.8b, v2.8b + smull v0.4s, v1.4h, v2.4h + smull v0.2d, v1.2s, v2.2s + smull2 v0.8h, v1.16b, v2.16b + smull2 v0.4s, v1.8h, v2.8h + smull2 v0.2d, v1.4s, v2.4s + umull v0.8h, v1.8b, v2.8b + umull v0.4s, v1.4h, v2.4h + umull v0.2d, v1.2s, v2.2s + umull2 v0.8h, v1.16b, v2.16b + umull2 v0.4s, v1.8h, v2.8h + umull2 v0.2d, v1.4s, v2.4s + sqdmlal v0.4s, v1.4h, v2.4h + sqdmlal v0.2d, v1.2s, v2.2s + sqdmlal2 v0.4s, v1.8h, v2.8h + sqdmlal2 v0.2d, v1.4s, v2.4s + sqdmlsl v0.4s, v1.4h, v2.4h + sqdmlsl v0.2d, v1.2s, v2.2s + sqdmlsl2 v0.4s, v1.8h, v2.8h + sqdmlsl2 v0.2d, v1.4s, v2.4s + sqdmull v0.4s, v1.4h, v2.4h + sqdmull v0.2d, v1.2s, v2.2s + sqdmull2 v0.4s, v1.8h, v2.8h + sqdmull2 v0.2d, v1.4s, v2.4s + pmull v0.8h, v1.8b, v2.8b + pmull2 v0.8h, v1.16b, v2.16b + saddw v0.8h, v1.8h, v2.8b + saddw v0.4s, v1.4s, v2.4h + saddw v0.2d, v1.2d, v2.2s + saddw2 v0.8h, v1.8h, v2.16b + saddw2 v0.4s, v1.4s, v2.8h + saddw2 v0.2d, v1.2d, v2.4s + uaddw v0.8h, v1.8h, v2.8b + uaddw v0.4s, v1.4s, v2.4h + uaddw v0.2d, v1.2d, v2.2s + uaddw2 v0.8h, v1.8h, v2.16b + uaddw2 v0.4s, v1.4s, v2.8h + uaddw2 v0.2d, v1.2d, v2.4s + ssubw v0.8h, v1.8h, v2.8b + ssubw v0.4s, v1.4s, v2.4h + ssubw v0.2d, v1.2d, v2.2s + ssubw2 v0.8h, v1.8h, v2.16b + ssubw2 v0.4s, v1.4s, v2.8h + ssubw2 v0.2d, v1.2d, v2.4s + usubw v0.8h, v1.8h, v2.8b + usubw v0.4s, v1.4s, v2.4h + usubw v0.2d, v1.2d, v2.2s + usubw2 v0.8h, v1.8h, v2.16b + usubw2 v0.4s, v1.4s, v2.8h + usubw2 v0.2d, v1.2d, v2.4s + addhn v0.8b, v1.8h, v2.8h + addhn v0.4h, v1.4s, v2.4s + addhn v0.2s, v1.2d, v2.2d + addhn2 v0.16b, v1.8h, v2.8h + addhn2 v0.8h, v1.4s, v2.4s + addhn2 v0.4s, v1.2d, v2.2d + raddhn v0.8b, v1.8h, v2.8h + raddhn v0.4h, v1.4s, v2.4s + raddhn v0.2s, v1.2d, v2.2d + raddhn2 v0.16b, v1.8h, v2.8h + raddhn2 v0.8h, v1.4s, v2.4s + raddhn2 v0.4s, v1.2d, v2.2d + rsubhn v0.8b, v1.8h, v2.8h + rsubhn v0.4h, v1.4s, v2.4s + rsubhn v0.2s, v1.2d, v2.2d + rsubhn2 v0.16b, v1.8h, v2.8h + rsubhn2 v0.8h, v1.4s, v2.4s + rsubhn2 v0.4s, v1.2d, v2.2d + sqdmulh h10, h11, h12 + sqdmulh s20, s21, s2 + sqrdmulh h10, h11, h12 + sqrdmulh s20, s21, s2 + fmulx s20, s22, s15 + fmulx d23, d11, d1 + frecps s21, s16, s13 + frecps d22, d30, d21 + frsqrts s21, s5, s12 + frsqrts d8, d22, d18 + scvtf s22, s13 + scvtf d21, d12 + ucvtf s22, s13 + ucvtf d21, d14 + frecpe s19, s14 + frecpe d13, d13 + frecpx s18, s10 + frecpx d16, d19 + frsqrte s22, s13 + frsqrte d21, d12 + cmeq d20, d21, d22 + cmeq d20, d21, #0 + cmhs d20, d21, d22 + cmge d20, d21, d22 + cmge d20, d21, #0 + cmhi d20, d21, d22 + cmgt d20, d21, d22 + cmgt d20, d21, #0 + cmle d20, d21, #0 + cmlt d20, d21, #0 + cmtst d20, d21, d22 + fcmeq s10, s11, s12 + fcmeq d20, d21, d22 + fcmeq s10, s11, #0.0 + fcmeq d20, d21, #0.0 + fcmge s10, s11, s12 + fcmge d20, d21, d22 + fcmge s10, s11, #0.0 + fcmge d20, d21, #0.0 + fcmgt s10, s11, s12 + fcmgt d20, d21, d22 + fcmgt s10, s11, #0.0 + fcmgt d20, d21, #0.0 + fcmle s10, s11, #0.0 + fcmle d20, d21, #0.0 + fcmlt s10, s11, #0.0 + fcmlt d20, d21, #0.0 + facge s10, s11, s12 + facge d20, d21, d22 + facgt s10, s11, s12 + facgt d20, d21, d22 + abs d29, d24 + sqabs b19, b14 + sqabs h21, h15 + sqabs s20, s12 + sqabs d18, d12 + neg d29, d24 + sqneg b19, b14 + sqneg h21, h15 + sqneg s20, s12 + sqneg d18, d12 + suqadd b19, b14 + suqadd h20, h15 + suqadd s21, s12 + suqadd d18, d22 + usqadd b19, b14 + usqadd h20, h15 + usqadd s21, s12 + usqadd d18, d22 + sqdmlal s17, h27, h12 + sqdmlal d19, s24, s12 + sqdmlsl s14, h12, h25 + sqdmlsl d12, s23, s13 + sqdmull s12, h22, h12 + sqdmull d15, s22, s12 + sqxtun b19, h14 + sqxtun h21, s15 + sqxtun s20, d12 + sqxtn b18, h18 + sqxtn h20, s17 + sqxtn s19, d14 + uqxtn b18, h18 + uqxtn h20, s17 + uqxtn s19, d14 + sshr d15, d16, #12 + ushr d10, d17, #18 + srshr d19, d18, #7 + urshr d20, d23, #31 + ssra d18, d12, #21 + usra d20, d13, #61 + srsra d15, d11, #19 + ursra d18, d10, #13 + shl d7, d10, #12 + sqshl b11, b19, #7 + sqshl h13, h18, #11 + sqshl s14, s17, #22 + sqshl d15, d16, #51 + uqshl b18, b15, #6 + uqshl h11, h18, #7 + uqshl s14, s19, #18 + uqshl d15, d12, #19 + sqshlu b15, b18, #6 + sqshlu h19, h17, #6 + sqshlu s16, s14, #25 + sqshlu d11, d13, #32 + sri d10, d12, #14 + sli d10, d14, #12 + sqshrn b10, h15, #5 + sqshrn h17, s10, #4 + sqshrn s18, d10, #31 + uqshrn b12, h10, #7 + uqshrn h10, s14, #5 + uqshrn s10, d12, #13 + sqrshrn b10, h13, #2 + sqrshrn h15, s10, #6 + sqrshrn s15, d12, #9 + uqrshrn b10, h12, #5 + uqrshrn h12, s10, #14 + uqrshrn s10, d10, #25 + sqshrun b15, h10, #7 + sqshrun h20, s14, #3 + sqshrun s10, d15, #15 + sqrshrun b17, h10, #6 + sqrshrun h10, s13, #15 + sqrshrun s22, d16, #31 + scvtf s22, s13, #32 + scvtf d21, d12, #64 + ucvtf s22, s13, #32 + ucvtf d21, d14, #64 + fcvtzs s21, s12, #1 + fcvtzs d21, d12, #1 + fcvtzu s21, s12, #1 + fcvtzu d21, d12, #1 + ld1 { v0.16b }, [x0] + ld1 { v15.8h, v16.8h }, [x15] + ld1 { v31.4s, v0.4s, v1.4s }, [sp] + ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] + ld2 { v0.8b, v1.8b }, [x0] + ld3 { v15.4h, v16.4h, v17.4h }, [x15] + ld4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] + st1 { v0.16b }, [x0] + st1 { v15.8h, v16.8h }, [x15] + st1 { v31.4s, v0.4s, v1.4s }, [sp] + st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] + st2 { v0.8b, v1.8b }, [x0] + st3 { v15.4h, v16.4h, v17.4h }, [x15] + st4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] + ld1 { v15.8h }, [x15], x2 + ld1 { v31.4s, v0.4s }, [sp], #32 + ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 + ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 + ld2 { v0.16b, v1.16b }, [x0], x1 + ld3 { v15.8h, v16.8h, v17.8h }, [x15], x2 + ld4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64 + st1 { v15.8h }, [x15], x2 + st1 { v31.4s, v0.4s }, [sp], #32 + st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 + st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 + st2 { v0.16b, v1.16b }, [x0], x1 + st3 { v15.8h, v16.8h, v17.8h }, [x15], x2 + st4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64 + ld1r { v0.16b }, [x0] + ld1r { v15.8h }, [x15] + ld2r { v31.4s, v0.4s }, [sp] + ld2r { v0.2d, v1.2d }, [x0] + ld3r { v0.8b, v1.8b, v2.8b }, [x0] + ld3r { v15.4h, v16.4h, v17.4h }, [x15] + ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] + ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp] + ld1 { v0.b }[9], [x0] + ld2 { v15.h, v16.h }[7], [x15] + ld3 { v31.s, v0.s, v1.s }[3], [sp] + ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] + st1 { v0.d }[1], [x0] + st2 { v31.s, v0.s }[3], [sp] + st3 { v15.h, v16.h, v17.h }[7], [x15] + st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] + ld1r { v0.16b }, [x0], #1 + ld1r { v15.8h }, [x15], #2 + ld2r { v31.4s, v0.4s }, [sp], #8 + ld2r { v0.2d, v1.2d }, [x0], #16 + ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 + ld3r { v15.4h, v16.4h, v17.4h }, [x15], #6 + ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], x30 + ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp], x7 + ld1 { v0.b }[9], [x0], #1 + ld2 { v15.h, v16.h }[7], [x15], #4 + ld3 { v31.s, v0.s, v1.s }[3], [sp], x3 + ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 + ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 + st1 { v0.d }[1], [x0], #8 + st2 { v31.s, v0.s }[3], [sp], #8 + st3 { v15.h, v16.h, v17.h }[7], [x15], #6 + st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 + ext v0.8b, v1.8b, v2.8b, #3 + ext v0.16b, v1.16b, v2.16b, #3 + uzp1 v1.8b, v1.8b, v2.8b + uzp1 v2.16b, v1.16b, v2.16b + uzp1 v3.4h, v1.4h, v2.4h + uzp1 v4.8h, v1.8h, v2.8h + uzp1 v5.2s, v1.2s, v2.2s + uzp1 v6.4s, v1.4s, v2.4s + uzp1 v7.2d, v1.2d, v2.2d + trn1 v8.8b, v1.8b, v2.8b + trn1 v9.16b, v1.16b, v2.16b + trn1 v10.4h, v1.4h, v2.4h + trn1 v27.8h, v7.8h, v2.8h + trn1 v12.2s, v7.2s, v2.2s + trn1 v29.4s, v6.4s, v2.4s + trn1 v14.2d, v6.2d, v2.2d + zip1 v31.8b, v5.8b, v2.8b + zip1 v0.16b, v5.16b, v2.16b + zip1 v17.4h, v4.4h, v2.4h + zip1 v2.8h, v4.8h, v2.8h + zip1 v19.2s, v3.2s, v2.2s + zip1 v4.4s, v3.4s, v2.4s + zip1 v21.2d, v2.2d, v2.2d + uzp2 v6.8b, v2.8b, v2.8b + uzp2 v23.16b, v1.16b, v2.16b + uzp2 v8.4h, v1.4h, v2.4h + uzp2 v25.8h, v0.8h, v2.8h + uzp2 v10.2s, v0.2s, v2.2s + uzp2 v27.4s, v7.4s, v2.4s + uzp2 v12.2d, v7.2d, v2.2d + trn2 v29.8b, v6.8b, v2.8b + trn2 v14.16b, v6.16b, v2.16b + trn2 v31.4h, v5.4h, v2.4h + trn2 v0.8h, v5.8h, v2.8h + trn2 v17.2s, v4.2s, v2.2s + trn2 v2.4s, v4.4s, v2.4s + trn2 v19.2d, v3.2d, v2.2d + zip2 v4.8b, v3.8b, v2.8b + zip2 v21.16b, v2.16b, v2.16b + zip2 v6.4h, v2.4h, v2.4h + zip2 v23.8h, v1.8h, v2.8h + zip2 v8.2s, v1.2s, v2.2s + zip2 v25.4s, v0.4s, v2.4s + zip2 v10.2d, v0.2d, v2.2d + fmul s0, s1, v1.s[0] + fmul s0, s1, v1.s[3] + fmul d0, d1, v1.d[0] + fmul d0, d1, v1.d[1] + fmul d15, d15, v15.d[1] + fmulx s3, s5, v7.s[0] + fmulx s3, s5, v7.s[3] + fmulx s3, s5, v15.s[3] + fmulx d0, d4, v8.d[0] + fmulx d0, d4, v8.d[1] + fmla s0, s1, v1.s[0] + fmla s0, s1, v1.s[3] + fmla d0, d1, v1.d[0] + fmla d0, d1, v1.d[1] + fmla d15, d15, v15.d[1] + fmls s3, s5, v7.s[0] + fmls s3, s5, v7.s[3] + fmls s3, s5, v15.s[3] + fmls d0, d4, v8.d[0] + fmls d0, d4, v8.d[1] + sqdmlal s0, h0, v0.h[0] + sqdmlal s0, h0, v0.h[1] + sqdmlal s0, h0, v0.h[2] + sqdmlal s0, h0, v0.h[3] + sqdmlal s0, h0, v0.h[4] + sqdmlal s0, h0, v0.h[5] + sqdmlal s0, h0, v0.h[6] + sqdmlal s0, h0, v0.h[7] + sqdmlal d8, s9, v15.s[0] + sqdmlal d8, s9, v15.s[1] + sqdmlal d8, s9, v15.s[2] + sqdmlal d8, s9, v15.s[3] + sqdmlsl s0, h0, v0.h[0] + sqdmlsl s0, h0, v0.h[1] + sqdmlsl s0, h0, v0.h[2] + sqdmlsl s0, h0, v0.h[3] + sqdmlsl s0, h0, v0.h[4] + sqdmlsl s0, h0, v0.h[5] + sqdmlsl s0, h0, v0.h[6] + sqdmlsl s0, h0, v0.h[7] + sqdmlsl d8, s9, v15.s[0] + sqdmlsl d8, s9, v15.s[1] + sqdmlsl d8, s9, v15.s[2] + sqdmlsl d8, s9, v15.s[3] + sqdmull s1, h1, v1.h[0] + sqdmull s1, h1, v1.h[1] + sqdmull s1, h1, v1.h[2] + sqdmull s1, h1, v1.h[3] + sqdmull s1, h1, v1.h[4] + sqdmull s1, h1, v1.h[5] + sqdmull s1, h1, v1.h[6] + sqdmull s1, h1, v1.h[7] + sqdmull d1, s1, v4.s[0] + sqdmull d1, s1, v4.s[1] + sqdmull d1, s1, v4.s[2] + sqdmull d1, s1, v4.s[3] + sqdmulh h7, h1, v14.h[0] + sqdmulh h7, h15, v8.h[1] + sqdmulh h7, h15, v8.h[2] + sqdmulh h7, h15, v8.h[3] + sqdmulh h7, h15, v8.h[4] + sqdmulh h7, h15, v8.h[5] + sqdmulh h7, h15, v8.h[6] + sqdmulh h7, h15, v8.h[7] + sqdmulh s15, s3, v4.s[0] + sqdmulh s15, s14, v16.s[1] + sqdmulh s15, s15, v16.s[2] + sqdmulh s15, s16, v17.s[3] + sqrdmulh h7, h1, v14.h[0] + sqrdmulh h7, h15, v8.h[1] + sqrdmulh h7, h15, v8.h[2] + sqrdmulh h7, h15, v8.h[3] + sqrdmulh h7, h15, v8.h[4] + sqrdmulh h7, h15, v8.h[5] + sqrdmulh h7, h15, v8.h[6] + sqrdmulh h7, h15, v8.h[7] + sqrdmulh s15, s3, v4.s[0] + sqrdmulh s15, s14, v16.s[1] + sqrdmulh s15, s15, v16.s[2] + sqrdmulh s15, s16, v17.s[3] + mov b0, v0.b[15] + mov h2, v31.h[5] + mov s17, v2.s[2] + mov d6, v12.d[1] + tbl v0.8b, { v1.16b }, v2.8b + tbl v16.8b, { v31.16b, v0.16b }, v2.8b + tbl v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b + tbl v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b + tbl v0.16b, { v1.16b }, v2.16b + tbl v16.16b, { v31.16b, v0.16b }, v2.16b + tbl v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b + tbl v0.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b + tbx v0.8b, { v1.16b }, v2.8b + tbx v16.8b, { v31.16b, v0.16b }, v2.8b + tbx v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b + tbx v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b + tbx v0.16b, { v1.16b }, v2.16b + tbx v16.16b, { v31.16b, v0.16b }, v2.16b + tbx v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b + tbx v16.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b + fcvtxn s22, d13 + fcvtas s12, s13 + fcvtas d21, d14 + fcvtau s12, s13 + fcvtau d21, d14 + fcvtms s22, s13 + fcvtms d21, d14 + fcvtmu s12, s13 + fcvtmu d21, d14 + fcvtns s22, s13 + fcvtns d21, d14 + fcvtnu s12, s13 + fcvtnu d21, d14 + fcvtps s22, s13 + fcvtps d21, d14 + fcvtpu s12, s13 + fcvtpu d21, d14 + fcvtzs s12, s13 + fcvtzs d21, d14 + fcvtzu s12, s13 + fcvtzu d21, d14 + fabd s29, s24, s20 + fabd d29, d24, d20 # CHECK: Instruction Info: # CHECK-NEXT: [1]: #uOps @@ -1070,1065 +866,860 @@ # CHECK-NEXT: [6]: HasSideEffects (U) # CHECK: [1] [2] [3] [4] [5] [6] Instructions: -# CHECK-NEXT: 1 4 0.50 abs d29, d24 -# CHECK-NEXT: 1 4 1.00 abs v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 abs v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 abs v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 abs v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 abs v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 abs v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 abs v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 add d17, d31, d29 -# CHECK-NEXT: 1 4 0.50 add v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 addhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 addhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 addhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 addhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 addhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 addhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 addp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 addp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 and v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 bic v0.4h, #15, lsl #8 -# CHECK-NEXT: 1 4 0.50 bic v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 bif v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 bit v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 bsl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 cls v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 cls v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 cls v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 cls v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 cls v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 cls v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 clz v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 clz v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 clz v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 clz v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 clz v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 clz v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 cmeq d20, d21, #0 -# CHECK-NEXT: 1 4 0.50 cmeq d20, d21, d22 -# CHECK-NEXT: 1 4 1.00 cmeq v0.16b, v0.16b, #0 -# CHECK-NEXT: 1 4 1.00 cmeq v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 cmge d20, d21, #0 -# CHECK-NEXT: 1 4 0.50 cmge d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 cmge v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 cmge v0.8b, v0.8b, #0 -# CHECK-NEXT: 1 4 0.50 cmgt d20, d21, #0 -# CHECK-NEXT: 1 4 0.50 cmgt d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 cmgt v0.2s, v0.2s, #0 -# CHECK-NEXT: 1 4 1.00 cmgt v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 cmhi d20, d21, d22 -# CHECK-NEXT: 1 4 1.00 cmhi v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 cmhs d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 cmhs v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 cmle d20, d21, #0 -# CHECK-NEXT: 1 4 1.00 cmle v0.2d, v0.2d, #0 -# CHECK-NEXT: 1 4 0.50 cmlt d20, d21, #0 -# CHECK-NEXT: 1 4 1.00 cmlt v0.8h, v0.8h, #0 -# CHECK-NEXT: 1 4 0.50 cmtst d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 cmtst v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 cnt v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 cnt v0.8b, v0.8b -# CHECK-NEXT: 1 2 0.50 dup v0.16b, w28 -# CHECK-NEXT: 1 2 0.50 dup v0.2d, x28 -# CHECK-NEXT: 1 4 0.50 dup v0.2s, w28 -# CHECK-NEXT: 1 4 0.50 dup v0.4h, w28 -# CHECK-NEXT: 1 2 0.50 dup v0.4s, w28 -# CHECK-NEXT: 1 4 0.50 dup v0.8b, w28 -# CHECK-NEXT: 1 2 0.50 dup v0.8h, w28 -# CHECK-NEXT: 1 4 1.00 eor v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 ext v0.16b, v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 0.50 ext v0.8b, v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 0.50 fabd d29, d24, d20 -# CHECK-NEXT: 1 4 0.50 fabd s29, s24, s20 -# CHECK-NEXT: 1 4 1.00 fabd v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fabs v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fabs v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fabs v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 fabs v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fabs v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 facge d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 facge s10, s11, s12 -# CHECK-NEXT: 1 4 1.00 facge v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 facgt d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 facgt s10, s11, s12 -# CHECK-NEXT: 1 4 1.00 facgt v0.2d, v0.2d, v0.2d +# CHECK-NEXT: 1 2 0.50 add v31.8b, v31.8b, v31.8b +# CHECK-NEXT: 1 2 1.00 sub v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1 4 1.00 fadd v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 faddp v0.2s, v0.2s, v0.2s +# CHECK-NEXT: 1 4 0.50 fsub v31.2s, v31.2s, v31.2s +# CHECK-NEXT: 1 4 0.50 mul v0.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 4 0.50 fmul v0.2s, v1.2s, v2.2s +# CHECK-NEXT: 1 13 10.00 fdiv v31.2s, v31.2s, v31.2s +# CHECK-NEXT: 1 3 0.50 pmul v0.8b, v15.8b, v16.8b +# CHECK-NEXT: 1 3 1.00 pmul v31.16b, v7.16b, v8.16b +# CHECK-NEXT: 1 1 0.50 and v2.8b, v2.8b, v2.8b +# CHECK-NEXT: 1 1 1.00 orr v31.16b, v31.16b, v30.16b +# CHECK-NEXT: 1 1 1.00 eor v0.16b, v1.16b, v2.16b +# CHECK-NEXT: 1 1 1.00 orn v9.16b, v10.16b, v11.16b +# CHECK-NEXT: 1 1 0.50 bic v31.8b, v30.8b, v29.8b +# CHECK-NEXT: 1 2 0.50 bsl v0.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 bit v31.16b, v31.16b, v31.16b +# CHECK-NEXT: 1 2 1.00 bif v0.16b, v1.16b, v2.16b +# CHECK-NEXT: 1 4 0.50 mla v0.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 4 0.50 mls v31.4h, v31.4h, v31.4h +# CHECK-NEXT: 1 4 0.50 fmla v0.2s, v1.2s, v2.2s +# CHECK-NEXT: 1 4 0.50 fmls v31.2s, v31.2s, v31.2s +# CHECK-NEXT: 1 1 1.00 movi v31.4s, #255, lsl #24 +# CHECK-NEXT: 1 1 0.50 mvni v0.2s, #0 +# CHECK-NEXT: 1 1 0.50 bic v15.4h, #15, lsl #8 +# CHECK-NEXT: 1 1 1.00 orr v16.8h, #31 +# CHECK-NEXT: 1 1 0.50 movi v8.2s, #8, msl #8 +# CHECK-NEXT: 1 1 1.00 mvni v16.4s, #16, msl #16 +# CHECK-NEXT: 1 1 0.50 movi v16.8b, #255 +# CHECK-NEXT: 1 1 1.00 movi v31.16b, #31 +# CHECK-NEXT: 1 1 0.50 movi d15, #0xff00ff00ff00ff +# CHECK-NEXT: 1 1 1.00 movi v31.2d, #0xff0000ff0000ffff +# CHECK-NEXT: 1 1 0.50 fmov v0.2s, #13.00000000 +# CHECK-NEXT: 1 1 0.50 fmov v15.4s, #1.00000000 +# CHECK-NEXT: 1 1 0.50 fmov v31.2d, #-1.25000000 +# CHECK-NEXT: 1 1 1.00 mov v1.16b, v15.16b +# CHECK-NEXT: 1 1 0.50 mov v25.8b, v4.8b +# CHECK-NEXT: 1 4 2.00 uaba v0.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 4 2.00 saba v31.16b, v30.16b, v29.16b +# CHECK-NEXT: 1 3 0.50 uabd v15.4h, v16.4h, v17.4h +# CHECK-NEXT: 1 3 0.50 sabd v5.4h, v4.4h, v6.4h +# CHECK-NEXT: 1 4 1.00 fabd v1.4s, v31.4s, v16.4s +# CHECK-NEXT: 1 2 0.50 add d17, d31, d29 +# CHECK-NEXT: 1 2 0.50 sub d15, d5, d16 +# CHECK-NEXT: 1 22 19.00 frsqrts v31.2d, v15.2d, v8.2d +# CHECK-NEXT: 1 4 1.00 frecps v5.4s, v7.4s, v16.4s +# CHECK-NEXT: 1 2 1.00 facge v0.4s, v31.4s, v16.4s +# CHECK-NEXT: 1 2 1.00 facgt v31.2d, v29.2d, v28.2d +# CHECK-NEXT: 1 2 1.00 cmeq v5.16b, v15.16b, v31.16b +# CHECK-NEXT: 1 2 0.50 cmhs v1.8b, v16.8b, v30.8b +# CHECK-NEXT: 1 2 0.50 cmge v20.4h, v11.4h, v23.4h +# CHECK-NEXT: 1 2 1.00 cmhi v13.8h, v3.8h, v27.8h +# CHECK-NEXT: 1 2 1.00 cmgt v9.4s, v4.4s, v28.4s +# CHECK-NEXT: 1 3 0.50 cmtst v21.2s, v19.2s, v18.2s +# CHECK-NEXT: 1 2 0.50 fcmeq v0.2s, v15.2s, v16.2s +# CHECK-NEXT: 1 2 1.00 fcmge v31.4s, v7.4s, v29.4s +# CHECK-NEXT: 1 2 1.00 fcmgt v17.4s, v8.4s, v25.4s +# CHECK-NEXT: 1 2 1.00 cmeq v31.16b, v15.16b, #0 +# CHECK-NEXT: 1 2 0.50 cmge v3.8b, v15.8b, #0 +# CHECK-NEXT: 1 2 0.50 cmgt v22.2s, v9.2s, #0 +# CHECK-NEXT: 1 2 1.00 cmle v5.2d, v14.2d, #0 +# CHECK-NEXT: 1 2 1.00 cmlt v13.8h, v11.8h, #0 +# CHECK-NEXT: 1 2 0.50 fcmeq v15.2s, v21.2s, #0.0 +# CHECK-NEXT: 1 2 1.00 fcmge v14.2d, v13.2d, #0.0 +# CHECK-NEXT: 1 2 1.00 fcmgt v9.4s, v23.4s, #0.0 +# CHECK-NEXT: 1 2 1.00 fcmle v11.2d, v6.2d, #0.0 +# CHECK-NEXT: 1 2 1.00 fcmlt v12.4s, v25.4s, #0.0 +# CHECK-NEXT: 1 2 0.50 shadd v0.8b, v31.8b, v29.8b +# CHECK-NEXT: 1 2 1.00 uhadd v15.16b, v16.16b, v17.16b +# CHECK-NEXT: 1 2 0.50 shsub v0.4h, v1.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 uhadd v5.8h, v7.8h, v8.8h +# CHECK-NEXT: 1 2 0.50 shsub v9.2s, v11.2s, v21.2s +# CHECK-NEXT: 1 2 1.00 uhsub v22.4s, v30.4s, v19.4s +# CHECK-NEXT: 1 2 0.50 srhadd v3.8b, v5.8b, v7.8b +# CHECK-NEXT: 1 2 1.00 urhadd v7.16b, v17.16b, v27.16b +# CHECK-NEXT: 1 2 0.50 srhadd v10.4h, v11.4h, v13.4h +# CHECK-NEXT: 1 2 1.00 urhadd v1.8h, v2.8h, v3.8h +# CHECK-NEXT: 1 2 0.50 srhadd v4.2s, v5.2s, v6.2s +# CHECK-NEXT: 1 2 1.00 urhadd v7.4s, v7.4s, v7.4s +# CHECK-NEXT: 1 3 0.50 sqsub v0.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 sqadd v0.16b, v1.16b, v2.16b +# CHECK-NEXT: 1 3 0.50 uqsub v0.4h, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 uqadd v0.8h, v1.8h, v2.8h +# CHECK-NEXT: 1 3 0.50 sqadd v0.2s, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 sqsub v0.4s, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 sqsub v0.2d, v1.2d, v2.2d +# CHECK-NEXT: 1 3 0.50 sqadd b20, b11, b15 +# CHECK-NEXT: 1 3 0.50 uqadd h0, h1, h5 +# CHECK-NEXT: 1 3 0.50 sqsub s20, s10, s7 +# CHECK-NEXT: 1 3 0.50 uqsub d16, d16, d16 +# CHECK-NEXT: 1 2 0.50 sshl v10.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 2 1.00 ushl v10.16b, v5.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 sshl v10.4h, v15.4h, v22.4h +# CHECK-NEXT: 1 2 1.00 ushl v10.8h, v5.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 sshl v10.2s, v15.2s, v22.2s +# CHECK-NEXT: 1 2 1.00 ushl v10.4s, v5.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 sshl v0.2d, v1.2d, v2.2d +# CHECK-NEXT: 1 4 0.50 sqshl v1.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 4 1.00 uqshl v2.16b, v14.16b, v23.16b +# CHECK-NEXT: 1 4 0.50 sqshl v3.4h, v13.4h, v24.4h +# CHECK-NEXT: 1 4 1.00 uqshl v4.8h, v12.8h, v25.8h +# CHECK-NEXT: 1 4 0.50 sqshl v5.2s, v11.2s, v26.2s +# CHECK-NEXT: 1 4 1.00 uqshl v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 4 1.00 uqshl v0.2d, v1.2d, v2.2d +# CHECK-NEXT: 1 3 0.50 srshl v10.8b, v5.8b, v22.8b +# CHECK-NEXT: 1 3 1.00 urshl v10.16b, v5.16b, v2.16b +# CHECK-NEXT: 1 3 0.50 srshl v1.4h, v5.4h, v31.4h +# CHECK-NEXT: 1 3 1.00 urshl v1.8h, v5.8h, v2.8h +# CHECK-NEXT: 1 3 0.50 srshl v10.2s, v15.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 urshl v1.4s, v5.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 urshl v0.2d, v1.2d, v2.2d +# CHECK-NEXT: 1 4 0.50 sqrshl v1.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 4 1.00 uqrshl v2.16b, v14.16b, v23.16b +# CHECK-NEXT: 1 4 0.50 sqrshl v3.4h, v13.4h, v24.4h +# CHECK-NEXT: 1 4 1.00 uqrshl v4.8h, v12.8h, v25.8h +# CHECK-NEXT: 1 4 0.50 sqrshl v5.2s, v11.2s, v26.2s +# CHECK-NEXT: 1 4 1.00 uqrshl v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 4 1.00 uqrshl v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 2 0.50 sshl d31, d31, d31 +# CHECK-NEXT: 1 2 0.50 ushl d0, d0, d0 +# CHECK-NEXT: 1 4 0.50 sqshl d31, d31, d31 +# CHECK-NEXT: 1 4 0.50 uqshl s23, s20, s16 +# CHECK-NEXT: 1 4 0.50 sqshl h3, h4, h15 +# CHECK-NEXT: 1 4 0.50 uqshl b11, b20, b30 +# CHECK-NEXT: 1 3 0.50 srshl d16, d16, d16 +# CHECK-NEXT: 1 3 0.50 urshl d8, d7, d4 +# CHECK-NEXT: 1 4 0.50 sqrshl d31, d31, d31 +# CHECK-NEXT: 1 4 0.50 uqrshl s23, s20, s16 +# CHECK-NEXT: 1 4 0.50 sqrshl h3, h4, h15 +# CHECK-NEXT: 1 4 0.50 uqrshl b11, b20, b30 +# CHECK-NEXT: 1 2 0.50 smax v1.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 2 1.00 umax v2.16b, v14.16b, v23.16b +# CHECK-NEXT: 1 2 0.50 smax v3.4h, v13.4h, v24.4h +# CHECK-NEXT: 1 2 1.00 umax v4.8h, v12.8h, v25.8h +# CHECK-NEXT: 1 2 0.50 smax v5.2s, v11.2s, v26.2s +# CHECK-NEXT: 1 2 1.00 umax v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 2 0.50 umin v1.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 2 1.00 smin v2.16b, v14.16b, v23.16b +# CHECK-NEXT: 1 2 0.50 umin v3.4h, v13.4h, v24.4h +# CHECK-NEXT: 1 2 1.00 smin v4.8h, v12.8h, v25.8h +# CHECK-NEXT: 1 2 0.50 umin v5.2s, v11.2s, v26.2s +# CHECK-NEXT: 1 2 1.00 smin v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 4 0.50 fmax v29.2s, v28.2s, v25.2s +# CHECK-NEXT: 1 4 1.00 fmax v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fmax v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 4 0.50 fmin v29.2s, v28.2s, v25.2s +# CHECK-NEXT: 1 4 1.00 fmin v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fmin v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 4 0.50 fmaxnm v9.2s, v8.2s, v5.2s +# CHECK-NEXT: 1 4 1.00 fmaxnm v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fmaxnm v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 4 0.50 fminnm v2.2s, v8.2s, v25.2s +# CHECK-NEXT: 1 4 1.00 fminnm v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fminnm v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 2 0.50 smaxp v1.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 2 1.00 umaxp v2.16b, v14.16b, v23.16b +# CHECK-NEXT: 1 2 0.50 smaxp v3.4h, v13.4h, v24.4h +# CHECK-NEXT: 1 2 1.00 umaxp v4.8h, v12.8h, v25.8h +# CHECK-NEXT: 1 2 0.50 smaxp v5.2s, v11.2s, v26.2s +# CHECK-NEXT: 1 2 1.00 umaxp v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 2 0.50 uminp v1.8b, v15.8b, v22.8b +# CHECK-NEXT: 1 2 1.00 sminp v2.16b, v14.16b, v23.16b +# CHECK-NEXT: 1 2 0.50 uminp v3.4h, v13.4h, v24.4h +# CHECK-NEXT: 1 2 1.00 sminp v4.8h, v12.8h, v25.8h +# CHECK-NEXT: 1 2 0.50 uminp v5.2s, v11.2s, v26.2s +# CHECK-NEXT: 1 2 1.00 sminp v6.4s, v10.4s, v27.4s +# CHECK-NEXT: 1 4 0.50 fmaxp v29.2s, v28.2s, v25.2s +# CHECK-NEXT: 1 4 1.00 fmaxp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fmaxp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 4 0.50 fminp v29.2s, v28.2s, v25.2s +# CHECK-NEXT: 1 4 1.00 fminp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fminp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 4 0.50 fmaxnmp v9.2s, v8.2s, v5.2s +# CHECK-NEXT: 1 4 1.00 fmaxnmp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fmaxnmp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 4 0.50 fminnmp v2.2s, v8.2s, v25.2s +# CHECK-NEXT: 1 4 1.00 fminnmp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: 1 4 1.00 fminnmp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: 1 3 0.50 addp v31.8b, v31.8b, v31.8b +# CHECK-NEXT: 1 3 1.00 addp v0.2d, v0.2d, v0.2d # CHECK-NEXT: 1 4 1.00 faddp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcmeq d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmeq d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 fcmeq s10, s11, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmeq s10, s11, s12 -# CHECK-NEXT: 1 4 0.50 fcmeq v0.2s, v0.2s, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmeq v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcmge d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmge d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 fcmge s10, s11, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmge s10, s11, s12 -# CHECK-NEXT: 1 4 1.00 fcmge v0.2d, v0.2d, #0.0 -# CHECK-NEXT: 1 4 1.00 fcmge v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcmgt d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmgt d20, d21, d22 -# CHECK-NEXT: 1 4 0.50 fcmgt s10, s11, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmgt s10, s11, s12 -# CHECK-NEXT: 1 4 1.00 fcmgt v0.4s, v0.4s, #0.0 -# CHECK-NEXT: 1 4 1.00 fcmgt v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcmle d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmle s10, s11, #0.0 -# CHECK-NEXT: 1 4 1.00 fcmle v0.2d, v0.2d, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmlt d20, d21, #0.0 -# CHECK-NEXT: 1 4 0.50 fcmlt s10, s11, #0.0 -# CHECK-NEXT: 1 4 1.00 fcmlt v0.4s, v0.4s, #0.0 -# CHECK-NEXT: 1 4 0.50 fcvtas d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtas s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtas v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtas v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtas v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtas v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtas v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtau d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtau s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtau v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtau v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtau v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtau v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtau v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtl v0.2d, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtl v0.4s, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtl2 v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtl2 v0.4s, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtms d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtms s22, s13 -# CHECK-NEXT: 1 4 0.50 fcvtms v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtms v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtms v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtms v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtms v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtmu d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtmu s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtmu v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtmu v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtmu v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtmu v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtmu v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtn v0.2s, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtn v0.4h, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtn2 v0.4s, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtn2 v0.8h, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtns d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtns s22, s13 -# CHECK-NEXT: 1 4 0.50 fcvtns v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtns v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtns v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtns v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtns v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtnu d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtnu s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtnu v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtnu v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtnu v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtnu v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtnu v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtps d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtps s22, s13 -# CHECK-NEXT: 1 4 0.50 fcvtps v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtps v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtps v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtps v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtps v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtpu d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtpu s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtpu v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtpu v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtpu v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtpu v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtpu v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtxn s22, d13 -# CHECK-NEXT: 1 4 0.50 fcvtxn v0.2s, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtxn2 v0.4s, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtzs d21, d12, #1 -# CHECK-NEXT: 1 4 0.50 fcvtzs d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtzs s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtzs s21, s12, #1 -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 fcvtzs v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fcvtzu d21, d12, #1 -# CHECK-NEXT: 1 4 0.50 fcvtzu d21, d14 -# CHECK-NEXT: 1 4 0.50 fcvtzu s12, s13 -# CHECK-NEXT: 1 4 0.50 fcvtzu s21, s12, #1 -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 fcvtzu v0.8h, v0.8h -# CHECK-NEXT: 1 13 10.00 fdiv v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmax v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fmax v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmax v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fmaxnm v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fmaxnm v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmaxnm v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fmaxnmp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fmaxnmp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmaxnmp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fmaxp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fmaxp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmaxp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fmin v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fmin v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmin v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fminnm v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fminnm v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fminnm v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fminnmp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fminnmp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fminnmp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fminp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fminp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fminp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 fmla d0, d1, v0.d[1] -# CHECK-NEXT: 1 4 0.50 fmla s0, s1, v0.s[3] -# CHECK-NEXT: 1 4 0.50 fmla v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fmls d0, d4, v0.d[1] -# CHECK-NEXT: 1 4 0.50 fmls s3, s5, v0.s[3] -# CHECK-NEXT: 1 4 0.50 fmls v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmov v0.2d, #-1.25000000 -# CHECK-NEXT: 1 4 0.50 fmov v0.2s, #13.00000000 -# CHECK-NEXT: 1 4 1.00 fmov v0.4s, #1.00000000 -# CHECK-NEXT: 1 4 0.50 fmul d0, d1, v0.d[1] -# CHECK-NEXT: 1 4 0.50 fmul s0, s1, v0.s[3] -# CHECK-NEXT: 1 4 0.50 fmul v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fmulx d0, d4, v0.d[1] -# CHECK-NEXT: 1 4 0.50 fmulx d23, d11, d1 +# CHECK-NEXT: 1 4 0.50 faddp v31.2s, v31.2s, v31.2s +# CHECK-NEXT: 1 4 0.50 sqdmulh v31.2s, v31.2s, v31.2s +# CHECK-NEXT: 1 4 1.00 sqdmulh v5.4s, v7.4s, v9.4s +# CHECK-NEXT: 1 4 0.50 sqrdmulh v31.4h, v3.4h, v13.4h +# CHECK-NEXT: 1 4 1.00 sqrdmulh v0.8h, v10.8h, v20.8h +# CHECK-NEXT: 1 4 0.50 fmulx v1.2s, v22.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 fmulx v21.4s, v15.4s, v3.4s +# CHECK-NEXT: 1 4 1.00 fmulx v11.2d, v5.2d, v23.2d +# CHECK-NEXT: 1 2 1.00 shll2 v2.8h, v4.16b, #8 +# CHECK-NEXT: 1 2 1.00 shll2 v6.4s, v8.8h, #16 +# CHECK-NEXT: 1 2 1.00 shll2 v6.2d, v8.4s, #32 +# CHECK-NEXT: 1 2 1.00 shll v2.8h, v4.8b, #8 +# CHECK-NEXT: 1 2 1.00 shll v6.4s, v8.4h, #16 +# CHECK-NEXT: 1 2 1.00 shll v6.2d, v8.2s, #32 +# CHECK-NEXT: 1 2 0.50 shl v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 2 0.50 shl v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 2 0.50 shl v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 2 0.50 shl v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 2 1.00 sshll v0.2d, v1.2s, #3 +# CHECK-NEXT: 1 2 1.00 sshll2 v0.4s, v1.8h, #3 +# CHECK-NEXT: 1 2 1.00 ushll v0.4s, v1.4h, #3 +# CHECK-NEXT: 1 2 1.00 ushll2 v0.8h, v1.16b, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 2 0.50 sshr v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 2 0.50 ushr v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 3 0.50 ssra v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 3 0.50 ssra v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 3 0.50 ssra v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 3 1.00 ssra v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 3 0.50 usra v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 3 0.50 usra v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 3 0.50 usra v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 3 1.00 usra v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 3 0.50 srshr v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 3 1.00 srshr v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 3 0.50 urshr v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 3 1.00 urshr v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 2.00 srsra v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 2.00 ursra v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 2 0.50 sri v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 2 0.50 sri v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 2 0.50 sri v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 2 1.00 sri v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 2 1.00 sri v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 2 1.00 sri v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 2 1.00 sri v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 2 0.50 sli v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 2 0.50 sli v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 2 0.50 sli v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 2 1.00 sli v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 2 1.00 sli v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 2 1.00 sli v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 2 1.00 sli v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 sqshlu v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 4 0.50 sqshlu v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 4 0.50 sqshlu v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshlu v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 sqshl v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 4 0.50 sqshl v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 4 0.50 sqshl v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshl v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 uqshl v0.8b, v1.8b, #3 +# CHECK-NEXT: 1 4 0.50 uqshl v0.4h, v1.4h, #3 +# CHECK-NEXT: 1 4 0.50 uqshl v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.16b, v1.16b, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.8h, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 uqshl v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 2 0.50 shrn v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 2 0.50 shrn v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 2 0.50 shrn v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 2 0.50 shrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 2 0.50 shrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 2 0.50 shrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 sqshrun v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 4 0.50 sqshrun v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 sqshrun v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 3 0.50 rshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 3 0.50 rshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 3 0.50 rshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 3 1.00 rshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 3 1.00 rshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 3 1.00 rshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 sqrshrun v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 4 0.50 sqrshrun v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 sqrshrun v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 sqshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 4 0.50 sqshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 sqshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 uqshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 4 0.50 uqshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 uqshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 sqrshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 4 0.50 sqrshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 sqrshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 uqrshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: 1 4 0.50 uqrshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 uqrshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 scvtf v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 0.50 scvtf v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 scvtf v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 ucvtf v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 0.50 ucvtf v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 ucvtf v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 fcvtzs v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 0.50 fcvtzs v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 fcvtzs v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 4 0.50 fcvtzu v0.2s, v1.2s, #3 +# CHECK-NEXT: 1 4 0.50 fcvtzu v0.4s, v1.4s, #3 +# CHECK-NEXT: 1 4 0.50 fcvtzu v0.2d, v1.2d, #3 +# CHECK-NEXT: 1 3 1.00 saddl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 saddl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 saddl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 saddl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 saddl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 saddl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 uaddl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 uaddl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 uaddl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 uaddl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 uaddl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 uaddl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 ssubl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 ssubl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 ssubl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 ssubl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 ssubl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 ssubl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 usubl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 usubl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 usubl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 usubl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 usubl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 usubl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 2.00 sabal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 2.00 sabal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 2.00 sabal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 2.00 sabal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 2.00 sabal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 2.00 sabal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 2.00 uabal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 2.00 uabal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 2.00 uabal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 2.00 uabal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 2.00 uabal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 2.00 uabal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 sabdl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 sabdl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 sabdl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 sabdl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 sabdl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 sabdl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 uabdl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 uabdl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 3 1.00 uabdl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 3 1.00 uabdl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 uabdl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 uabdl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 smlal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 1.00 smlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 smlal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 smlal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 1.00 smlal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 smlal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 umlal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 1.00 umlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 umlal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 umlal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 1.00 umlal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 umlal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 smlsl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 1.00 smlsl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 smlsl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 smlsl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 1.00 smlsl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 smlsl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 umlsl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 1.00 umlsl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 umlsl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 umlsl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 1.00 umlsl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 umlsl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 smull v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 1.00 smull v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 smull v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 smull2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 1.00 smull2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 smull2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 umull v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 4 1.00 umull v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 umull v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 umull2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 4 1.00 umull2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 umull2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 sqdmlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 sqdmlal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 sqdmlsl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 sqdmlsl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 4 1.00 sqdmull v0.4s, v1.4h, v2.4h +# CHECK-NEXT: 1 4 1.00 sqdmull v0.2d, v1.2s, v2.2s +# CHECK-NEXT: 1 4 1.00 sqdmull2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: 1 4 1.00 sqdmull2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 pmull v0.8h, v1.8b, v2.8b +# CHECK-NEXT: 1 3 1.00 pmull2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: 1 3 1.00 saddw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: 1 3 1.00 saddw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: 1 3 1.00 saddw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: 1 3 1.00 saddw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: 1 3 1.00 saddw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: 1 3 1.00 saddw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: 1 3 1.00 uaddw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: 1 3 1.00 uaddw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: 1 3 1.00 uaddw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: 1 3 1.00 uaddw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: 1 3 1.00 uaddw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: 1 3 1.00 uaddw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: 1 3 1.00 ssubw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: 1 3 1.00 ssubw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: 1 3 1.00 ssubw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: 1 3 1.00 ssubw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: 1 3 1.00 ssubw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: 1 3 1.00 ssubw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: 1 3 1.00 usubw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: 1 3 1.00 usubw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: 1 3 1.00 usubw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: 1 3 1.00 usubw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: 1 3 1.00 usubw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: 1 3 1.00 usubw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: 1 3 1.00 addhn v0.8b, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 addhn v0.4h, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 addhn v0.2s, v1.2d, v2.2d +# CHECK-NEXT: 1 3 1.00 addhn2 v0.16b, v1.8h, v2.8h +# CHECK-NEXT: 1 3 1.00 addhn2 v0.8h, v1.4s, v2.4s +# CHECK-NEXT: 1 3 1.00 addhn2 v0.4s, v1.2d, v2.2d +# CHECK-NEXT: 1 4 2.00 raddhn v0.8b, v1.8h, v2.8h +# CHECK-NEXT: 1 4 2.00 raddhn v0.4h, v1.4s, v2.4s +# CHECK-NEXT: 1 4 2.00 raddhn v0.2s, v1.2d, v2.2d +# CHECK-NEXT: 1 4 2.00 raddhn2 v0.16b, v1.8h, v2.8h +# CHECK-NEXT: 1 4 2.00 raddhn2 v0.8h, v1.4s, v2.4s +# CHECK-NEXT: 1 4 2.00 raddhn2 v0.4s, v1.2d, v2.2d +# CHECK-NEXT: 1 4 2.00 rsubhn v0.8b, v1.8h, v2.8h +# CHECK-NEXT: 1 4 2.00 rsubhn v0.4h, v1.4s, v2.4s +# CHECK-NEXT: 1 4 2.00 rsubhn v0.2s, v1.2d, v2.2d +# CHECK-NEXT: 1 4 2.00 rsubhn2 v0.16b, v1.8h, v2.8h +# CHECK-NEXT: 1 4 2.00 rsubhn2 v0.8h, v1.4s, v2.4s +# CHECK-NEXT: 1 4 2.00 rsubhn2 v0.4s, v1.2d, v2.2d +# CHECK-NEXT: 1 4 0.50 sqdmulh h10, h11, h12 +# CHECK-NEXT: 1 4 0.50 sqdmulh s20, s21, s2 +# CHECK-NEXT: 1 4 0.50 sqrdmulh h10, h11, h12 +# CHECK-NEXT: 1 4 0.50 sqrdmulh s20, s21, s2 # CHECK-NEXT: 1 4 0.50 fmulx s20, s22, s15 -# CHECK-NEXT: 1 4 0.50 fmulx s3, s5, v0.s[3] -# CHECK-NEXT: 1 4 1.00 fmulx v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fmulx v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 fmulx v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fneg v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 fneg v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 fneg v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 fneg v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 fneg v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 frecpe d13, d13 -# CHECK-NEXT: 1 4 0.50 frecpe s19, s14 -# CHECK-NEXT: 1 4 1.00 frecpe v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frecpe v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frecpe v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frecpe v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frecpe v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frecps v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 frecps d22, d30, d21 +# CHECK-NEXT: 1 4 0.50 fmulx d23, d11, d1 # CHECK-NEXT: 1 4 0.50 frecps s21, s16, s13 -# CHECK-NEXT: 1 4 0.50 frecpx d16, d19 -# CHECK-NEXT: 1 4 0.50 frecpx s18, s10 -# CHECK-NEXT: 1 4 1.00 frinta v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frinta v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frinta v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frinta v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frinta v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frinti v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frinti v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frinti v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frinti v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frinti v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frintm v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frintm v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frintm v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frintm v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frintm v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frintn v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frintn v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frintn v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frintn v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frintn v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frintp v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frintp v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frintp v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frintp v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frintp v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frintx v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frintx v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frintx v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frintx v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frintx v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 frintz v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 frintz v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 frintz v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 frintz v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 frintz v0.8h, v0.8h -# CHECK-NEXT: 1 22 19.00 frsqrte d21, d12 -# CHECK-NEXT: 1 12 9.00 frsqrte s22, s13 -# CHECK-NEXT: 1 22 19.00 frsqrte v0.2d, v0.2d -# CHECK-NEXT: 1 12 9.00 frsqrte v0.2s, v0.2s -# CHECK-NEXT: 1 8 5.00 frsqrte v0.4h, v0.4h -# CHECK-NEXT: 1 12 9.00 frsqrte v0.4s, v0.4s -# CHECK-NEXT: 1 8 5.00 frsqrte v0.8h, v0.8h -# CHECK-NEXT: 1 22 19.00 frsqrts d8, d22, d18 +# CHECK-NEXT: 1 4 0.50 frecps d22, d30, d21 # CHECK-NEXT: 1 12 9.00 frsqrts s21, s5, s12 -# CHECK-NEXT: 1 22 19.00 frsqrts v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 22 19.00 fsqrt v0.2d, v0.2d -# CHECK-NEXT: 1 12 9.00 fsqrt v0.2s, v0.2s -# CHECK-NEXT: 1 8 5.00 fsqrt v0.4h, v0.4h -# CHECK-NEXT: 1 12 9.00 fsqrt v0.4s, v0.4s -# CHECK-NEXT: 1 8 5.00 fsqrt v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 fsub v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 5 2.00 * ld1 { v0.16b }, [x0] -# CHECK-NEXT: 2 9 6.00 * ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 -# CHECK-NEXT: 1 11 8.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] -# CHECK-NEXT: 2 7 4.00 * ld1 { v0.4s, v1.4s }, [sp], #32 -# CHECK-NEXT: 1 9 6.00 * ld1 { v0.4s, v1.4s, v2.4s }, [sp] -# CHECK-NEXT: 2 7 4.00 * ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 -# CHECK-NEXT: 2 5 2.00 * ld1 { v0.8h }, [x15], x2 -# CHECK-NEXT: 1 7 4.00 * ld1 { v0.8h, v1.8h }, [x15] -# CHECK-NEXT: 1 4 1.00 * ld1 { v0.b }[9], [x0] -# CHECK-NEXT: 2 4 1.00 * ld1 { v0.b }[9], [x0], #1 -# CHECK-NEXT: 1 4 1.00 * ld1r { v0.16b }, [x0] -# CHECK-NEXT: 2 4 1.00 * ld1r { v0.16b }, [x0], #1 -# CHECK-NEXT: 1 4 1.00 * ld1r { v0.8h }, [x15] -# CHECK-NEXT: 2 4 1.00 * ld1r { v0.8h }, [x15], #2 -# CHECK-NEXT: 2 7 4.00 * ld2 { v0.16b, v1.16b }, [x0], x1 -# CHECK-NEXT: 1 5 2.00 * ld2 { v0.8b, v1.8b }, [x0] -# CHECK-NEXT: 1 5 2.00 * ld2 { v0.h, v1.h }[7], [x15] -# CHECK-NEXT: 2 5 2.00 * ld2 { v0.h, v1.h }[7], [x15], #4 -# CHECK-NEXT: 1 5 2.00 * ld2r { v0.2d, v1.2d }, [x0] -# CHECK-NEXT: 2 5 2.00 * ld2r { v0.2d, v1.2d }, [x0], #16 -# CHECK-NEXT: 1 5 2.00 * ld2r { v0.4s, v1.4s }, [sp] -# CHECK-NEXT: 2 5 2.00 * ld2r { v0.4s, v1.4s }, [sp], #8 -# CHECK-NEXT: 1 6 3.00 * ld3 { v0.4h, v1.4h, v2.4h }, [x15] -# CHECK-NEXT: 2 9 6.00 * ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2 -# CHECK-NEXT: 1 5 2.00 * ld3 { v0.s, v1.s, v2.s }[3], [sp] -# CHECK-NEXT: 2 5 2.00 * ld3 { v0.s, v1.s, v2.s }[3], [sp], x3 -# CHECK-NEXT: 1 5 2.00 * ld3r { v0.4h, v1.4h, v2.4h }, [x15] -# CHECK-NEXT: 2 5 2.00 * ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6 -# CHECK-NEXT: 1 5 2.00 * ld3r { v0.8b, v1.8b, v2.8b }, [x0] -# CHECK-NEXT: 2 5 2.00 * ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 -# CHECK-NEXT: 1 7 4.00 * ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# CHECK-NEXT: 2 11 8.00 * ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 -# CHECK-NEXT: 1 5 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] -# CHECK-NEXT: 2 5 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 -# CHECK-NEXT: 2 5 2.00 * ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 -# CHECK-NEXT: 1 5 2.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp] -# CHECK-NEXT: 2 5 2.00 * ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7 -# CHECK-NEXT: 1 5 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# CHECK-NEXT: 2 5 2.00 * ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30 -# CHECK-NEXT: 1 4 0.50 mla v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 mls v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 mov b0, v0.b[15] -# CHECK-NEXT: 1 4 0.50 mov d6, v0.d[1] -# CHECK-NEXT: 1 4 0.50 mov h2, v0.h[5] -# CHECK-NEXT: 1 4 0.50 mov s17, v0.s[2] -# CHECK-NEXT: 1 4 1.00 mov v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 mov v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 movi d15, #0xff00ff00ff00ff -# CHECK-NEXT: 1 4 1.00 movi v0.16b, #31 -# CHECK-NEXT: 1 4 1.00 movi v0.2d, #0xff0000ff0000ffff -# CHECK-NEXT: 1 4 0.50 movi v0.2s, #8, msl #8 -# CHECK-NEXT: 1 4 1.00 movi v0.4s, #255, lsl #24 -# CHECK-NEXT: 1 4 0.50 movi v0.8b, #255 -# CHECK-NEXT: 1 4 0.50 mul v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 mvni v0.2s, #0 -# CHECK-NEXT: 1 4 1.00 mvni v0.4s, #16, msl #16 -# CHECK-NEXT: 1 4 0.50 neg d29, d24 -# CHECK-NEXT: 1 4 1.00 neg v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 neg v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 neg v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 neg v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 neg v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 neg v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 neg v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 mvn v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 mvn v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 orn v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 mov v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 orr v0.8h, #31 -# CHECK-NEXT: 1 4 1.00 pmul v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 pmul v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 pmull v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 pmull2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 raddhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 raddhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 raddhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 raddhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 raddhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 raddhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 rbit v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 rbit v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 rev16 v21.8b, v1.8b -# CHECK-NEXT: 1 4 1.00 rev16 v30.16b, v31.16b -# CHECK-NEXT: 1 4 0.50 rev32 v0.4h, v9.4h -# CHECK-NEXT: 1 4 0.50 rev32 v21.8b, v1.8b -# CHECK-NEXT: 1 4 1.00 rev32 v30.16b, v31.16b -# CHECK-NEXT: 1 4 1.00 rev32 v4.8h, v7.8h -# CHECK-NEXT: 1 4 1.00 rev64 v0.16b, v31.16b -# CHECK-NEXT: 1 4 0.50 rev64 v1.8b, v9.8b -# CHECK-NEXT: 1 4 0.50 rev64 v13.4h, v21.4h -# CHECK-NEXT: 1 4 1.00 rev64 v2.8h, v4.8h -# CHECK-NEXT: 1 4 0.50 rev64 v4.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 rev64 v6.4s, v8.4s -# CHECK-NEXT: 1 4 0.50 rshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 rshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 rshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 rshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 rshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 rshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 1.00 rsubhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 rsubhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 rsubhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 rsubhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 rsubhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 rsubhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 saba v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 sabal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sabal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sabal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 sabal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sabal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 sabal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 sabd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sabdl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sabdl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sabdl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 sabdl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sabdl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 sabdl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 sadalp v0.1d, v0.2s -# CHECK-NEXT: 1 4 1.00 sadalp v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 sadalp v0.2s, v0.4h -# CHECK-NEXT: 1 4 0.50 sadalp v0.4h, v0.8b -# CHECK-NEXT: 1 4 1.00 sadalp v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 sadalp v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 saddl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 saddl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 saddl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 saddl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 saddl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 saddl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 saddlp v0.1d, v0.2s -# CHECK-NEXT: 1 4 1.00 saddlp v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 saddlp v0.2s, v0.4h -# CHECK-NEXT: 1 4 0.50 saddlp v0.4h, v0.8b -# CHECK-NEXT: 1 4 1.00 saddlp v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 saddlp v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 saddw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: 1 4 1.00 saddw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: 1 4 1.00 saddw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: 1 4 1.00 saddw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: 1 4 1.00 saddw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 saddw2 v0.8h, v0.8h, v0.16b -# CHECK-NEXT: 1 4 0.50 scvtf d21, d12 -# CHECK-NEXT: 1 4 0.50 scvtf d21, d12, #64 +# CHECK-NEXT: 1 22 19.00 frsqrts d8, d22, d18 # CHECK-NEXT: 1 4 0.50 scvtf s22, s13 -# CHECK-NEXT: 1 4 0.50 scvtf s22, s13, #32 -# CHECK-NEXT: 1 4 0.50 scvtf v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 scvtf v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 scvtf v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 scvtf v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 scvtf v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 scvtf v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 scvtf v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 scvtf v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 shadd v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 shl d7, d10, #12 -# CHECK-NEXT: 1 4 1.00 shl v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 shl v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 shl v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 shl v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 1.00 shll v0.2d, v0.2s, #32 -# CHECK-NEXT: 1 4 1.00 shll v0.4s, v0.4h, #16 -# CHECK-NEXT: 1 4 1.00 shll v0.8h, v0.8b, #8 -# CHECK-NEXT: 1 4 1.00 shll v0.2d, v0.2s, #32 -# CHECK-NEXT: 1 4 1.00 shll v0.4s, v0.4h, #16 -# CHECK-NEXT: 1 4 1.00 shll v0.8h, v0.8b, #8 -# CHECK-NEXT: 1 4 1.00 shll2 v0.2d, v0.4s, #32 -# CHECK-NEXT: 1 4 1.00 shll2 v0.4s, v0.8h, #16 -# CHECK-NEXT: 1 4 1.00 shll2 v0.8h, v0.16b, #8 -# CHECK-NEXT: 1 4 1.00 shll2 v0.2d, v0.4s, #32 -# CHECK-NEXT: 1 4 1.00 shll2 v0.4s, v0.8h, #16 -# CHECK-NEXT: 1 4 1.00 shll2 v0.8h, v0.16b, #8 -# CHECK-NEXT: 1 4 0.50 shrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 shrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 shrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 shrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 shrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 shrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 shsub v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 shsub v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 sli d10, d14, #12 -# CHECK-NEXT: 1 4 1.00 sli v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 sli v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sli v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 sli v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 sli v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sli v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 sli v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 smax v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 smax v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 smax v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 smaxp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 smaxp v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 smaxp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 smin v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 smin v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 smin v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 sminp v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 sminp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sminp v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 smlal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 smlal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 smlal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 smlal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 smlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 smlal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 smlsl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 smlsl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 smlsl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 smlsl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 smlsl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 smlsl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 smull v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 smull v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 smull v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 smull2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 smull2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 smull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: 1 4 0.50 scvtf d21, d12 +# CHECK-NEXT: 1 4 0.50 ucvtf s22, s13 +# CHECK-NEXT: 1 4 0.50 ucvtf d21, d14 +# CHECK-NEXT: 1 4 0.50 frecpe s19, s14 +# CHECK-NEXT: 1 4 0.50 frecpe d13, d13 +# CHECK-NEXT: 1 4 0.50 frecpx s18, s10 +# CHECK-NEXT: 1 4 0.50 frecpx d16, d19 +# CHECK-NEXT: 1 12 9.00 frsqrte s22, s13 +# CHECK-NEXT: 1 22 19.00 frsqrte d21, d12 +# CHECK-NEXT: 1 2 0.50 cmeq d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmeq d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmhs d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmge d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmge d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmhi d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmgt d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 cmgt d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmle d20, d21, #0 +# CHECK-NEXT: 1 2 0.50 cmlt d20, d21, #0 +# CHECK-NEXT: 1 3 0.50 cmtst d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 fcmeq s10, s11, s12 +# CHECK-NEXT: 1 2 0.50 fcmeq d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 fcmeq s10, s11, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmeq d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmge s10, s11, s12 +# CHECK-NEXT: 1 2 0.50 fcmge d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 fcmge s10, s11, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmge d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmgt s10, s11, s12 +# CHECK-NEXT: 1 2 0.50 fcmgt d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 fcmgt s10, s11, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmgt d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmle s10, s11, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmle d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmlt s10, s11, #0.0 +# CHECK-NEXT: 1 2 0.50 fcmlt d20, d21, #0.0 +# CHECK-NEXT: 1 2 0.50 facge s10, s11, s12 +# CHECK-NEXT: 1 2 0.50 facge d20, d21, d22 +# CHECK-NEXT: 1 2 0.50 facgt s10, s11, s12 +# CHECK-NEXT: 1 2 0.50 facgt d20, d21, d22 +# CHECK-NEXT: 1 3 0.50 abs d29, d24 # CHECK-NEXT: 1 4 0.50 sqabs b19, b14 -# CHECK-NEXT: 1 4 0.50 sqabs d18, d12 # CHECK-NEXT: 1 4 0.50 sqabs h21, h15 # CHECK-NEXT: 1 4 0.50 sqabs s20, s12 -# CHECK-NEXT: 1 4 1.00 sqabs v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 sqabs v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 sqabs v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sqabs v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqabs v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 sqabs v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 sqabs v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqadd b20, b11, b15 -# CHECK-NEXT: 1 4 1.00 sqadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 sqadd v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sqdmlal d19, s24, s12 -# CHECK-NEXT: 1 4 0.50 sqdmlal d8, s9, v0.s[1] -# CHECK-NEXT: 1 4 0.50 sqdmlal s0, h0, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqdmlal s17, h27, h12 -# CHECK-NEXT: 1 4 1.00 sqdmlal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sqdmlal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sqdmlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqdmlsl d12, s23, s13 -# CHECK-NEXT: 1 4 0.50 sqdmlsl d8, s9, v0.s[1] -# CHECK-NEXT: 1 4 0.50 sqdmlsl s0, h0, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqdmlsl s14, h12, h25 -# CHECK-NEXT: 1 4 1.00 sqdmlsl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sqdmlsl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sqdmlsl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqdmulh h10, h11, h12 -# CHECK-NEXT: 1 4 0.50 sqdmulh h7, h15, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqdmulh s15, s14, v0.s[1] -# CHECK-NEXT: 1 4 0.50 sqdmulh s20, s21, s2 -# CHECK-NEXT: 1 4 0.50 sqdmulh v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sqdmulh v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 sqdmull d1, s1, v0.s[1] -# CHECK-NEXT: 1 4 0.50 sqdmull d15, s22, s12 -# CHECK-NEXT: 1 4 0.50 sqdmull s1, h1, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqdmull s12, h22, h12 -# CHECK-NEXT: 1 4 1.00 sqdmull v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 sqdmull v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqdmull2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 sqdmull2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqneg b19, b14 -# CHECK-NEXT: 1 4 0.50 sqneg d18, d12 -# CHECK-NEXT: 1 4 0.50 sqneg h21, h15 -# CHECK-NEXT: 1 4 0.50 sqneg s20, s12 -# CHECK-NEXT: 1 4 1.00 sqneg v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 sqneg v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 sqneg v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sqneg v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqneg v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 sqneg v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 sqneg v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqrdmulh h10, h11, h12 -# CHECK-NEXT: 1 4 0.50 sqrdmulh h7, h15, v0.h[3] -# CHECK-NEXT: 1 4 0.50 sqrdmulh s15, s14, v0.s[1] -# CHECK-NEXT: 1 4 0.50 sqrdmulh s20, s21, s2 -# CHECK-NEXT: 1 4 0.50 sqrdmulh v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqrdmulh v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 sqrshl d31, d31, d31 -# CHECK-NEXT: 1 4 0.50 sqrshl h3, h4, h15 -# CHECK-NEXT: 1 4 0.50 sqrshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sqrshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 sqrshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 sqrshrn b10, h13, #2 -# CHECK-NEXT: 1 4 0.50 sqrshrn h15, s10, #6 -# CHECK-NEXT: 1 4 0.50 sqrshrn s15, d12, #9 -# CHECK-NEXT: 1 4 0.50 sqrshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sqrshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqrshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 sqrshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqrshrun b17, h10, #6 -# CHECK-NEXT: 1 4 0.50 sqrshrun h10, s13, #15 -# CHECK-NEXT: 1 4 0.50 sqrshrun s22, d16, #31 -# CHECK-NEXT: 1 4 0.50 sqrshrun v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sqrshrun v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqrshrun v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 sqrshrun2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 4 0.50 sqabs d18, d12 +# CHECK-NEXT: 1 2 0.50 neg d29, d24 +# CHECK-NEXT: 1 3 0.50 sqneg b19, b14 +# CHECK-NEXT: 1 3 0.50 sqneg h21, h15 +# CHECK-NEXT: 1 3 0.50 sqneg s20, s12 +# CHECK-NEXT: 1 3 0.50 sqneg d18, d12 +# CHECK-NEXT: 1 3 0.50 suqadd b19, b14 +# CHECK-NEXT: 1 3 0.50 suqadd h20, h15 +# CHECK-NEXT: 1 3 0.50 suqadd s21, s12 +# CHECK-NEXT: 1 3 0.50 suqadd d18, d22 +# CHECK-NEXT: 1 3 0.50 usqadd b19, b14 +# CHECK-NEXT: 1 3 0.50 usqadd h20, h15 +# CHECK-NEXT: 1 3 0.50 usqadd s21, s12 +# CHECK-NEXT: 1 3 0.50 usqadd d18, d22 +# CHECK-NEXT: 1 4 1.00 sqdmlal s17, h27, h12 +# CHECK-NEXT: 1 4 1.00 sqdmlal d19, s24, s12 +# CHECK-NEXT: 1 4 1.00 sqdmlsl s14, h12, h25 +# CHECK-NEXT: 1 4 1.00 sqdmlsl d12, s23, s13 +# CHECK-NEXT: 1 4 1.00 sqdmull s12, h22, h12 +# CHECK-NEXT: 1 4 1.00 sqdmull d15, s22, s12 +# CHECK-NEXT: 1 4 0.50 sqxtun b19, h14 +# CHECK-NEXT: 1 4 0.50 sqxtun h21, s15 +# CHECK-NEXT: 1 4 0.50 sqxtun s20, d12 +# CHECK-NEXT: 1 4 0.50 sqxtn b18, h18 +# CHECK-NEXT: 1 4 0.50 sqxtn h20, s17 +# CHECK-NEXT: 1 4 0.50 sqxtn s19, d14 +# CHECK-NEXT: 1 4 0.50 uqxtn b18, h18 +# CHECK-NEXT: 1 4 0.50 uqxtn h20, s17 +# CHECK-NEXT: 1 4 0.50 uqxtn s19, d14 +# CHECK-NEXT: 1 2 0.50 sshr d15, d16, #12 +# CHECK-NEXT: 1 2 0.50 ushr d10, d17, #18 +# CHECK-NEXT: 1 3 0.50 srshr d19, d18, #7 +# CHECK-NEXT: 1 3 0.50 urshr d20, d23, #31 +# CHECK-NEXT: 1 3 0.50 ssra d18, d12, #21 +# CHECK-NEXT: 1 3 0.50 usra d20, d13, #61 +# CHECK-NEXT: 1 4 2.00 srsra d15, d11, #19 +# CHECK-NEXT: 1 4 2.00 ursra d18, d10, #13 +# CHECK-NEXT: 1 2 0.50 shl d7, d10, #12 # CHECK-NEXT: 1 4 0.50 sqshl b11, b19, #7 -# CHECK-NEXT: 1 4 0.50 sqshl d15, d16, #51 -# CHECK-NEXT: 1 4 0.50 sqshl d31, d31, d31 # CHECK-NEXT: 1 4 0.50 sqshl h13, h18, #11 -# CHECK-NEXT: 1 4 0.50 sqshl h3, h4, h15 # CHECK-NEXT: 1 4 0.50 sqshl s14, s17, #22 -# CHECK-NEXT: 1 4 1.00 sqshl v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 sqshl v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sqshl v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 sqshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sqshl v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 0.50 sqshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 sqshl v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqshl v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 0.50 sqshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 sqshl v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 4 0.50 sqshl d15, d16, #51 +# CHECK-NEXT: 1 4 0.50 uqshl b18, b15, #6 +# CHECK-NEXT: 1 4 0.50 uqshl h11, h18, #7 +# CHECK-NEXT: 1 4 0.50 uqshl s14, s19, #18 +# CHECK-NEXT: 1 4 0.50 uqshl d15, d12, #19 # CHECK-NEXT: 1 4 0.50 sqshlu b15, b18, #6 -# CHECK-NEXT: 1 4 0.50 sqshlu d11, d13, #32 # CHECK-NEXT: 1 4 0.50 sqshlu h19, h17, #6 # CHECK-NEXT: 1 4 0.50 sqshlu s16, s14, #25 -# CHECK-NEXT: 1 4 1.00 sqshlu v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 sqshlu v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sqshlu v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 sqshlu v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 sqshlu v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqshlu v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 sqshlu v0.8h, v0.8h, #3 +# CHECK-NEXT: 1 4 0.50 sqshlu d11, d13, #32 +# CHECK-NEXT: 1 2 0.50 sri d10, d12, #14 +# CHECK-NEXT: 1 2 0.50 sli d10, d14, #12 # CHECK-NEXT: 1 4 0.50 sqshrn b10, h15, #5 # CHECK-NEXT: 1 4 0.50 sqshrn h17, s10, #4 # CHECK-NEXT: 1 4 0.50 sqshrn s18, d10, #31 -# CHECK-NEXT: 1 4 0.50 sqshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sqshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 sqshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: 1 4 0.50 uqshrn b12, h10, #7 +# CHECK-NEXT: 1 4 0.50 uqshrn h10, s14, #5 +# CHECK-NEXT: 1 4 0.50 uqshrn s10, d12, #13 +# CHECK-NEXT: 1 4 0.50 sqrshrn b10, h13, #2 +# CHECK-NEXT: 1 4 0.50 sqrshrn h15, s10, #6 +# CHECK-NEXT: 1 4 0.50 sqrshrn s15, d12, #9 +# CHECK-NEXT: 1 4 0.50 uqrshrn b10, h12, #5 +# CHECK-NEXT: 1 4 0.50 uqrshrn h12, s10, #14 +# CHECK-NEXT: 1 4 0.50 uqrshrn s10, d10, #25 # CHECK-NEXT: 1 4 0.50 sqshrun b15, h10, #7 # CHECK-NEXT: 1 4 0.50 sqshrun h20, s14, #3 # CHECK-NEXT: 1 4 0.50 sqshrun s10, d15, #15 -# CHECK-NEXT: 1 4 0.50 sqshrun v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sqshrun v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqshrun v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 sqshrun2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sqsub s20, s10, s7 -# CHECK-NEXT: 1 4 1.00 sqsub v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 sqsub v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 sqsub v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 sqxtn b18, h18 -# CHECK-NEXT: 1 4 0.50 sqxtn h20, s17 -# CHECK-NEXT: 1 4 0.50 sqxtn s19, d14 -# CHECK-NEXT: 1 4 1.00 sqxtn v0.2s, v0.2d -# CHECK-NEXT: 1 4 1.00 sqxtn v0.4h, v0.4s -# CHECK-NEXT: 1 4 1.00 sqxtn v0.8b, v0.8h -# CHECK-NEXT: 1 4 1.00 sqxtn2 v0.16b, v0.8h -# CHECK-NEXT: 1 4 1.00 sqxtn2 v0.4s, v0.2d -# CHECK-NEXT: 1 4 1.00 sqxtn2 v0.8h, v0.4s -# CHECK-NEXT: 1 4 0.50 sqxtun b19, h14 -# CHECK-NEXT: 1 4 0.50 sqxtun h21, s15 -# CHECK-NEXT: 1 4 0.50 sqxtun s20, d12 -# CHECK-NEXT: 1 4 1.00 sqxtun v0.2s, v0.2d -# CHECK-NEXT: 1 4 1.00 sqxtun v0.4h, v0.4s -# CHECK-NEXT: 1 4 1.00 sqxtun v0.8b, v0.8h -# CHECK-NEXT: 1 4 1.00 sqxtun2 v0.16b, v0.8h -# CHECK-NEXT: 1 4 1.00 sqxtun2 v0.4s, v0.2d -# CHECK-NEXT: 1 4 1.00 sqxtun2 v0.8h, v0.4s -# CHECK-NEXT: 1 4 0.50 srhadd v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 srhadd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 srhadd v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 sri d10, d12, #14 -# CHECK-NEXT: 1 4 1.00 sri v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 sri v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sri v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 sri v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 sri v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sri v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 sri v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 srshl d16, d16, d16 -# CHECK-NEXT: 1 4 0.50 srshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 srshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 srshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 srshr d19, d18, #7 -# CHECK-NEXT: 1 4 1.00 srshr v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 srshr v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 srshr v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 srshr v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 srshr v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 srshr v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 srshr v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 srsra d15, d11, #19 -# CHECK-NEXT: 1 4 1.00 srsra v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 srsra v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 srsra v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 srsra v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 srsra v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 srsra v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 srsra v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 sshl d31, d31, d31 -# CHECK-NEXT: 1 4 1.00 sshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 sshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 sshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 sshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 sshll v0.2d, v0.2s, #3 -# CHECK-NEXT: 1 4 1.00 sshll2 v0.4s, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 sshr d15, d16, #12 -# CHECK-NEXT: 1 4 1.00 sshr v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 sshr v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 sshr v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 sshr v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 sshr v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 sshr v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 sshr v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 ssra d18, d12, #21 -# CHECK-NEXT: 1 4 1.00 ssra v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 ssra v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 ssra v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 ssra v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 ssra v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 ssra v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 ssra v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 ssubl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 ssubl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 ssubl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 ssubl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 ssubl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 ssubl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 ssubw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: 1 4 1.00 ssubw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: 1 4 1.00 ssubw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: 1 4 1.00 ssubw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: 1 4 1.00 ssubw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 ssubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: 1 4 0.50 sqrshrun b17, h10, #6 +# CHECK-NEXT: 1 4 0.50 sqrshrun h10, s13, #15 +# CHECK-NEXT: 1 4 0.50 sqrshrun s22, d16, #31 +# CHECK-NEXT: 1 4 0.50 scvtf s22, s13, #32 +# CHECK-NEXT: 1 4 0.50 scvtf d21, d12, #64 +# CHECK-NEXT: 1 4 0.50 ucvtf s22, s13, #32 +# CHECK-NEXT: 1 4 0.50 ucvtf d21, d14, #64 +# CHECK-NEXT: 1 4 0.50 fcvtzs s21, s12, #1 +# CHECK-NEXT: 1 4 0.50 fcvtzs d21, d12, #1 +# CHECK-NEXT: 1 4 0.50 fcvtzu s21, s12, #1 +# CHECK-NEXT: 1 4 0.50 fcvtzu d21, d12, #1 +# CHECK-NEXT: 1 5 2.00 * ld1 { v0.16b }, [x0] +# CHECK-NEXT: 1 7 4.00 * ld1 { v15.8h, v16.8h }, [x15] +# CHECK-NEXT: 1 9 6.00 * ld1 { v31.4s, v0.4s, v1.4s }, [sp] +# CHECK-NEXT: 1 11 8.00 * ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +# CHECK-NEXT: 1 5 2.00 * ld2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: 1 6 3.00 * ld3 { v15.4h, v16.4h, v17.4h }, [x15] +# CHECK-NEXT: 1 7 4.00 * ld4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] # CHECK-NEXT: 1 4 1.00 * st1 { v0.16b }, [x0] -# CHECK-NEXT: 2 5 2.00 * st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: 1 4 1.00 * st1 { v15.8h, v16.8h }, [x15] +# CHECK-NEXT: 1 5 2.00 * st1 { v31.4s, v0.4s, v1.4s }, [sp] # CHECK-NEXT: 1 5 4.00 * st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] -# CHECK-NEXT: 2 4 1.00 * st1 { v0.4s, v1.4s }, [sp], #32 -# CHECK-NEXT: 1 5 2.00 * st1 { v0.4s, v1.4s, v2.4s }, [sp] +# CHECK-NEXT: 1 5 2.00 * st2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: 1 5 4.00 * st3 { v15.4h, v16.4h, v17.4h }, [x15] +# CHECK-NEXT: 1 5 4.00 * st4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] +# CHECK-NEXT: 2 5 2.00 * ld1 { v15.8h }, [x15], x2 +# CHECK-NEXT: 2 7 4.00 * ld1 { v31.4s, v0.4s }, [sp], #32 +# CHECK-NEXT: 2 9 6.00 * ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: 2 7 4.00 * ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +# CHECK-NEXT: 2 7 4.00 * ld2 { v0.16b, v1.16b }, [x0], x1 +# CHECK-NEXT: 2 9 6.00 * ld3 { v15.8h, v16.8h, v17.8h }, [x15], x2 +# CHECK-NEXT: 2 11 8.00 * ld4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64 +# CHECK-NEXT: 2 4 1.00 * st1 { v15.8h }, [x15], x2 +# CHECK-NEXT: 2 4 1.00 * st1 { v31.4s, v0.4s }, [sp], #32 +# CHECK-NEXT: 2 5 2.00 * st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 # CHECK-NEXT: 2 5 4.00 * st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 -# CHECK-NEXT: 2 4 1.00 * st1 { v0.8h }, [x15], x2 -# CHECK-NEXT: 1 4 1.00 * st1 { v0.8h, v1.8h }, [x15] -# CHECK-NEXT: 1 4 1.00 * st1 { v0.d }[1], [x0] -# CHECK-NEXT: 2 4 1.00 * st1 { v0.d }[1], [x0], #8 # CHECK-NEXT: 2 5 4.00 * st2 { v0.16b, v1.16b }, [x0], x1 -# CHECK-NEXT: 1 5 2.00 * st2 { v0.8b, v1.8b }, [x0] -# CHECK-NEXT: 1 5 2.00 * st2 { v0.s, v1.s }[3], [sp] -# CHECK-NEXT: 2 5 2.00 * st2 { v0.s, v1.s }[3], [sp], #8 -# CHECK-NEXT: 1 5 4.00 * st3 { v0.4h, v1.4h, v2.4h }, [x15] -# CHECK-NEXT: 2 5 4.00 * st3 { v0.8h, v1.8h, v2.8h }, [x15], x2 -# CHECK-NEXT: 1 5 2.00 * st3 { v0.h, v1.h, v2.h }[7], [x15] -# CHECK-NEXT: 2 5 2.00 * st3 { v0.h, v1.h, v2.h }[7], [x15], #6 -# CHECK-NEXT: 1 5 4.00 * st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# CHECK-NEXT: 2 5 4.00 * st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +# CHECK-NEXT: 2 5 4.00 * st3 { v15.8h, v16.8h, v17.8h }, [x15], x2 +# CHECK-NEXT: 2 5 4.00 * st4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64 +# CHECK-NEXT: 1 4 1.00 * ld1r { v0.16b }, [x0] +# CHECK-NEXT: 1 4 1.00 * ld1r { v15.8h }, [x15] +# CHECK-NEXT: 1 5 2.00 * ld2r { v31.4s, v0.4s }, [sp] +# CHECK-NEXT: 1 5 2.00 * ld2r { v0.2d, v1.2d }, [x0] +# CHECK-NEXT: 1 5 2.00 * ld3r { v0.8b, v1.8b, v2.8b }, [x0] +# CHECK-NEXT: 1 5 2.00 * ld3r { v15.4h, v16.4h, v17.4h }, [x15] +# CHECK-NEXT: 1 5 2.00 * ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] +# CHECK-NEXT: 1 5 2.00 * ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp] +# CHECK-NEXT: 1 4 1.00 * ld1 { v0.b }[9], [x0] +# CHECK-NEXT: 1 5 2.00 * ld2 { v15.h, v16.h }[7], [x15] +# CHECK-NEXT: 1 5 2.00 * ld3 { v31.s, v0.s, v1.s }[3], [sp] +# CHECK-NEXT: 1 5 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] +# CHECK-NEXT: 1 4 1.00 * st1 { v0.d }[1], [x0] +# CHECK-NEXT: 1 5 2.00 * st2 { v31.s, v0.s }[3], [sp] +# CHECK-NEXT: 1 5 2.00 * st3 { v15.h, v16.h, v17.h }[7], [x15] # CHECK-NEXT: 1 5 2.00 * st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] +# CHECK-NEXT: 2 4 1.00 * ld1r { v0.16b }, [x0], #1 +# CHECK-NEXT: 2 4 1.00 * ld1r { v15.8h }, [x15], #2 +# CHECK-NEXT: 2 5 2.00 * ld2r { v31.4s, v0.4s }, [sp], #8 +# CHECK-NEXT: 2 5 2.00 * ld2r { v0.2d, v1.2d }, [x0], #16 +# CHECK-NEXT: 2 5 2.00 * ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 +# CHECK-NEXT: 2 5 2.00 * ld3r { v15.4h, v16.4h, v17.4h }, [x15], #6 +# CHECK-NEXT: 2 5 2.00 * ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], x30 +# CHECK-NEXT: 2 5 2.00 * ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp], x7 +# CHECK-NEXT: 2 4 1.00 * ld1 { v0.b }[9], [x0], #1 +# CHECK-NEXT: 2 5 2.00 * ld2 { v15.h, v16.h }[7], [x15], #4 +# CHECK-NEXT: 2 5 2.00 * ld3 { v31.s, v0.s, v1.s }[3], [sp], x3 +# CHECK-NEXT: 2 5 2.00 * ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 +# CHECK-NEXT: 2 5 2.00 * ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 +# CHECK-NEXT: 2 4 1.00 * st1 { v0.d }[1], [x0], #8 +# CHECK-NEXT: 2 5 2.00 * st2 { v31.s, v0.s }[3], [sp], #8 +# CHECK-NEXT: 2 5 2.00 * st3 { v15.h, v16.h, v17.h }[7], [x15], #6 # CHECK-NEXT: 2 5 2.00 * st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 -# CHECK-NEXT: 1 4 0.50 sub d15, d5, d16 -# CHECK-NEXT: 1 4 1.00 sub v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 suqadd b19, b14 -# CHECK-NEXT: 1 4 0.50 suqadd d18, d22 -# CHECK-NEXT: 1 4 0.50 suqadd h20, h15 -# CHECK-NEXT: 1 4 0.50 suqadd s21, s12 -# CHECK-NEXT: 1 4 1.00 suqadd v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 suqadd v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 suqadd v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 suqadd v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 suqadd v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 suqadd v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 suqadd v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -# CHECK-NEXT: 1 4 0.50 tbl v0.8b, { v0.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbl v0.8b, { v0.16b, v1.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b, v1.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -# CHECK-NEXT: 1 4 1.00 tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -# CHECK-NEXT: 1 4 0.50 tbx v0.8b, { v0.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbx v0.8b, { v0.16b, v1.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -# CHECK-NEXT: 1 4 0.50 tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -# CHECK-NEXT: 1 4 1.00 trn1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 trn1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 trn1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 trn1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 trn1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 trn1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 trn1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 trn2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 trn2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 trn2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 trn2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 trn2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 trn2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 trn2 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 uaba v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uabal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 uabal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uabal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uabal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 uabal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uabal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 uabd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uabdl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 uabdl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uabdl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uabdl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 uabdl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uabdl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 uadalp v0.1d, v0.2s -# CHECK-NEXT: 1 4 1.00 uadalp v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 uadalp v0.2s, v0.4h -# CHECK-NEXT: 1 4 0.50 uadalp v0.4h, v0.8b -# CHECK-NEXT: 1 4 1.00 uadalp v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 uadalp v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 uaddl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 uaddl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uaddl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uaddl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 uaddl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uaddl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 uaddlp v0.1d, v0.2s -# CHECK-NEXT: 1 4 1.00 uaddlp v0.2d, v0.4s -# CHECK-NEXT: 1 4 0.50 uaddlp v0.2s, v0.4h -# CHECK-NEXT: 1 4 0.50 uaddlp v0.4h, v0.8b -# CHECK-NEXT: 1 4 1.00 uaddlp v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 uaddlp v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 uaddw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: 1 4 1.00 uaddw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: 1 4 1.00 uaddw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: 1 4 1.00 uaddw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: 1 4 1.00 uaddw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 uaddw2 v0.8h, v0.8h, v0.16b -# CHECK-NEXT: 1 4 0.50 ucvtf d21, d14 -# CHECK-NEXT: 1 4 0.50 ucvtf d21, d14, #64 -# CHECK-NEXT: 1 4 0.50 ucvtf s22, s13 -# CHECK-NEXT: 1 4 0.50 ucvtf s22, s13, #32 -# CHECK-NEXT: 1 4 0.50 ucvtf v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 ucvtf v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 ucvtf v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 ucvtf v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 ucvtf v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 ucvtf v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 ucvtf v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 ucvtf v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uhadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 uhadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uhsub v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umax v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 umax v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umax v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 umaxp v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 umaxp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umaxp v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 umin v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 umin v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 umin v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 0.50 uminp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 uminp v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 uminp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 umlal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 umlal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 umlal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 umlal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 umlal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 umlsl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 umlsl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 umlsl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 umlsl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umlsl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 umlsl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 umull v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 umull v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 umull v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 umull2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 umull2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 umull2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 0.50 uqadd h0, h1, h5 -# CHECK-NEXT: 1 4 1.00 uqadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 uqrshl b11, b20, b30 -# CHECK-NEXT: 1 4 0.50 uqrshl s23, s20, s16 -# CHECK-NEXT: 1 4 1.00 uqrshl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 uqrshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 uqrshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 uqrshl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 uqrshrn b10, h12, #5 -# CHECK-NEXT: 1 4 0.50 uqrshrn h12, s10, #14 -# CHECK-NEXT: 1 4 0.50 uqrshrn s10, d10, #25 -# CHECK-NEXT: 1 4 0.50 uqrshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 uqrshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 uqrshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 uqrshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 uqshl b11, b20, b30 -# CHECK-NEXT: 1 4 0.50 uqshl b18, b15, #6 -# CHECK-NEXT: 1 4 0.50 uqshl d15, d12, #19 -# CHECK-NEXT: 1 4 0.50 uqshl h11, h18, #7 -# CHECK-NEXT: 1 4 0.50 uqshl s14, s19, #18 -# CHECK-NEXT: 1 4 0.50 uqshl s23, s20, s16 -# CHECK-NEXT: 1 4 1.00 uqshl v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 uqshl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 uqshl v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 uqshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 uqshl v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 uqshl v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 uqshl v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 1.00 uqshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 uqshl v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 uqshl v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 uqshl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 uqshrn b12, h10, #7 -# CHECK-NEXT: 1 4 0.50 uqshrn h10, s14, #5 -# CHECK-NEXT: 1 4 0.50 uqshrn s10, d12, #13 -# CHECK-NEXT: 1 4 0.50 uqshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 uqshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 uqshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: 1 4 1.00 uqshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 uqsub d16, d16, d16 -# CHECK-NEXT: 1 4 0.50 uqsub v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 0.50 uqxtn b18, h18 -# CHECK-NEXT: 1 4 0.50 uqxtn h20, s17 -# CHECK-NEXT: 1 4 0.50 uqxtn s19, d14 -# CHECK-NEXT: 1 4 1.00 uqxtn v0.2s, v0.2d -# CHECK-NEXT: 1 4 1.00 uqxtn v0.4h, v0.4s -# CHECK-NEXT: 1 4 1.00 uqxtn v0.8b, v0.8h -# CHECK-NEXT: 1 4 1.00 uqxtn2 v0.16b, v0.8h -# CHECK-NEXT: 1 4 1.00 uqxtn2 v0.4s, v0.2d -# CHECK-NEXT: 1 4 1.00 uqxtn2 v0.8h, v0.4s -# CHECK-NEXT: 1 4 0.50 urecpe v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 urecpe v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 urhadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 urhadd v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 urhadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 urshl d8, d7, d4 -# CHECK-NEXT: 1 4 1.00 urshl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 urshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 1.00 urshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 urshl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 urshr d20, d23, #31 -# CHECK-NEXT: 1 4 1.00 urshr v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 urshr v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 urshr v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 urshr v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 urshr v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 urshr v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 urshr v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 12 9.00 ursqrte v0.2s, v0.2s -# CHECK-NEXT: 1 12 9.00 ursqrte v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 ursra d18, d10, #13 -# CHECK-NEXT: 1 4 1.00 ursra v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 ursra v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 ursra v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 ursra v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 ursra v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 ursra v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 ursra v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 ushl d0, d0, d0 -# CHECK-NEXT: 1 4 1.00 ushl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 ushl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 ushl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 ushll v0.4s, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 ushll2 v0.8h, v0.16b, #3 -# CHECK-NEXT: 1 4 0.50 ushr d10, d17, #18 -# CHECK-NEXT: 1 4 1.00 ushr v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 ushr v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 ushr v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 ushr v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 ushr v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 ushr v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 ushr v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 0.50 usqadd b19, b14 -# CHECK-NEXT: 1 4 0.50 usqadd d18, d22 -# CHECK-NEXT: 1 4 0.50 usqadd h20, h15 -# CHECK-NEXT: 1 4 0.50 usqadd s21, s12 -# CHECK-NEXT: 1 4 1.00 usqadd v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 usqadd v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 usqadd v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 usqadd v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 usqadd v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 usqadd v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 usqadd v0.8h, v0.8h -# CHECK-NEXT: 1 4 0.50 usra d20, d13, #61 -# CHECK-NEXT: 1 4 1.00 usra v0.16b, v0.16b, #3 -# CHECK-NEXT: 1 4 1.00 usra v0.2d, v0.2d, #3 -# CHECK-NEXT: 1 4 0.50 usra v0.2s, v0.2s, #3 -# CHECK-NEXT: 1 4 0.50 usra v0.4h, v0.4h, #3 -# CHECK-NEXT: 1 4 1.00 usra v0.4s, v0.4s, #3 -# CHECK-NEXT: 1 4 0.50 usra v0.8b, v0.8b, #3 -# CHECK-NEXT: 1 4 1.00 usra v0.8h, v0.8h, #3 -# CHECK-NEXT: 1 4 1.00 usubl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: 1 4 1.00 usubl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 usubl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 usubl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: 1 4 1.00 usubl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 usubl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 usubw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: 1 4 1.00 usubw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: 1 4 1.00 usubw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: 1 4 1.00 usubw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: 1 4 1.00 usubw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: 1 4 1.00 usubw2 v0.8h, v0.8h, v0.16b -# CHECK-NEXT: 1 4 1.00 uzp1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 uzp1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 uzp1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 uzp1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uzp1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 uzp1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uzp1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 uzp2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 uzp2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 uzp2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 uzp2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 uzp2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 uzp2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 uzp2 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 2 0.50 xtn v0.2s, v0.2d -# CHECK-NEXT: 1 2 0.50 xtn v0.4h, v0.4s -# CHECK-NEXT: 1 2 0.50 xtn v0.8b, v0.8h -# CHECK-NEXT: 1 2 0.50 xtn2 v0.16b, v0.8h -# CHECK-NEXT: 1 2 0.50 xtn2 v0.4s, v0.2d -# CHECK-NEXT: 1 2 0.50 xtn2 v0.8h, v0.4s -# CHECK-NEXT: 1 4 1.00 zip1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 zip1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 zip1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 zip1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 zip1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 zip1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 zip1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: 1 4 1.00 zip2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: 1 4 1.00 zip2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: 1 4 0.50 zip2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: 1 4 0.50 zip2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: 1 4 1.00 zip2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: 1 4 0.50 zip2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: 1 4 1.00 zip2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: 1 2 0.50 ext v0.8b, v1.8b, v2.8b, #3 +# CHECK-NEXT: 1 2 1.00 ext v0.16b, v1.16b, v2.16b, #3 +# CHECK-NEXT: 1 2 0.50 uzp1 v1.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 uzp1 v2.16b, v1.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 uzp1 v3.4h, v1.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 uzp1 v4.8h, v1.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 uzp1 v5.2s, v1.2s, v2.2s +# CHECK-NEXT: 1 2 1.00 uzp1 v6.4s, v1.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 uzp1 v7.2d, v1.2d, v2.2d +# CHECK-NEXT: 1 2 0.50 trn1 v8.8b, v1.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 trn1 v9.16b, v1.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 trn1 v10.4h, v1.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 trn1 v27.8h, v7.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 trn1 v12.2s, v7.2s, v2.2s +# CHECK-NEXT: 1 2 1.00 trn1 v29.4s, v6.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 trn1 v14.2d, v6.2d, v2.2d +# CHECK-NEXT: 1 2 0.50 zip1 v31.8b, v5.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 zip1 v0.16b, v5.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 zip1 v17.4h, v4.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 zip1 v2.8h, v4.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 zip1 v19.2s, v3.2s, v2.2s +# CHECK-NEXT: 1 2 1.00 zip1 v4.4s, v3.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 zip1 v21.2d, v2.2d, v2.2d +# CHECK-NEXT: 1 2 0.50 uzp2 v6.8b, v2.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 uzp2 v23.16b, v1.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 uzp2 v8.4h, v1.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 uzp2 v25.8h, v0.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 uzp2 v10.2s, v0.2s, v2.2s +# CHECK-NEXT: 1 2 1.00 uzp2 v27.4s, v7.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 uzp2 v12.2d, v7.2d, v2.2d +# CHECK-NEXT: 1 2 0.50 trn2 v29.8b, v6.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 trn2 v14.16b, v6.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 trn2 v31.4h, v5.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 trn2 v0.8h, v5.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 trn2 v17.2s, v4.2s, v2.2s +# CHECK-NEXT: 1 2 1.00 trn2 v2.4s, v4.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 trn2 v19.2d, v3.2d, v2.2d +# CHECK-NEXT: 1 2 0.50 zip2 v4.8b, v3.8b, v2.8b +# CHECK-NEXT: 1 2 1.00 zip2 v21.16b, v2.16b, v2.16b +# CHECK-NEXT: 1 2 0.50 zip2 v6.4h, v2.4h, v2.4h +# CHECK-NEXT: 1 2 1.00 zip2 v23.8h, v1.8h, v2.8h +# CHECK-NEXT: 1 2 0.50 zip2 v8.2s, v1.2s, v2.2s +# CHECK-NEXT: 1 2 1.00 zip2 v25.4s, v0.4s, v2.4s +# CHECK-NEXT: 1 2 1.00 zip2 v10.2d, v0.2d, v2.2d +# CHECK-NEXT: 1 4 1.00 fmul s0, s1, v1.s[0] +# CHECK-NEXT: 1 4 1.00 fmul s0, s1, v1.s[3] +# CHECK-NEXT: 1 4 1.00 fmul d0, d1, v1.d[0] +# CHECK-NEXT: 1 4 1.00 fmul d0, d1, v1.d[1] +# CHECK-NEXT: 1 4 1.00 fmul d15, d15, v15.d[1] +# CHECK-NEXT: 1 4 1.00 fmulx s3, s5, v7.s[0] +# CHECK-NEXT: 1 4 1.00 fmulx s3, s5, v7.s[3] +# CHECK-NEXT: 1 4 1.00 fmulx s3, s5, v15.s[3] +# CHECK-NEXT: 1 4 1.00 fmulx d0, d4, v8.d[0] +# CHECK-NEXT: 1 4 1.00 fmulx d0, d4, v8.d[1] +# CHECK-NEXT: 1 4 0.50 fmla s0, s1, v1.s[0] +# CHECK-NEXT: 1 4 0.50 fmla s0, s1, v1.s[3] +# CHECK-NEXT: 1 4 0.50 fmla d0, d1, v1.d[0] +# CHECK-NEXT: 1 4 0.50 fmla d0, d1, v1.d[1] +# CHECK-NEXT: 1 4 0.50 fmla d15, d15, v15.d[1] +# CHECK-NEXT: 1 4 0.50 fmls s3, s5, v7.s[0] +# CHECK-NEXT: 1 4 0.50 fmls s3, s5, v7.s[3] +# CHECK-NEXT: 1 4 0.50 fmls s3, s5, v15.s[3] +# CHECK-NEXT: 1 4 0.50 fmls d0, d4, v8.d[0] +# CHECK-NEXT: 1 4 0.50 fmls d0, d4, v8.d[1] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[0] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[1] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[2] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[4] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[5] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[6] +# CHECK-NEXT: 1 4 1.00 sqdmlal s0, h0, v0.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmlal d8, s9, v15.s[0] +# CHECK-NEXT: 1 4 1.00 sqdmlal d8, s9, v15.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmlal d8, s9, v15.s[2] +# CHECK-NEXT: 1 4 1.00 sqdmlal d8, s9, v15.s[3] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[0] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[1] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[2] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[4] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[5] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[6] +# CHECK-NEXT: 1 4 1.00 sqdmlsl s0, h0, v0.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmlsl d8, s9, v15.s[0] +# CHECK-NEXT: 1 4 1.00 sqdmlsl d8, s9, v15.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmlsl d8, s9, v15.s[2] +# CHECK-NEXT: 1 4 1.00 sqdmlsl d8, s9, v15.s[3] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[0] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[1] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[2] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[4] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[5] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[6] +# CHECK-NEXT: 1 4 1.00 sqdmull s1, h1, v1.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmull d1, s1, v4.s[0] +# CHECK-NEXT: 1 4 1.00 sqdmull d1, s1, v4.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmull d1, s1, v4.s[2] +# CHECK-NEXT: 1 4 1.00 sqdmull d1, s1, v4.s[3] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h1, v14.h[0] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[1] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[2] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[3] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[4] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[5] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[6] +# CHECK-NEXT: 1 4 1.00 sqdmulh h7, h15, v8.h[7] +# CHECK-NEXT: 1 4 1.00 sqdmulh s15, s3, v4.s[0] +# CHECK-NEXT: 1 4 1.00 sqdmulh s15, s14, v16.s[1] +# CHECK-NEXT: 1 4 1.00 sqdmulh s15, s15, v16.s[2] +# CHECK-NEXT: 1 4 1.00 sqdmulh s15, s16, v17.s[3] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h1, v14.h[0] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[1] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[2] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[3] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[4] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[5] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[6] +# CHECK-NEXT: 1 4 1.00 sqrdmulh h7, h15, v8.h[7] +# CHECK-NEXT: 1 4 1.00 sqrdmulh s15, s3, v4.s[0] +# CHECK-NEXT: 1 4 1.00 sqrdmulh s15, s14, v16.s[1] +# CHECK-NEXT: 1 4 1.00 sqrdmulh s15, s15, v16.s[2] +# CHECK-NEXT: 1 4 1.00 sqrdmulh s15, s16, v17.s[3] +# CHECK-NEXT: 1 2 0.50 mov b0, v0.b[15] +# CHECK-NEXT: 1 2 0.50 mov h2, v31.h[5] +# CHECK-NEXT: 1 2 0.50 mov s17, v2.s[2] +# CHECK-NEXT: 1 2 0.50 mov d6, v12.d[1] +# CHECK-NEXT: 1 2 1.00 tbl v0.8b, { v1.16b }, v2.8b +# CHECK-NEXT: 1 3 2.00 tbl v16.8b, { v31.16b, v0.16b }, v2.8b +# CHECK-NEXT: 1 4 3.00 tbl v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b +# CHECK-NEXT: 1 5 4.00 tbl v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b +# CHECK-NEXT: 1 2 1.00 tbl v0.16b, { v1.16b }, v2.16b +# CHECK-NEXT: 1 3 2.00 tbl v16.16b, { v31.16b, v0.16b }, v2.16b +# CHECK-NEXT: 1 4 3.00 tbl v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b +# CHECK-NEXT: 1 5 4.00 tbl v0.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b +# CHECK-NEXT: 1 3 2.00 tbx v0.8b, { v1.16b }, v2.8b +# CHECK-NEXT: 1 4 3.00 tbx v16.8b, { v31.16b, v0.16b }, v2.8b +# CHECK-NEXT: 1 5 4.00 tbx v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b +# CHECK-NEXT: 1 6 5.00 tbx v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b +# CHECK-NEXT: 1 3 2.00 tbx v0.16b, { v1.16b }, v2.16b +# CHECK-NEXT: 1 4 3.00 tbx v16.16b, { v31.16b, v0.16b }, v2.16b +# CHECK-NEXT: 1 5 4.00 tbx v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b +# CHECK-NEXT: 1 6 5.00 tbx v16.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b +# CHECK-NEXT: 1 4 0.50 fcvtxn s22, d13 +# CHECK-NEXT: 1 4 0.50 fcvtas s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtas d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtau s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtau d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtms s22, s13 +# CHECK-NEXT: 1 4 0.50 fcvtms d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtmu s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtmu d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtns s22, s13 +# CHECK-NEXT: 1 4 0.50 fcvtns d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtnu s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtnu d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtps s22, s13 +# CHECK-NEXT: 1 4 0.50 fcvtps d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtpu s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtpu d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtzs s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtzs d21, d14 +# CHECK-NEXT: 1 4 0.50 fcvtzu s12, s13 +# CHECK-NEXT: 1 4 0.50 fcvtzu d21, d14 +# CHECK-NEXT: 1 4 0.50 fabd s29, s24, s20 +# CHECK-NEXT: 1 4 0.50 fabd d29, d24, d20 # CHECK: Resources: # CHECK-NEXT: [0.0] - CortexA55UnitALU @@ -2146,1066 +1737,861 @@ # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] -# CHECK-NEXT: - - - - 716.50 716.50 197.00 3.00 3.00 107.00 - 52.00 +# CHECK-NEXT: - - - - 652.50 652.50 85.00 31.00 31.00 107.00 - 52.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0.0] [0.1] [1] [2] [3.0] [3.1] [4] [5.0] [5.1] [6] [7] [8] Instructions: -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - abs d29, d24 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - abs v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - abs v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - abs v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - abs v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - abs v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - abs v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - abs v0.8h, v0.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - add v31.8b, v31.8b, v31.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sub v0.2d, v0.2d, v0.2d +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fadd v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fsub v31.2s, v31.2s, v31.2s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mul v0.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmul v0.2s, v1.2s, v2.2s +# CHECK-NEXT: - - - - - - 10.00 - - - - - fdiv v31.2s, v31.2s, v31.2s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - pmul v0.8b, v15.8b, v16.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmul v31.16b, v7.16b, v8.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - and v2.8b, v2.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - orr v31.16b, v31.16b, v30.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - eor v0.16b, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - orn v9.16b, v10.16b, v11.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - bic v31.8b, v30.8b, v29.8b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - bsl v0.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - bit v31.16b, v31.16b, v31.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - bif v0.16b, v1.16b, v2.16b +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - mla v0.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - mls v31.4h, v31.4h, v31.4h +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla v0.2s, v1.2s, v2.2s +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls v31.2s, v31.2s, v31.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - movi v31.4s, #255, lsl #24 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mvni v0.2s, #0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - bic v15.4h, #15, lsl #8 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - orr v16.8h, #31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - movi v8.2s, #8, msl #8 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - mvni v16.4s, #16, msl #16 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - movi v16.8b, #255 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - movi v31.16b, #31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - movi d15, #0xff00ff00ff00ff +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - movi v31.2d, #0xff0000ff0000ffff +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmov v0.2s, #13.00000000 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmov v15.4s, #1.00000000 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmov v31.2d, #-1.25000000 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - mov v1.16b, v15.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov v25.8b, v4.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uaba v0.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - saba v31.16b, v30.16b, v29.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uabd v15.4h, v16.4h, v17.4h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sabd v5.4h, v4.4h, v6.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fabd v1.4s, v31.4s, v16.4s # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - add d17, d31, d29 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - add v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn2 v0.8h, v0.4s, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sub d15, d5, d16 +# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrts v31.2d, v15.2d, v8.2d +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frecps v5.4s, v7.4s, v16.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - facge v0.4s, v31.4s, v16.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - facgt v31.2d, v29.2d, v28.2d +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmeq v5.16b, v15.16b, v31.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmhs v1.8b, v16.8b, v30.8b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge v20.4h, v11.4h, v23.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmhi v13.8h, v3.8h, v27.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmgt v9.4s, v4.4s, v28.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmtst v21.2s, v19.2s, v18.2s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq v0.2s, v15.2s, v16.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmge v31.4s, v7.4s, v29.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmgt v17.4s, v8.4s, v25.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmeq v31.16b, v15.16b, #0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge v3.8b, v15.8b, #0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmgt v22.2s, v9.2s, #0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmle v5.2d, v14.2d, #0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmlt v13.8h, v11.8h, #0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq v15.2s, v21.2s, #0.0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmge v14.2d, v13.2d, #0.0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmgt v9.4s, v23.4s, #0.0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmle v11.2d, v6.2d, #0.0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmlt v12.4s, v25.4s, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shadd v0.8b, v31.8b, v29.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uhadd v15.16b, v16.16b, v17.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shsub v0.4h, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uhadd v5.8h, v7.8h, v8.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shsub v9.2s, v11.2s, v21.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uhsub v22.4s, v30.4s, v19.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v3.8b, v5.8b, v7.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urhadd v7.16b, v17.16b, v27.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v10.4h, v11.4h, v13.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urhadd v1.8h, v2.8h, v3.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v4.2s, v5.2s, v6.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urhadd v7.4s, v7.4s, v7.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqsub v0.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqadd v0.16b, v1.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqsub v0.4h, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqadd v0.8h, v1.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqadd v0.2s, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqsub v0.4s, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqsub v0.2d, v1.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqadd b20, b11, b15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqadd h0, h1, h5 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqsub s20, s10, s7 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqsub d16, d16, d16 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v10.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v10.16b, v5.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v10.4h, v15.4h, v22.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v10.8h, v5.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v10.2s, v15.2s, v22.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v10.4s, v5.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshl v0.2d, v1.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v1.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v2.16b, v14.16b, v23.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v3.4h, v13.4h, v24.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v4.8h, v12.8h, v25.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v5.2s, v11.2s, v26.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.2d, v1.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl v10.8b, v5.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v10.16b, v5.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl v1.4h, v5.4h, v31.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v1.8h, v5.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl v10.2s, v15.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v1.4s, v5.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v0.2d, v1.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl v1.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v2.16b, v14.16b, v23.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl v3.4h, v13.4h, v24.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v4.8h, v12.8h, v25.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl v5.2s, v11.2s, v26.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl d31, d31, d31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushl d0, d0, d0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl d31, d31, d31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl s23, s20, s16 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl h3, h4, h15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl b11, b20, b30 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl d16, d16, d16 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshl d8, d7, d4 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl d31, d31, d31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshl s23, s20, s16 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl h3, h4, h15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshl b11, b20, b30 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smax v1.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umax v2.16b, v14.16b, v23.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smax v3.4h, v13.4h, v24.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umax v4.8h, v12.8h, v25.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smax v5.2s, v11.2s, v26.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umax v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - umin v1.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smin v2.16b, v14.16b, v23.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - umin v3.4h, v13.4h, v24.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smin v4.8h, v12.8h, v25.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - umin v5.2s, v11.2s, v26.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smin v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmax v29.2s, v28.2s, v25.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmax v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmax v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmin v29.2s, v28.2s, v25.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmin v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmin v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmaxnm v9.2s, v8.2s, v5.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnm v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnm v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fminnm v2.2s, v8.2s, v25.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnm v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnm v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smaxp v1.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umaxp v2.16b, v14.16b, v23.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smaxp v3.4h, v13.4h, v24.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umaxp v4.8h, v12.8h, v25.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smaxp v5.2s, v11.2s, v26.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umaxp v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uminp v1.8b, v15.8b, v22.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sminp v2.16b, v14.16b, v23.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uminp v3.4h, v13.4h, v24.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sminp v4.8h, v12.8h, v25.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uminp v5.2s, v11.2s, v26.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sminp v6.4s, v10.4s, v27.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmaxp v29.2s, v28.2s, v25.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fminp v29.2s, v28.2s, v25.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmaxnmp v9.2s, v8.2s, v5.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnmp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnmp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fminnmp v2.2s, v8.2s, v25.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnmp v9.4s, v8.4s, v5.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnmp v11.2d, v10.2d, v7.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - addp v31.8b, v31.8b, v31.8b # CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - addp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - and v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - bic v0.4h, #15, lsl #8 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - bic v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - bif v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - bit v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - bsl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cls v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cls v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cls v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cls v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cls v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cls v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - clz v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - clz v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - clz v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - clz v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - clz v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - clz v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmeq d20, d21, #0 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - faddp v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - faddp v31.2s, v31.2s, v31.2s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh v31.2s, v31.2s, v31.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh v5.4s, v7.4s, v9.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh v31.4h, v3.4h, v13.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh v0.8h, v10.8h, v20.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx v1.2s, v22.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx v21.4s, v15.4s, v3.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx v11.2d, v5.2d, v23.2d +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v2.8h, v4.16b, #8 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v6.4s, v8.8h, #16 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v6.2d, v8.4s, #32 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v2.8h, v4.8b, #8 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v6.4s, v8.4h, #16 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v6.2d, v8.2s, #32 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshll v0.2d, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshll2 v0.4s, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushll v0.4s, v1.4h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushll2 v0.8h, v1.16b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl v0.8b, v1.8b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl v0.4h, v1.4h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.16b, v1.16b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.8h, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrun2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrun2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrun2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrun2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrun2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrun2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn v0.8b, v1.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn v0.4h, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn v0.2s, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshrn2 v0.16b, v1.8h, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshrn2 v0.8h, v1.4s, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshrn2 v0.4s, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.2s, v1.2s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.4s, v1.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.2d, v1.2d, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - sabal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - uabal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlal v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlal2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlsl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlsl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlsl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlsl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlsl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - smlsl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlsl v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlsl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlsl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlsl2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlsl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - - - - 1.00 1.00 - - - umlsl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull v0.4s, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull v0.2d, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull2 v0.4s, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull2 v0.2d, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmull v0.8h, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmull2 v0.8h, v1.16b, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw v0.8h, v1.8h, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw v0.4s, v1.4s, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw v0.2d, v1.2d, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw2 v0.8h, v1.8h, v2.16b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw2 v0.4s, v1.4s, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw2 v0.2d, v1.2d, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn v0.8b, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn v0.4h, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn v0.2s, v1.2d, v2.2d +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn2 v0.16b, v1.8h, v2.8h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn2 v0.8h, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - addhn2 v0.4s, v1.2d, v2.2d +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn v0.8b, v1.8h, v2.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn v0.4h, v1.4s, v2.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn v0.2s, v1.2d, v2.2d +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn2 v0.16b, v1.8h, v2.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn2 v0.8h, v1.4s, v2.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - raddhn2 v0.4s, v1.2d, v2.2d +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn v0.8b, v1.8h, v2.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn v0.4h, v1.4s, v2.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn v0.2s, v1.2d, v2.2d +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn2 v0.16b, v1.8h, v2.8h +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn2 v0.8h, v1.4s, v2.4s +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - rsubhn2 v0.4s, v1.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh h10, h11, h12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh s20, s21, s2 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh h10, h11, h12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh s20, s21, s2 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx s20, s22, s15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx d23, d11, d1 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecps s21, s16, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecps d22, d30, d21 +# CHECK-NEXT: - - - - - - 9.00 - - - - - frsqrts s21, s5, s12 +# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrts d8, d22, d18 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf s22, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf d21, d12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf s22, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpe s19, s14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpe d13, d13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpx s18, s10 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpx d16, d19 +# CHECK-NEXT: - - - - - - 9.00 - - - - - frsqrte s22, s13 +# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrte d21, d12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmeq d20, d21, d22 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmeq v0.16b, v0.16b, #0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmeq v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge d20, d21, #0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmeq d20, d21, #0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmhs d20, d21, d22 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge d20, d21, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge v0.8b, v0.8b, #0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmgt d20, d21, #0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmgt d20, d21, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmgt v0.2s, v0.2s, #0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmgt v0.4s, v0.4s, v0.4s +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmge d20, d21, #0 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmhi d20, d21, d22 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmhi v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmhs d20, d21, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmhs v0.8b, v0.8b, v0.8b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmgt d20, d21, d22 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmgt d20, d21, #0 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmle d20, d21, #0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmle v0.2d, v0.2d, #0 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmlt d20, d21, #0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cmlt v0.8h, v0.8h, #0 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmtst d20, d21, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cmtst v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - cnt v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - cnt v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.16b, w28 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.2d, x28 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.2s, w28 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.4h, w28 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.4s, w28 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.8b, w28 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - dup v0.8h, w28 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - eor v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ext v0.16b, v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ext v0.8b, v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fabd d29, d24, d20 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fabd s29, s24, s20 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fabd v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fabs v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fabs v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fabs v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fabs v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fabs v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facge d20, d21, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facge s10, s11, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - facge v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facgt d20, d21, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facgt s10, s11, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - facgt v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fadd v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - faddp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - faddp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq s10, s11, s12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq d20, d21, d22 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq s10, s11, #0.0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq s10, s11, s12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq v0.2s, v0.2s, #0.0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmge d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmeq d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmge s10, s11, s12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmge d20, d21, d22 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmge s10, s11, #0.0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmge s10, s11, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmge v0.2d, v0.2d, #0.0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmge v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmgt d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmge d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmgt s10, s11, s12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmgt d20, d21, d22 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmgt s10, s11, #0.0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmgt s10, s11, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmgt v0.4s, v0.4s, #0.0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmgt v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmle d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmgt d20, d21, #0.0 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmle s10, s11, #0.0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmle v0.2d, v0.2d, #0.0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmlt d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmle d20, d21, #0.0 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmlt s10, s11, #0.0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fcmlt v0.4s, v0.4s, #0.0 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtl v0.2d, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtl v0.4s, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtl2 v0.2d, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtl2 v0.4s, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms s22, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtn v0.2s, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtn v0.4h, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtn2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtn2 v0.8h, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns s22, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps s22, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtxn s22, d13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtxn v0.2s, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtxn2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs d21, d12, #1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs s21, s12, #1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu d21, d12, #1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu s12, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu s21, s12, #1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu v0.8h, v0.8h -# CHECK-NEXT: - - - - - - 10.00 - - - - - fdiv v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmax v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmax v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmax v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnm v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmaxnm v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnm v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnmp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmaxnmp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxnmp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmaxp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmaxp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmin v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmin v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmin v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnm v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fminnm v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnm v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnmp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fminnmp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminnmp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminp v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fminp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fminp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla d0, d1, v0.d[1] -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla s0, s1, v0.s[3] -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls d0, d4, v0.d[1] -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls s3, s5, v0.s[3] -# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmov v0.2d, #-1.25000000 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmov v0.2s, #13.00000000 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmov v0.4s, #1.00000000 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmul d0, d1, v0.d[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmul s0, s1, v0.s[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmul v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx d0, d4, v0.d[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx d23, d11, d1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx s20, s22, s15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx s3, s5, v0.s[3] -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fmulx v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fneg v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fneg v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fneg v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fneg v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fneg v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpe d13, d13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpe s19, s14 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frecpe v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpe v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpe v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frecpe v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frecpe v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frecps v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecps d22, d30, d21 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecps s21, s16, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpx d16, d19 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frecpx s18, s10 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frinta v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frinta v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frinta v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frinta v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frinta v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frinti v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frinti v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frinti v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frinti v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frinti v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintm v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintm v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintm v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintm v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintm v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintn v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintn v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintn v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintn v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintn v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintp v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintp v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintp v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintp v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintp v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintx v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintx v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintx v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintx v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintx v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintz v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintz v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - frintz v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintz v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - frintz v0.8h, v0.8h -# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrte d21, d12 -# CHECK-NEXT: - - - - - - 9.00 - - - - - frsqrte s22, s13 -# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrte v0.2d, v0.2d -# CHECK-NEXT: - - - - - - 9.00 - - - - - frsqrte v0.2s, v0.2s -# CHECK-NEXT: - - - - - - 5.00 - - - - - frsqrte v0.4h, v0.4h -# CHECK-NEXT: - - - - - - 9.00 - - - - - frsqrte v0.4s, v0.4s -# CHECK-NEXT: - - - - - - 5.00 - - - - - frsqrte v0.8h, v0.8h -# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrts d8, d22, d18 -# CHECK-NEXT: - - - - - - 9.00 - - - - - frsqrts s21, s5, s12 -# CHECK-NEXT: - - - - - - 19.00 - - - - - frsqrts v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - - - 19.00 - - - - - fsqrt v0.2d, v0.2d -# CHECK-NEXT: - - - - - - 9.00 - - - - - fsqrt v0.2s, v0.2s -# CHECK-NEXT: - - - - - - 5.00 - - - - - fsqrt v0.4h, v0.4h -# CHECK-NEXT: - - - - - - 9.00 - - - - - fsqrt v0.4s, v0.4s -# CHECK-NEXT: - - - - - - 5.00 - - - - - fsqrt v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fsub v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld1 { v0.16b }, [x0] -# CHECK-NEXT: - - - - - - - - - 6.00 - - ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 -# CHECK-NEXT: - - - - - - - - - 8.00 - - ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] -# CHECK-NEXT: - - - - - - - - - 4.00 - - ld1 { v0.4s, v1.4s }, [sp], #32 -# CHECK-NEXT: - - - - - - - - - 6.00 - - ld1 { v0.4s, v1.4s, v2.4s }, [sp] -# CHECK-NEXT: - - - - - - - - - 4.00 - - ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld1 { v0.8h }, [x15], x2 -# CHECK-NEXT: - - - - - - - - - 4.00 - - ld1 { v0.8h, v1.8h }, [x15] -# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1 { v0.b }[9], [x0] -# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1 { v0.b }[9], [x0], #1 -# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v0.16b }, [x0] -# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v0.16b }, [x0], #1 -# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v0.8h }, [x15] -# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v0.8h }, [x15], #2 -# CHECK-NEXT: - - - - - - - - - 4.00 - - ld2 { v0.16b, v1.16b }, [x0], x1 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2 { v0.8b, v1.8b }, [x0] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2 { v0.h, v1.h }[7], [x15] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2 { v0.h, v1.h }[7], [x15], #4 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v0.2d, v1.2d }, [x0] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v0.2d, v1.2d }, [x0], #16 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v0.4s, v1.4s }, [sp] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v0.4s, v1.4s }, [sp], #8 -# CHECK-NEXT: - - - - - - - - - 3.00 - - ld3 { v0.4h, v1.4h, v2.4h }, [x15] -# CHECK-NEXT: - - - - - - - - - 6.00 - - ld3 { v0.8h, v1.8h, v2.8h }, [x15], x2 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3 { v0.s, v1.s, v2.s }[3], [sp] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3 { v0.s, v1.s, v2.s }[3], [sp], x3 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v0.4h, v1.4h, v2.4h }, [x15] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v0.4h, v1.4h, v2.4h }, [x15], #6 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v0.8b, v1.8b, v2.8b }, [x0] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 -# CHECK-NEXT: - - - - - - - - - 4.00 - - ld4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# CHECK-NEXT: - - - - - - - - - 8.00 - - ld4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v0.1d, v1.1d, v2.1d, v3.1d }, [sp], x7 -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v0.2s, v1.2s, v2.2s, v3.2s }, [sp], x30 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mla v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mls v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov b0, v0.b[15] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov d6, v0.d[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov h2, v0.h[5] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov s17, v0.s[2] -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - mov v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - movi d15, #0xff00ff00ff00ff -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - movi v0.16b, #31 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - movi v0.2d, #0xff0000ff0000ffff -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - movi v0.2s, #8, msl #8 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - movi v0.4s, #255, lsl #24 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - movi v0.8b, #255 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mul v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mvni v0.2s, #0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - mvni v0.4s, #16, msl #16 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - neg d29, d24 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - neg v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - neg v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - neg v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - neg v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - neg v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - neg v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - neg v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - mvn v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mvn v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - orn v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - mov v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - orr v0.8h, #31 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmul v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - pmul v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmull v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - pmull2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - raddhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rbit v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rbit v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev16 v21.8b, v1.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rev16 v30.16b, v31.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev32 v0.4h, v9.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev32 v21.8b, v1.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rev32 v30.16b, v31.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rev32 v4.8h, v7.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rev64 v0.16b, v31.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev64 v1.8b, v9.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev64 v13.4h, v21.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rev64 v2.8h, v4.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rev64 v4.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rev64 v6.4s, v8.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - rshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn v0.2s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn v0.4h, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn v0.8b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn2 v0.16b, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn2 v0.4s, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - rsubhn2 v0.8h, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saba v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sabd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sabdl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sadalp v0.1d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.2d, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sadalp v0.2s, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sadalp v0.4h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sadalp v0.8h, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - saddlp v0.1d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddlp v0.2d, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - saddlp v0.2s, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - saddlp v0.4h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddlp v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddlp v0.8h, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - saddw2 v0.8h, v0.8h, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf d21, d12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf d21, d12, #64 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf s22, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf s22, s13, #32 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shadd v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl d7, d10, #12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shl v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shl v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shl v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.2d, v0.2s, #32 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.4s, v0.4h, #16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.8h, v0.8b, #8 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.2d, v0.2s, #32 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.4s, v0.4h, #16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll v0.8h, v0.8b, #8 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v0.2d, v0.4s, #32 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v0.4s, v0.8h, #16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v0.8h, v0.16b, #8 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v0.2d, v0.4s, #32 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v0.4s, v0.8h, #16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shll2 v0.8h, v0.16b, #8 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shrn v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - shrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shsub v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shsub v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli d10, d14, #12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sli v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smax v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smax v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smax v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smaxp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smaxp v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - smaxp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smin v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smin v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smin v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sminp v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sminp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sminp v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlsl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlsl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlsl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlsl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlsl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smlsl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - smull2 v0.8h, v0.16b, v0.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcmlt d20, d21, #0.0 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facge s10, s11, s12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facge d20, d21, d22 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facgt s10, s11, s12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - facgt d20, d21, d22 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - abs d29, d24 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs b19, b14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs d18, d12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs h21, h15 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs s20, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqabs v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqabs v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqabs v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqabs v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqadd b20, b11, b15 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqadd v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlal d19, s24, s12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlal d8, s9, v0.s[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlal s0, h0, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlal s17, h27, h12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlsl d12, s23, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlsl d8, s9, v0.s[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlsl s0, h0, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmlsl s14, h12, h25 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh h10, h11, h12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh h7, h15, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh s15, s14, v0.s[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh s20, s21, s2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmulh v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmull d1, s1, v0.s[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmull d15, s22, s12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmull s1, h1, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqdmull s12, h22, h12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull2 v0.4s, v0.8h, v0.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqabs d18, d12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - neg d29, d24 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg b19, b14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg d18, d12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg h21, h15 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg s20, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqneg v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqneg v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqneg v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqneg v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh h10, h11, h12 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh h7, h15, v0.h[3] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh s15, s14, v0.s[1] -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh s20, s21, s2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrdmulh v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl d31, d31, d31 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl h3, h4, h15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn b10, h13, #2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn h15, s10, #6 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn s15, d12, #9 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun b17, h10, #6 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun h10, s13, #15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun s22, d16, #31 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrun2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrun2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrshrun2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqneg d18, d12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd b19, b14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd h20, h15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd s21, s12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd d18, d22 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd b19, b14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd h20, h15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd s21, s12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd d18, d22 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s17, h27, h12 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal d19, s24, s12 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s14, h12, h25 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl d12, s23, s13 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s12, h22, h12 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull d15, s22, s12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun b19, h14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun h21, s15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun s20, d12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn b18, h18 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn h20, s17 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn s19, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn b18, h18 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn h20, s17 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn s19, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr d15, d16, #12 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr d10, d17, #18 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr d19, d18, #7 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr d20, d23, #31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra d18, d12, #21 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra d20, d13, #61 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - srsra d15, d11, #19 +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - ursra d18, d10, #13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - shl d7, d10, #12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl b11, b19, #7 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl d15, d16, #51 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl d31, d31, d31 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl h13, h18, #11 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl h3, h4, h15 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl s14, s17, #22 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshl v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshl d15, d16, #51 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl b18, b15, #6 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl h11, h18, #7 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl s14, s19, #18 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl d15, d12, #19 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu b15, b18, #6 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu d11, d13, #32 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu h19, h17, #6 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu s16, s14, #25 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshlu v0.8h, v0.8h, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshlu d11, d13, #32 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri d10, d12, #14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sli d10, d14, #12 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn b10, h15, #5 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn h17, s10, #4 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn s18, d10, #31 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrn2 v0.8h, v0.4s, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn b12, h10, #7 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn h10, s14, #5 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn s10, d12, #13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn b10, h13, #2 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn h15, s10, #6 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrn s15, d12, #9 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn b10, h12, #5 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn h12, s10, #14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn s10, d10, #25 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun b15, h10, #7 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun h20, s14, #3 # CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun s10, d15, #15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqshrun v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrun2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrun2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqshrun2 v0.8h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqsub s20, s10, s7 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqsub v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqsub v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqsub v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn b18, h18 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn h20, s17 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtn s19, d14 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn v0.2s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn v0.4h, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn v0.8b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn2 v0.16b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtn2 v0.8h, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun b19, h14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun h21, s15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqxtun s20, d12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun v0.2s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun v0.4h, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun v0.8b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun2 v0.16b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqxtun2 v0.8h, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srhadd v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri d10, d12, #14 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sri v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sri v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl d16, d16, d16 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr d19, d18, #7 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srshr v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srshr v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srsra d15, d11, #19 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srsra v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srsra v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - srsra v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - srsra v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl d31, d31, d31 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshl v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshll v0.2d, v0.2s, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshll2 v0.4s, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr d15, d16, #12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshr v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshr v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshr v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sshr v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sshr v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra d18, d12, #21 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ssra v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssra v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ssubw2 v0.8h, v0.8h, v0.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun b17, h10, #6 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun h10, s13, #15 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sqrshrun s22, d16, #31 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf s22, s13, #32 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - scvtf d21, d12, #64 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf s22, s13, #32 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf d21, d14, #64 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs s21, s12, #1 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs d21, d12, #1 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu s21, s12, #1 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu d21, d12, #1 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld1 { v0.16b }, [x0] +# CHECK-NEXT: - - - - - - - - - 4.00 - - ld1 { v15.8h, v16.8h }, [x15] +# CHECK-NEXT: - - - - - - - - - 6.00 - - ld1 { v31.4s, v0.4s, v1.4s }, [sp] +# CHECK-NEXT: - - - - - - - - - 8.00 - - ld1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: - - - - - - - - - 3.00 - - ld3 { v15.4h, v16.4h, v17.4h }, [x15] +# CHECK-NEXT: - - - - - - - - - 4.00 - - ld4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] # CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.16b }, [x0] -# CHECK-NEXT: - - - - - - - - - - - 2.00 st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v15.8h, v16.8h }, [x15] +# CHECK-NEXT: - - - - - - - - - - - 2.00 st1 { v31.4s, v0.4s, v1.4s }, [sp] # CHECK-NEXT: - - - - - - - - - - - 4.00 st1 { v0.2d, v1.2d, v2.2d, v3.2d }, [x0] -# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.4s, v1.4s }, [sp], #32 -# CHECK-NEXT: - - - - - - - - - - - 2.00 st1 { v0.4s, v1.4s, v2.4s }, [sp] +# CHECK-NEXT: - - - - - - - - - - - 2.00 st2 { v0.8b, v1.8b }, [x0] +# CHECK-NEXT: - - - - - - - - - - - 4.00 st3 { v15.4h, v16.4h, v17.4h }, [x15] +# CHECK-NEXT: - - - - - - - - - - - 4.00 st4 { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld1 { v15.8h }, [x15], x2 +# CHECK-NEXT: - - - - - - - - - 4.00 - - ld1 { v31.4s, v0.4s }, [sp], #32 +# CHECK-NEXT: - - - - - - - - - 6.00 - - ld1 { v0.2d, v1.2d, v2.2d }, [x0], #48 +# CHECK-NEXT: - - - - - - - - - 4.00 - - ld1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 +# CHECK-NEXT: - - - - - - - - - 4.00 - - ld2 { v0.16b, v1.16b }, [x0], x1 +# CHECK-NEXT: - - - - - - - - - 6.00 - - ld3 { v15.8h, v16.8h, v17.8h }, [x15], x2 +# CHECK-NEXT: - - - - - - - - - 8.00 - - ld4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64 +# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v15.8h }, [x15], x2 +# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v31.4s, v0.4s }, [sp], #32 +# CHECK-NEXT: - - - - - - - - - - - 2.00 st1 { v0.2d, v1.2d, v2.2d }, [x0], #48 # CHECK-NEXT: - - - - - - - - - - - 4.00 st1 { v0.8b, v1.8b, v2.8b, v3.8b }, [x0], x3 -# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.8h }, [x15], x2 -# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.8h, v1.8h }, [x15] -# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.d }[1], [x0] -# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.d }[1], [x0], #8 # CHECK-NEXT: - - - - - - - - - - - 4.00 st2 { v0.16b, v1.16b }, [x0], x1 -# CHECK-NEXT: - - - - - - - - - - - 2.00 st2 { v0.8b, v1.8b }, [x0] -# CHECK-NEXT: - - - - - - - - - - - 2.00 st2 { v0.s, v1.s }[3], [sp] -# CHECK-NEXT: - - - - - - - - - - - 2.00 st2 { v0.s, v1.s }[3], [sp], #8 -# CHECK-NEXT: - - - - - - - - - - - 4.00 st3 { v0.4h, v1.4h, v2.4h }, [x15] -# CHECK-NEXT: - - - - - - - - - - - 4.00 st3 { v0.8h, v1.8h, v2.8h }, [x15], x2 -# CHECK-NEXT: - - - - - - - - - - - 2.00 st3 { v0.h, v1.h, v2.h }[7], [x15] -# CHECK-NEXT: - - - - - - - - - - - 2.00 st3 { v0.h, v1.h, v2.h }[7], [x15], #6 -# CHECK-NEXT: - - - - - - - - - - - 4.00 st4 { v0.2s, v1.2s, v2.2s, v3.2s }, [sp] -# CHECK-NEXT: - - - - - - - - - - - 4.00 st4 { v0.4s, v1.4s, v2.4s, v3.4s }, [sp], #64 +# CHECK-NEXT: - - - - - - - - - - - 4.00 st3 { v15.8h, v16.8h, v17.8h }, [x15], x2 +# CHECK-NEXT: - - - - - - - - - - - 4.00 st4 { v31.4s, v0.4s, v1.4s, v2.4s }, [sp], #64 +# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v0.16b }, [x0] +# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v15.8h }, [x15] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v31.4s, v0.4s }, [sp] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v0.2d, v1.2d }, [x0] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v0.8b, v1.8b, v2.8b }, [x0] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v15.4h, v16.4h, v17.4h }, [x15] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp] +# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1 { v0.b }[9], [x0] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2 { v15.h, v16.h }[7], [x15] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3 { v31.s, v0.s, v1.s }[3], [sp] +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0] +# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.d }[1], [x0] +# CHECK-NEXT: - - - - - - - - - - - 2.00 st2 { v31.s, v0.s }[3], [sp] +# CHECK-NEXT: - - - - - - - - - - - 2.00 st3 { v15.h, v16.h, v17.h }[7], [x15] # CHECK-NEXT: - - - - - - - - - - - 2.00 st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0] +# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v0.16b }, [x0], #1 +# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1r { v15.8h }, [x15], #2 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v31.4s, v0.4s }, [sp], #8 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2r { v0.2d, v1.2d }, [x0], #16 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v0.8b, v1.8b, v2.8b }, [x0], #3 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3r { v15.4h, v16.4h, v17.4h }, [x15], #6 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v31.2s, v0.2s, v1.2s, v2.2s }, [sp], x30 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4r { v31.1d, v0.1d, v1.1d, v2.1d }, [sp], x7 +# CHECK-NEXT: - - - - - - - - - 1.00 - - ld1 { v0.b }[9], [x0], #1 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld2 { v15.h, v16.h }[7], [x15], #4 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld3 { v31.s, v0.s, v1.s }[3], [sp], x3 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4 { v0.d, v1.d, v2.d, v3.d }[1], [x0], #32 +# CHECK-NEXT: - - - - - - - - - 2.00 - - ld4 { v0.h, v1.h, v2.h, v3.h }[7], [x0], x0 +# CHECK-NEXT: - - - - - - - - - - - 1.00 st1 { v0.d }[1], [x0], #8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 st2 { v31.s, v0.s }[3], [sp], #8 +# CHECK-NEXT: - - - - - - - - - - - 2.00 st3 { v15.h, v16.h, v17.h }[7], [x15], #6 # CHECK-NEXT: - - - - - - - - - - - 2.00 st4 { v0.b, v1.b, v2.b, v3.b }[9], [x0], x5 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - sub d15, d5, d16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sub v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd b19, b14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd d18, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd h20, h15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd s21, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - suqadd v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - suqadd v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - suqadd v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - suqadd v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - suqadd v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v0.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v0.16b, v1.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbl v0.8b, { v0.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbl v0.8b, { v0.16b, v1.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbl v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbl v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbx v0.16b, { v0.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbx v0.16b, { v0.16b, v1.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbx v0.16b, { v0.16b, v1.16b, v2.16b }, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbx v0.16b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbx v0.8b, { v0.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbx v0.8b, { v0.16b, v1.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbx v0.8b, { v0.16b, v1.16b, v2.16b }, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - tbx v0.8b, { v0.16b, v1.16b, v2.16b, v3.16b }, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uaba v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uabd v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uabdl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uadalp v0.1d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.2d, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uadalp v0.2s, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uadalp v0.4h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uadalp v0.8h, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uaddlp v0.1d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddlp v0.2d, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uaddlp v0.2s, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uaddlp v0.4h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddlp v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddlp v0.8h, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uaddw2 v0.8h, v0.8h, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf d21, d14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf d21, d14, #64 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf s22, s13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf s22, s13, #32 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ucvtf v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uhadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uhadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uhsub v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umax v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umax v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umax v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umaxp v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umaxp v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umaxp v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - umin v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - umin v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - umin v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uminp v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uminp v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uminp v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlal v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlal v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlal v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlal2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlal2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlal2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlsl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlsl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlsl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlsl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlsl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umlsl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - umull2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqadd h0, h1, h5 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshl b11, b20, b30 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshl s23, s20, s16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn b10, h12, #5 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn h12, s10, #14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn s10, d10, #25 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqrshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqrshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl b11, b20, b30 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl b18, b15, #6 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl d15, d12, #19 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl h11, h18, #7 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl s14, s19, #18 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl s23, s20, s16 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshl v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn b12, h10, #7 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn h10, s14, #5 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn s10, d12, #13 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn v0.2s, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn v0.4h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqshrn v0.8b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshrn2 v0.16b, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshrn2 v0.4s, v0.2d, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqshrn2 v0.8h, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqsub d16, d16, d16 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqsub v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn b18, h18 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn h20, s17 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uqxtn s19, d14 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn v0.2s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn v0.4h, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn v0.8b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn2 v0.16b, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uqxtn2 v0.8h, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urecpe v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urecpe v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urhadd v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urhadd v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urhadd v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshl d8, d7, d4 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr d20, d23, #31 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - urshr v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - urshr v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - - - 9.00 - - - - - ursqrte v0.2s, v0.2s -# CHECK-NEXT: - - - - - - 9.00 - - - - - ursqrte v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ursra d18, d10, #13 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ursra v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ursra v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ursra v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ursra v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushl d0, d0, d0 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushl v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushll v0.4s, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushll2 v0.8h, v0.16b, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr d10, d17, #18 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushr v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushr v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushr v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ushr v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ushr v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd b19, b14 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd d18, d22 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd h20, h15 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd s21, s12 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usqadd v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usqadd v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usqadd v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usqadd v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usqadd v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra d20, d13, #61 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.16b, v0.16b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.2d, v0.2d, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra v0.2s, v0.2s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra v0.4h, v0.4h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.4s, v0.4s, #3 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - usra v0.8b, v0.8b, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usra v0.8h, v0.8h, #3 -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl v0.2d, v0.2s, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl v0.4s, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl v0.8h, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl2 v0.2d, v0.4s, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl2 v0.4s, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubl2 v0.8h, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw v0.2d, v0.2d, v0.2s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw v0.4s, v0.4s, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw v0.8h, v0.8h, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw2 v0.2d, v0.2d, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw2 v0.4s, v0.4s, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - usubw2 v0.8h, v0.8h, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - xtn v0.2s, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - xtn v0.4h, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - xtn v0.8b, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - xtn2 v0.16b, v0.8h -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - xtn2 v0.4s, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - xtn2 v0.8h, v0.4s -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip1 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip1 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip1 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v0.8h, v0.8h, v0.8h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v0.16b, v0.16b, v0.16b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v0.2d, v0.2d, v0.2d -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip2 v0.2s, v0.2s, v0.2s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip2 v0.4h, v0.4h, v0.4h -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v0.4s, v0.4s, v0.4s -# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip2 v0.8b, v0.8b, v0.8b -# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v0.8h, v0.8h, v0.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - ext v0.8b, v1.8b, v2.8b, #3 +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - ext v0.16b, v1.16b, v2.16b, #3 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp1 v1.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v2.16b, v1.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp1 v3.4h, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v4.8h, v1.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp1 v5.2s, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v6.4s, v1.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp1 v7.2d, v1.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v8.8b, v1.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v9.16b, v1.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v10.4h, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v27.8h, v7.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn1 v12.2s, v7.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v29.4s, v6.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn1 v14.2d, v6.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip1 v31.8b, v5.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v0.16b, v5.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip1 v17.4h, v4.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v2.8h, v4.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip1 v19.2s, v3.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v4.4s, v3.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip1 v21.2d, v2.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp2 v6.8b, v2.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v23.16b, v1.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp2 v8.4h, v1.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v25.8h, v0.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - uzp2 v10.2s, v0.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v27.4s, v7.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - uzp2 v12.2d, v7.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v29.8b, v6.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v14.16b, v6.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v31.4h, v5.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v0.8h, v5.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - trn2 v17.2s, v4.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v2.4s, v4.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - trn2 v19.2d, v3.2d, v2.2d +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip2 v4.8b, v3.8b, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v21.16b, v2.16b, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip2 v6.4h, v2.4h, v2.4h +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v23.8h, v1.8h, v2.8h +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - zip2 v8.2s, v1.2s, v2.2s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v25.4s, v0.4s, v2.4s +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - zip2 v10.2d, v0.2d, v2.2d +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmul s0, s1, v1.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmul s0, s1, v1.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmul d0, d1, v1.d[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmul d0, d1, v1.d[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmul d15, d15, v15.d[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx s3, s5, v7.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx s3, s5, v7.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx s3, s5, v15.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx d0, d4, v8.d[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - fmulx d0, d4, v8.d[1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla s0, s1, v1.s[0] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla s0, s1, v1.s[3] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla d0, d1, v1.d[0] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla d0, d1, v1.d[1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmla d15, d15, v15.d[1] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls s3, s5, v7.s[0] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls s3, s5, v7.s[3] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls s3, s5, v15.s[3] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls d0, d4, v8.d[0] +# CHECK-NEXT: - - - - - - - 0.50 0.50 - - - fmls d0, d4, v8.d[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[4] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[5] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[6] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal s0, h0, v0.h[7] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal d8, s9, v15.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal d8, s9, v15.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal d8, s9, v15.s[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlal d8, s9, v15.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[4] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[5] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[6] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl s0, h0, v0.h[7] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl d8, s9, v15.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl d8, s9, v15.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl d8, s9, v15.s[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmlsl d8, s9, v15.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[4] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[5] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[6] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull s1, h1, v1.h[7] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull d1, s1, v4.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull d1, s1, v4.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull d1, s1, v4.s[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmull d1, s1, v4.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h1, v14.h[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[4] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[5] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[6] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh h7, h15, v8.h[7] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh s15, s3, v4.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh s15, s14, v16.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh s15, s15, v16.s[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqdmulh s15, s16, v17.s[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h1, v14.h[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[3] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[4] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[5] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[6] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh h7, h15, v8.h[7] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh s15, s3, v4.s[0] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh s15, s14, v16.s[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh s15, s15, v16.s[2] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - sqrdmulh s15, s16, v17.s[3] +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov b0, v0.b[15] +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov h2, v31.h[5] +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov s17, v2.s[2] +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - mov d6, v12.d[1] +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.8b, { v1.16b }, v2.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - tbl v16.8b, { v31.16b, v0.16b }, v2.8b +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - tbl v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - tbl v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b +# CHECK-NEXT: - - - - 1.00 1.00 - - - - - - tbl v0.16b, { v1.16b }, v2.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - tbl v16.16b, { v31.16b, v0.16b }, v2.16b +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - tbl v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - tbl v0.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - tbx v0.8b, { v1.16b }, v2.8b +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - tbx v16.8b, { v31.16b, v0.16b }, v2.8b +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - tbx v0.8b, { v1.16b, v2.16b, v3.16b }, v2.8b +# CHECK-NEXT: - - - - 5.00 5.00 - - - - - - tbx v16.8b, { v23.16b, v24.16b, v25.16b, v26.16b }, v2.8b +# CHECK-NEXT: - - - - 2.00 2.00 - - - - - - tbx v0.16b, { v1.16b }, v2.16b +# CHECK-NEXT: - - - - 3.00 3.00 - - - - - - tbx v16.16b, { v31.16b, v0.16b }, v2.16b +# CHECK-NEXT: - - - - 4.00 4.00 - - - - - - tbx v0.16b, { v1.16b, v2.16b, v3.16b }, v2.16b +# CHECK-NEXT: - - - - 5.00 5.00 - - - - - - tbx v16.16b, { v31.16b, v0.16b, v1.16b, v2.16b }, v2.16b +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtxn s22, d13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtas d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtau d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms s22, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtms d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtmu d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns s22, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtns d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtnu d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps s22, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtps d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtpu d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzs d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu s12, s13 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fcvtzu d21, d14 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fabd s29, s24, s20 +# CHECK-NEXT: - - - - 0.50 0.50 - - - - - - fabd d29, d24, d20