diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2305,10 +2305,10 @@ unsigned NextUnreserved = ReservedCycles[InstanceIdx]; // If this resource has never been used, always return cycle zero. if (NextUnreserved == InvalidCycle) - return 0; + return CurrCycle; // For bottom-up scheduling add the cycles needed for the current operation. if (!isTop()) - NextUnreserved += Cycles; + NextUnreserved = std::max(CurrCycle, NextUnreserved + Cycles); return NextUnreserved; } @@ -2712,7 +2712,7 @@ unsigned ReservedUntil, InstanceIdx; std::tie(ReservedUntil, InstanceIdx) = - getNextResourceCycle(SC, PIdx, 0, PI->StartAtCycle); + getNextResourceCycle(SC, PIdx, PI->Cycles, PI->StartAtCycle); if (isTop()) { ReservedCycles[InstanceIdx] = std::max(ReservedUntil, NextCycle + PI->Cycles); diff --git a/llvm/test/CodeGen/AArch64/dump-reserved-cycles.mir b/llvm/test/CodeGen/AArch64/dump-reserved-cycles.mir --- a/llvm/test/CodeGen/AArch64/dump-reserved-cycles.mir +++ b/llvm/test/CodeGen/AArch64/dump-reserved-cycles.mir @@ -27,7 +27,7 @@ # CHECK-NEXT: ExpectedLatency: 0c # CHECK-NEXT: - Latency limited. # CHECK-NEXT: CortexA55UnitALU(0) = 1 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA55UnitALU(1) = 0 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 diff --git a/llvm/test/CodeGen/AArch64/faddp-half.ll b/llvm/test/CodeGen/AArch64/faddp-half.ll --- a/llvm/test/CodeGen/AArch64/faddp-half.ll +++ b/llvm/test/CodeGen/AArch64/faddp-half.ll @@ -173,29 +173,29 @@ ; CHECKNOFP16-NEXT: fadd s7, s16, s7 ; CHECKNOFP16-NEXT: fcvt h3, s3 ; CHECKNOFP16-NEXT: mov h16, v2.h[5] +; CHECKNOFP16-NEXT: fcvt s5, h5 +; CHECKNOFP16-NEXT: fcvt s6, h6 ; CHECKNOFP16-NEXT: fcvt h7, s7 ; CHECKNOFP16-NEXT: mov v1.h[1], v3.h[0] ; CHECKNOFP16-NEXT: fcvt h3, s4 -; CHECKNOFP16-NEXT: fcvt s4, h5 -; CHECKNOFP16-NEXT: fcvt s5, h6 -; CHECKNOFP16-NEXT: mov h6, v0.h[5] -; CHECKNOFP16-NEXT: mov v1.h[2], v3.h[0] -; CHECKNOFP16-NEXT: fadd s3, s5, s4 -; CHECKNOFP16-NEXT: fcvt s4, h6 -; CHECKNOFP16-NEXT: fcvt s5, h16 +; CHECKNOFP16-NEXT: mov h4, v0.h[5] +; CHECKNOFP16-NEXT: fadd s5, s6, s5 ; CHECKNOFP16-NEXT: mov h6, v0.h[6] -; CHECKNOFP16-NEXT: mov h16, v2.h[6] +; CHECKNOFP16-NEXT: mov v1.h[2], v3.h[0] ; CHECKNOFP16-NEXT: mov h0, v0.h[7] -; CHECKNOFP16-NEXT: mov v1.h[3], v7.h[0] +; CHECKNOFP16-NEXT: fcvt s3, h4 +; CHECKNOFP16-NEXT: fcvt s4, h16 +; CHECKNOFP16-NEXT: mov h16, v2.h[6] ; CHECKNOFP16-NEXT: mov h2, v2.h[7] -; CHECKNOFP16-NEXT: fcvt h3, s3 -; CHECKNOFP16-NEXT: fadd s4, s5, s4 +; CHECKNOFP16-NEXT: mov v1.h[3], v7.h[0] +; CHECKNOFP16-NEXT: fcvt s0, h0 +; CHECKNOFP16-NEXT: fadd s3, s4, s3 +; CHECKNOFP16-NEXT: fcvt h4, s5 ; CHECKNOFP16-NEXT: fcvt s5, h6 ; CHECKNOFP16-NEXT: fcvt s6, h16 -; CHECKNOFP16-NEXT: fcvt s0, h0 ; CHECKNOFP16-NEXT: fcvt s2, h2 -; CHECKNOFP16-NEXT: mov v1.h[4], v3.h[0] -; CHECKNOFP16-NEXT: fcvt h3, s4 +; CHECKNOFP16-NEXT: mov v1.h[4], v4.h[0] +; CHECKNOFP16-NEXT: fcvt h3, s3 ; CHECKNOFP16-NEXT: fadd s4, s6, s5 ; CHECKNOFP16-NEXT: fadd s0, s2, s0 ; CHECKNOFP16-NEXT: mov v1.h[5], v3.h[0] @@ -221,110 +221,110 @@ ; ; CHECKNOFP16-LABEL: addp_v16f16: ; CHECKNOFP16: // %bb.0: // %entry -; CHECKNOFP16-NEXT: rev32 v5.8h, v0.8h -; CHECKNOFP16-NEXT: mov h6, v0.h[1] -; CHECKNOFP16-NEXT: rev32 v4.8h, v1.8h -; CHECKNOFP16-NEXT: fcvt s2, h0 -; CHECKNOFP16-NEXT: mov h7, v0.h[2] -; CHECKNOFP16-NEXT: mov h16, v1.h[1] -; CHECKNOFP16-NEXT: mov h3, v5.h[1] -; CHECKNOFP16-NEXT: fcvt s6, h6 -; CHECKNOFP16-NEXT: mov h17, v5.h[2] -; CHECKNOFP16-NEXT: mov h18, v4.h[1] +; CHECKNOFP16-NEXT: rev32 v4.8h, v0.8h +; CHECKNOFP16-NEXT: mov h2, v0.h[1] +; CHECKNOFP16-NEXT: fcvt s6, h0 +; CHECKNOFP16-NEXT: rev32 v5.8h, v1.8h +; CHECKNOFP16-NEXT: mov h17, v0.h[2] +; CHECKNOFP16-NEXT: mov h18, v0.h[3] +; CHECKNOFP16-NEXT: mov h3, v4.h[1] +; CHECKNOFP16-NEXT: fcvt s2, h2 +; CHECKNOFP16-NEXT: fcvt s7, h4 +; CHECKNOFP16-NEXT: mov h20, v4.h[2] +; CHECKNOFP16-NEXT: mov h16, v5.h[1] ; CHECKNOFP16-NEXT: fcvt s19, h5 -; CHECKNOFP16-NEXT: fcvt s20, h1 -; CHECKNOFP16-NEXT: fcvt s7, h7 -; CHECKNOFP16-NEXT: fcvt s16, h16 +; CHECKNOFP16-NEXT: mov h21, v4.h[3] +; CHECKNOFP16-NEXT: mov h22, v0.h[4] ; CHECKNOFP16-NEXT: fcvt s3, h3 -; CHECKNOFP16-NEXT: fcvt s21, h4 -; CHECKNOFP16-NEXT: fcvt s17, h17 -; CHECKNOFP16-NEXT: fcvt s18, h18 -; CHECKNOFP16-NEXT: fadd s2, s19, s2 -; CHECKNOFP16-NEXT: mov h19, v5.h[3] -; CHECKNOFP16-NEXT: fadd s3, s3, s6 -; CHECKNOFP16-NEXT: mov h6, v0.h[3] -; CHECKNOFP16-NEXT: fadd s20, s21, s20 -; CHECKNOFP16-NEXT: fadd s7, s17, s7 -; CHECKNOFP16-NEXT: fadd s16, s18, s16 -; CHECKNOFP16-NEXT: fcvt h2, s2 -; CHECKNOFP16-NEXT: fcvt s18, h19 -; CHECKNOFP16-NEXT: mov h19, v0.h[5] -; CHECKNOFP16-NEXT: fcvt h17, s3 -; CHECKNOFP16-NEXT: fcvt s6, h6 -; CHECKNOFP16-NEXT: fcvt h3, s20 -; CHECKNOFP16-NEXT: fcvt h7, s7 -; CHECKNOFP16-NEXT: fcvt h16, s16 -; CHECKNOFP16-NEXT: mov h20, v5.h[5] -; CHECKNOFP16-NEXT: fcvt s19, h19 -; CHECKNOFP16-NEXT: mov v2.h[1], v17.h[0] -; CHECKNOFP16-NEXT: fadd s6, s18, s6 -; CHECKNOFP16-NEXT: mov h17, v0.h[4] -; CHECKNOFP16-NEXT: mov h18, v5.h[4] -; CHECKNOFP16-NEXT: fcvt s20, h20 -; CHECKNOFP16-NEXT: mov v3.h[1], v16.h[0] -; CHECKNOFP16-NEXT: mov v2.h[2], v7.h[0] -; CHECKNOFP16-NEXT: fcvt h6, s6 -; CHECKNOFP16-NEXT: fcvt s7, h17 -; CHECKNOFP16-NEXT: fcvt s16, h18 -; CHECKNOFP16-NEXT: mov h17, v1.h[2] -; CHECKNOFP16-NEXT: mov h18, v4.h[2] -; CHECKNOFP16-NEXT: fadd s19, s20, s19 -; CHECKNOFP16-NEXT: mov h20, v4.h[4] -; CHECKNOFP16-NEXT: mov v2.h[3], v6.h[0] +; CHECKNOFP16-NEXT: fadd s6, s7, s6 +; CHECKNOFP16-NEXT: mov h7, v1.h[1] +; CHECKNOFP16-NEXT: fcvt s16, h16 +; CHECKNOFP16-NEXT: fadd s3, s3, s2 +; CHECKNOFP16-NEXT: fcvt h2, s6 +; CHECKNOFP16-NEXT: fcvt s6, h1 +; CHECKNOFP16-NEXT: fcvt s7, h7 +; CHECKNOFP16-NEXT: fcvt h3, s3 +; CHECKNOFP16-NEXT: mov v2.h[1], v3.h[0] +; CHECKNOFP16-NEXT: fadd s3, s19, s6 ; CHECKNOFP16-NEXT: fadd s6, s16, s7 ; CHECKNOFP16-NEXT: fcvt s7, h17 -; CHECKNOFP16-NEXT: fcvt s16, h18 -; CHECKNOFP16-NEXT: mov h17, v1.h[3] -; CHECKNOFP16-NEXT: mov h18, v4.h[3] +; CHECKNOFP16-NEXT: fcvt s16, h20 +; CHECKNOFP16-NEXT: fcvt s17, h18 +; CHECKNOFP16-NEXT: fcvt s18, h21 +; CHECKNOFP16-NEXT: mov h19, v1.h[2] +; CHECKNOFP16-NEXT: mov h20, v5.h[2] +; CHECKNOFP16-NEXT: fcvt h3, s3 ; CHECKNOFP16-NEXT: fcvt h6, s6 ; CHECKNOFP16-NEXT: fadd s7, s16, s7 -; CHECKNOFP16-NEXT: mov h16, v1.h[4] -; CHECKNOFP16-NEXT: fcvt s17, h17 +; CHECKNOFP16-NEXT: mov h16, v1.h[3] +; CHECKNOFP16-NEXT: fadd s17, s18, s17 +; CHECKNOFP16-NEXT: mov h18, v4.h[4] +; CHECKNOFP16-NEXT: fcvt s19, h19 +; CHECKNOFP16-NEXT: fcvt s20, h20 +; CHECKNOFP16-NEXT: mov v3.h[1], v6.h[0] +; CHECKNOFP16-NEXT: mov h6, v5.h[3] +; CHECKNOFP16-NEXT: fcvt h7, s7 +; CHECKNOFP16-NEXT: fcvt s21, h22 ; CHECKNOFP16-NEXT: fcvt s18, h18 -; CHECKNOFP16-NEXT: mov v2.h[4], v6.h[0] -; CHECKNOFP16-NEXT: fcvt h6, s7 -; CHECKNOFP16-NEXT: fadd s7, s18, s17 +; CHECKNOFP16-NEXT: fadd s19, s20, s19 ; CHECKNOFP16-NEXT: fcvt s16, h16 -; CHECKNOFP16-NEXT: fcvt s17, h20 -; CHECKNOFP16-NEXT: fcvt h18, s19 -; CHECKNOFP16-NEXT: mov v3.h[2], v6.h[0] -; CHECKNOFP16-NEXT: fcvt h6, s7 -; CHECKNOFP16-NEXT: fadd s7, s17, s16 -; CHECKNOFP16-NEXT: mov h16, v1.h[5] +; CHECKNOFP16-NEXT: fcvt s6, h6 +; CHECKNOFP16-NEXT: fcvt h17, s17 +; CHECKNOFP16-NEXT: mov v2.h[2], v7.h[0] +; CHECKNOFP16-NEXT: mov h20, v5.h[4] +; CHECKNOFP16-NEXT: fadd s7, s18, s21 +; CHECKNOFP16-NEXT: mov h18, v1.h[4] +; CHECKNOFP16-NEXT: fadd s6, s6, s16 +; CHECKNOFP16-NEXT: fcvt h16, s19 +; CHECKNOFP16-NEXT: mov v2.h[3], v17.h[0] +; CHECKNOFP16-NEXT: mov h19, v5.h[5] +; CHECKNOFP16-NEXT: fcvt h7, s7 +; CHECKNOFP16-NEXT: fcvt s17, h18 +; CHECKNOFP16-NEXT: fcvt s18, h20 +; CHECKNOFP16-NEXT: fcvt h6, s6 +; CHECKNOFP16-NEXT: mov v3.h[2], v16.h[0] +; CHECKNOFP16-NEXT: mov h16, v0.h[5] +; CHECKNOFP16-NEXT: mov v2.h[4], v7.h[0] +; CHECKNOFP16-NEXT: fadd s7, s18, s17 ; CHECKNOFP16-NEXT: mov h17, v4.h[5] -; CHECKNOFP16-NEXT: mov v2.h[5], v18.h[0] -; CHECKNOFP16-NEXT: mov h18, v5.h[6] +; CHECKNOFP16-NEXT: mov h18, v1.h[5] ; CHECKNOFP16-NEXT: mov v3.h[3], v6.h[0] ; CHECKNOFP16-NEXT: fcvt h6, s7 ; CHECKNOFP16-NEXT: fcvt s7, h16 ; CHECKNOFP16-NEXT: fcvt s16, h17 -; CHECKNOFP16-NEXT: mov h17, v0.h[6] -; CHECKNOFP16-NEXT: fcvt s18, h18 +; CHECKNOFP16-NEXT: fcvt s17, h18 +; CHECKNOFP16-NEXT: fcvt s18, h19 +; CHECKNOFP16-NEXT: mov h19, v0.h[6] ; CHECKNOFP16-NEXT: mov h0, v0.h[7] -; CHECKNOFP16-NEXT: mov h5, v5.h[7] ; CHECKNOFP16-NEXT: mov v3.h[4], v6.h[0] -; CHECKNOFP16-NEXT: fadd s6, s16, s7 -; CHECKNOFP16-NEXT: mov h7, v1.h[6] -; CHECKNOFP16-NEXT: mov h16, v4.h[6] -; CHECKNOFP16-NEXT: fcvt s17, h17 -; CHECKNOFP16-NEXT: mov h1, v1.h[7] +; CHECKNOFP16-NEXT: mov h6, v4.h[6] +; CHECKNOFP16-NEXT: fadd s7, s16, s7 +; CHECKNOFP16-NEXT: fadd s16, s18, s17 +; CHECKNOFP16-NEXT: mov h17, v1.h[6] +; CHECKNOFP16-NEXT: mov h18, v5.h[6] +; CHECKNOFP16-NEXT: fcvt s19, h19 +; CHECKNOFP16-NEXT: fcvt s6, h6 ; CHECKNOFP16-NEXT: mov h4, v4.h[7] +; CHECKNOFP16-NEXT: mov h1, v1.h[7] +; CHECKNOFP16-NEXT: mov h5, v5.h[7] +; CHECKNOFP16-NEXT: fcvt s17, h17 +; CHECKNOFP16-NEXT: fcvt h7, s7 +; CHECKNOFP16-NEXT: fcvt s18, h18 ; CHECKNOFP16-NEXT: fcvt s0, h0 -; CHECKNOFP16-NEXT: fcvt h6, s6 +; CHECKNOFP16-NEXT: fadd s6, s6, s19 +; CHECKNOFP16-NEXT: fcvt s4, h4 +; CHECKNOFP16-NEXT: fcvt s1, h1 ; CHECKNOFP16-NEXT: fcvt s5, h5 -; CHECKNOFP16-NEXT: fcvt s7, h7 -; CHECKNOFP16-NEXT: fcvt s16, h16 +; CHECKNOFP16-NEXT: fcvt h16, s16 ; CHECKNOFP16-NEXT: fadd s17, s18, s17 -; CHECKNOFP16-NEXT: fcvt s1, h1 -; CHECKNOFP16-NEXT: fcvt s4, h4 -; CHECKNOFP16-NEXT: mov v3.h[5], v6.h[0] -; CHECKNOFP16-NEXT: fadd s0, s5, s0 -; CHECKNOFP16-NEXT: fadd s7, s16, s7 -; CHECKNOFP16-NEXT: fcvt h6, s17 -; CHECKNOFP16-NEXT: fadd s1, s4, s1 -; CHECKNOFP16-NEXT: fcvt h0, s0 -; CHECKNOFP16-NEXT: fcvt h4, s7 +; CHECKNOFP16-NEXT: mov v2.h[5], v7.h[0] +; CHECKNOFP16-NEXT: fcvt h6, s6 +; CHECKNOFP16-NEXT: fadd s0, s4, s0 +; CHECKNOFP16-NEXT: fadd s1, s5, s1 +; CHECKNOFP16-NEXT: mov v3.h[5], v16.h[0] +; CHECKNOFP16-NEXT: fcvt h4, s17 ; CHECKNOFP16-NEXT: mov v2.h[6], v6.h[0] +; CHECKNOFP16-NEXT: fcvt h0, s0 ; CHECKNOFP16-NEXT: fcvt h1, s1 ; CHECKNOFP16-NEXT: mov v3.h[6], v4.h[0] ; CHECKNOFP16-NEXT: mov v2.h[7], v0.h[0] diff --git a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll --- a/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/llvm/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -28,29 +28,29 @@ ; CHECK-CVT-NEXT: fadd s7, s16, s7 ; CHECK-CVT-NEXT: fcvt h3, s3 ; CHECK-CVT-NEXT: mov h16, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: fcvt h7, s7 ; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] ; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: mov h6, v1.h[5] -; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] -; CHECK-CVT-NEXT: fadd s3, s5, s4 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: fcvt s5, h16 +; CHECK-CVT-NEXT: mov h4, v1.h[5] +; CHECK-CVT-NEXT: fadd s5, s6, s5 ; CHECK-CVT-NEXT: mov h6, v1.h[6] -; CHECK-CVT-NEXT: mov h16, v0.h[6] +; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h16 +; CHECK-CVT-NEXT: mov h16, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: fadd s4, s5, s4 +; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fadd s3, s4, s3 +; CHECK-CVT-NEXT: fcvt h4, s5 ; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s6, h16 -; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: mov v2.h[4], v4.h[0] +; CHECK-CVT-NEXT: fcvt h3, s3 ; CHECK-CVT-NEXT: fadd s4, s6, s5 ; CHECK-CVT-NEXT: fadd s0, s0, s1 ; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] @@ -97,29 +97,29 @@ ; CHECK-CVT-NEXT: fsub s7, s16, s7 ; CHECK-CVT-NEXT: fcvt h3, s3 ; CHECK-CVT-NEXT: mov h16, v0.h[5] +; CHECK-CVT-NEXT: fcvt s5, h5 +; CHECK-CVT-NEXT: fcvt s6, h6 ; CHECK-CVT-NEXT: fcvt h7, s7 ; CHECK-CVT-NEXT: mov v2.h[1], v3.h[0] ; CHECK-CVT-NEXT: fcvt h3, s4 -; CHECK-CVT-NEXT: fcvt s4, h5 -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: mov h6, v1.h[5] -; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] -; CHECK-CVT-NEXT: fsub s3, s5, s4 -; CHECK-CVT-NEXT: fcvt s4, h6 -; CHECK-CVT-NEXT: fcvt s5, h16 +; CHECK-CVT-NEXT: mov h4, v1.h[5] +; CHECK-CVT-NEXT: fsub s5, s6, s5 ; CHECK-CVT-NEXT: mov h6, v1.h[6] -; CHECK-CVT-NEXT: mov h16, v0.h[6] +; CHECK-CVT-NEXT: mov v2.h[2], v3.h[0] ; CHECK-CVT-NEXT: mov h1, v1.h[7] -; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h16 +; CHECK-CVT-NEXT: mov h16, v0.h[6] ; CHECK-CVT-NEXT: mov h0, v0.h[7] -; CHECK-CVT-NEXT: fcvt h3, s3 -; CHECK-CVT-NEXT: fsub s4, s5, s4 +; CHECK-CVT-NEXT: mov v2.h[3], v7.h[0] +; CHECK-CVT-NEXT: fcvt s1, h1 +; CHECK-CVT-NEXT: fsub s3, s4, s3 +; CHECK-CVT-NEXT: fcvt h4, s5 ; CHECK-CVT-NEXT: fcvt s5, h6 ; CHECK-CVT-NEXT: fcvt s6, h16 -; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov v2.h[4], v3.h[0] -; CHECK-CVT-NEXT: fcvt h3, s4 +; CHECK-CVT-NEXT: mov v2.h[4], v4.h[0] +; CHECK-CVT-NEXT: fcvt h3, s3 ; CHECK-CVT-NEXT: fsub s4, s6, s5 ; CHECK-CVT-NEXT: fsub s0, s0, s1 ; CHECK-CVT-NEXT: mov v2.h[5], v3.h[0] diff --git a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptosi-sat-vector.ll @@ -2494,63 +2494,63 @@ ; CHECK-CVT-LABEL: test_signed_v8f16_v8i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-CVT-NEXT: mov h4, v0.h[2] -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: fcvt s2, h1 -; CHECK-CVT-NEXT: mov h3, v1.h[1] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: fcvt s3, h0 +; CHECK-CVT-NEXT: mov h7, v0.h[1] +; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: mov h4, v1.h[1] ; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h1 ; CHECK-CVT-NEXT: mov h1, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvtzs x8, s3 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvtzs x9, s5 -; CHECK-CVT-NEXT: fcvtzs x8, s2 -; CHECK-CVT-NEXT: fcvt s2, h3 -; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s5, h7 +; CHECK-CVT-NEXT: fcvt s6, h0 +; CHECK-CVT-NEXT: fcvt s7, h1 ; CHECK-CVT-NEXT: fcvtzs x10, s2 -; CHECK-CVT-NEXT: fmov d2, x8 -; CHECK-CVT-NEXT: fcvtzs x8, s4 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: fmov d0, x9 -; CHECK-CVT-NEXT: mov v2.d[1], x10 -; CHECK-CVT-NEXT: fcvtzs x10, s5 -; CHECK-CVT-NEXT: fmov d1, x8 -; CHECK-CVT-NEXT: fcvtzs x9, s3 -; CHECK-CVT-NEXT: fcvtzs x8, s4 -; CHECK-CVT-NEXT: fmov d3, x10 +; CHECK-CVT-NEXT: fmov d0, x8 +; CHECK-CVT-NEXT: fmov d2, x9 +; CHECK-CVT-NEXT: fcvtzs x9, s4 +; CHECK-CVT-NEXT: fcvtzs x11, s3 +; CHECK-CVT-NEXT: fcvtzs x8, s5 +; CHECK-CVT-NEXT: fmov d1, x10 ; CHECK-CVT-NEXT: fcvtzs x10, s6 -; CHECK-CVT-NEXT: mov v0.d[1], x9 -; CHECK-CVT-NEXT: mov v1.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x10 +; CHECK-CVT-NEXT: fmov d3, x9 +; CHECK-CVT-NEXT: fcvtzs x9, s7 +; CHECK-CVT-NEXT: mov v2.d[1], x11 +; CHECK-CVT-NEXT: mov v0.d[1], x8 +; CHECK-CVT-NEXT: mov v1.d[1], x10 +; CHECK-CVT-NEXT: mov v3.d[1], x9 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_signed_v8f16_v8i64: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h3, v0.h[2] -; CHECK-FP16-NEXT: mov h5, v0.h[3] -; CHECK-FP16-NEXT: fcvtzs x9, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] -; CHECK-FP16-NEXT: fcvtzs x8, h1 +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h5, v0.h[1] +; CHECK-FP16-NEXT: mov h6, v0.h[3] +; CHECK-FP16-NEXT: fcvtzs x8, h0 ; CHECK-FP16-NEXT: mov h4, v1.h[2] -; CHECK-FP16-NEXT: mov h6, v1.h[3] +; CHECK-FP16-NEXT: fcvtzs x9, h1 +; CHECK-FP16-NEXT: mov h3, v1.h[1] +; CHECK-FP16-NEXT: mov h7, v1.h[3] ; CHECK-FP16-NEXT: fcvtzs x10, h2 -; CHECK-FP16-NEXT: fmov d2, x8 -; CHECK-FP16-NEXT: fcvtzs x8, h3 -; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fmov d0, x9 -; CHECK-FP16-NEXT: mov v2.d[1], x10 -; CHECK-FP16-NEXT: fcvtzs x10, h4 -; CHECK-FP16-NEXT: fmov d1, x8 -; CHECK-FP16-NEXT: fcvtzs x9, h3 +; CHECK-FP16-NEXT: fmov d0, x8 +; CHECK-FP16-NEXT: fmov d2, x9 ; CHECK-FP16-NEXT: fcvtzs x8, h5 -; CHECK-FP16-NEXT: fmov d3, x10 +; CHECK-FP16-NEXT: fcvtzs x9, h4 +; CHECK-FP16-NEXT: fcvtzs x11, h3 +; CHECK-FP16-NEXT: fmov d1, x10 ; CHECK-FP16-NEXT: fcvtzs x10, h6 -; CHECK-FP16-NEXT: mov v0.d[1], x9 -; CHECK-FP16-NEXT: mov v1.d[1], x8 -; CHECK-FP16-NEXT: mov v3.d[1], x10 +; CHECK-FP16-NEXT: fmov d3, x9 +; CHECK-FP16-NEXT: fcvtzs x9, h7 +; CHECK-FP16-NEXT: mov v2.d[1], x11 +; CHECK-FP16-NEXT: mov v0.d[1], x8 +; CHECK-FP16-NEXT: mov v1.d[1], x10 +; CHECK-FP16-NEXT: mov v3.d[1], x9 ; CHECK-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptosi.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x diff --git a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll --- a/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/AArch64/fptoui-sat-vector.ll @@ -691,7 +691,7 @@ ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 ; CHECK-NEXT: mov s1, v0.s[1] ; CHECK-NEXT: fcvtzu x9, s0 -; CHECK-NEXT: mov x10, #1125899906842623 +; CHECK-NEXT: mov x10, #1125899906842623 // =0x3ffffffffffff ; CHECK-NEXT: fcvtzu x8, s1 ; CHECK-NEXT: cmp x8, x10 ; CHECK-NEXT: csel x8, x8, x10, lo @@ -737,9 +737,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov w8, #1904214015 +; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov x21, #68719476735 +; CHECK-NEXT: mov x21, #68719476735 // =0xfffffffff ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 ; CHECK-NEXT: fmov s9, w8 @@ -788,7 +788,7 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov w8, #2139095039 +; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0 @@ -904,7 +904,7 @@ ; CHECK-LABEL: test_unsigned_v4f32_v4i50: ; CHECK: // %bb.0: ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: mov x8, #1125899906842623 +; CHECK-NEXT: mov x8, #1125899906842623 // =0x3ffffffffffff ; CHECK-NEXT: mov s3, v0.s[1] ; CHECK-NEXT: fcvtzu x11, s0 ; CHECK-NEXT: mov s2, v1.s[1] @@ -967,10 +967,10 @@ ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov w8, #1904214015 +; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov x25, #68719476735 +; CHECK-NEXT: mov x25, #68719476735 // =0xfffffffff ; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -1050,7 +1050,7 @@ ; CHECK-NEXT: str q0, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti -; CHECK-NEXT: mov w8, #2139095039 +; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: fmov s9, w8 @@ -1146,7 +1146,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w10, d0 -; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov w8, #255 // =0xff ; CHECK-NEXT: fcvtzu w9, d1 ; CHECK-NEXT: cmp w9, #255 ; CHECK-NEXT: csel w9, w9, w8, lo @@ -1165,7 +1165,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w9, d0 -; CHECK-NEXT: mov w10, #8191 +; CHECK-NEXT: mov w10, #8191 // =0x1fff ; CHECK-NEXT: fcvtzu w8, d1 ; CHECK-NEXT: cmp w8, w10 ; CHECK-NEXT: csel w8, w8, w10, lo @@ -1184,7 +1184,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w9, d0 -; CHECK-NEXT: mov w10, #65535 +; CHECK-NEXT: mov w10, #65535 // =0xffff ; CHECK-NEXT: fcvtzu w8, d1 ; CHECK-NEXT: cmp w8, w10 ; CHECK-NEXT: csel w8, w8, w10, lo @@ -1203,7 +1203,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu w9, d0 -; CHECK-NEXT: mov w10, #524287 +; CHECK-NEXT: mov w10, #524287 // =0x7ffff ; CHECK-NEXT: fcvtzu w8, d1 ; CHECK-NEXT: cmp w8, w10 ; CHECK-NEXT: csel w8, w8, w10, lo @@ -1236,7 +1236,7 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: mov d1, v0.d[1] ; CHECK-NEXT: fcvtzu x9, d0 -; CHECK-NEXT: mov x10, #1125899906842623 +; CHECK-NEXT: mov x10, #1125899906842623 // =0x3ffffffffffff ; CHECK-NEXT: fcvtzu x8, d1 ; CHECK-NEXT: cmp x8, x10 ; CHECK-NEXT: csel x8, x8, x10, lo @@ -1276,9 +1276,9 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov x8, #5057542381537067007 +; CHECK-NEXT: mov x8, #5057542381537067007 // =0x462fffffffffffff ; CHECK-NEXT: fcmp d8, #0.0 -; CHECK-NEXT: mov x21, #68719476735 +; CHECK-NEXT: mov x21, #68719476735 // =0xfffffffff ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 ; CHECK-NEXT: fmov d9, x8 @@ -1326,7 +1326,7 @@ ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: fmov d0, d8 ; CHECK-NEXT: bl __fixunsdfti -; CHECK-NEXT: mov x8, #5183643171103440895 +; CHECK-NEXT: mov x8, #5183643171103440895 // =0x47efffffffffffff ; CHECK-NEXT: fcmp d8, #0.0 ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 @@ -1478,7 +1478,7 @@ ; CHECK-CVT-NEXT: mov h2, v0.h[2] ; CHECK-CVT-NEXT: mov h3, v0.h[3] ; CHECK-CVT-NEXT: fcvt s0, h0 -; CHECK-CVT-NEXT: mov x8, #1125899906842623 +; CHECK-CVT-NEXT: mov x8, #1125899906842623 // =0x3ffffffffffff ; CHECK-CVT-NEXT: fcvt s1, h1 ; CHECK-CVT-NEXT: fcvt s2, h2 ; CHECK-CVT-NEXT: fcvt s3, h3 @@ -1503,7 +1503,7 @@ ; CHECK-FP16-NEXT: mov h2, v0.h[2] ; CHECK-FP16-NEXT: mov h3, v0.h[3] ; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov x8, #1125899906842623 +; CHECK-FP16-NEXT: mov x8, #1125899906842623 // =0x3ffffffffffff ; CHECK-FP16-NEXT: fcvtzu x10, h1 ; CHECK-FP16-NEXT: fcvtzu x11, h2 ; CHECK-FP16-NEXT: cmp x9, x8 @@ -1587,9 +1587,9 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov w8, #1904214015 +; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov x25, #68719476735 +; CHECK-NEXT: mov x25, #68719476735 // =0xfffffffff ; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -1673,7 +1673,7 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp] // 16-byte Folded Reload -; CHECK-NEXT: mov w8, #2139095039 +; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: mov h0, v0.h[2] ; CHECK-NEXT: fmov s9, w8 @@ -1809,7 +1809,7 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: mov w8, #255 +; CHECK-CVT-NEXT: mov w8, #255 // =0xff ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] ; CHECK-CVT-NEXT: mov s4, v1.s[3] @@ -1866,7 +1866,7 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: mov w8, #8191 +; CHECK-CVT-NEXT: mov w8, #8191 // =0x1fff ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] ; CHECK-CVT-NEXT: mov s4, v1.s[3] @@ -1923,7 +1923,7 @@ ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: fcvtl2 v1.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h -; CHECK-CVT-NEXT: mov w8, #65535 +; CHECK-CVT-NEXT: mov w8, #65535 // =0xffff ; CHECK-CVT-NEXT: mov s2, v1.s[1] ; CHECK-CVT-NEXT: mov s3, v1.s[2] ; CHECK-CVT-NEXT: mov s4, v1.s[3] @@ -2012,7 +2012,7 @@ ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i50: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-CVT-NEXT: mov x8, #1125899906842623 +; CHECK-CVT-NEXT: mov x8, #1125899906842623 // =0x3ffffffffffff ; CHECK-CVT-NEXT: mov h2, v0.h[1] ; CHECK-CVT-NEXT: mov h3, v0.h[2] ; CHECK-CVT-NEXT: mov h5, v0.h[3] @@ -2056,7 +2056,7 @@ ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i50: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov x8, #1125899906842623 +; CHECK-FP16-NEXT: mov x8, #1125899906842623 // =0x3ffffffffffff ; CHECK-FP16-NEXT: mov h2, v0.h[1] ; CHECK-FP16-NEXT: mov h3, v0.h[2] ; CHECK-FP16-NEXT: mov h5, v0.h[3] @@ -2096,63 +2096,63 @@ ; CHECK-CVT-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-CVT: // %bb.0: ; CHECK-CVT-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-CVT-NEXT: mov h4, v0.h[2] -; CHECK-CVT-NEXT: fcvt s5, h0 -; CHECK-CVT-NEXT: fcvt s2, h1 -; CHECK-CVT-NEXT: mov h3, v1.h[1] +; CHECK-CVT-NEXT: mov h2, v0.h[2] +; CHECK-CVT-NEXT: fcvt s3, h0 +; CHECK-CVT-NEXT: mov h7, v0.h[1] +; CHECK-CVT-NEXT: mov h0, v0.h[3] +; CHECK-CVT-NEXT: mov h4, v1.h[1] ; CHECK-CVT-NEXT: mov h6, v1.h[2] -; CHECK-CVT-NEXT: fcvt s4, h4 +; CHECK-CVT-NEXT: fcvt s5, h1 ; CHECK-CVT-NEXT: mov h1, v1.h[3] +; CHECK-CVT-NEXT: fcvt s2, h2 +; CHECK-CVT-NEXT: fcvtzu x8, s3 +; CHECK-CVT-NEXT: fcvt s3, h4 +; CHECK-CVT-NEXT: fcvt s4, h6 ; CHECK-CVT-NEXT: fcvtzu x9, s5 -; CHECK-CVT-NEXT: fcvtzu x8, s2 -; CHECK-CVT-NEXT: fcvt s2, h3 -; CHECK-CVT-NEXT: mov h3, v0.h[1] -; CHECK-CVT-NEXT: mov h0, v0.h[3] -; CHECK-CVT-NEXT: fcvt s5, h6 -; CHECK-CVT-NEXT: fcvt s6, h1 +; CHECK-CVT-NEXT: fcvt s5, h7 +; CHECK-CVT-NEXT: fcvt s6, h0 +; CHECK-CVT-NEXT: fcvt s7, h1 ; CHECK-CVT-NEXT: fcvtzu x10, s2 -; CHECK-CVT-NEXT: fmov d2, x8 -; CHECK-CVT-NEXT: fcvtzu x8, s4 -; CHECK-CVT-NEXT: fcvt s3, h3 -; CHECK-CVT-NEXT: fcvt s4, h0 -; CHECK-CVT-NEXT: fmov d0, x9 -; CHECK-CVT-NEXT: mov v2.d[1], x10 -; CHECK-CVT-NEXT: fcvtzu x10, s5 -; CHECK-CVT-NEXT: fmov d1, x8 -; CHECK-CVT-NEXT: fcvtzu x9, s3 -; CHECK-CVT-NEXT: fcvtzu x8, s4 -; CHECK-CVT-NEXT: fmov d3, x10 +; CHECK-CVT-NEXT: fmov d0, x8 +; CHECK-CVT-NEXT: fmov d2, x9 +; CHECK-CVT-NEXT: fcvtzu x9, s4 +; CHECK-CVT-NEXT: fcvtzu x11, s3 +; CHECK-CVT-NEXT: fcvtzu x8, s5 +; CHECK-CVT-NEXT: fmov d1, x10 ; CHECK-CVT-NEXT: fcvtzu x10, s6 -; CHECK-CVT-NEXT: mov v0.d[1], x9 -; CHECK-CVT-NEXT: mov v1.d[1], x8 -; CHECK-CVT-NEXT: mov v3.d[1], x10 +; CHECK-CVT-NEXT: fmov d3, x9 +; CHECK-CVT-NEXT: fcvtzu x9, s7 +; CHECK-CVT-NEXT: mov v2.d[1], x11 +; CHECK-CVT-NEXT: mov v0.d[1], x8 +; CHECK-CVT-NEXT: mov v1.d[1], x10 +; CHECK-CVT-NEXT: mov v3.d[1], x9 ; CHECK-CVT-NEXT: ret ; ; CHECK-FP16-LABEL: test_unsigned_v8f16_v8i64: ; CHECK-FP16: // %bb.0: ; CHECK-FP16-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-FP16-NEXT: mov h3, v0.h[2] -; CHECK-FP16-NEXT: mov h5, v0.h[3] -; CHECK-FP16-NEXT: fcvtzu x9, h0 -; CHECK-FP16-NEXT: mov h2, v1.h[1] -; CHECK-FP16-NEXT: fcvtzu x8, h1 +; CHECK-FP16-NEXT: mov h2, v0.h[2] +; CHECK-FP16-NEXT: mov h5, v0.h[1] +; CHECK-FP16-NEXT: mov h6, v0.h[3] +; CHECK-FP16-NEXT: fcvtzu x8, h0 ; CHECK-FP16-NEXT: mov h4, v1.h[2] -; CHECK-FP16-NEXT: mov h6, v1.h[3] +; CHECK-FP16-NEXT: fcvtzu x9, h1 +; CHECK-FP16-NEXT: mov h3, v1.h[1] +; CHECK-FP16-NEXT: mov h7, v1.h[3] ; CHECK-FP16-NEXT: fcvtzu x10, h2 -; CHECK-FP16-NEXT: fmov d2, x8 -; CHECK-FP16-NEXT: fcvtzu x8, h3 -; CHECK-FP16-NEXT: mov h3, v0.h[1] -; CHECK-FP16-NEXT: fmov d0, x9 -; CHECK-FP16-NEXT: mov v2.d[1], x10 -; CHECK-FP16-NEXT: fcvtzu x10, h4 -; CHECK-FP16-NEXT: fmov d1, x8 -; CHECK-FP16-NEXT: fcvtzu x9, h3 +; CHECK-FP16-NEXT: fmov d0, x8 +; CHECK-FP16-NEXT: fmov d2, x9 ; CHECK-FP16-NEXT: fcvtzu x8, h5 -; CHECK-FP16-NEXT: fmov d3, x10 +; CHECK-FP16-NEXT: fcvtzu x9, h4 +; CHECK-FP16-NEXT: fcvtzu x11, h3 +; CHECK-FP16-NEXT: fmov d1, x10 ; CHECK-FP16-NEXT: fcvtzu x10, h6 -; CHECK-FP16-NEXT: mov v0.d[1], x9 -; CHECK-FP16-NEXT: mov v1.d[1], x8 -; CHECK-FP16-NEXT: mov v3.d[1], x10 +; CHECK-FP16-NEXT: fmov d3, x9 +; CHECK-FP16-NEXT: fcvtzu x9, h7 +; CHECK-FP16-NEXT: mov v2.d[1], x11 +; CHECK-FP16-NEXT: mov v0.d[1], x8 +; CHECK-FP16-NEXT: mov v1.d[1], x10 +; CHECK-FP16-NEXT: mov v3.d[1], x9 ; CHECK-FP16-NEXT: ret %x = call <8 x i64> @llvm.fptoui.sat.v8f16.v8i64(<8 x half> %f) ret <8 x i64> %x @@ -2193,9 +2193,9 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov w8, #1904214015 +; CHECK-NEXT: mov w8, #1904214015 // =0x717fffff ; CHECK-NEXT: fcmp s8, #0.0 -; CHECK-NEXT: mov x23, #68719476735 +; CHECK-NEXT: mov x23, #68719476735 // =0xfffffffff ; CHECK-NEXT: mov h0, v0.h[3] ; CHECK-NEXT: fmov s9, w8 ; CHECK-NEXT: csel x8, xzr, x0, lt @@ -2357,7 +2357,7 @@ ; CHECK-NEXT: fmov s0, s8 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: ldr q0, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: mov w8, #2139095039 +; CHECK-NEXT: mov w8, #2139095039 // =0x7f7fffff ; CHECK-NEXT: fcmp s8, #0.0 ; CHECK-NEXT: mov h0, v0.h[1] ; CHECK-NEXT: fmov s9, w8 @@ -2559,7 +2559,7 @@ ; CHECK-CVT-NEXT: fcvtl2 v2.4s, v1.8h ; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h ; CHECK-CVT-NEXT: fcvtl2 v5.4s, v0.8h -; CHECK-CVT-NEXT: mov w8, #255 +; CHECK-CVT-NEXT: mov w8, #255 // =0xff ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: mov s3, v2.s[1] ; CHECK-CVT-NEXT: mov s4, v2.s[2] @@ -2661,7 +2661,7 @@ ; CHECK-CVT-NEXT: fcvtl2 v2.4s, v0.8h ; CHECK-CVT-NEXT: fcvtl v0.4s, v0.4h ; CHECK-CVT-NEXT: fcvtl2 v5.4s, v1.8h -; CHECK-CVT-NEXT: mov w8, #65535 +; CHECK-CVT-NEXT: mov w8, #65535 // =0xffff ; CHECK-CVT-NEXT: fcvtl v1.4s, v1.4h ; CHECK-CVT-NEXT: mov s3, v2.s[1] ; CHECK-CVT-NEXT: mov s4, v2.s[2] @@ -2758,7 +2758,7 @@ ; CHECK-NEXT: mov d4, v3.d[1] ; CHECK-NEXT: fcvtzu w10, d3 ; CHECK-NEXT: mov d3, v2.d[1] -; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov w8, #255 // =0xff ; CHECK-NEXT: fcvtzu w12, d2 ; CHECK-NEXT: fcvtzu w13, d1 ; CHECK-NEXT: fcvtzu w9, d4 @@ -2806,7 +2806,7 @@ ; CHECK-NEXT: mov d16, v0.d[1] ; CHECK-NEXT: fcvtzu w10, d0 ; CHECK-NEXT: mov d0, v1.d[1] -; CHECK-NEXT: mov w8, #255 +; CHECK-NEXT: mov w8, #255 // =0xff ; CHECK-NEXT: fcvtzu w12, d1 ; CHECK-NEXT: mov d1, v2.d[1] ; CHECK-NEXT: fcvtzu w9, d16 @@ -2910,7 +2910,7 @@ ; CHECK-NEXT: mov d4, v3.d[1] ; CHECK-NEXT: fcvtzu w10, d3 ; CHECK-NEXT: mov d3, v2.d[1] -; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov w8, #65535 // =0xffff ; CHECK-NEXT: fcvtzu w12, d2 ; CHECK-NEXT: fcvtzu w13, d1 ; CHECK-NEXT: fcvtzu w9, d4 @@ -2956,80 +2956,80 @@ ; CHECK-LABEL: test_unsigned_v16f64_v16i16: ; CHECK: // %bb.0: ; CHECK-NEXT: mov d16, v3.d[1] +; CHECK-NEXT: mov d17, v2.d[1] ; CHECK-NEXT: fcvtzu w9, d3 -; CHECK-NEXT: mov d3, v2.d[1] -; CHECK-NEXT: mov w8, #65535 +; CHECK-NEXT: mov d3, v1.d[1] +; CHECK-NEXT: mov w8, #65535 // =0xffff ; CHECK-NEXT: fcvtzu w10, d2 -; CHECK-NEXT: mov d2, v1.d[1] -; CHECK-NEXT: fcvtzu w11, d1 +; CHECK-NEXT: fcvtzu w12, d1 ; CHECK-NEXT: mov d1, v0.d[1] -; CHECK-NEXT: fcvtzu w12, d16 -; CHECK-NEXT: fcvtzu w13, d0 +; CHECK-NEXT: fcvtzu w11, d16 +; CHECK-NEXT: fcvtzu w13, d17 ; CHECK-NEXT: fcvtzu w14, d3 -; CHECK-NEXT: mov d0, v7.d[1] -; CHECK-NEXT: fcvtzu w15, d2 +; CHECK-NEXT: mov d2, v7.d[1] ; CHECK-NEXT: fcvtzu w17, d6 -; CHECK-NEXT: cmp w12, w8 -; CHECK-NEXT: fcvtzu w16, d1 -; CHECK-NEXT: csel w12, w12, w8, lo +; CHECK-NEXT: cmp w11, w8 +; CHECK-NEXT: fcvtzu w15, d1 +; CHECK-NEXT: csel w11, w11, w8, lo ; CHECK-NEXT: cmp w9, w8 ; CHECK-NEXT: csel w9, w9, w8, lo -; CHECK-NEXT: cmp w14, w8 -; CHECK-NEXT: csel w14, w14, w8, lo +; CHECK-NEXT: cmp w13, w8 +; CHECK-NEXT: csel w13, w13, w8, lo ; CHECK-NEXT: cmp w10, w8 ; CHECK-NEXT: csel w10, w10, w8, lo -; CHECK-NEXT: cmp w15, w8 +; CHECK-NEXT: cmp w14, w8 ; CHECK-NEXT: fmov s19, w9 -; CHECK-NEXT: csel w9, w15, w8, lo -; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: fcvtzu w15, d0 +; CHECK-NEXT: csel w9, w14, w8, lo +; CHECK-NEXT: fcvtzu w14, d0 +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: fcvtzu w16, d2 ; CHECK-NEXT: mov d0, v6.d[1] -; CHECK-NEXT: csel w11, w11, w8, lo -; CHECK-NEXT: mov v19.s[1], w12 -; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fcvtzu w12, d7 +; CHECK-NEXT: csel w12, w12, w8, lo +; CHECK-NEXT: cmp w15, w8 +; CHECK-NEXT: mov v19.s[1], w11 +; CHECK-NEXT: fcvtzu w11, d7 ; CHECK-NEXT: fmov s18, w10 -; CHECK-NEXT: csel w10, w16, w8, lo -; CHECK-NEXT: cmp w13, w8 +; CHECK-NEXT: csel w10, w15, w8, lo +; CHECK-NEXT: cmp w14, w8 +; CHECK-NEXT: csel w14, w14, w8, lo +; CHECK-NEXT: cmp w16, w8 +; CHECK-NEXT: csel w15, w16, w8, lo +; CHECK-NEXT: cmp w11, w8 ; CHECK-NEXT: fcvtzu w16, d0 -; CHECK-NEXT: csel w13, w13, w8, lo -; CHECK-NEXT: cmp w15, w8 -; CHECK-NEXT: csel w15, w15, w8, lo -; CHECK-NEXT: cmp w12, w8 ; CHECK-NEXT: mov d0, v5.d[1] -; CHECK-NEXT: csel w12, w12, w8, lo +; CHECK-NEXT: csel w11, w11, w8, lo +; CHECK-NEXT: mov v18.s[1], w13 ; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: mov v18.s[1], w14 -; CHECK-NEXT: fmov s23, w12 -; CHECK-NEXT: csel w12, w16, w8, lo +; CHECK-NEXT: fmov s17, w12 +; CHECK-NEXT: fmov s23, w11 +; CHECK-NEXT: csel w11, w16, w8, lo ; CHECK-NEXT: cmp w17, w8 ; CHECK-NEXT: fcvtzu w16, d0 ; CHECK-NEXT: mov d0, v4.d[1] -; CHECK-NEXT: csel w14, w17, w8, lo +; CHECK-NEXT: csel w13, w17, w8, lo ; CHECK-NEXT: fcvtzu w17, d5 -; CHECK-NEXT: fmov s17, w11 +; CHECK-NEXT: fcvtzu w12, d4 ; CHECK-NEXT: mov v23.s[1], w15 ; CHECK-NEXT: cmp w16, w8 -; CHECK-NEXT: fmov s22, w14 -; CHECK-NEXT: csel w14, w16, w8, lo +; CHECK-NEXT: fmov s22, w13 +; CHECK-NEXT: csel w13, w16, w8, lo ; CHECK-NEXT: cmp w17, w8 ; CHECK-NEXT: fcvtzu w16, d0 ; CHECK-NEXT: csel w15, w17, w8, lo -; CHECK-NEXT: fcvtzu w11, d4 -; CHECK-NEXT: mov v22.s[1], w12 +; CHECK-NEXT: mov v22.s[1], w11 ; CHECK-NEXT: cmp w16, w8 ; CHECK-NEXT: fmov s21, w15 -; CHECK-NEXT: csel w12, w16, w8, lo -; CHECK-NEXT: cmp w11, w8 -; CHECK-NEXT: csel w8, w11, w8, lo +; CHECK-NEXT: csel w11, w16, w8, lo +; CHECK-NEXT: cmp w12, w8 +; CHECK-NEXT: csel w8, w12, w8, lo ; CHECK-NEXT: mov v17.s[1], w9 ; CHECK-NEXT: adrp x9, .LCPI85_0 -; CHECK-NEXT: mov v21.s[1], w14 -; CHECK-NEXT: fmov s16, w13 +; CHECK-NEXT: mov v21.s[1], w13 +; CHECK-NEXT: fmov s16, w14 ; CHECK-NEXT: fmov s20, w8 ; CHECK-NEXT: ldr q1, [x9, :lo12:.LCPI85_0] ; CHECK-NEXT: mov v16.s[1], w10 -; CHECK-NEXT: mov v20.s[1], w12 +; CHECK-NEXT: mov v20.s[1], w11 ; CHECK-NEXT: tbl v0.16b, { v16.16b, v17.16b, v18.16b, v19.16b }, v1.16b ; CHECK-NEXT: tbl v1.16b, { v20.16b, v21.16b, v22.16b, v23.16b }, v1.16b ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir --- a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir +++ b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-01.mir @@ -357,9 +357,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@3c): -# CHECK-NEXT: Instance 0 available @0c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @0c +# CHECK-NEXT: Instance 0 available @3c +# CHECK-NEXT: Instance 1 available @3c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @3c # CHECK-NEXT: Resource booking (@3c): # CHECK-NEXT: CortexA55UnitALU(0) = 4294967295 # CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 @@ -374,9 +374,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@3c): -# CHECK-NEXT: Instance 0 available @0c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @0c +# CHECK-NEXT: Instance 0 available @3c +# CHECK-NEXT: Instance 1 available @3c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @3c # CHECK-NEXT: Queue BotQ.P: # CHECK-NEXT: Queue BotQ.A: 12 11 # CHECK-NEXT: Cand SU(12) ORDER @@ -398,9 +398,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@3c): -# CHECK-NEXT: Instance 0 available @0c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @0c +# CHECK-NEXT: Instance 0 available @3c +# CHECK-NEXT: Instance 1 available @3c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @3c # CHECK-NEXT: Resource booking (@3c): # CHECK-NEXT: CortexA55UnitALU(0) = 4294967295 # CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 @@ -415,9 +415,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@3c): -# CHECK-NEXT: Instance 0 available @0c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @0c +# CHECK-NEXT: Instance 0 available @3c +# CHECK-NEXT: Instance 1 available @3c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @3c # CHECK-NEXT: BotQ.A TopLatency SU(12) 11c # CHECK-NEXT: BotQ.A BotLatency SU(12) 3c # CHECK-NEXT: BotQ.A @3c @@ -454,8 +454,8 @@ # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@3c): # CHECK-NEXT: Instance 0 available @4c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[1] available @0c +# CHECK-NEXT: Instance 1 available @3c +# CHECK-NEXT: selecting CortexA55UnitALU[1] available @3c # CHECK-NEXT: Queue BotQ.P: 10 # CHECK-NEXT: Queue BotQ.A: 11 # CHECK-NEXT: Scheduling SU(11) $q0 = COPY %10:fpr128 @@ -476,8 +476,8 @@ # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@3c): # CHECK-NEXT: Instance 0 available @4c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[1] available @0c +# CHECK-NEXT: Instance 1 available @3c +# CHECK-NEXT: selecting CortexA55UnitALU[1] available @3c # CHECK-NEXT: Resource booking (@3c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 @@ -492,9 +492,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@3c): -# CHECK-NEXT: Instance 0 available @3c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[1] available @0c +# CHECK-NEXT: Instance 0 available @4c +# CHECK-NEXT: Instance 1 available @3c +# CHECK-NEXT: selecting CortexA55UnitALU[1] available @3c # CHECK-NEXT: *** Max MOps 2 at cycle 3 # CHECK-NEXT: Cycle: 4 BotQ.A # CHECK-NEXT: BotQ.A @4c @@ -531,9 +531,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@7c): -# CHECK-NEXT: Instance 0 available @0c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @0c +# CHECK-NEXT: Instance 0 available @7c +# CHECK-NEXT: Instance 1 available @7c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @7c # CHECK-NEXT: Resource booking (@7c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 @@ -548,9 +548,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@7c): -# CHECK-NEXT: Instance 0 available @0c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @0c +# CHECK-NEXT: Instance 0 available @7c +# CHECK-NEXT: Instance 1 available @7c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @7c # CHECK-NEXT: Queue BotQ.P: # CHECK-NEXT: Queue BotQ.A: 10 8 # CHECK-NEXT: Cand SU(10) ORDER @@ -572,9 +572,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@7c): -# CHECK-NEXT: Instance 0 available @0c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @0c +# CHECK-NEXT: Instance 0 available @7c +# CHECK-NEXT: Instance 1 available @7c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @7c # CHECK-NEXT: Resource booking (@7c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 @@ -589,9 +589,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@7c): -# CHECK-NEXT: Instance 0 available @0c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @0c +# CHECK-NEXT: Instance 0 available @7c +# CHECK-NEXT: Instance 1 available @7c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @7c # CHECK-NEXT: BotQ.A BotLatency SU(10) 7c # CHECK-NEXT: Bump cycle to begin group # CHECK-NEXT: Cycle: 8 BotQ.A @@ -629,8 +629,8 @@ # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@8c): # CHECK-NEXT: Instance 0 available @9c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @0c +# CHECK-NEXT: Instance 1 available @8c +# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @8c # CHECK-NEXT: Queue BotQ.P: 9 3 # CHECK-NEXT: Queue BotQ.A: 8 # CHECK-NEXT: Scheduling SU(8) %10:fpr128 = UMULLv4i16_v4i32 %0.dsub:fpr128, %9:fpr64 @@ -651,8 +651,8 @@ # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@8c): # CHECK-NEXT: Instance 0 available @9c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @0c +# CHECK-NEXT: Instance 1 available @8c +# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @8c # CHECK-NEXT: Resource booking (@8c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 @@ -667,9 +667,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@8c): -# CHECK-NEXT: Instance 0 available @7c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @0c +# CHECK-NEXT: Instance 0 available @9c +# CHECK-NEXT: Instance 1 available @8c +# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @8c # CHECK-NEXT: BotQ.A TopLatency SU(8) 7c # CHECK-NEXT: Bump cycle to begin group # CHECK-NEXT: Cycle: 9 BotQ.A @@ -706,9 +706,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@9c): -# CHECK-NEXT: Instance 0 available @8c +# CHECK-NEXT: Instance 0 available @9c # CHECK-NEXT: Instance 1 available @9c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @8c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @9c # CHECK-NEXT: Resource booking (@9c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 @@ -723,9 +723,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@9c): -# CHECK-NEXT: Instance 0 available @8c +# CHECK-NEXT: Instance 0 available @9c # CHECK-NEXT: Instance 1 available @9c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @8c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @9c # CHECK-NEXT: Queue BotQ.P: 7 3 # CHECK-NEXT: Queue BotQ.A: 9 # CHECK-NEXT: Scheduling SU(9) %11:fpr64 = XTNv4i16 %7:fpr128 @@ -745,9 +745,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@9c): -# CHECK-NEXT: Instance 0 available @8c +# CHECK-NEXT: Instance 0 available @9c # CHECK-NEXT: Instance 1 available @9c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @8c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @9c # CHECK-NEXT: Resource booking (@9c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 @@ -762,9 +762,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@9c): -# CHECK-NEXT: Instance 0 available @7c -# CHECK-NEXT: Instance 1 available @8c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @7c +# CHECK-NEXT: Instance 0 available @9c +# CHECK-NEXT: Instance 1 available @9c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @9c # CHECK-NEXT: BotQ.A BotLatency SU(9) 9c # CHECK-NEXT: BotQ.A @9c # CHECK-NEXT: Retired: 5 @@ -801,8 +801,8 @@ # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@10c): # CHECK-NEXT: Instance 0 available @10c -# CHECK-NEXT: Instance 1 available @9c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @9c +# CHECK-NEXT: Instance 1 available @10c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @10c # CHECK-NEXT: Resource booking (@10c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 @@ -842,8 +842,8 @@ # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@10c): # CHECK-NEXT: Instance 0 available @10c -# CHECK-NEXT: Instance 1 available @9c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @9c +# CHECK-NEXT: Instance 1 available @10c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @10c # CHECK-NEXT: Resource booking (@10c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 @@ -858,9 +858,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@10c): -# CHECK-NEXT: Instance 0 available @9c -# CHECK-NEXT: Instance 1 available @8c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @8c +# CHECK-NEXT: Instance 0 available @10c +# CHECK-NEXT: Instance 1 available @10c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @10c # CHECK-NEXT: BotQ.A @10c # CHECK-NEXT: Retired: 6 # CHECK-NEXT: Executed: 10c @@ -871,8 +871,8 @@ # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 9 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -885,8 +885,8 @@ # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 9 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -894,51 +894,63 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@10c): -# CHECK-NEXT: Instance 0 available @11c -# CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @11c -# CHECK-NEXT: SU(5) CortexA55UnitFPALU[0]=11c -# CHECK-NEXT: Cycle: 11 BotQ.A -# CHECK-NEXT: Resource booking (@11c): +# CHECK-NEXT: Instance 0 available @12c +# CHECK-NEXT: Instance 1 available @10c +# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @10c +# CHECK-NEXT: Queue BotQ.P: 3 6 +# CHECK-NEXT: Queue BotQ.A: 5 +# CHECK-NEXT: Scheduling SU(5) %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 +# CHECK-NEXT: Ready @10c +# CHECK-NEXT: CortexA55UnitFPALU +2x1u +# CHECK-NEXT: *** Critical resource CortexA55UnitFPALU: 4c +# CHECK-NEXT: Resource booking (@10c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 9 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 # CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@11c): -# CHECK-NEXT: Instance 0 available @11c -# CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @11c -# CHECK-NEXT: Resource booking (@11c): +# CHECK-NEXT: getNextResourceCycle (@10c): +# CHECK-NEXT: Instance 0 available @12c +# CHECK-NEXT: Instance 1 available @10c +# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @10c +# CHECK-NEXT: Resource booking (@10c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 9 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 8 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 # CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@11c): -# CHECK-NEXT: Instance 0 available @11c -# CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @11c -# CHECK-NEXT: Resource booking (@11c): +# CHECK-NEXT: getNextResourceCycle (@10c): +# CHECK-NEXT: Instance 0 available @12c +# CHECK-NEXT: Instance 1 available @10c +# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @10c +# CHECK-NEXT: BotQ.A BotLatency SU(5) 10c +# CHECK-NEXT: Bump cycle to begin group +# CHECK-NEXT: Cycle: 11 BotQ.A +# CHECK-NEXT: BotQ.A @11c +# CHECK-NEXT: Retired: 7 +# CHECK-NEXT: Executed: 11c +# CHECK-NEXT: Critical: 4c, 8 CortexA55UnitFPALU +# CHECK-NEXT: ExpectedLatency: 10c +# CHECK-NEXT: - Latency limited. # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 9 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 # CHECK-NEXT: CortexA55UnitFPALU(1) = 10 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 @@ -946,26 +958,13 @@ # CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@11c): -# CHECK-NEXT: Instance 0 available @11c -# CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @11c -# CHECK-NEXT: Queue BotQ.P: -# CHECK-NEXT: Queue BotQ.A: 3 5 6 -# CHECK-NEXT: Cand SU(3) ORDER -# CHECK-NEXT: Cand SU(5) ORDER -# CHECK-NEXT: Cand SU(6) ORDER -# CHECK-NEXT: Pick Bot ORDER -# CHECK-NEXT: Scheduling SU(6) %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 -# CHECK-NEXT: Ready @11c -# CHECK-NEXT: CortexA55UnitFPALU +2x1u -# CHECK-NEXT: *** Critical resource CortexA55UnitFPALU: 4c +# CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node # CHECK-NEXT: Resource booking (@11c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 9 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 # CHECK-NEXT: CortexA55UnitFPALU(1) = 10 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 @@ -974,15 +973,16 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@11c): -# CHECK-NEXT: Instance 0 available @11c +# CHECK-NEXT: Instance 0 available @12c # CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @11c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @12c +# CHECK-NEXT: SU(3) CortexA55UnitFPALU[0]=12c # CHECK-NEXT: Resource booking (@11c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 9 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 # CHECK-NEXT: CortexA55UnitFPALU(1) = 10 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 @@ -991,37 +991,17 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@11c): -# CHECK-NEXT: Instance 0 available @9c -# CHECK-NEXT: Instance 1 available @10c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @9c -# CHECK-NEXT: BotQ.A BotLatency SU(6) 10c -# CHECK-NEXT: Bump cycle to begin group +# CHECK-NEXT: Instance 0 available @12c +# CHECK-NEXT: Instance 1 available @12c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @12c +# CHECK-NEXT: SU(6) CortexA55UnitFPALU[0]=12c # CHECK-NEXT: Cycle: 12 BotQ.A -# CHECK-NEXT: BotQ.A @12c -# CHECK-NEXT: Retired: 7 -# CHECK-NEXT: Executed: 12c -# CHECK-NEXT: Critical: 4c, 8 CortexA55UnitFPALU -# CHECK-NEXT: ExpectedLatency: 10c -# CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 11 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node # CHECK-NEXT: Resource booking (@12c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 11 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 # CHECK-NEXT: CortexA55UnitFPALU(1) = 10 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 @@ -1030,15 +1010,15 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@12c): -# CHECK-NEXT: Instance 0 available @13c +# CHECK-NEXT: Instance 0 available @12c # CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @12c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @12c # CHECK-NEXT: Resource booking (@12c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 11 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 # CHECK-NEXT: CortexA55UnitFPALU(1) = 10 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 @@ -1047,15 +1027,15 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@12c): -# CHECK-NEXT: Instance 0 available @13c +# CHECK-NEXT: Instance 0 available @12c # CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @12c -# CHECK-NEXT: Queue BotQ.P: 1 -# CHECK-NEXT: Queue BotQ.A: 3 5 +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @12c +# CHECK-NEXT: Queue BotQ.P: 0 +# CHECK-NEXT: Queue BotQ.A: 3 6 # CHECK-NEXT: Cand SU(3) ORDER -# CHECK-NEXT: Cand SU(5) ORDER +# CHECK-NEXT: Cand SU(6) ORDER # CHECK-NEXT: Pick Bot ORDER -# CHECK-NEXT: Scheduling SU(5) %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 +# CHECK-NEXT: Scheduling SU(6) %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 # CHECK-NEXT: Ready @12c # CHECK-NEXT: CortexA55UnitFPALU +2x1u # CHECK-NEXT: Resource booking (@12c): @@ -1063,7 +1043,7 @@ # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 11 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 # CHECK-NEXT: CortexA55UnitFPALU(1) = 10 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 @@ -1072,15 +1052,15 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@12c): -# CHECK-NEXT: Instance 0 available @13c +# CHECK-NEXT: Instance 0 available @12c # CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @12c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @12c # CHECK-NEXT: Resource booking (@12c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 11 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 10 # CHECK-NEXT: CortexA55UnitFPALU(1) = 10 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 @@ -1089,10 +1069,10 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@12c): -# CHECK-NEXT: Instance 0 available @11c -# CHECK-NEXT: Instance 1 available @10c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @10c -# CHECK-NEXT: BotQ.A TopLatency SU(5) 4c +# CHECK-NEXT: Instance 0 available @12c +# CHECK-NEXT: Instance 1 available @12c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @12c +# CHECK-NEXT: BotQ.A TopLatency SU(6) 4c # CHECK-NEXT: Bump cycle to begin group # CHECK-NEXT: Cycle: 13 BotQ.A # CHECK-NEXT: BotQ.A @13c @@ -1105,8 +1085,8 @@ # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 11 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1119,8 +1099,8 @@ # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 11 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1129,10 +1109,46 @@ # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@13c): # CHECK-NEXT: Instance 0 available @13c -# CHECK-NEXT: Instance 1 available @14c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @13c -# CHECK-NEXT: Queue BotQ.P: 1 4 0 -# CHECK-NEXT: Queue BotQ.A: 3 +# CHECK-NEXT: Instance 1 available @13c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @13c +# CHECK-NEXT: Resource booking (@13c): +# CHECK-NEXT: CortexA55UnitALU(0) = 3 +# CHECK-NEXT: CortexA55UnitALU(1) = 3 +# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 +# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 +# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@13c): +# CHECK-NEXT: Instance 0 available @14c +# CHECK-NEXT: Instance 1 available @13c +# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @13c +# CHECK-NEXT: Resource booking (@13c): +# CHECK-NEXT: CortexA55UnitALU(0) = 3 +# CHECK-NEXT: CortexA55UnitALU(1) = 3 +# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 +# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 +# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: getNextResourceCycle (@13c): +# CHECK-NEXT: Instance 0 available @13c +# CHECK-NEXT: Instance 1 available @13c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @13c +# CHECK-NEXT: Queue BotQ.P: 1 4 +# CHECK-NEXT: Queue BotQ.A: 3 0 +# CHECK-NEXT: Cand SU(3) ORDER +# CHECK-NEXT: Pick Bot PHYS-REG # CHECK-NEXT: Scheduling SU(3) %3:fpr128 = EXTv16i8 %0:fpr128, %0:fpr128, 8 # CHECK-NEXT: Ready @13c # CHECK-NEXT: CortexA55UnitFPALU +2x1u @@ -1141,8 +1157,8 @@ # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 11 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1150,16 +1166,16 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@13c): -# CHECK-NEXT: Instance 0 available @13c -# CHECK-NEXT: Instance 1 available @14c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @13c +# CHECK-NEXT: Instance 0 available @14c +# CHECK-NEXT: Instance 1 available @13c +# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @13c # CHECK-NEXT: Resource booking (@13c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 11 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 10 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1167,9 +1183,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@13c): -# CHECK-NEXT: Instance 0 available @11c -# CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @11c +# CHECK-NEXT: Instance 0 available @14c +# CHECK-NEXT: Instance 1 available @13c +# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @13c # CHECK-NEXT: BotQ.A BotLatency SU(3) 11c # CHECK-NEXT: Bump cycle to begin group # CHECK-NEXT: Cycle: 14 BotQ.A @@ -1183,8 +1199,8 @@ # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1197,8 +1213,8 @@ # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1206,29 +1222,12 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@14c): -# CHECK-NEXT: Instance 0 available @4c -# CHECK-NEXT: Instance 1 available @4c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @4c -# CHECK-NEXT: Resource booking (@14c): -# CHECK-NEXT: CortexA55UnitALU(0) = 3 -# CHECK-NEXT: CortexA55UnitALU(1) = 3 -# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 -# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 -# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 -# CHECK-NEXT: getNextResourceCycle (@14c): -# CHECK-NEXT: Instance 0 available @4c -# CHECK-NEXT: Instance 1 available @4c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @4c -# CHECK-NEXT: Queue BotQ.P: 2 4 0 -# CHECK-NEXT: Queue BotQ.A: 1 -# CHECK-NEXT: Scheduling SU(1) %1:fpr128 = COPY $q1 +# CHECK-NEXT: Instance 0 available @14c +# CHECK-NEXT: Instance 1 available @14c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @14c +# CHECK-NEXT: Queue BotQ.P: 1 4 2 +# CHECK-NEXT: Queue BotQ.A: 0 +# CHECK-NEXT: Scheduling SU(0) %2:fpr128 = COPY $q2 # CHECK-NEXT: Ready @14c # CHECK-NEXT: CortexA55UnitALU +1x1u # CHECK-NEXT: Resource booking (@14c): @@ -1236,8 +1235,8 @@ # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1245,16 +1244,16 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@14c): -# CHECK-NEXT: Instance 0 available @4c -# CHECK-NEXT: Instance 1 available @4c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @4c +# CHECK-NEXT: Instance 0 available @14c +# CHECK-NEXT: Instance 1 available @14c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @14c # CHECK-NEXT: Resource booking (@14c): # CHECK-NEXT: CortexA55UnitALU(0) = 3 # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1262,10 +1261,10 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@14c): -# CHECK-NEXT: Instance 0 available @3c -# CHECK-NEXT: Instance 1 available @3c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @3c -# CHECK-NEXT: BotQ.A BotLatency SU(1) 13c +# CHECK-NEXT: Instance 0 available @14c +# CHECK-NEXT: Instance 1 available @14c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @14c +# CHECK-NEXT: BotQ.A BotLatency SU(0) 13c # CHECK-NEXT: BotQ.A @14c # CHECK-NEXT: Retired: 10 # CHECK-NEXT: Executed: 14c @@ -1276,8 +1275,8 @@ # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1291,8 +1290,8 @@ # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1301,11 +1300,11 @@ # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@15c): # CHECK-NEXT: Instance 0 available @15c -# CHECK-NEXT: Instance 1 available @4c -# CHECK-NEXT: selecting CortexA55UnitALU[1] available @4c +# CHECK-NEXT: Instance 1 available @15c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @15c # CHECK-NEXT: Queue BotQ.P: 2 4 -# CHECK-NEXT: Queue BotQ.A: 0 -# CHECK-NEXT: Scheduling SU(0) %2:fpr128 = COPY $q2 +# CHECK-NEXT: Queue BotQ.A: 1 +# CHECK-NEXT: Scheduling SU(1) %1:fpr128 = COPY $q1 # CHECK-NEXT: Ready @15c # CHECK-NEXT: CortexA55UnitALU +1x1u # CHECK-NEXT: Resource booking (@15c): @@ -1313,8 +1312,8 @@ # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1323,15 +1322,15 @@ # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@15c): # CHECK-NEXT: Instance 0 available @15c -# CHECK-NEXT: Instance 1 available @4c -# CHECK-NEXT: selecting CortexA55UnitALU[1] available @4c +# CHECK-NEXT: Instance 1 available @15c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @15c # CHECK-NEXT: Resource booking (@15c): # CHECK-NEXT: CortexA55UnitALU(0) = 14 # CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1339,21 +1338,21 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@15c): -# CHECK-NEXT: Instance 0 available @14c -# CHECK-NEXT: Instance 1 available @3c -# CHECK-NEXT: selecting CortexA55UnitALU[1] available @3c +# CHECK-NEXT: Instance 0 available @15c +# CHECK-NEXT: Instance 1 available @15c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @15c # CHECK-NEXT: BotQ.A @15c # CHECK-NEXT: Retired: 11 # CHECK-NEXT: Executed: 15c # CHECK-NEXT: Critical: 6c, 12 CortexA55UnitFPALU # CHECK-NEXT: ExpectedLatency: 13c # CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 14 -# CHECK-NEXT: CortexA55UnitALU(1) = 15 +# CHECK-NEXT: CortexA55UnitALU(0) = 15 +# CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1363,12 +1362,12 @@ # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node # CHECK-NEXT: Cycle: 16 BotQ.A # CHECK-NEXT: Resource booking (@16c): -# CHECK-NEXT: CortexA55UnitALU(0) = 14 -# CHECK-NEXT: CortexA55UnitALU(1) = 15 +# CHECK-NEXT: CortexA55UnitALU(0) = 15 +# CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1376,16 +1375,16 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@16c): -# CHECK-NEXT: Instance 0 available @15c +# CHECK-NEXT: Instance 0 available @16c # CHECK-NEXT: Instance 1 available @16c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @15c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @16c # CHECK-NEXT: Resource booking (@16c): -# CHECK-NEXT: CortexA55UnitALU(0) = 14 -# CHECK-NEXT: CortexA55UnitALU(1) = 15 +# CHECK-NEXT: CortexA55UnitALU(0) = 15 +# CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1393,9 +1392,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@16c): -# CHECK-NEXT: Instance 0 available @15c -# CHECK-NEXT: Instance 1 available @14c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @14c +# CHECK-NEXT: Instance 0 available @16c +# CHECK-NEXT: Instance 1 available @16c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @16c # CHECK-NEXT: Queue BotQ.P: # CHECK-NEXT: Queue BotQ.A: 2 4 # CHECK-NEXT: Cand SU(2) ORDER @@ -1405,12 +1404,12 @@ # CHECK-NEXT: Ready @16c # CHECK-NEXT: CortexA55UnitFPALU +2x1u # CHECK-NEXT: Resource booking (@16c): -# CHECK-NEXT: CortexA55UnitALU(0) = 14 -# CHECK-NEXT: CortexA55UnitALU(1) = 15 +# CHECK-NEXT: CortexA55UnitALU(0) = 15 +# CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1418,16 +1417,16 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@16c): -# CHECK-NEXT: Instance 0 available @15c -# CHECK-NEXT: Instance 1 available @14c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @14c +# CHECK-NEXT: Instance 0 available @16c +# CHECK-NEXT: Instance 1 available @16c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @16c # CHECK-NEXT: Resource booking (@16c): -# CHECK-NEXT: CortexA55UnitALU(0) = 14 -# CHECK-NEXT: CortexA55UnitALU(1) = 15 +# CHECK-NEXT: CortexA55UnitALU(0) = 15 +# CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 12 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1435,9 +1434,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@16c): -# CHECK-NEXT: Instance 0 available @13c -# CHECK-NEXT: Instance 1 available @12c -# CHECK-NEXT: selecting CortexA55UnitFPALU[1] available @12c +# CHECK-NEXT: Instance 0 available @16c +# CHECK-NEXT: Instance 1 available @16c +# CHECK-NEXT: selecting CortexA55UnitFPALU[0] available @16c # CHECK-NEXT: BotQ.A BotLatency SU(4) 14c # CHECK-NEXT: Bump cycle to begin group # CHECK-NEXT: Cycle: 17 BotQ.A @@ -1447,12 +1446,12 @@ # CHECK-NEXT: Critical: 7c, 14 CortexA55UnitFPALU # CHECK-NEXT: ExpectedLatency: 14c # CHECK-NEXT: - Latency limited. -# CHECK-NEXT: CortexA55UnitALU(0) = 14 -# CHECK-NEXT: CortexA55UnitALU(1) = 15 +# CHECK-NEXT: CortexA55UnitALU(0) = 15 +# CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 16 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 16 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1461,12 +1460,12 @@ # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node # CHECK-NEXT: Resource booking (@17c): -# CHECK-NEXT: CortexA55UnitALU(0) = 14 -# CHECK-NEXT: CortexA55UnitALU(1) = 15 +# CHECK-NEXT: CortexA55UnitALU(0) = 15 +# CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 16 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 16 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1474,21 +1473,21 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@17c): -# CHECK-NEXT: Instance 0 available @15c -# CHECK-NEXT: Instance 1 available @16c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @15c +# CHECK-NEXT: Instance 0 available @17c +# CHECK-NEXT: Instance 1 available @17c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @17c # CHECK-NEXT: Queue BotQ.P: # CHECK-NEXT: Queue BotQ.A: 2 # CHECK-NEXT: Scheduling SU(2) %0:fpr128 = COPY $q0 # CHECK-NEXT: Ready @17c # CHECK-NEXT: CortexA55UnitALU +1x1u # CHECK-NEXT: Resource booking (@17c): -# CHECK-NEXT: CortexA55UnitALU(0) = 14 -# CHECK-NEXT: CortexA55UnitALU(1) = 15 +# CHECK-NEXT: CortexA55UnitALU(0) = 15 +# CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 16 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 16 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1496,16 +1495,16 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@17c): -# CHECK-NEXT: Instance 0 available @15c -# CHECK-NEXT: Instance 1 available @16c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @15c +# CHECK-NEXT: Instance 0 available @17c +# CHECK-NEXT: Instance 1 available @17c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @17c # CHECK-NEXT: Resource booking (@17c): -# CHECK-NEXT: CortexA55UnitALU(0) = 14 -# CHECK-NEXT: CortexA55UnitALU(1) = 15 +# CHECK-NEXT: CortexA55UnitALU(0) = 15 +# CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 16 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 16 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1513,9 +1512,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@17c): -# CHECK-NEXT: Instance 0 available @14c -# CHECK-NEXT: Instance 1 available @15c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @14c +# CHECK-NEXT: Instance 0 available @17c +# CHECK-NEXT: Instance 1 available @17c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @17c # CHECK-NEXT: BotQ.A @17c # CHECK-NEXT: Retired: 13 # CHECK-NEXT: Executed: 17c @@ -1523,11 +1522,11 @@ # CHECK-NEXT: ExpectedLatency: 14c # CHECK-NEXT: - Latency limited. # CHECK-NEXT: CortexA55UnitALU(0) = 17 -# CHECK-NEXT: CortexA55UnitALU(1) = 15 +# CHECK-NEXT: CortexA55UnitALU(1) = 3 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 -# CHECK-NEXT: CortexA55UnitFPALU(0) = 13 -# CHECK-NEXT: CortexA55UnitFPALU(1) = 16 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 16 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 13 # CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 @@ -1544,16 +1543,16 @@ # CHECK-NEXT: CortexA55UnitALU | x | | | | | | | | | | | | | | | # CHECK-NEXT: SU(4) | | i | | | | | | | | | | | | | | # CHECK-NEXT: CortexA55UnitFPALU | | x | x | | | | | | | | | | | | | -# CHECK-NEXT: SU(0) | | | i | | | | | | | | | | | | | +# CHECK-NEXT: SU(1) | | | i | | | | | | | | | | | | | # CHECK-NEXT: CortexA55UnitALU | | | x | | | | | | | | | | | | | -# CHECK-NEXT: SU(1) | | | | i | | | | | | | | | | | | +# CHECK-NEXT: SU(0) | | | | i | | | | | | | | | | | | # CHECK-NEXT: CortexA55UnitALU | | | | x | | | | | | | | | | | | # CHECK-NEXT: SU(3) | | | | | i | | | | | | | | | | | # CHECK-NEXT: CortexA55UnitFPALU | | | | | x | x | | | | | | | | | | -# CHECK-NEXT: SU(5) | | | | | | i | | | | | | | | | | +# CHECK-NEXT: SU(6) | | | | | | i | | | | | | | | | | # CHECK-NEXT: CortexA55UnitFPALU | | | | | | x | x | | | | | | | | | -# CHECK-NEXT: SU(6) | | | | | | | i | | | | | | | | | -# CHECK-NEXT: CortexA55UnitFPALU | | | | | | | x | x | | | | | | | | +# CHECK-NEXT: SU(5) | | | | | | | | i | | | | | | | | +# CHECK-NEXT: CortexA55UnitFPALU | | | | | | | | x | x | | | | | | | # CHECK-NEXT: SU(7) | | | | | | | | i | | | | | | | | # CHECK-NEXT: CortexA55UnitFPALU | | | | | | | | x | | | | | | | | # CHECK-NEXT: SU(9) | | | | | | | | | i | | | | | | | @@ -1568,11 +1567,11 @@ # CHECK-NEXT: CortexA55UnitALU | | | | | | | | | | | | | | | x | # CHECK-NEXT: SU(2) [TopReadyCycle = 0, BottomReadyCycle = 17]: %0:fpr128 = COPY $q0 # CHECK-NEXT: SU(4) [TopReadyCycle = 0, BottomReadyCycle = 16]: %6:fpr128 = MOVIv2d_ns 17 -# CHECK-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 15]: %2:fpr128 = COPY $q2 -# CHECK-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 14]: %1:fpr128 = COPY $q1 +# CHECK-NEXT: SU(1) [TopReadyCycle = 0, BottomReadyCycle = 15]: %1:fpr128 = COPY $q1 +# CHECK-NEXT: SU(0) [TopReadyCycle = 0, BottomReadyCycle = 14]: %2:fpr128 = COPY $q2 # CHECK-NEXT: SU(3) [TopReadyCycle = 0, BottomReadyCycle = 13]: %3:fpr128 = EXTv16i8 %0:fpr128, %0:fpr128, 8 -# CHECK-NEXT: SU(5) [TopReadyCycle = 0, BottomReadyCycle = 12]: %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 -# CHECK-NEXT: SU(6) [TopReadyCycle = 0, BottomReadyCycle = 11]: %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 +# CHECK-NEXT: SU(6) [TopReadyCycle = 0, BottomReadyCycle = 12]: %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 +# CHECK-NEXT: SU(5) [TopReadyCycle = 0, BottomReadyCycle = 10]: %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 # CHECK-NEXT: SU(7) [TopReadyCycle = 0, BottomReadyCycle = 10]: %9:fpr64 = XTNv4i16 %8:fpr128 # CHECK-NEXT: SU(9) [TopReadyCycle = 0, BottomReadyCycle = 9]: %11:fpr64 = XTNv4i16 %7:fpr128 # CHECK-NEXT: SU(8) [TopReadyCycle = 0, BottomReadyCycle = 8]: %10:fpr128 = UMULLv4i16_v4i32 %0.dsub:fpr128, %9:fpr64 @@ -1582,16 +1581,16 @@ # CHECK-EMPTY: # CHECK-NEXT: ********** INTERVALS ********** # CHECK-NEXT: B0 [0B,48r:0)[192r,224r:1) 0@0B-phi 1@192r -# CHECK-NEXT: B1 [0B,96r:0)[208r,224r:1) 0@0B-phi 1@208r -# CHECK-NEXT: B2 [0B,88r:0) 0@0B-phi +# CHECK-NEXT: B1 [0B,88r:0)[208r,224r:1) 0@0B-phi 1@208r +# CHECK-NEXT: B2 [0B,96r:0) 0@0B-phi # CHECK-NEXT: %0 [48r,168r:0) 0@48r weight:0.000000e+00 -# CHECK-NEXT: %1 [96r,120r:0) 0@96r weight:0.000000e+00 -# CHECK-NEXT: %2 [88r,112r:0) 0@88r weight:0.000000e+00 +# CHECK-NEXT: %1 [88r,120r:0) 0@88r weight:0.000000e+00 +# CHECK-NEXT: %2 [96r,128r:0) 0@96r weight:0.000000e+00 # CHECK-NEXT: %3 [104r,176r:0) 0@104r weight:0.000000e+00 -# CHECK-NEXT: %6 [80r,120r:0) 0@80r weight:0.000000e+00 -# CHECK-NEXT: %7 [112r,160r:0) 0@112r weight:0.000000e+00 -# CHECK-NEXT: %8 [120r,128r:0) 0@120r weight:0.000000e+00 -# CHECK-NEXT: %9 [128r,168r:0) 0@128r weight:0.000000e+00 +# CHECK-NEXT: %6 [80r,128r:0) 0@80r weight:0.000000e+00 +# CHECK-NEXT: %7 [128r,160r:0) 0@128r weight:0.000000e+00 +# CHECK-NEXT: %8 [120r,136r:0) 0@120r weight:0.000000e+00 +# CHECK-NEXT: %9 [136r,168r:0) 0@136r weight:0.000000e+00 # CHECK-NEXT: %10 [168r,192r:0) 0@168r weight:0.000000e+00 # CHECK-NEXT: %11 [160r,176r:0) 0@160r weight:0.000000e+00 # CHECK-NEXT: %12 [176r,208r:0) 0@176r weight:0.000000e+00 @@ -1604,12 +1603,12 @@ # CHECK-NEXT: liveins: $q0, $q1, $q2 # CHECK-NEXT: 48B %0:fpr128 = COPY $q0 # CHECK-NEXT: 80B %6:fpr128 = MOVIv2d_ns 17 -# CHECK-NEXT: 88B %2:fpr128 = COPY $q2 -# CHECK-NEXT: 96B %1:fpr128 = COPY $q1 +# CHECK-NEXT: 88B %1:fpr128 = COPY $q1 +# CHECK-NEXT: 96B %2:fpr128 = COPY $q2 # CHECK-NEXT: 104B %3:fpr128 = EXTv16i8 %0:fpr128, %0:fpr128, 8 -# CHECK-NEXT: 112B %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 # CHECK-NEXT: 120B %8:fpr128 = ANDv16i8 %1:fpr128, %6:fpr128 -# CHECK-NEXT: 128B %9:fpr64 = XTNv4i16 %8:fpr128 +# CHECK-NEXT: 128B %7:fpr128 = ANDv16i8 %2:fpr128, %6:fpr128 +# CHECK-NEXT: 136B %9:fpr64 = XTNv4i16 %8:fpr128 # CHECK-NEXT: 160B %11:fpr64 = XTNv4i16 %7:fpr128 # CHECK-NEXT: 168B %10:fpr128 = UMULLv4i16_v4i32 %0.dsub:fpr128, %9:fpr64 # CHECK-NEXT: 176B %12:fpr128 = UMULLv4i16_v4i32 %3.dsub:fpr128, %11:fpr64 diff --git a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-02.mir b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-02.mir --- a/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-02.mir +++ b/llvm/test/CodeGen/AArch64/misched-detail-resource-booking-02.mir @@ -348,9 +348,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@0c): -# CHECK-NEXT: Instance 0 available @0c +# CHECK-NEXT: Instance 0 available @1c # CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @0c +# CHECK-NEXT: selecting CortexA55UnitALU[1] available @0c # CHECK-NEXT: *** Max MOps 2 at cycle 0 # CHECK-NEXT: Cycle: 1 BotQ.A # CHECK-NEXT: BotQ.A @1c @@ -360,7 +360,7 @@ # CHECK-NEXT: ExpectedLatency: 0c # CHECK-NEXT: - Latency limited. # CHECK-NEXT: CortexA55UnitALU(0) = 0 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA55UnitALU(1) = 0 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 @@ -374,7 +374,7 @@ # CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node # CHECK-NEXT: Resource booking (@1c): # CHECK-NEXT: CortexA55UnitALU(0) = 0 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA55UnitALU(1) = 0 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 @@ -387,8 +387,8 @@ # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@1c): # CHECK-NEXT: Instance 0 available @1c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[1] available @0c +# CHECK-NEXT: Instance 1 available @1c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @1c # CHECK-NEXT: Queue BotQ.P: # CHECK-NEXT: Queue BotQ.A: 0 # CHECK-NEXT: Scheduling SU(0) $x3 = ADDXrr $x0, $x0 @@ -396,7 +396,7 @@ # CHECK-NEXT: CortexA55UnitALU +1x1u # CHECK-NEXT: Resource booking (@1c): # CHECK-NEXT: CortexA55UnitALU(0) = 0 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA55UnitALU(1) = 0 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 @@ -409,11 +409,11 @@ # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@1c): # CHECK-NEXT: Instance 0 available @1c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[1] available @0c +# CHECK-NEXT: Instance 1 available @1c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @1c # CHECK-NEXT: Resource booking (@1c): # CHECK-NEXT: CortexA55UnitALU(0) = 0 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA55UnitALU(1) = 0 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 @@ -425,9 +425,9 @@ # CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 # CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 # CHECK-NEXT: getNextResourceCycle (@1c): -# CHECK-NEXT: Instance 0 available @0c -# CHECK-NEXT: Instance 1 available @0c -# CHECK-NEXT: selecting CortexA55UnitALU[0] available @0c +# CHECK-NEXT: Instance 0 available @1c +# CHECK-NEXT: Instance 1 available @1c +# CHECK-NEXT: selecting CortexA55UnitALU[0] available @1c # CHECK-NEXT: BotQ.A @1c # CHECK-NEXT: Retired: 3 # CHECK-NEXT: Executed: 1c @@ -435,7 +435,7 @@ # CHECK-NEXT: ExpectedLatency: 0c # CHECK-NEXT: - Latency limited. # CHECK-NEXT: CortexA55UnitALU(0) = 1 -# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA55UnitALU(1) = 0 # CHECK-NEXT: CortexA55UnitB(0) = 4294967295 # CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 # CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 diff --git a/llvm/test/CodeGen/AArch64/vec_uaddo.ll b/llvm/test/CodeGen/AArch64/vec_uaddo.ll --- a/llvm/test/CodeGen/AArch64/vec_uaddo.ll +++ b/llvm/test/CodeGen/AArch64/vec_uaddo.ll @@ -145,17 +145,17 @@ ; CHECK-NEXT: zip2 v2.8b, v0.8b, v0.8b ; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: ushll v1.4s, v1.4h, #0 -; CHECK-NEXT: ushll v2.4s, v2.4h, #0 ; CHECK-NEXT: zip1 v3.8b, v0.8b, v0.8b ; CHECK-NEXT: zip2 v5.8b, v0.8b, v0.8b -; CHECK-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: cmlt v0.4s, v1.4s, #0 -; CHECK-NEXT: cmlt v1.4s, v2.4s, #0 +; CHECK-NEXT: shl v0.4s, v1.4s, #31 +; CHECK-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 +; CHECK-NEXT: shl v1.4s, v2.4s, #31 ; CHECK-NEXT: ushll v2.4s, v3.4h, #0 ; CHECK-NEXT: ushll v3.4s, v5.4h, #0 ; CHECK-NEXT: shl v2.4s, v2.4s, #31 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 +; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 ; CHECK-NEXT: cmlt v2.4s, v2.4s, #0 ; CHECK-NEXT: cmlt v3.4s, v3.4s, #0 ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vec_umulo.ll b/llvm/test/CodeGen/AArch64/vec_umulo.ll --- a/llvm/test/CodeGen/AArch64/vec_umulo.ll +++ b/llvm/test/CodeGen/AArch64/vec_umulo.ll @@ -120,13 +120,13 @@ ; CHECK-NEXT: uzp2 v5.4s, v6.4s, v5.4s ; CHECK-NEXT: cmtst v4.4s, v4.4s, v4.4s ; CHECK-NEXT: str q0, [x8] -; CHECK-NEXT: cmtst v3.4s, v5.4s, v5.4s +; CHECK-NEXT: cmtst v5.4s, v5.4s, v5.4s ; CHECK-NEXT: mov w5, v4.s[1] ; CHECK-NEXT: fmov w4, s4 -; CHECK-NEXT: mov w1, v3.s[1] -; CHECK-NEXT: mov w2, v3.s[2] -; CHECK-NEXT: mov w3, v3.s[3] -; CHECK-NEXT: fmov w0, s3 +; CHECK-NEXT: mov w1, v5.s[1] +; CHECK-NEXT: mov w2, v5.s[2] +; CHECK-NEXT: mov w3, v5.s[3] +; CHECK-NEXT: fmov w0, s5 ; CHECK-NEXT: ret %t = call {<6 x i32>, <6 x i1>} @llvm.umul.with.overflow.v6i32(<6 x i32> %a0, <6 x i32> %a1) %val = extractvalue {<6 x i32>, <6 x i1>} %t, 0 @@ -166,27 +166,27 @@ ; CHECK: // %bb.0: ; CHECK-NEXT: umull2 v2.8h, v0.16b, v1.16b ; CHECK-NEXT: umull v3.8h, v0.8b, v1.8b -; CHECK-NEXT: mul v5.16b, v0.16b, v1.16b +; CHECK-NEXT: mul v6.16b, v0.16b, v1.16b ; CHECK-NEXT: uzp2 v2.16b, v3.16b, v2.16b -; CHECK-NEXT: str q5, [x0] +; CHECK-NEXT: str q6, [x0] ; CHECK-NEXT: cmtst v2.16b, v2.16b, v2.16b ; CHECK-NEXT: zip1 v3.8b, v2.8b, v0.8b ; CHECK-NEXT: zip2 v4.8b, v2.8b, v0.8b -; CHECK-NEXT: ext v0.16b, v2.16b, v2.16b, #8 -; CHECK-NEXT: ushll v1.4s, v3.4h, #0 -; CHECK-NEXT: ushll v2.4s, v4.4h, #0 -; CHECK-NEXT: zip1 v3.8b, v0.8b, v0.8b -; CHECK-NEXT: zip2 v4.8b, v0.8b, v0.8b -; CHECK-NEXT: shl v1.4s, v1.4s, #31 -; CHECK-NEXT: shl v2.4s, v2.4s, #31 -; CHECK-NEXT: cmlt v0.4s, v1.4s, #0 -; CHECK-NEXT: cmlt v1.4s, v2.4s, #0 -; CHECK-NEXT: ushll v2.4s, v3.4h, #0 -; CHECK-NEXT: ushll v3.4s, v4.4h, #0 -; CHECK-NEXT: shl v2.4s, v2.4s, #31 +; CHECK-NEXT: ext v2.16b, v2.16b, v2.16b, #8 +; CHECK-NEXT: ushll v3.4s, v3.4h, #0 +; CHECK-NEXT: zip1 v5.8b, v2.8b, v0.8b +; CHECK-NEXT: zip2 v2.8b, v2.8b, v0.8b +; CHECK-NEXT: shl v3.4s, v3.4s, #31 +; CHECK-NEXT: ushll v4.4s, v4.4h, #0 +; CHECK-NEXT: cmlt v0.4s, v3.4s, #0 +; CHECK-NEXT: ushll v3.4s, v5.4h, #0 +; CHECK-NEXT: ushll v2.4s, v2.4h, #0 +; CHECK-NEXT: shl v1.4s, v4.4s, #31 ; CHECK-NEXT: shl v3.4s, v3.4s, #31 -; CHECK-NEXT: cmlt v2.4s, v2.4s, #0 -; CHECK-NEXT: cmlt v3.4s, v3.4s, #0 +; CHECK-NEXT: shl v4.4s, v2.4s, #31 +; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 +; CHECK-NEXT: cmlt v2.4s, v3.4s, #0 +; CHECK-NEXT: cmlt v3.4s, v4.4s, #0 ; CHECK-NEXT: ret %t = call {<16 x i8>, <16 x i1>} @llvm.umul.with.overflow.v16i8(<16 x i8> %a0, <16 x i8> %a1) %val = extractvalue {<16 x i8>, <16 x i1>} %t, 0 diff --git a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll --- a/llvm/test/CodeGen/AArch64/vector-fcopysign.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcopysign.ll @@ -341,13 +341,14 @@ ; NOFP16-NEXT: mov h17, v0[3] ; NOFP16-NEXT: fcvt s5, h5 ; NOFP16-NEXT: fcvt s6, h6 -; NOFP16-NEXT: mov h18, v0[5] +; NOFP16-NEXT: mov h18, v1[5] ; NOFP16-NEXT: bit.16b v2, v4, v3 ; NOFP16-NEXT: mov h4, v1[3] ; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: fcvt s16, h16 ; NOFP16-NEXT: fcvt s17, h17 ; NOFP16-NEXT: bit.16b v5, v6, v3 +; NOFP16-NEXT: fcvt s18, h18 ; NOFP16-NEXT: mov.16b v6, v3 ; NOFP16-NEXT: fcvt s4, h4 ; NOFP16-NEXT: bsl.16b v6, v16, v7 @@ -356,32 +357,34 @@ ; NOFP16-NEXT: fcvt h2, s2 ; NOFP16-NEXT: fcvt h5, s5 ; NOFP16-NEXT: bit.16b v4, v17, v3 -; NOFP16-NEXT: mov h17, v1[5] +; NOFP16-NEXT: mov h17, v0[5] ; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: fcvt h6, s6 ; NOFP16-NEXT: mov.h v2[1], v5[0] -; NOFP16-NEXT: fcvt h5, s6 -; NOFP16-NEXT: fcvt s6, h17 -; NOFP16-NEXT: fcvt s17, h18 -; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: bit.16b v7, v16, v3 +; NOFP16-NEXT: mov.16b v5, v3 +; NOFP16-NEXT: fcvt s17, h17 +; NOFP16-NEXT: bsl.16b v5, v16, v7 +; NOFP16-NEXT: mov h7, v1[6] ; NOFP16-NEXT: mov h16, v0[6] -; NOFP16-NEXT: mov.h v2[2], v5[0] -; NOFP16-NEXT: mov h5, v1[6] -; NOFP16-NEXT: bit.16b v6, v17, v3 -; NOFP16-NEXT: mov h1, v1[7] +; NOFP16-NEXT: mov.h v2[2], v6[0] +; NOFP16-NEXT: fcvt h4, s4 +; NOFP16-NEXT: mov.16b v6, v3 +; NOFP16-NEXT: bsl.16b v6, v17, v18 +; NOFP16-NEXT: fcvt s7, h7 ; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: mov h1, v1[7] ; NOFP16-NEXT: mov.h v2[3], v4[0] -; NOFP16-NEXT: fcvt h4, s7 -; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: fcvt h4, s5 ; NOFP16-NEXT: mov h0, v0[7] -; NOFP16-NEXT: fcvt s1, h1 +; NOFP16-NEXT: mov.16b v5, v3 +; NOFP16-NEXT: bsl.16b v5, v16, v7 ; NOFP16-NEXT: mov.h v2[4], v4[0] ; NOFP16-NEXT: fcvt h4, s6 -; NOFP16-NEXT: bit.16b v5, v16, v3 +; NOFP16-NEXT: fcvt s1, h1 ; NOFP16-NEXT: fcvt s0, h0 -; NOFP16-NEXT: mov.h v2[5], v4[0] ; NOFP16-NEXT: fcvt h5, s5 +; NOFP16-NEXT: mov.h v2[5], v4[0] ; NOFP16-NEXT: bif.16b v0, v1, v3 ; NOFP16-NEXT: mov.h v2[6], v5[0] ; NOFP16-NEXT: fcvt h0, s0 @@ -402,60 +405,61 @@ ; NOFP16-LABEL: test_copysign_v8f16_v8f32: ; NOFP16: ; %bb.0: ; NOFP16-NEXT: fcvtn v1.4h, v1.4s -; NOFP16-NEXT: fcvtn v2.4h, v2.4s ; NOFP16-NEXT: mov h4, v0[1] -; NOFP16-NEXT: mov h5, v0[4] -; NOFP16-NEXT: fcvt s7, h0 -; NOFP16-NEXT: mov h17, v0[2] +; NOFP16-NEXT: fcvt s6, h0 +; NOFP16-NEXT: mov h16, v0[2] ; NOFP16-NEXT: mvni.4s v3, #128, lsl #24 -; NOFP16-NEXT: mov h6, v1[1] -; NOFP16-NEXT: fcvt s16, h1 +; NOFP16-NEXT: fcvtn v2.4h, v2.4s +; NOFP16-NEXT: mov h5, v1[1] +; NOFP16-NEXT: fcvt s7, h1 ; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: mov h18, v1[2] +; NOFP16-NEXT: mov h17, v1[2] +; NOFP16-NEXT: mov h1, v1[3] +; NOFP16-NEXT: fcvt s16, h16 ; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: bif.16b v6, v7, v3 +; NOFP16-NEXT: mov h7, v0[3] ; NOFP16-NEXT: fcvt s17, h17 -; NOFP16-NEXT: fcvt s6, h6 -; NOFP16-NEXT: bif.16b v7, v16, v3 -; NOFP16-NEXT: fcvt s16, h2 -; NOFP16-NEXT: fcvt s18, h18 -; NOFP16-NEXT: bif.16b v4, v6, v3 -; NOFP16-NEXT: mov h6, v0[3] -; NOFP16-NEXT: bif.16b v5, v16, v3 -; NOFP16-NEXT: mov h16, v1[3] -; NOFP16-NEXT: fcvt h1, s7 -; NOFP16-NEXT: mov.16b v7, v3 +; NOFP16-NEXT: fcvt s18, h1 +; NOFP16-NEXT: bif.16b v4, v5, v3 +; NOFP16-NEXT: mov h5, v0[4] +; NOFP16-NEXT: fcvt h1, s6 +; NOFP16-NEXT: mov.16b v6, v3 +; NOFP16-NEXT: fcvt s7, h7 +; NOFP16-NEXT: bsl.16b v6, v16, v17 ; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: bsl.16b v7, v17, v18 -; NOFP16-NEXT: fcvt s6, h6 -; NOFP16-NEXT: fcvt s16, h16 -; NOFP16-NEXT: mov h17, v0[5] +; NOFP16-NEXT: mov h16, v0[5] +; NOFP16-NEXT: fcvt s5, h5 +; NOFP16-NEXT: fcvt s17, h2 +; NOFP16-NEXT: bif.16b v7, v18, v3 ; NOFP16-NEXT: mov h18, v2[1] -; NOFP16-NEXT: fcvt h5, s5 ; NOFP16-NEXT: mov.h v1[1], v4[0] -; NOFP16-NEXT: fcvt h4, s7 -; NOFP16-NEXT: bif.16b v6, v16, v3 -; NOFP16-NEXT: fcvt s7, h17 -; NOFP16-NEXT: fcvt s17, h18 -; NOFP16-NEXT: mov.h v1[2], v4[0] -; NOFP16-NEXT: mov h4, v0[6] -; NOFP16-NEXT: mov h16, v2[2] ; NOFP16-NEXT: fcvt h6, s6 +; NOFP16-NEXT: fcvt s4, h16 +; NOFP16-NEXT: bif.16b v5, v17, v3 +; NOFP16-NEXT: fcvt s16, h18 +; NOFP16-NEXT: mov h17, v0[6] +; NOFP16-NEXT: mov h18, v2[2] ; NOFP16-NEXT: mov h0, v0[7] -; NOFP16-NEXT: bif.16b v7, v17, v3 -; NOFP16-NEXT: mov h2, v2[3] -; NOFP16-NEXT: fcvt s4, h4 -; NOFP16-NEXT: fcvt s16, h16 +; NOFP16-NEXT: mov.h v1[2], v6[0] +; NOFP16-NEXT: fcvt h6, s7 +; NOFP16-NEXT: bif.16b v4, v16, v3 +; NOFP16-NEXT: fcvt s7, h17 +; NOFP16-NEXT: fcvt s16, h18 +; NOFP16-NEXT: fcvt h5, s5 ; NOFP16-NEXT: mov.h v1[3], v6[0] +; NOFP16-NEXT: mov h2, v2[3] +; NOFP16-NEXT: mov.16b v6, v3 +; NOFP16-NEXT: fcvt h4, s4 +; NOFP16-NEXT: bsl.16b v6, v7, v16 ; NOFP16-NEXT: fcvt s0, h0 -; NOFP16-NEXT: fcvt s2, h2 -; NOFP16-NEXT: bif.16b v4, v16, v3 ; NOFP16-NEXT: mov.h v1[4], v5[0] -; NOFP16-NEXT: fcvt h5, s7 +; NOFP16-NEXT: fcvt s2, h2 +; NOFP16-NEXT: fcvt h5, s6 +; NOFP16-NEXT: mov.h v1[5], v4[0] ; NOFP16-NEXT: bif.16b v0, v2, v3 -; NOFP16-NEXT: fcvt h4, s4 -; NOFP16-NEXT: mov.h v1[5], v5[0] +; NOFP16-NEXT: mov.h v1[6], v5[0] ; NOFP16-NEXT: fcvt h0, s0 -; NOFP16-NEXT: mov.h v1[6], v4[0] ; NOFP16-NEXT: mov.h v1[7], v0[0] ; NOFP16-NEXT: mov.16b v0, v1 ; NOFP16-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/vector-fcvt.ll b/llvm/test/CodeGen/AArch64/vector-fcvt.ll --- a/llvm/test/CodeGen/AArch64/vector-fcvt.ll +++ b/llvm/test/CodeGen/AArch64/vector-fcvt.ll @@ -37,26 +37,26 @@ ; CHECK-LABEL: sitofp_v16i8_float: ; CHECK: // %bb.0: ; CHECK-NEXT: zip1 v1.8b, v0.8b, v0.8b -; CHECK-NEXT: zip2 v2.8b, v0.8b, v0.8b -; CHECK-NEXT: ext v0.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: shl v1.4h, v1.4h, #8 -; CHECK-NEXT: shl v2.4h, v2.4h, #8 -; CHECK-NEXT: zip1 v3.8b, v0.8b, v0.8b +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 ; CHECK-NEXT: zip2 v0.8b, v0.8b, v0.8b +; CHECK-NEXT: shl v1.4h, v1.4h, #8 +; CHECK-NEXT: zip1 v3.8b, v2.8b, v0.8b +; CHECK-NEXT: shl v0.4h, v0.4h, #8 ; CHECK-NEXT: sshr v1.4h, v1.4h, #8 -; CHECK-NEXT: sshr v2.4h, v2.4h, #8 +; CHECK-NEXT: zip2 v2.8b, v2.8b, v0.8b +; CHECK-NEXT: sshr v0.4h, v0.4h, #8 ; CHECK-NEXT: sshll v1.4s, v1.4h, #0 ; CHECK-NEXT: shl v3.4h, v3.4h, #8 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: sshr v3.4h, v3.4h, #8 -; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: sshll v2.4s, v2.4h, #0 -; CHECK-NEXT: sshll v3.4s, v3.4h, #0 ; CHECK-NEXT: sshll v4.4s, v0.4h, #0 +; CHECK-NEXT: shl v2.4h, v2.4h, #8 +; CHECK-NEXT: sshr v0.4h, v3.4h, #8 +; CHECK-NEXT: sshr v2.4h, v2.4h, #8 +; CHECK-NEXT: sshll v3.4s, v0.4h, #0 ; CHECK-NEXT: scvtf v0.4s, v1.4s -; CHECK-NEXT: scvtf v1.4s, v2.4s +; CHECK-NEXT: sshll v5.4s, v2.4h, #0 +; CHECK-NEXT: scvtf v1.4s, v4.4s ; CHECK-NEXT: scvtf v2.4s, v3.4s -; CHECK-NEXT: scvtf v3.4s, v4.4s +; CHECK-NEXT: scvtf v3.4s, v5.4s ; CHECK-NEXT: ret %1 = sitofp <16 x i8> %a to <16 x float> ret <16 x float> %1 @@ -253,71 +253,71 @@ define <16 x double> @sitofp_v16i8_double(<16 x i8> %a) { ; CHECK-LABEL: sitofp_v16i8_double: ; CHECK: // %bb.0: -; CHECK-NEXT: umov w8, v0.b[0] -; CHECK-NEXT: umov w9, v0.b[1] ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: umov w11, v0.b[2] +; CHECK-NEXT: umov w8, v0.b[0] +; CHECK-NEXT: umov w9, v0.b[2] +; CHECK-NEXT: umov w11, v0.b[1] +; CHECK-NEXT: umov w12, v0.b[4] +; CHECK-NEXT: umov w10, v1.b[0] ; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: umov w8, v1.b[0] -; CHECK-NEXT: umov w10, v1.b[2] -; CHECK-NEXT: umov w12, v1.b[1] -; CHECK-NEXT: fmov s6, w11 -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: umov w9, v1.b[4] -; CHECK-NEXT: umov w11, v0.b[5] -; CHECK-NEXT: fmov s3, w8 -; CHECK-NEXT: umov w8, v1.b[3] +; CHECK-NEXT: umov w8, v1.b[2] +; CHECK-NEXT: fmov s3, w9 +; CHECK-NEXT: umov w9, v1.b[1] +; CHECK-NEXT: fmov s7, w12 +; CHECK-NEXT: mov v2.s[1], w11 +; CHECK-NEXT: umov w11, v1.b[3] ; CHECK-NEXT: fmov s4, w10 -; CHECK-NEXT: umov w10, v1.b[5] -; CHECK-NEXT: fmov s5, w9 -; CHECK-NEXT: umov w9, v1.b[6] -; CHECK-NEXT: mov v3.s[1], w12 -; CHECK-NEXT: umov w12, v1.b[7] -; CHECK-NEXT: mov v4.s[1], w8 -; CHECK-NEXT: umov w8, v0.b[4] -; CHECK-NEXT: mov v5.s[1], w10 +; CHECK-NEXT: umov w10, v1.b[4] +; CHECK-NEXT: fmov s5, w8 +; CHECK-NEXT: umov w8, v1.b[6] +; CHECK-NEXT: umov w12, v0.b[7] +; CHECK-NEXT: mov v4.s[1], w9 +; CHECK-NEXT: umov w9, v1.b[5] +; CHECK-NEXT: mov v5.s[1], w11 +; CHECK-NEXT: fmov s6, w10 ; CHECK-NEXT: umov w10, v0.b[6] -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: umov w9, v0.b[3] -; CHECK-NEXT: shl v3.2s, v3.2s, #24 -; CHECK-NEXT: fmov s7, w8 -; CHECK-NEXT: umov w8, v0.b[7] -; CHECK-NEXT: fmov s0, w10 +; CHECK-NEXT: umov w11, v1.b[7] +; CHECK-NEXT: fmov s1, w8 +; CHECK-NEXT: umov w8, v0.b[3] +; CHECK-NEXT: mov v6.s[1], w9 +; CHECK-NEXT: umov w9, v0.b[5] ; CHECK-NEXT: shl v4.2s, v4.2s, #24 -; CHECK-NEXT: mov v1.s[1], w12 +; CHECK-NEXT: fmov s0, w10 ; CHECK-NEXT: shl v5.2s, v5.2s, #24 -; CHECK-NEXT: mov v6.s[1], w9 -; CHECK-NEXT: mov v7.s[1], w11 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: shl v1.2s, v1.2s, #24 +; CHECK-NEXT: mov v1.s[1], w11 ; CHECK-NEXT: shl v2.2s, v2.2s, #24 +; CHECK-NEXT: mov v3.s[1], w8 +; CHECK-NEXT: mov v7.s[1], w9 +; CHECK-NEXT: mov v0.s[1], w12 ; CHECK-NEXT: shl v6.2s, v6.2s, #24 +; CHECK-NEXT: shl v1.2s, v1.2s, #24 +; CHECK-NEXT: shl v3.2s, v3.2s, #24 ; CHECK-NEXT: shl v7.2s, v7.2s, #24 -; CHECK-NEXT: sshr v3.2s, v3.2s, #24 -; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: sshr v4.2s, v4.2s, #24 +; CHECK-NEXT: shl v0.2s, v0.2s, #24 ; CHECK-NEXT: sshr v5.2s, v5.2s, #24 +; CHECK-NEXT: sshr v6.2s, v6.2s, #24 ; CHECK-NEXT: sshr v1.2s, v1.2s, #24 ; CHECK-NEXT: sshr v2.2s, v2.2s, #24 -; CHECK-NEXT: sshr v6.2s, v6.2s, #24 +; CHECK-NEXT: sshr v3.2s, v3.2s, #24 ; CHECK-NEXT: sshr v7.2s, v7.2s, #24 ; CHECK-NEXT: sshr v0.2s, v0.2s, #24 -; CHECK-NEXT: sshll v16.2d, v3.2s, #0 -; CHECK-NEXT: sshll v17.2d, v4.2s, #0 -; CHECK-NEXT: sshll v18.2d, v5.2s, #0 -; CHECK-NEXT: sshll v19.2d, v1.2s, #0 +; CHECK-NEXT: sshll v4.2d, v4.2s, #0 +; CHECK-NEXT: sshll v5.2d, v5.2s, #0 +; CHECK-NEXT: sshll v6.2d, v6.2s, #0 +; CHECK-NEXT: sshll v16.2d, v1.2s, #0 ; CHECK-NEXT: sshll v1.2d, v2.2s, #0 -; CHECK-NEXT: sshll v2.2d, v6.2s, #0 +; CHECK-NEXT: sshll v2.2d, v3.2s, #0 ; CHECK-NEXT: sshll v3.2d, v7.2s, #0 -; CHECK-NEXT: sshll v4.2d, v0.2s, #0 +; CHECK-NEXT: sshll v7.2d, v0.2s, #0 ; CHECK-NEXT: scvtf v0.2d, v1.2d ; CHECK-NEXT: scvtf v1.2d, v2.2d ; CHECK-NEXT: scvtf v2.2d, v3.2d -; CHECK-NEXT: scvtf v3.2d, v4.2d -; CHECK-NEXT: scvtf v4.2d, v16.2d -; CHECK-NEXT: scvtf v5.2d, v17.2d -; CHECK-NEXT: scvtf v6.2d, v18.2d -; CHECK-NEXT: scvtf v7.2d, v19.2d +; CHECK-NEXT: scvtf v3.2d, v7.2d +; CHECK-NEXT: scvtf v4.2d, v4.2d +; CHECK-NEXT: scvtf v5.2d, v5.2d +; CHECK-NEXT: scvtf v6.2d, v6.2d +; CHECK-NEXT: scvtf v7.2d, v16.2d ; CHECK-NEXT: ret %1 = sitofp <16 x i8> %a to <16 x double> ret <16 x double> %1 @@ -427,53 +427,53 @@ define <16 x double> @uitofp_v16i8_double(<16 x i8> %a) { ; CHECK-LABEL: uitofp_v16i8_double: ; CHECK: // %bb.0: -; CHECK-NEXT: ext v3.16b, v0.16b, v0.16b, #8 -; CHECK-NEXT: umov w11, v0.b[0] +; CHECK-NEXT: ext v2.16b, v0.16b, v0.16b, #8 +; CHECK-NEXT: umov w9, v0.b[0] +; CHECK-NEXT: umov w11, v0.b[1] ; CHECK-NEXT: movi d1, #0x0000ff000000ff -; CHECK-NEXT: umov w8, v3.b[0] -; CHECK-NEXT: umov w9, v3.b[1] -; CHECK-NEXT: umov w10, v3.b[4] -; CHECK-NEXT: umov w12, v3.b[3] -; CHECK-NEXT: fmov s6, w11 -; CHECK-NEXT: umov w11, v0.b[3] -; CHECK-NEXT: fmov s2, w8 -; CHECK-NEXT: umov w8, v3.b[2] +; CHECK-NEXT: umov w8, v2.b[0] +; CHECK-NEXT: umov w10, v2.b[2] +; CHECK-NEXT: umov w12, v2.b[1] +; CHECK-NEXT: fmov s4, w9 +; CHECK-NEXT: umov w9, v2.b[3] +; CHECK-NEXT: fmov s3, w8 +; CHECK-NEXT: umov w8, v2.b[4] ; CHECK-NEXT: fmov s5, w10 -; CHECK-NEXT: umov w10, v3.b[7] -; CHECK-NEXT: mov v2.s[1], w9 -; CHECK-NEXT: umov w9, v3.b[6] -; CHECK-NEXT: fmov s4, w8 -; CHECK-NEXT: umov w8, v3.b[5] -; CHECK-NEXT: fmov s3, w9 -; CHECK-NEXT: umov w9, v0.b[2] -; CHECK-NEXT: mov v4.s[1], w12 +; CHECK-NEXT: umov w10, v2.b[6] +; CHECK-NEXT: mov v4.s[1], w11 +; CHECK-NEXT: mov v3.s[1], w12 +; CHECK-NEXT: umov w12, v2.b[5] +; CHECK-NEXT: fmov s6, w8 +; CHECK-NEXT: umov w8, v0.b[2] +; CHECK-NEXT: mov v5.s[1], w9 +; CHECK-NEXT: umov w9, v2.b[7] +; CHECK-NEXT: fmov s2, w10 +; CHECK-NEXT: umov w10, v0.b[4] +; CHECK-NEXT: and v3.8b, v3.8b, v1.8b +; CHECK-NEXT: mov v6.s[1], w12 ; CHECK-NEXT: umov w12, v0.b[6] -; CHECK-NEXT: mov v5.s[1], w8 -; CHECK-NEXT: umov w8, v0.b[4] -; CHECK-NEXT: mov v3.s[1], w10 -; CHECK-NEXT: umov w10, v0.b[1] -; CHECK-NEXT: fmov s7, w9 +; CHECK-NEXT: fmov s7, w8 +; CHECK-NEXT: umov w8, v0.b[3] +; CHECK-NEXT: and v5.8b, v5.8b, v1.8b +; CHECK-NEXT: mov v2.s[1], w9 ; CHECK-NEXT: umov w9, v0.b[5] -; CHECK-NEXT: and v2.8b, v2.8b, v1.8b -; CHECK-NEXT: fmov s16, w8 -; CHECK-NEXT: umov w8, v0.b[7] +; CHECK-NEXT: fmov s16, w10 +; CHECK-NEXT: umov w10, v0.b[7] ; CHECK-NEXT: fmov s0, w12 -; CHECK-NEXT: and v4.8b, v4.8b, v1.8b -; CHECK-NEXT: mov v6.s[1], w10 -; CHECK-NEXT: and v5.8b, v5.8b, v1.8b -; CHECK-NEXT: mov v7.s[1], w11 -; CHECK-NEXT: mov v16.s[1], w9 -; CHECK-NEXT: mov v0.s[1], w8 -; CHECK-NEXT: and v3.8b, v3.8b, v1.8b +; CHECK-NEXT: mov v7.s[1], w8 ; CHECK-NEXT: and v6.8b, v6.8b, v1.8b +; CHECK-NEXT: mov v16.s[1], w9 +; CHECK-NEXT: and v2.8b, v2.8b, v1.8b +; CHECK-NEXT: mov v0.s[1], w10 +; CHECK-NEXT: and v4.8b, v4.8b, v1.8b ; CHECK-NEXT: and v7.8b, v7.8b, v1.8b ; CHECK-NEXT: and v16.8b, v16.8b, v1.8b +; CHECK-NEXT: ushll v17.2d, v3.2s, #0 ; CHECK-NEXT: and v0.8b, v0.8b, v1.8b -; CHECK-NEXT: ushll v17.2d, v2.2s, #0 -; CHECK-NEXT: ushll v18.2d, v4.2s, #0 -; CHECK-NEXT: ushll v19.2d, v5.2s, #0 -; CHECK-NEXT: ushll v20.2d, v3.2s, #0 -; CHECK-NEXT: ushll v1.2d, v6.2s, #0 +; CHECK-NEXT: ushll v5.2d, v5.2s, #0 +; CHECK-NEXT: ushll v6.2d, v6.2s, #0 +; CHECK-NEXT: ushll v18.2d, v2.2s, #0 +; CHECK-NEXT: ushll v1.2d, v4.2s, #0 ; CHECK-NEXT: ushll v2.2d, v7.2s, #0 ; CHECK-NEXT: ushll v3.2d, v16.2s, #0 ; CHECK-NEXT: ushll v4.2d, v0.2s, #0 @@ -482,9 +482,9 @@ ; CHECK-NEXT: ucvtf v2.2d, v3.2d ; CHECK-NEXT: ucvtf v3.2d, v4.2d ; CHECK-NEXT: ucvtf v4.2d, v17.2d -; CHECK-NEXT: ucvtf v5.2d, v18.2d -; CHECK-NEXT: ucvtf v6.2d, v19.2d -; CHECK-NEXT: ucvtf v7.2d, v20.2d +; CHECK-NEXT: ucvtf v5.2d, v5.2d +; CHECK-NEXT: ucvtf v6.2d, v6.2d +; CHECK-NEXT: ucvtf v7.2d, v18.2d ; CHECK-NEXT: ret %1 = uitofp <16 x i8> %a to <16 x double> ret <16 x double> %1 diff --git a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll --- a/llvm/test/CodeGen/AArch64/zext-to-tbl.ll +++ b/llvm/test/CodeGen/AArch64/zext-to-tbl.ll @@ -2969,48 +2969,48 @@ ; CHECK-BE-NEXT: ld1 { v6.4s }, [x8] ; CHECK-BE-NEXT: add x8, x0, #96 ; CHECK-BE-NEXT: tbl v7.16b, { v4.16b }, v3.16b -; CHECK-BE-NEXT: tbl v18.16b, { v4.16b }, v1.16b -; CHECK-BE-NEXT: ld1 { v17.4s }, [x1] +; CHECK-BE-NEXT: tbl v17.16b, { v4.16b }, v1.16b +; CHECK-BE-NEXT: ld1 { v20.4s }, [x1] ; CHECK-BE-NEXT: tbl v16.16b, { v4.16b }, v0.16b ; CHECK-BE-NEXT: tbl v4.16b, { v4.16b }, v2.16b -; CHECK-BE-NEXT: ld1 { v19.4s }, [x9] -; CHECK-BE-NEXT: rev32 v20.8b, v7.8b +; CHECK-BE-NEXT: ld1 { v18.4s }, [x9] +; CHECK-BE-NEXT: rev32 v19.8b, v7.8b ; CHECK-BE-NEXT: add x9, x0, #32 ; CHECK-BE-NEXT: ext v23.16b, v6.16b, v6.16b, #8 -; CHECK-BE-NEXT: rev32 v22.8b, v18.8b +; CHECK-BE-NEXT: rev32 v22.8b, v17.8b ; CHECK-BE-NEXT: ext v7.16b, v7.16b, v7.16b, #8 ; CHECK-BE-NEXT: ext v24.16b, v4.16b, v4.16b, #8 -; CHECK-BE-NEXT: umull v6.2d, v20.2s, v6.2s -; CHECK-BE-NEXT: umull v20.2d, v22.2s, v17.2s -; CHECK-BE-NEXT: ext v22.16b, v19.16b, v19.16b, #8 +; CHECK-BE-NEXT: umull v6.2d, v19.2s, v6.2s +; CHECK-BE-NEXT: umull v19.2d, v22.2s, v20.2s +; CHECK-BE-NEXT: ext v22.16b, v18.16b, v18.16b, #8 ; CHECK-BE-NEXT: ext v21.16b, v5.16b, v5.16b, #8 ; CHECK-BE-NEXT: st1 { v6.2d }, [x8] ; CHECK-BE-NEXT: rev32 v6.8b, v7.8b -; CHECK-BE-NEXT: ext v7.16b, v18.16b, v18.16b, #8 -; CHECK-BE-NEXT: rev32 v18.8b, v16.8b +; CHECK-BE-NEXT: ext v7.16b, v17.16b, v17.16b, #8 +; CHECK-BE-NEXT: rev32 v17.8b, v16.8b ; CHECK-BE-NEXT: ext v16.16b, v16.16b, v16.16b, #8 ; CHECK-BE-NEXT: add x8, x0, #112 -; CHECK-BE-NEXT: st1 { v20.2d }, [x9] -; CHECK-BE-NEXT: rev32 v20.8b, v24.8b +; CHECK-BE-NEXT: st1 { v19.2d }, [x9] +; CHECK-BE-NEXT: rev32 v19.8b, v24.8b ; CHECK-BE-NEXT: umull v6.2d, v6.2s, v23.2s ; CHECK-BE-NEXT: rev32 v4.8b, v4.8b -; CHECK-BE-NEXT: umull v5.2d, v18.2s, v5.2s +; CHECK-BE-NEXT: umull v5.2d, v17.2s, v5.2s ; CHECK-BE-NEXT: add x9, x0, #80 -; CHECK-BE-NEXT: ext v17.16b, v17.16b, v17.16b, #8 -; CHECK-BE-NEXT: umull v18.2d, v20.2s, v22.2s +; CHECK-BE-NEXT: rev32 v7.8b, v7.8b +; CHECK-BE-NEXT: rev32 v16.8b, v16.8b ; CHECK-BE-NEXT: st1 { v6.2d }, [x8] -; CHECK-BE-NEXT: rev32 v6.8b, v7.8b -; CHECK-BE-NEXT: rev32 v7.8b, v16.8b -; CHECK-BE-NEXT: st1 { v5.2d }, [x0] -; CHECK-BE-NEXT: umull v4.2d, v4.2s, v19.2s ; CHECK-BE-NEXT: add x8, x0, #48 -; CHECK-BE-NEXT: st1 { v18.2d }, [x9] +; CHECK-BE-NEXT: ext v6.16b, v20.16b, v20.16b, #8 +; CHECK-BE-NEXT: st1 { v5.2d }, [x0] +; CHECK-BE-NEXT: umull v17.2d, v19.2s, v22.2s +; CHECK-BE-NEXT: umull v4.2d, v4.2s, v18.2s +; CHECK-BE-NEXT: umull v5.2d, v7.2s, v6.2s +; CHECK-BE-NEXT: umull v6.2d, v16.2s, v21.2s +; CHECK-BE-NEXT: st1 { v17.2d }, [x9] ; CHECK-BE-NEXT: add x9, x0, #16 -; CHECK-BE-NEXT: umull v5.2d, v6.2s, v17.2s ; CHECK-BE-NEXT: add x0, x0, #64 -; CHECK-BE-NEXT: umull v6.2d, v7.2s, v21.2s -; CHECK-BE-NEXT: st1 { v4.2d }, [x0] ; CHECK-BE-NEXT: st1 { v5.2d }, [x8] +; CHECK-BE-NEXT: st1 { v4.2d }, [x0] ; CHECK-BE-NEXT: st1 { v6.2d }, [x9] ; CHECK-BE-NEXT: b.ne .LBB25_1 ; CHECK-BE-NEXT: // %bb.2: // %exit