diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -376,8 +376,9 @@ bool eraseIfRedundant(MachineInstr &Copy, MCRegister Src, MCRegister Def); void forwardUses(MachineInstr &MI); void propagateDefs(MachineInstr &MI); - bool isForwardableRegClassCopy(const MachineInstr &Copy, - const MachineInstr &UseI, unsigned UseIdx); + bool isForwardableRegClassCopy(const DestSourcePair &CopyOperands, + const MachineInstr &UseI, unsigned UseIdx, + Register ForwardedReg); bool isBackwardPropagatableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx); @@ -502,21 +503,20 @@ return false; } -/// Decide whether we should forward the source of \param Copy to its use in -/// \param UseI based on the physical register class constraints of the opcode -/// and avoiding introducing more cross-class COPYs. -bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, - const MachineInstr &UseI, - unsigned UseIdx) { - std::optional CopyOperands = - isCopyInstr(Copy, *TII, UseCopyInstr); - Register CopySrcReg = CopyOperands->Source->getReg(); - +/// Decide whether we should forward the source of a COPY-like instruction +/// described by \param CopyOperands to its use in \param UseI based on the +/// physical register class constraints of the opcode and avoiding introducing +/// more cross-class COPYs. +/// \param ForwardedReg is the register that is going to be used as the new +/// source of \param UseI. It may be a sub-register of the original copy source. +bool MachineCopyPropagation::isForwardableRegClassCopy( + const DestSourcePair &CopyOperands, const MachineInstr &UseI, + unsigned UseIdx, Register ForwardedReg) { // If the new register meets the opcode register constraints, then allow // forwarding. if (const TargetRegisterClass *URC = UseI.getRegClassConstraint(UseIdx, TII, TRI)) - return URC->contains(CopySrcReg); + return URC->contains(ForwardedReg); auto UseICopyOperands = isCopyInstr(UseI, *TII, UseCopyInstr); if (!UseICopyOperands) @@ -546,7 +546,7 @@ bool Found = false; bool IsCrossClass = false; for (const TargetRegisterClass *RC : TRI->regclasses()) { - if (RC->contains(CopySrcReg) && RC->contains(UseDstReg)) { + if (RC->contains(ForwardedReg) && RC->contains(UseDstReg)) { Found = true; if (TRI->getCrossCopyRegClass(RC) != RC) { IsCrossClass = true; @@ -560,7 +560,8 @@ return true; // The forwarded copy would be cross-class. Only do this if the original copy // was also cross-class. - Register CopyDstReg = CopyOperands->Destination->getReg(); + Register CopyDstReg = CopyOperands.Destination->getReg(); + Register CopySrcReg = CopyOperands.Source->getReg(); for (const TargetRegisterClass *RC : TRI->regclasses()) { if (RC->contains(CopySrcReg) && RC->contains(CopyDstReg) && TRI->getCrossCopyRegClass(RC) != RC) @@ -643,22 +644,32 @@ const MachineOperand &CopySrc = *CopyOperands->Source; Register CopySrcReg = CopySrc.getReg(); - // When the use is a subregister of the COPY destination, - // record the subreg index. - unsigned SubregIdx = 0; - - // This can only occur when we are dealing with physical registers. + Register ForwardedReg = CopySrcReg; + // MI might use a sub-register of the Copy destination, in which case the + // forwarded register is the matching sub-register of the Copy source. if (MOUse.getReg() != CopyDstReg) { - SubregIdx = TRI->getSubRegIndex(CopyDstReg, MOUse.getReg()); - if (!SubregIdx) + unsigned SubRegIdx = TRI->getSubRegIndex(CopyDstReg, MOUse.getReg()); + assert(SubRegIdx && "findAvailCopy returned unrelated COPY"); + ForwardedReg = TRI->getSubReg(CopySrcReg, SubRegIdx); + if (!ForwardedReg) { + LLVM_DEBUG(dbgs() << "MCP: Copy source does not have sub-register " + << TRI->getSubRegIndexName(SubRegIdx) << '\n'); continue; + } + // Bail if the sub-register is reserved (e.g. X86's %bpl in 32-bit mode). + // Forwarding it may create an invalid instruction. + if (MRI->isReserved(ForwardedReg)) { + LLVM_DEBUG(dbgs() << "MCP: Forwarded sub-register " + << printReg(ForwardedReg, TRI) << " is reserved\n"); + continue; + } } // Don't forward COPYs of reserved regs unless they are constant. if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg)) continue; - if (!isForwardableRegClassCopy(*Copy, MI, OpIdx)) + if (!isForwardableRegClassCopy(*CopyOperands, MI, OpIdx, ForwardedReg)) continue; if (hasImplicitOverlap(MI, MOUse)) @@ -667,6 +678,10 @@ // Check that the instruction is not a copy that partially overwrites the // original copy source that we are about to use. The tracker mechanism // cannot cope with that. + // TODO: It might be too conservative in case of sub-register forwarding. + // If it is not, need to explain why and add a test. At the moment, + // changing CopySrcReg to ForwardedReg only results in (presumably legit) + // changes in one test. if (isCopyInstr(MI, *TII, UseCopyInstr) && MI.modifiesRegister(CopySrcReg, TRI) && !MI.definesRegister(CopySrcReg)) { @@ -681,13 +696,10 @@ } LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI) - << "\n with " << printReg(CopySrcReg, TRI) + << "\n with " << printReg(ForwardedReg, TRI) << "\n in " << MI << " from " << *Copy); - if (SubregIdx) - MOUse.setReg(TRI->getSubReg(CopySrcReg, SubregIdx)); - else - MOUse.setReg(CopySrcReg); + MOUse.setReg(ForwardedReg); if (!CopySrc.isRenamable()) MOUse.setIsRenamable(false); diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll --- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll +++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll @@ -579,7 +579,7 @@ ; CHECK32-LABEL: postidx_clobber: ; CHECK32: ; %bb.0: ; CHECK32-NEXT: mov x8, x0 -; CHECK32-NEXT: add w0, w8, #8 +; CHECK32-NEXT: add w0, w0, #8 ; CHECK32-NEXT: str w8, [x8] ; CHECK32-NEXT: ret ; ret diff --git a/llvm/test/CodeGen/AArch64/swifterror.ll b/llvm/test/CodeGen/AArch64/swifterror.ll --- a/llvm/test/CodeGen/AArch64/swifterror.ll +++ b/llvm/test/CodeGen/AArch64/swifterror.ll @@ -154,7 +154,7 @@ ; CHECK-APPLE-ARM64_32-NEXT: mov x21, xzr ; CHECK-APPLE-ARM64_32-NEXT: bl _foo ; CHECK-APPLE-ARM64_32-NEXT: mov x0, x21 -; CHECK-APPLE-ARM64_32-NEXT: cbnz w0, LBB1_2 +; CHECK-APPLE-ARM64_32-NEXT: cbnz w21, LBB1_2 ; CHECK-APPLE-ARM64_32-NEXT: ; %bb.1: ; %cont ; CHECK-APPLE-ARM64_32-NEXT: ldrb w8, [x0, #8] ; CHECK-APPLE-ARM64_32-NEXT: strb w8, [x19] @@ -844,7 +844,7 @@ ; CHECK-APPLE-ARM64_32-NEXT: mov x21, xzr ; CHECK-APPLE-ARM64_32-NEXT: bl _foo_sret ; CHECK-APPLE-ARM64_32-NEXT: mov x0, x21 -; CHECK-APPLE-ARM64_32-NEXT: cbnz w0, LBB6_2 +; CHECK-APPLE-ARM64_32-NEXT: cbnz w21, LBB6_2 ; CHECK-APPLE-ARM64_32-NEXT: ; %bb.1: ; %cont ; CHECK-APPLE-ARM64_32-NEXT: ldrb w8, [x0, #8] ; CHECK-APPLE-ARM64_32-NEXT: strb w8, [x19] @@ -1199,7 +1199,7 @@ ; CHECK-APPLE-ARM64_32-NEXT: str w8, [sp] ; CHECK-APPLE-ARM64_32-NEXT: bl _foo_vararg ; CHECK-APPLE-ARM64_32-NEXT: mov x0, x21 -; CHECK-APPLE-ARM64_32-NEXT: cbnz w0, LBB8_2 +; CHECK-APPLE-ARM64_32-NEXT: cbnz w21, LBB8_2 ; CHECK-APPLE-ARM64_32-NEXT: ; %bb.1: ; %cont ; CHECK-APPLE-ARM64_32-NEXT: ldrb w8, [x0, #8] ; CHECK-APPLE-ARM64_32-NEXT: strb w8, [x19] diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/extractelement-stack-lower.ll @@ -138,14 +138,10 @@ ; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0 ; GCN-NEXT: v_add_u32_e32 v0, v1, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v16, v20 -; GCN-NEXT: v_mov_b32_e32 v17, v21 -; GCN-NEXT: v_mov_b32_e32 v18, v22 -; GCN-NEXT: v_mov_b32_e32 v19, v23 -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:464 -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:468 -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:472 -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:476 +; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:464 +; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:468 +; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:472 +; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:476 ; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:480 ; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:484 ; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:488 @@ -315,14 +311,10 @@ ; GCN-NEXT: v_and_b32_e32 v1, 1, v6 ; GCN-NEXT: v_lshlrev_b32_e32 v1, 4, v1 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v16, v20 -; GCN-NEXT: v_mov_b32_e32 v17, v21 -; GCN-NEXT: v_mov_b32_e32 v18, v22 -; GCN-NEXT: v_mov_b32_e32 v19, v23 -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:464 -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:468 -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:472 -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:476 +; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:464 +; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:468 +; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:472 +; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:476 ; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:480 ; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:484 ; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:488 @@ -492,14 +484,10 @@ ; GCN-NEXT: v_add_u32_e32 v2, 0x100, v2 ; GCN-NEXT: v_add_u32_e32 v1, v2, v0 ; GCN-NEXT: s_waitcnt vmcnt(0) -; GCN-NEXT: v_mov_b32_e32 v16, v20 -; GCN-NEXT: v_mov_b32_e32 v17, v21 -; GCN-NEXT: v_mov_b32_e32 v18, v22 -; GCN-NEXT: v_mov_b32_e32 v19, v23 -; GCN-NEXT: buffer_store_dword v16, off, s[0:3], s33 offset:464 -; GCN-NEXT: buffer_store_dword v17, off, s[0:3], s33 offset:468 -; GCN-NEXT: buffer_store_dword v18, off, s[0:3], s33 offset:472 -; GCN-NEXT: buffer_store_dword v19, off, s[0:3], s33 offset:476 +; GCN-NEXT: buffer_store_dword v20, off, s[0:3], s33 offset:464 +; GCN-NEXT: buffer_store_dword v21, off, s[0:3], s33 offset:468 +; GCN-NEXT: buffer_store_dword v22, off, s[0:3], s33 offset:472 +; GCN-NEXT: buffer_store_dword v23, off, s[0:3], s33 offset:476 ; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s33 offset:480 ; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s33 offset:484 ; GCN-NEXT: buffer_store_dword v10, off, s[0:3], s33 offset:488 diff --git a/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir b/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir --- a/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir +++ b/llvm/test/CodeGen/AMDGPU/mcp-overlap-after-propagation.mir @@ -14,13 +14,9 @@ body: | bb.0: successors: - liveins: $sgpr2, $sgpr3, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5, $vgpr70, $vgpr71 + liveins: $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr5, $vgpr70, $vgpr71 - renamable $sgpr8_sgpr9 = S_GETPC_B64 - renamable $sgpr8 = COPY killed renamable $sgpr2 - renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 = S_LOAD_DWORDX8_IMM renamable $sgpr8_sgpr9, 144, 0 :: (invariant load (s256), align 16, addrspace 4) renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = COPY killed renamable $sgpr60_sgpr61_sgpr62_sgpr63_sgpr64_sgpr65_sgpr66_sgpr67 - renamable $vgpr4 = IMAGE_GET_LOD_V1_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 2, 1, 0, 0, 0, 0, 0, 0, implicit $exec renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63 = COPY killed renamable $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 renamable $vgpr12_vgpr13_vgpr14 = IMAGE_SAMPLE_V3_V2_gfx10 renamable $vgpr70_vgpr71, renamable $sgpr56_sgpr57_sgpr58_sgpr59_sgpr60_sgpr61_sgpr62_sgpr63, renamable $sgpr96_sgpr97_sgpr98_sgpr99, 7, 1, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load (s96), align 16) S_ENDPGM 0 diff --git a/llvm/test/CodeGen/ARM/bf16-shuffle.ll b/llvm/test/CodeGen/ARM/bf16-shuffle.ll --- a/llvm/test/CodeGen/ARM/bf16-shuffle.ll +++ b/llvm/test/CodeGen/ARM/bf16-shuffle.ll @@ -326,7 +326,7 @@ ; CHECK-NOFP16-LABEL: shuffle3step1_bf16: ; CHECK-NOFP16: @ %bb.0: @ %entry ; CHECK-NOFP16-NEXT: vorr q3, q0, q0 -; CHECK-NOFP16-NEXT: vmov.u16 r1, d6[1] +; CHECK-NOFP16-NEXT: vmov.u16 r1, d0[1] ; CHECK-NOFP16-NEXT: vmov r0, s14 ; CHECK-NOFP16-NEXT: vmov.16 d0[0], r1 ; CHECK-NOFP16-NEXT: vmov.16 d0[1], r0 @@ -348,9 +348,9 @@ ; CHECK-FP16-LABEL: shuffle3step1_bf16: ; CHECK-FP16: @ %bb.0: @ %entry ; CHECK-FP16-NEXT: vorr q3, q0, q0 -; CHECK-FP16-NEXT: vmovx.f16 s0, s12 -; CHECK-FP16-NEXT: vmovx.f16 s12, s15 +; CHECK-FP16-NEXT: vmovx.f16 s0, s0 ; CHECK-FP16-NEXT: vmov r1, s0 +; CHECK-FP16-NEXT: vmovx.f16 s12, s15 ; CHECK-FP16-NEXT: vmov r0, s14 ; CHECK-FP16-NEXT: vmov.16 d0[0], r1 ; CHECK-FP16-NEXT: vmov.16 d0[1], r0 diff --git a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll --- a/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/ARM/fpclamptosat_vec.ll @@ -11,8 +11,8 @@ ; CHECK-NEXT: push {r4, r5, r11, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -86,8 +86,8 @@ ; CHECK-NEXT: push {r4, r5, r11, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -130,8 +130,8 @@ ; CHECK-NEXT: push {r4, r5, r11, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -197,8 +197,8 @@ ; CHECK-NEXT: sub sp, sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vmov r0, s18 @@ -326,8 +326,8 @@ ; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, s1 ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: vmov r0, s16 @@ -399,8 +399,8 @@ ; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vmov r0, s1 ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vmov r0, s16 @@ -2376,8 +2376,8 @@ ; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vmov r0, r2, d9 @@ -2433,8 +2433,8 @@ ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmov r2, r1, d8 @@ -2464,8 +2464,8 @@ ; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: vmov r2, r12, d9 ; CHECK-NEXT: cmp r1, #0 @@ -2514,11 +2514,11 @@ ; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vorr q4, q0, q0 +; CHECK-NEXT: vmov r0, s3 ; CHECK-NEXT: mov r8, #-2147483648 +; CHECK-NEXT: vorr q4, q0, q0 ; CHECK-NEXT: mvn r7, #-2147483648 -; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: vmov r5, s16 +; CHECK-NEXT: vmov r5, s0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: cmn r0, #-2147483647 @@ -2607,8 +2607,8 @@ ; CHECK-NEXT: push {r4, r5, r6, r7, r11, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, s1 ; CHECK-NEXT: vorr q4, q0, q0 -; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmov r1, s18 @@ -2653,11 +2653,11 @@ ; CHECK-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vorr q4, q0, q0 +; CHECK-NEXT: vmov r0, s3 ; CHECK-NEXT: mvn r9, #0 -; CHECK-NEXT: vmov r0, s19 -; CHECK-NEXT: vmov r5, s16 -; CHECK-NEXT: vmov r8, s18 +; CHECK-NEXT: vorr q4, q0, q0 +; CHECK-NEXT: vmov r5, s0 +; CHECK-NEXT: vmov r8, s2 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: cmp r1, #0 ; CHECK-NEXT: mvn r2, #0 diff --git a/llvm/test/CodeGen/ARM/pr60908.mir b/llvm/test/CodeGen/ARM/pr60908.mir new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/ARM/pr60908.mir @@ -0,0 +1,45 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 +# RUN: llc -mtriple=armv7 -run-pass=machine-cp %s -o - | FileCheck %s + +# Positive test: $d15 has sub-register $s30, which should be propagated. +--- +name: test_d15 +tracksRegLiveness: true +liveins: + - { reg: '$d15' } +body: | + bb.0.entry: + liveins: $d15 + + ; CHECK-LABEL: name: test_d15 + ; CHECK: liveins: $d15 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $s0 = COPY $s30 + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0 + renamable $d14 = COPY killed renamable $d15 + renamable $s0 = COPY killed renamable $s28 + BX_RET 14 /* CC::al */, $noreg, implicit $s0 + +... + +# Negative test: $d18 does not have sub-registers. +--- +name: test_d18 +tracksRegLiveness: true +liveins: + - { reg: '$d18' } +body: | + bb.0.entry: + liveins: $d18 + + ; CHECK-LABEL: name: test_d18 + ; CHECK: liveins: $d18 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: renamable $d14 = COPY killed renamable $d18 + ; CHECK-NEXT: renamable $s0 = COPY killed renamable $s28 + ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0 + renamable $d14 = COPY killed renamable $d18 + renamable $s0 = COPY killed renamable $s28 + BX_RET 14 /* CC::al */, $noreg, implicit $s0 + +... diff --git a/llvm/test/CodeGen/Hexagon/autohvx/reg-sequence.ll b/llvm/test/CodeGen/Hexagon/autohvx/reg-sequence.ll --- a/llvm/test/CodeGen/Hexagon/autohvx/reg-sequence.ll +++ b/llvm/test/CodeGen/Hexagon/autohvx/reg-sequence.ll @@ -100,7 +100,7 @@ ; CHECK-LABEL: test_22: ; CHECK: vcombine(v3,v2) -; CHECK: v1 = v0 +; CHECK: v1 = v2 ; Result: v1:0 = vcombine(v2,v2) define <128 x i8> @test_22(<128 x i8> %a0, <128 x i8> %a1) #0 { b2: @@ -146,7 +146,7 @@ ; CHECK-LABEL: test_33: ; CHECK: vcombine(v3,v2) -; CHECK: v0 = v1 +; CHECK: v0 = v3 ; Result: v1:0 = vcombine(v3,v3) define <128 x i8> @test_33(<128 x i8> %a0, <128 x i8> %a1) #0 { b2: diff --git a/llvm/test/CodeGen/Hexagon/swp-reuse-phi-6.ll b/llvm/test/CodeGen/Hexagon/swp-reuse-phi-6.ll --- a/llvm/test/CodeGen/Hexagon/swp-reuse-phi-6.ll +++ b/llvm/test/CodeGen/Hexagon/swp-reuse-phi-6.ll @@ -9,7 +9,8 @@ ; CHECK: loop0(.LBB0_[[LOOP:.]], ; CHECK: .LBB0_[[LOOP]]: ; CHECK: vlalign([[VREG1:v([0-9]+)]],[[VREG2:v([0-9]+)]],#2) -; CHECK: [[VREG2]]:{{[0-9]+}} = vcombine([[VREG1]],v{{[0-9]+}}) +; CHECK: [[VREG3:v([0-9]+)]]:{{[0-9]+}} = vcombine([[VREG1]],v{{[0-9]+}}) +; CHECK: [[VREG2]]:{{[0-9]+}} = vcombine([[VREG3]],v{{[0-9]+}}) ; CHECK: }{{[ \t]*}}:endloop0 ; Function Attrs: nounwind diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll --- a/llvm/test/CodeGen/PowerPC/fp-strict-round.ll +++ b/llvm/test/CodeGen/PowerPC/fp-strict-round.ll @@ -318,11 +318,11 @@ ; P8-NEXT: .cfi_offset v30, -32 ; P8-NEXT: .cfi_offset v31, -16 ; P8-NEXT: li r3, 128 +; P8-NEXT: xxlor f1, v2, v2 ; P8-NEXT: stxvd2x v30, r1, r3 # 16-byte Folded Spill ; P8-NEXT: li r3, 144 ; P8-NEXT: stxvd2x v31, r1, r3 # 16-byte Folded Spill ; P8-NEXT: vmr v31, v2 -; P8-NEXT: xxlor f1, v31, v31 ; P8-NEXT: bl nearbyint ; P8-NEXT: nop ; P8-NEXT: xxlor v30, f1, f1 @@ -350,10 +350,10 @@ ; P9-NEXT: .cfi_offset lr, 16 ; P9-NEXT: .cfi_offset v30, -32 ; P9-NEXT: .cfi_offset v31, -16 +; P9-NEXT: xscpsgndp f1, v2, v2 +; P9-NEXT: stxv v30, 32(r1) # 16-byte Folded Spill ; P9-NEXT: stxv v31, 48(r1) # 16-byte Folded Spill ; P9-NEXT: vmr v31, v2 -; P9-NEXT: xscpsgndp f1, v31, v31 -; P9-NEXT: stxv v30, 32(r1) # 16-byte Folded Spill ; P9-NEXT: bl nearbyint ; P9-NEXT: nop ; P9-NEXT: xscpsgndp v30, f1, f1 diff --git a/llvm/test/CodeGen/PowerPC/frem.ll b/llvm/test/CodeGen/PowerPC/frem.ll --- a/llvm/test/CodeGen/PowerPC/frem.ll +++ b/llvm/test/CodeGen/PowerPC/frem.ll @@ -112,13 +112,13 @@ ; CHECK-NEXT: .cfi_offset v29, -48 ; CHECK-NEXT: .cfi_offset v30, -32 ; CHECK-NEXT: .cfi_offset v31, -16 +; CHECK-NEXT: xscpsgndp 1, 34, 34 +; CHECK-NEXT: xscpsgndp 2, 35, 35 +; CHECK-NEXT: stxv 61, 32(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 62, 48(1) # 16-byte Folded Spill ; CHECK-NEXT: stxv 63, 64(1) # 16-byte Folded Spill ; CHECK-NEXT: vmr 31, 3 -; CHECK-NEXT: xscpsgndp 2, 63, 63 ; CHECK-NEXT: vmr 30, 2 -; CHECK-NEXT: xscpsgndp 1, 62, 62 -; CHECK-NEXT: stxv 61, 32(1) # 16-byte Folded Spill ; CHECK-NEXT: bl fmod ; CHECK-NEXT: nop ; CHECK-NEXT: xscpsgndp 61, 1, 1 diff --git a/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll b/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll --- a/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll +++ b/llvm/test/CodeGen/PowerPC/inlineasm-i64-reg.ll @@ -73,9 +73,9 @@ ; CHECK-LABEL: @main -; CHECK: mr [[REG:[0-9]+]], 3 ; CHECK: std 0, -; CHECK: stw [[REG]], +; CHECK: stw 3, +; CHECK: mr [[REG:[0-9]+]], 3 ; CHECK: #APP ; CHECK: sc diff --git a/llvm/test/CodeGen/PowerPC/ldst-16-byte.mir b/llvm/test/CodeGen/PowerPC/ldst-16-byte.mir --- a/llvm/test/CodeGen/PowerPC/ldst-16-byte.mir +++ b/llvm/test/CodeGen/PowerPC/ldst-16-byte.mir @@ -211,7 +211,7 @@ ; CHECK: liveins: $g8p8 ; CHECK: $x4 = OR8 $x16, $x16 ; CHECK: $x5 = OR8 $x17, $x17 - ; CHECK: $x3 = OR8 $x5, $x5 + ; CHECK: $x3 = OR8 $x17, $x17 ; CHECK: BLR8 implicit $lr8, implicit undef $rm, implicit killed $x3, implicit $x4 %0:g8prc = COPY $g8p8 $x3 = COPY %0.sub_gp8_x1:g8prc diff --git a/llvm/test/CodeGen/PowerPC/machine-pre.ll b/llvm/test/CodeGen/PowerPC/machine-pre.ll --- a/llvm/test/CodeGen/PowerPC/machine-pre.ll +++ b/llvm/test/CodeGen/PowerPC/machine-pre.ll @@ -63,11 +63,11 @@ ; CHECK-P9-NEXT: std r29, -24(r1) # 8-byte Folded Spill ; CHECK-P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; CHECK-P9-NEXT: stdu r1, -80(r1) -; CHECK-P9-NEXT: mr r30, r4 ; CHECK-P9-NEXT: mr r29, r3 +; CHECK-P9-NEXT: add r28, r4, r3 ; CHECK-P9-NEXT: lis r3, 21845 +; CHECK-P9-NEXT: mr r30, r4 ; CHECK-P9-NEXT: std r0, 96(r1) -; CHECK-P9-NEXT: add r28, r30, r29 ; CHECK-P9-NEXT: ori r27, r3, 21846 ; CHECK-P9-NEXT: b .LBB1_4 ; CHECK-P9-NEXT: .p2align 4 diff --git a/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll b/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll --- a/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll +++ b/llvm/test/CodeGen/PowerPC/ppc-ctr-dead-code.ll @@ -32,8 +32,8 @@ ; V23-LABEL: limit_loop: ; V23: # %bb.0: # %entry ; V23-NEXT: mr 6, 3 +; V23-NEXT: cmpwi 3, 0 ; V23-NEXT: li 3, 0 -; V23-NEXT: cmpwi 6, 0 ; V23-NEXT: blelr 0 ; V23-NEXT: # %bb.1: # %for.body.preheader ; V23-NEXT: addi 4, 4, -4 diff --git a/llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll b/llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll --- a/llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll +++ b/llvm/test/CodeGen/PowerPC/tail-dup-break-cfg.ll @@ -12,8 +12,8 @@ ;CHECK-LABEL: tail_dup_break_cfg: ;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, [[BODY1LABEL:[._0-9A-Za-z]+]] +;CHECK: andi. {{[0-9]+}}, 3, 1 +;CHECK: bc 12, 1, [[BODY1LABEL:[._0-9A-Za-z]+]] ;CHECK-NEXT: # %test2 ;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2 ;CHECK-NEXT: bne 0, [[BODY2LABEL:[._0-9A-Za-z]+]] @@ -54,8 +54,8 @@ ; The branch weights here hint that we shouldn't tail duplicate in this case. ;CHECK-LABEL: tail_dup_dont_break_cfg: ;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 4, 1, [[TEST2LABEL:[._0-9A-Za-z]+]] +;CHECK: andi. {{[0-9]+}}, 3, 1 +;CHECK: bc 4, 1, [[TEST2LABEL:[._0-9A-Za-z]+]] ;CHECK-NEXT: # %body1 ;CHECK: [[TEST2LABEL]]: # %test2 ;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2 diff --git a/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll b/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll --- a/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll +++ b/llvm/test/CodeGen/PowerPC/tail-dup-layout.ll @@ -25,8 +25,8 @@ ;CHECK-LABEL: straight_test: ; test1 may have been merged with entry ;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG:[0-9]+]], 1 -;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] +;CHECK: andi. {{[0-9]+}}, 3, 1 +;CHECK: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] ;CHECK-NEXT: # %test2 ;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] @@ -116,8 +116,8 @@ ;CHECK-LABEL: straight_test_50: ; test1 may have been merged with entry ;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: andi. {{[0-9]+}}, [[TAGREG]], 1 -;CHECK-NEXT: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] +;CHECK: andi. {{[0-9]+}}, 3, 1 +;CHECK: bc 12, 1, .[[OPT1LABEL:[_0-9A-Za-z]+]] ;CHECK-NEXT: # %test2 ;CHECK-NEXT: andi. {{[0-9]+}}, [[TAGREG]], 2 ;CHECK-NEXT: bne 0, .[[OPT2LABEL:[_0-9A-Za-z]+]] @@ -186,8 +186,8 @@ ;CHECK-LABEL: straight_test_3_instr_test: ; test1 may have been merged with entry ;CHECK: mr [[TAGREG:[0-9]+]], 3 -;CHECK: clrlwi {{[0-9]+}}, [[TAGREG]], 30 -;CHECK-NEXT: cmplwi {{[0-9]+}}, 2 +;CHECK: clrlwi {{[0-9]+}}, 3, 30 +;CHECK: cmplwi {{[0-9]+}}, 2 ;CHECK-O3-NEXT: bne 0, .[[OPT1LABEL:[_0-9A-Za-z]+]] ;CHECK-O3-NEXT: # %test2 diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll --- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll @@ -204,15 +204,15 @@ ; PC64LE-NEXT: stdu 1, -96(1) ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxlor 2, 35, 35 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 3 -; PC64LE-NEXT: xxlor 1, 62, 62 -; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 61, 1, 1 @@ -239,14 +239,14 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 +; PC64LE9-NEXT: xscpsgndp 2, 35, 35 ; PC64LE9-NEXT: std 0, 96(1) +; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 3 -; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: vmr 30, 2 -; PC64LE9-NEXT: xscpsgndp 1, 62, 62 -; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 61, 1, 1 @@ -486,8 +486,10 @@ ; PC64LE-NEXT: stdu 1, -128(1) ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: std 0, 144(1) +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: stxvd2x 59, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxlor 2, 36, 36 ; PC64LE-NEXT: stxvd2x 60, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 28, 2 @@ -495,12 +497,10 @@ ; PC64LE-NEXT: li 3, 96 ; PC64LE-NEXT: vmr 29, 3 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 30, 4 ; PC64LE-NEXT: li 3, 112 +; PC64LE-NEXT: vmr 30, 4 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 5 -; PC64LE-NEXT: xxlor 1, 60, 60 -; PC64LE-NEXT: xxlor 2, 62, 62 ; PC64LE-NEXT: bl fmod ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 59, 1, 1 @@ -545,18 +545,18 @@ ; PC64LE9: # %bb.0: ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -112(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 +; PC64LE9-NEXT: xscpsgndp 2, 36, 36 ; PC64LE9-NEXT: std 0, 128(1) -; PC64LE9-NEXT: stxv 60, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: stxv 62, 80(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 30, 4 -; PC64LE9-NEXT: xscpsgndp 2, 62, 62 -; PC64LE9-NEXT: vmr 28, 2 -; PC64LE9-NEXT: xscpsgndp 1, 60, 60 ; PC64LE9-NEXT: stxv 59, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 60, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 61, 64(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 80(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 96(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 5 +; PC64LE9-NEXT: vmr 30, 4 ; PC64LE9-NEXT: vmr 29, 3 +; PC64LE9-NEXT: vmr 28, 2 ; PC64LE9-NEXT: bl fmod ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 59, 1, 1 @@ -1264,15 +1264,15 @@ ; PC64LE-NEXT: stdu 1, -96(1) ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxlor 2, 35, 35 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 3 -; PC64LE-NEXT: xxlor 1, 62, 62 -; PC64LE-NEXT: xxlor 2, 63, 63 ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 61, 1, 1 @@ -1299,14 +1299,14 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 +; PC64LE9-NEXT: xscpsgndp 2, 35, 35 ; PC64LE9-NEXT: std 0, 96(1) +; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 3 -; PC64LE9-NEXT: xscpsgndp 2, 63, 63 ; PC64LE9-NEXT: vmr 30, 2 -; PC64LE9-NEXT: xscpsgndp 1, 62, 62 -; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 61, 1, 1 @@ -1546,8 +1546,10 @@ ; PC64LE-NEXT: stdu 1, -128(1) ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: std 0, 144(1) +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: stxvd2x 59, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 +; PC64LE-NEXT: xxlor 2, 36, 36 ; PC64LE-NEXT: stxvd2x 60, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 80 ; PC64LE-NEXT: vmr 28, 2 @@ -1555,12 +1557,10 @@ ; PC64LE-NEXT: li 3, 96 ; PC64LE-NEXT: vmr 29, 3 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 30, 4 ; PC64LE-NEXT: li 3, 112 +; PC64LE-NEXT: vmr 30, 4 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 5 -; PC64LE-NEXT: xxlor 1, 60, 60 -; PC64LE-NEXT: xxlor 2, 62, 62 ; PC64LE-NEXT: bl pow ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 59, 1, 1 @@ -1605,18 +1605,18 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -112(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 +; PC64LE9-NEXT: xscpsgndp 2, 36, 36 ; PC64LE9-NEXT: std 0, 128(1) -; PC64LE9-NEXT: stxv 60, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: stxv 62, 80(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 30, 4 -; PC64LE9-NEXT: xscpsgndp 2, 62, 62 -; PC64LE9-NEXT: vmr 28, 2 -; PC64LE9-NEXT: xscpsgndp 1, 60, 60 ; PC64LE9-NEXT: stxv 59, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 60, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 61, 64(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 80(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 96(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 5 +; PC64LE9-NEXT: vmr 30, 4 ; PC64LE9-NEXT: vmr 29, 3 +; PC64LE9-NEXT: vmr 28, 2 ; PC64LE9-NEXT: bl pow ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 59, 1, 1 @@ -1700,16 +1700,16 @@ ; PC64LE: # %bb.0: # %entry ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: stdu 1, -96(1) -; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: std 0, 112(1) ; PC64LE-NEXT: std 30, 80(1) # 8-byte Folded Spill +; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: clrldi 30, 5, 32 +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: mr 4, 30 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 2 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 62, 1, 1 @@ -1736,11 +1736,11 @@ ; PC64LE9-NEXT: stdu 1, -80(1) ; PC64LE9-NEXT: std 0, 96(1) ; PC64LE9-NEXT: std 30, 64(1) # 8-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 31, 2 ; PC64LE9-NEXT: clrldi 30, 5, 32 -; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 +; PC64LE9-NEXT: vmr 31, 2 ; PC64LE9-NEXT: mr 4, 30 ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop @@ -1970,15 +1970,15 @@ ; PC64LE-NEXT: std 0, 128(1) ; PC64LE-NEXT: std 30, 96(1) # 8-byte Folded Spill ; PC64LE-NEXT: clrldi 30, 7, 32 +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: mr 4, 30 ; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 3 -; PC64LE-NEXT: xxlor 1, 62, 62 ; PC64LE-NEXT: bl __powidf2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 61, 1, 1 @@ -2020,14 +2020,14 @@ ; PC64LE9-NEXT: stdu 1, -96(1) ; PC64LE9-NEXT: std 0, 112(1) ; PC64LE9-NEXT: std 30, 80(1) # 8-byte Folded Spill +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 +; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 30, 2 ; PC64LE9-NEXT: clrldi 30, 7, 32 -; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill -; PC64LE9-NEXT: xscpsgndp 1, 62, 62 -; PC64LE9-NEXT: mr 4, 30 ; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill +; PC64LE9-NEXT: mr 4, 30 ; PC64LE9-NEXT: vmr 31, 3 +; PC64LE9-NEXT: vmr 30, 2 ; PC64LE9-NEXT: bl __powidf2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: mr 4, 30 @@ -2106,12 +2106,12 @@ ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: stdu 1, -80(1) ; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: std 0, 96(1) ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 2 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 62, 1, 1 @@ -2134,11 +2134,11 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 2 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 62, 1, 1 @@ -2334,14 +2334,14 @@ ; PC64LE-NEXT: stdu 1, -96(1) ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 3 -; PC64LE-NEXT: xxlor 1, 62, 62 ; PC64LE-NEXT: bl sin ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 61, 1, 1 @@ -2377,13 +2377,13 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 96(1) -; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 30, 2 -; PC64LE9-NEXT: xscpsgndp 1, 62, 62 ; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 3 +; PC64LE9-NEXT: vmr 30, 2 ; PC64LE9-NEXT: bl sin ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 61, 1, 1 @@ -2457,12 +2457,12 @@ ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: stdu 1, -80(1) ; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: std 0, 96(1) ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 2 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 62, 1, 1 @@ -2485,11 +2485,11 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 2 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 62, 1, 1 @@ -2685,14 +2685,14 @@ ; PC64LE-NEXT: stdu 1, -96(1) ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 3 -; PC64LE-NEXT: xxlor 1, 62, 62 ; PC64LE-NEXT: bl cos ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 61, 1, 1 @@ -2728,13 +2728,13 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 96(1) -; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 30, 2 -; PC64LE9-NEXT: xscpsgndp 1, 62, 62 ; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 3 +; PC64LE9-NEXT: vmr 30, 2 ; PC64LE9-NEXT: bl cos ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 61, 1, 1 @@ -2808,12 +2808,12 @@ ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: stdu 1, -80(1) ; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: std 0, 96(1) ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 2 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 62, 1, 1 @@ -2836,11 +2836,11 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 2 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 62, 1, 1 @@ -3036,14 +3036,14 @@ ; PC64LE-NEXT: stdu 1, -96(1) ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 3 -; PC64LE-NEXT: xxlor 1, 62, 62 ; PC64LE-NEXT: bl exp ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 61, 1, 1 @@ -3079,13 +3079,13 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 96(1) -; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 30, 2 -; PC64LE9-NEXT: xscpsgndp 1, 62, 62 ; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 3 +; PC64LE9-NEXT: vmr 30, 2 ; PC64LE9-NEXT: bl exp ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 61, 1, 1 @@ -3159,12 +3159,12 @@ ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: stdu 1, -80(1) ; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: std 0, 96(1) ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 2 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 62, 1, 1 @@ -3187,11 +3187,11 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 2 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 62, 1, 1 @@ -3387,14 +3387,14 @@ ; PC64LE-NEXT: stdu 1, -96(1) ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 3 -; PC64LE-NEXT: xxlor 1, 62, 62 ; PC64LE-NEXT: bl exp2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 61, 1, 1 @@ -3430,13 +3430,13 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 96(1) -; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 30, 2 -; PC64LE9-NEXT: xscpsgndp 1, 62, 62 ; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 3 +; PC64LE9-NEXT: vmr 30, 2 ; PC64LE9-NEXT: bl exp2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 61, 1, 1 @@ -3510,12 +3510,12 @@ ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: stdu 1, -80(1) ; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: std 0, 96(1) ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 2 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 62, 1, 1 @@ -3538,11 +3538,11 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 2 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 62, 1, 1 @@ -3738,14 +3738,14 @@ ; PC64LE-NEXT: stdu 1, -96(1) ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 3 -; PC64LE-NEXT: xxlor 1, 62, 62 ; PC64LE-NEXT: bl log ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 61, 1, 1 @@ -3781,13 +3781,13 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 96(1) -; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 30, 2 -; PC64LE9-NEXT: xscpsgndp 1, 62, 62 ; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 3 +; PC64LE9-NEXT: vmr 30, 2 ; PC64LE9-NEXT: bl log ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 61, 1, 1 @@ -3861,12 +3861,12 @@ ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: stdu 1, -80(1) ; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: std 0, 96(1) ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 2 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 62, 1, 1 @@ -3889,11 +3889,11 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 2 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 62, 1, 1 @@ -4089,14 +4089,14 @@ ; PC64LE-NEXT: stdu 1, -96(1) ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 3 -; PC64LE-NEXT: xxlor 1, 62, 62 ; PC64LE-NEXT: bl log10 ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 61, 1, 1 @@ -4132,13 +4132,13 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 96(1) -; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 30, 2 -; PC64LE9-NEXT: xscpsgndp 1, 62, 62 ; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 3 +; PC64LE9-NEXT: vmr 30, 2 ; PC64LE9-NEXT: bl log10 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 61, 1, 1 @@ -4212,12 +4212,12 @@ ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: stdu 1, -80(1) ; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: std 0, 96(1) ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 2 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 62, 1, 1 @@ -4240,11 +4240,11 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 2 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 62, 1, 1 @@ -4440,14 +4440,14 @@ ; PC64LE-NEXT: stdu 1, -96(1) ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 3 -; PC64LE-NEXT: xxlor 1, 62, 62 ; PC64LE-NEXT: bl log2 ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 61, 1, 1 @@ -4483,13 +4483,13 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 96(1) -; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 30, 2 -; PC64LE9-NEXT: xscpsgndp 1, 62, 62 ; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 3 +; PC64LE9-NEXT: vmr 30, 2 ; PC64LE9-NEXT: bl log2 ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 61, 1, 1 @@ -4702,12 +4702,12 @@ ; PC64LE-NEXT: mflr 0 ; PC64LE-NEXT: stdu 1, -80(1) ; PC64LE-NEXT: li 3, 48 +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: std 0, 96(1) ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 2 -; PC64LE-NEXT: xxlor 1, 63, 63 ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 62, 1, 1 @@ -4730,11 +4730,11 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -64(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 80(1) +; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 2 -; PC64LE9-NEXT: xscpsgndp 1, 63, 63 -; PC64LE9-NEXT: stxv 62, 32(1) # 16-byte Folded Spill ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 62, 1, 1 @@ -4930,14 +4930,14 @@ ; PC64LE-NEXT: stdu 1, -96(1) ; PC64LE-NEXT: li 3, 48 ; PC64LE-NEXT: std 0, 112(1) +; PC64LE-NEXT: xxlor 1, 34, 34 ; PC64LE-NEXT: stxvd2x 61, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: li 3, 64 ; PC64LE-NEXT: stxvd2x 62, 1, 3 # 16-byte Folded Spill -; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: li 3, 80 +; PC64LE-NEXT: vmr 30, 2 ; PC64LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill ; PC64LE-NEXT: vmr 31, 3 -; PC64LE-NEXT: xxlor 1, 62, 62 ; PC64LE-NEXT: bl nearbyint ; PC64LE-NEXT: nop ; PC64LE-NEXT: xxlor 61, 1, 1 @@ -4973,13 +4973,13 @@ ; PC64LE9: # %bb.0: # %entry ; PC64LE9-NEXT: mflr 0 ; PC64LE9-NEXT: stdu 1, -80(1) +; PC64LE9-NEXT: xscpsgndp 1, 34, 34 ; PC64LE9-NEXT: std 0, 96(1) -; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill -; PC64LE9-NEXT: vmr 30, 2 -; PC64LE9-NEXT: xscpsgndp 1, 62, 62 ; PC64LE9-NEXT: stxv 61, 32(1) # 16-byte Folded Spill +; PC64LE9-NEXT: stxv 62, 48(1) # 16-byte Folded Spill ; PC64LE9-NEXT: stxv 63, 64(1) # 16-byte Folded Spill ; PC64LE9-NEXT: vmr 31, 3 +; PC64LE9-NEXT: vmr 30, 2 ; PC64LE9-NEXT: bl nearbyint ; PC64LE9-NEXT: nop ; PC64LE9-NEXT: xscpsgndp 61, 1, 1 diff --git a/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-f64-mul.ll b/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-f64-mul.ll --- a/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-f64-mul.ll +++ b/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-f64-mul.ll @@ -39,7 +39,7 @@ ; CHECK-NEXT: vmul.f64 d8, d3, d7 ; CHECK-NEXT: vfma.f64 d9, d6, d3 ; CHECK-NEXT: vfnms.f64 d8, d6, d2 -; CHECK-NEXT: vmul.f64 d1, d5, d10 +; CHECK-NEXT: vmul.f64 d1, d5, d0 ; CHECK-NEXT: vmov q1, q4 ; CHECK-NEXT: vmul.f64 d0, d11, d5 ; CHECK-NEXT: vfma.f64 d1, d4, d11 diff --git a/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-i64-add.ll b/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-i64-add.ll --- a/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-i64-add.ll +++ b/llvm/test/CodeGen/Thumb2/mve-complex-deinterleaving-i64-add.ll @@ -40,9 +40,9 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d2 ; CHECK-NEXT: vmov q4, q1 ; CHECK-NEXT: vmov r2, r3, d7 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: adds.w lr, r2, r0 ; CHECK-NEXT: adc.w r12, r3, r1 ; CHECK-NEXT: vmov r2, r3, d0 @@ -89,9 +89,9 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add r2, sp, #72 -; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov r0, r1, d2 ; CHECK-NEXT: vldrw.u32 q5, [r2] -; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: vmov q4, q1 ; CHECK-NEXT: vmov r2, r3, d11 ; CHECK-NEXT: adds.w lr, r2, r0 ; CHECK-NEXT: adc.w r12, r3, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll --- a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll +++ b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll @@ -621,10 +621,10 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov r2, r3, d3 ; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vmov r0, r1, d11 -; CHECK-NEXT: vmov r2, r3, d9 ; CHECK-NEXT: bl __aeabi_uldivmod ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -647,10 +647,10 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov r2, r3, d3 ; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vmov r0, r1, d11 -; CHECK-NEXT: vmov r2, r3, d9 ; CHECK-NEXT: bl __aeabi_ldivmod ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -673,10 +673,10 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov r2, r3, d3 ; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vmov r0, r1, d11 -; CHECK-NEXT: vmov r2, r3, d9 ; CHECK-NEXT: bl __aeabi_uldivmod ; CHECK-NEXT: mov r4, r2 ; CHECK-NEXT: mov r5, r3 @@ -699,10 +699,10 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov r2, r3, d3 ; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vmov r0, r1, d11 -; CHECK-NEXT: vmov r2, r3, d9 ; CHECK-NEXT: bl __aeabi_ldivmod ; CHECK-NEXT: mov r4, r2 ; CHECK-NEXT: mov r5, r3 @@ -741,10 +741,10 @@ ; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r4, d1 ; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov r1, r5, d3 ; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vmov r0, r4, d11 -; CHECK-NEXT: vmov r1, r5, d9 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: mov r0, r4 @@ -807,11 +807,11 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vmov q4, q1 -; CHECK-NEXT: vcvtb.f32.f16 s0, s20 +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vcvtb.f32.f16 s0, s16 +; CHECK-NEXT: vcvtb.f32.f16 s0, s4 ; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: vmov q4, q1 ; CHECK-NEXT: bl fmodf ; CHECK-NEXT: vcvtt.f32.f16 s0, s20 ; CHECK-NEXT: vmov r2, s0 @@ -880,10 +880,10 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov r2, r3, d3 ; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vmov r0, r1, d11 -; CHECK-NEXT: vmov r2, r3, d9 ; CHECK-NEXT: bl __aeabi_ddiv ; CHECK-NEXT: vmov lr, r12, d10 ; CHECK-NEXT: vmov r2, r3, d8 @@ -907,10 +907,10 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov r2, r3, d3 ; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vmov r0, r1, d11 -; CHECK-NEXT: vmov r2, r3, d9 ; CHECK-NEXT: bl fmod ; CHECK-NEXT: vmov lr, r12, d10 ; CHECK-NEXT: vmov r2, r3, d8 diff --git a/llvm/test/CodeGen/Thumb2/mve-fmath.ll b/llvm/test/CodeGen/Thumb2/mve-fmath.ll --- a/llvm/test/CodeGen/Thumb2/mve-fmath.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fmath.ll @@ -47,8 +47,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl sqrt ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -71,8 +71,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r4, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r4, d9 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r4 @@ -102,7 +102,7 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtb.f32.f16 s0, s16 +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl cosf ; CHECK-NEXT: vcvtt.f32.f16 s0, s16 @@ -158,8 +158,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl cos ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -182,8 +182,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r4, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r4, d9 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r4 @@ -213,7 +213,7 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtb.f32.f16 s0, s16 +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl sinf ; CHECK-NEXT: vcvtt.f32.f16 s0, s16 @@ -269,8 +269,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl sin ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -293,8 +293,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r4, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r4, d9 ; CHECK-NEXT: bl expf ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r4 @@ -324,7 +324,7 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtb.f32.f16 s0, s16 +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl expf ; CHECK-NEXT: vcvtt.f32.f16 s0, s16 @@ -380,8 +380,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl exp ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -404,8 +404,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r4, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r4, d9 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r4 @@ -435,7 +435,7 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtb.f32.f16 s0, s16 +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl exp2f ; CHECK-NEXT: vcvtt.f32.f16 s0, s16 @@ -491,8 +491,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl exp2 ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -515,8 +515,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r4, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r4, d9 ; CHECK-NEXT: bl logf ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r4 @@ -546,7 +546,7 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtb.f32.f16 s0, s16 +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl logf ; CHECK-NEXT: vcvtt.f32.f16 s0, s16 @@ -602,8 +602,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl log ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -626,8 +626,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r4, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r4, d9 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r4 @@ -657,7 +657,7 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtb.f32.f16 s0, s16 +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log2f ; CHECK-NEXT: vcvtt.f32.f16 s0, s16 @@ -713,8 +713,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl log2 ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -737,8 +737,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r4, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r4, d9 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: mov r0, r4 @@ -768,7 +768,7 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtb.f32.f16 s0, s16 +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-NEXT: vmov r0, s0 ; CHECK-NEXT: bl log10f ; CHECK-NEXT: vcvtt.f32.f16 s0, s16 @@ -824,8 +824,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl log10 ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -848,10 +848,10 @@ ; CHECK-NEXT: push {r4, r5, r6, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r4, d1 ; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov r1, r5, d3 ; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vmov r0, r4, d11 -; CHECK-NEXT: vmov r1, r5, d9 ; CHECK-NEXT: bl powf ; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: mov r0, r4 @@ -884,11 +884,11 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vmov q4, q1 -; CHECK-NEXT: vcvtb.f32.f16 s0, s20 +; CHECK-NEXT: vcvtb.f32.f16 s0, s0 ; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vcvtb.f32.f16 s0, s16 +; CHECK-NEXT: vcvtb.f32.f16 s0, s4 ; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: vmov q4, q1 ; CHECK-NEXT: bl powf ; CHECK-NEXT: vcvtt.f32.f16 s0, s20 ; CHECK-NEXT: vmov r2, s0 @@ -957,10 +957,10 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov r2, r3, d3 ; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vmov r0, r1, d11 -; CHECK-NEXT: vmov r2, r3, d9 ; CHECK-NEXT: bl pow ; CHECK-NEXT: vmov lr, r12, d10 ; CHECK-NEXT: vmov r2, r3, d8 diff --git a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll --- a/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fpclamptosat_vec.ll @@ -10,8 +10,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -80,8 +80,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -118,8 +118,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -341,8 +341,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -415,8 +415,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -456,8 +456,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -672,8 +672,8 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: vmov r12, lr, d8 ; CHECK-NEXT: subs.w r4, r0, #-1 @@ -743,8 +743,8 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmov r4, r1, d8 @@ -786,8 +786,8 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: vmov r12, lr, d8 ; CHECK-NEXT: subs r4, r2, #1 @@ -1178,8 +1178,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -1246,8 +1246,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -1283,8 +1283,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -1494,8 +1494,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -1566,8 +1566,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -1606,8 +1606,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -1814,8 +1814,8 @@ ; CHECK-NEXT: vpush {d8, d9} ; CHECK-NEXT: .pad #24 ; CHECK-NEXT: sub sp, #24 +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: movs r0, #0 @@ -1930,8 +1930,8 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixunsdfti ; CHECK-NEXT: mov r8, r1 ; CHECK-NEXT: vmov r4, r1, d8 @@ -1983,8 +1983,8 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __fixdfti ; CHECK-NEXT: subs r6, r2, #1 ; CHECK-NEXT: eor r7, r2, #1 diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll --- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll @@ -32,8 +32,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10} ; CHECK-NEXT: vpush {d8, d9, d10} +; CHECK-NEXT: vmov r0, s1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vmov r0, s16 @@ -647,8 +647,8 @@ ; CHECK-NEXT: sub sp, #32 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI11_0 +; CHECK-NEXT: vmov r5, r6, d2 ; CHECK-NEXT: vmov q5, q1 -; CHECK-NEXT: vmov r5, r6, d10 ; CHECK-NEXT: vmov r9, r3, d0 ; CHECK-NEXT: str r3, [sp, #24] @ 4-byte Spill ; CHECK-NEXT: mov r0, r5 @@ -1286,7 +1286,7 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtt.f32.f16 s18, s16 +; CHECK-NEXT: vcvtt.f32.f16 s18, s0 ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcvtb.f32.f16 s16, s16 @@ -1849,9 +1849,9 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r0, s17 +; CHECK-NEXT: vmov r0, s1 +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: vmov r0, s19 @@ -2008,8 +2008,8 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, s3 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vmov r0, s18 @@ -2148,12 +2148,12 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr s20, .LCPI30_0 -; CHECK-NEXT: vmov r7, s19 -; CHECK-NEXT: vmov r5, s16 +; CHECK-NEXT: vmov r7, s3 +; CHECK-NEXT: vmov r5, s0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vldr s22, .LCPI30_1 ; CHECK-NEXT: mov r6, r3 @@ -2409,9 +2409,9 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} -; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vmov r5, s18 ; CHECK-NEXT: vldr s22, .LCPI31_0 @@ -5033,10 +5033,10 @@ ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vcvtt.f32.f16 s28, s3 ; CHECK-NEXT: mov r11, r0 -; CHECK-NEXT: vcvtt.f32.f16 s28, s19 ; CHECK-NEXT: vmov r0, s28 +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcvtb.f32.f16 s26, s18 ; CHECK-NEXT: mov r7, r0 @@ -5350,8 +5350,8 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vcvtt.f32.f16 s20, s3 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtt.f32.f16 s20, s19 ; CHECK-NEXT: vmov r0, s20 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcvtb.f32.f16 s22, s19 @@ -5615,10 +5615,10 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vcvtb.f32.f16 s30, s3 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtb.f32.f16 s30, s19 ; CHECK-NEXT: vmov r0, s30 +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixsfti ; CHECK-NEXT: vcvtb.f32.f16 s28, s18 ; CHECK-NEXT: mov r5, r3 @@ -6125,14 +6125,14 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vcvtt.f32.f16 s28, s3 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtt.f32.f16 s28, s19 -; CHECK-NEXT: vcvtb.f32.f16 s20, s16 ; CHECK-NEXT: vmov r0, s28 -; CHECK-NEXT: vcvtt.f32.f16 s24, s16 -; CHECK-NEXT: vcvtb.f32.f16 s26, s17 -; CHECK-NEXT: vcvtb.f32.f16 s19, s19 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vcvtb.f32.f16 s20, s0 +; CHECK-NEXT: vcvtt.f32.f16 s24, s0 +; CHECK-NEXT: vcvtb.f32.f16 s26, s1 +; CHECK-NEXT: vcvtb.f32.f16 s19, s3 ; CHECK-NEXT: vldr s22, .LCPI51_2 ; CHECK-NEXT: vmov r8, s20 ; CHECK-NEXT: vmov r9, s24 diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll --- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll @@ -32,8 +32,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, s1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mov r5, r0 ; CHECK-NEXT: vmov r0, s16 @@ -559,14 +559,14 @@ ; CHECK-NEXT: sub sp, #24 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vldr d0, .LCPI11_0 +; CHECK-NEXT: vmov r4, r5, d2 ; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vmov r7, r9, d0 -; CHECK-NEXT: vmov r4, r5, d10 ; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill -; CHECK-NEXT: mov r2, r7 -; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: mov r2, r7 +; CHECK-NEXT: mov r3, r9 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: vldr d0, .LCPI11_1 ; CHECK-NEXT: mov r1, r5 @@ -1068,7 +1068,7 @@ ; CHECK-NEXT: .vsave {d8, d9, d10} ; CHECK-NEXT: vpush {d8, d9, d10} ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtt.f32.f16 s18, s16 +; CHECK-NEXT: vcvtt.f32.f16 s18, s0 ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcvtb.f32.f16 s16, s16 @@ -1522,12 +1522,12 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9, d10} ; CHECK-NEXT: vpush {d8, d9, d10} -; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r0, s16 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r4, s1 +; CHECK-NEXT: vmov r6, s3 ; CHECK-NEXT: vldr s20, .LCPI28_0 -; CHECK-NEXT: vmov r4, s17 -; CHECK-NEXT: vmov r6, s19 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vcmp.f32 s16, #0 @@ -1648,8 +1648,8 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10} ; CHECK-NEXT: vpush {d8, d9, d10} +; CHECK-NEXT: vmov r0, s3 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: mov r11, r0 ; CHECK-NEXT: vmov r0, s18 @@ -1754,12 +1754,12 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10} ; CHECK-NEXT: vpush {d8, d9, d10} -; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r5, s0 ; CHECK-NEXT: vldr s20, .LCPI30_0 -; CHECK-NEXT: vmov r5, s16 -; CHECK-NEXT: vmov r7, s19 +; CHECK-NEXT: vmov r7, s3 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: mov r6, r3 @@ -1948,9 +1948,9 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10} ; CHECK-NEXT: vpush {d8, d9, d10} -; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vmov r0, s19 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vmov r5, s18 ; CHECK-NEXT: vldr s20, .LCPI31_0 @@ -3909,10 +3909,10 @@ ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: .pad #16 ; CHECK-NEXT: sub sp, #16 -; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vcvtb.f32.f16 s24, s2 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtb.f32.f16 s24, s18 ; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcvtt.f32.f16 s26, s19 ; CHECK-NEXT: mov r7, r0 @@ -4154,8 +4154,8 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: vcvtt.f32.f16 s20, s3 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vcvtt.f32.f16 s20, s19 ; CHECK-NEXT: vmov r0, s20 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcvtb.f32.f16 s22, s19 @@ -4352,10 +4352,10 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vcvtb.f32.f16 s28, s3 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtb.f32.f16 s28, s19 ; CHECK-NEXT: vmov r0, s28 +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: bl __fixunssfti ; CHECK-NEXT: vcvtb.f32.f16 s26, s18 ; CHECK-NEXT: mov r5, r3 @@ -4729,12 +4729,12 @@ ; CHECK-NEXT: sub sp, #4 ; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vcvtt.f32.f16 s26, s3 ; CHECK-NEXT: mov r4, r0 -; CHECK-NEXT: vcvtt.f32.f16 s26, s19 -; CHECK-NEXT: vcvtb.f32.f16 s22, s16 ; CHECK-NEXT: vmov r0, s26 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vcvtb.f32.f16 s22, s0 +; CHECK-NEXT: vcvtt.f32.f16 s16, s0 ; CHECK-NEXT: vcvtb.f32.f16 s24, s17 ; CHECK-NEXT: vcvtb.f32.f16 s30, s19 ; CHECK-NEXT: vldr s20, .LCPI51_0 diff --git a/llvm/test/CodeGen/Thumb2/mve-frint.ll b/llvm/test/CodeGen/Thumb2/mve-frint.ll --- a/llvm/test/CodeGen/Thumb2/mve-frint.ll +++ b/llvm/test/CodeGen/Thumb2/mve-frint.ll @@ -57,8 +57,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl ceil ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -129,8 +129,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl trunc ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -201,8 +201,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl rint ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -263,8 +263,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl nearbyint ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -335,8 +335,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl floor ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -407,8 +407,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl round ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-minmax.ll b/llvm/test/CodeGen/Thumb2/mve-minmax.ll --- a/llvm/test/CodeGen/Thumb2/mve-minmax.ll +++ b/llvm/test/CodeGen/Thumb2/mve-minmax.ll @@ -305,10 +305,10 @@ ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d2 ; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov r2, r3, d0 ; CHECK-NEXT: vmov q5, q0 -; CHECK-NEXT: vmov r0, r1, d8 -; CHECK-NEXT: vmov r2, r3, d10 ; CHECK-NEXT: bl __aeabi_dcmpgt ; CHECK-NEXT: vmov r12, r1, d9 ; CHECK-NEXT: cmp r0, #0 diff --git a/llvm/test/CodeGen/Thumb2/mve-nofloat.ll b/llvm/test/CodeGen/Thumb2/mve-nofloat.ll --- a/llvm/test/CodeGen/Thumb2/mve-nofloat.ll +++ b/llvm/test/CodeGen/Thumb2/mve-nofloat.ll @@ -106,10 +106,10 @@ ; CHECK-NOFP-NEXT: push {r4, r5, r7, lr} ; CHECK-NOFP-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NOFP-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NOFP-NEXT: vmov r4, r0, d1 ; CHECK-NOFP-NEXT: vmov q4, q1 +; CHECK-NOFP-NEXT: vmov r5, r1, d3 ; CHECK-NOFP-NEXT: vmov q5, q0 -; CHECK-NOFP-NEXT: vmov r4, r0, d11 -; CHECK-NOFP-NEXT: vmov r5, r1, d9 ; CHECK-NOFP-NEXT: bl __aeabi_fadd ; CHECK-NOFP-NEXT: vmov s19, r0 ; CHECK-NOFP-NEXT: mov r0, r4 diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll --- a/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-ext.ll @@ -931,9 +931,9 @@ ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r4, r1 ; CHECK-NEXT: cset r0, lt @@ -962,9 +962,9 @@ ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: movs r4, #0 -; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: rsbs r0, r0, #0 ; CHECK-NEXT: sbcs.w r0, r4, r1 ; CHECK-NEXT: csetm r0, lt @@ -993,8 +993,8 @@ ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2iz ; CHECK-NEXT: vmov r2, r1, d9 ; CHECK-NEXT: movs r4, #0 @@ -1031,8 +1031,8 @@ ; CHECK-NEXT: push {r4, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d0 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d8 ; CHECK-NEXT: bl __aeabi_d2iz ; CHECK-NEXT: vmov r2, r1, d9 ; CHECK-NEXT: movs r4, #0 diff --git a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll --- a/llvm/test/CodeGen/Thumb2/mve-shuffle.ll +++ b/llvm/test/CodeGen/Thumb2/mve-shuffle.ll @@ -94,7 +94,7 @@ ; CHECK-LABEL: oneoff22_i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov q0, q1 -; CHECK-NEXT: vmov.f32 s2, s0 +; CHECK-NEXT: vmov.f32 s2, s4 ; CHECK-NEXT: bx lr entry: %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> @@ -224,14 +224,14 @@ ; CHECK-LABEL: shuffle3_i16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmovx.f16 s1, s3 ; CHECK-NEXT: vmovx.f16 s2, s5 ; CHECK-NEXT: vmovx.f16 s0, s4 ; CHECK-NEXT: vins.f16 s5, s4 ; CHECK-NEXT: vins.f16 s2, s0 ; CHECK-NEXT: vmov.f32 s3, s5 -; CHECK-NEXT: vmovx.f16 s1, s7 -; CHECK-NEXT: vmov.f32 s0, s6 ; CHECK-NEXT: vins.f16 s1, s7 +; CHECK-NEXT: vmov.f32 s0, s6 ; CHECK-NEXT: bx lr entry: %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> @@ -1026,7 +1026,7 @@ ; CHECK-LABEL: oneoff22_f32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov q0, q1 -; CHECK-NEXT: vmov.f32 s2, s0 +; CHECK-NEXT: vmov.f32 s2, s4 ; CHECK-NEXT: bx lr entry: %out = shufflevector <4 x float> %src1, <4 x float> %src2, <4 x i32> @@ -1146,14 +1146,14 @@ ; CHECK-LABEL: shuffle3_f16: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov q1, q0 +; CHECK-NEXT: vmovx.f16 s1, s3 ; CHECK-NEXT: vmovx.f16 s2, s5 ; CHECK-NEXT: vmovx.f16 s0, s4 ; CHECK-NEXT: vins.f16 s5, s4 ; CHECK-NEXT: vins.f16 s2, s0 ; CHECK-NEXT: vmov.f32 s3, s5 -; CHECK-NEXT: vmovx.f16 s1, s7 -; CHECK-NEXT: vmov.f32 s0, s6 ; CHECK-NEXT: vins.f16 s1, s7 +; CHECK-NEXT: vmov.f32 s0, s6 ; CHECK-NEXT: bx lr entry: %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> @@ -1470,15 +1470,15 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q5, q2 +; CHECK-NEXT: vmov.f32 s18, s8 ; CHECK-NEXT: vmov.f32 s16, s0 -; CHECK-NEXT: vmov.f32 s18, s20 ; CHECK-NEXT: vmov.f32 s20, s2 ; CHECK-NEXT: vmov.f32 s10, s12 -; CHECK-NEXT: vmov.f32 s19, s21 -; CHECK-NEXT: vmov.f32 s8, s4 ; CHECK-NEXT: vmov.f32 s17, s1 +; CHECK-NEXT: vmov.f32 s19, s9 ; CHECK-NEXT: vmov.f32 s21, s3 ; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vmov.f32 s8, s4 ; CHECK-NEXT: vmov.f32 s12, s6 ; CHECK-NEXT: vmov.f32 s11, s13 ; CHECK-NEXT: vmov.f32 s9, s5 @@ -1563,15 +1563,15 @@ ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q5, q2 +; CHECK-NEXT: vmov.f32 s18, s8 ; CHECK-NEXT: vmov.f32 s16, s0 -; CHECK-NEXT: vmov.f32 s18, s20 ; CHECK-NEXT: vmov.f32 s20, s2 ; CHECK-NEXT: vmov.f32 s10, s12 -; CHECK-NEXT: vmov.f32 s19, s21 -; CHECK-NEXT: vmov.f32 s8, s4 ; CHECK-NEXT: vmov.f32 s17, s1 +; CHECK-NEXT: vmov.f32 s19, s9 ; CHECK-NEXT: vmov.f32 s21, s3 ; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vmov.f32 s8, s4 ; CHECK-NEXT: vmov.f32 s12, s6 ; CHECK-NEXT: vmov.f32 s11, s13 ; CHECK-NEXT: vmov.f32 s9, s5 diff --git a/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll b/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll --- a/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll +++ b/llvm/test/CodeGen/Thumb2/mve-simple-arith.ll @@ -112,10 +112,10 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d3 ; CHECK-NEXT: vmov q5, q1 +; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d11 -; CHECK-NEXT: vmov r2, r3, d9 ; CHECK-NEXT: bl __aeabi_dadd ; CHECK-NEXT: vmov lr, r12, d10 ; CHECK-NEXT: vmov r2, r3, d8 @@ -243,10 +243,10 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d3 ; CHECK-NEXT: vmov q5, q1 +; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d11 -; CHECK-NEXT: vmov r2, r3, d9 ; CHECK-NEXT: bl __aeabi_dsub ; CHECK-NEXT: vmov lr, r12, d10 ; CHECK-NEXT: vmov r2, r3, d8 @@ -376,10 +376,10 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov r0, r1, d3 ; CHECK-NEXT: vmov q5, q1 +; CHECK-NEXT: vmov r2, r3, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d11 -; CHECK-NEXT: vmov r2, r3, d9 ; CHECK-NEXT: bl __aeabi_dmul ; CHECK-NEXT: vmov lr, r12, d10 ; CHECK-NEXT: vmov r2, r3, d8 diff --git a/llvm/test/CodeGen/Thumb2/mve-vabd.ll b/llvm/test/CodeGen/Thumb2/mve-vabd.ll --- a/llvm/test/CodeGen/Thumb2/mve-vabd.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vabd.ll @@ -11,11 +11,11 @@ ; CHECK-MVE-NEXT: sub sp, #4 ; CHECK-MVE-NEXT: .vsave {d8, d9, d10, d11} ; CHECK-MVE-NEXT: vpush {d8, d9, d10, d11} +; CHECK-MVE-NEXT: mov r8, r0 +; CHECK-MVE-NEXT: vmov r0, r6, d0 +; CHECK-MVE-NEXT: vmov r1, r7, d2 ; CHECK-MVE-NEXT: vmov q4, q1 ; CHECK-MVE-NEXT: vmov q5, q0 -; CHECK-MVE-NEXT: mov r8, r0 -; CHECK-MVE-NEXT: vmov r0, r6, d10 -; CHECK-MVE-NEXT: vmov r1, r7, d8 ; CHECK-MVE-NEXT: bl __aeabi_fsub ; CHECK-MVE-NEXT: mov r9, r0 ; CHECK-MVE-NEXT: mov r0, r6 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll --- a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll @@ -263,8 +263,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_l2d ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -287,8 +287,8 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_ul2d ; CHECK-NEXT: vmov r2, r3, d8 ; CHECK-NEXT: vmov d9, r0, r1 @@ -311,8 +311,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2lz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 @@ -334,8 +334,8 @@ ; CHECK-NEXT: push {r4, r5, r7, lr} ; CHECK-NEXT: .vsave {d8, d9} ; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov r0, r1, d1 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: vmov r0, r1, d9 ; CHECK-NEXT: bl __aeabi_d2ulz ; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: mov r5, r1 diff --git a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll --- a/llvm/test/CodeGen/Thumb2/mve-vldst4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vldst4.ll @@ -114,7 +114,7 @@ ; CHECK-NEXT: vstrh.16 q5, [r1, #48] ; CHECK-NEXT: vstrh.16 q2, [r1], #64 ; CHECK-NEXT: vmov.f32 s4, s13 -; CHECK-NEXT: vmov.f32 s5, s25 +; CHECK-NEXT: vmov.f32 s5, s1 ; CHECK-NEXT: vstrh.16 q1, [r1, #-48] ; CHECK-NEXT: le lr, .LBB0_2 ; CHECK-NEXT: .LBB0_3: @ %while.end diff --git a/llvm/test/CodeGen/Thumb2/mve-vst4.ll b/llvm/test/CodeGen/Thumb2/mve-vst4.ll --- a/llvm/test/CodeGen/Thumb2/mve-vst4.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vst4.ll @@ -1191,24 +1191,23 @@ ; CHECK-NEXT: vmovx.f16 s23, s6 ; CHECK-NEXT: vmovx.f16 s1, s22 ; CHECK-NEXT: vins.f16 s6, s22 -; CHECK-NEXT: vins.f16 s5, s21 -; CHECK-NEXT: vins.f16 s4, s20 -; CHECK-NEXT: vins.f16 s23, s1 ; CHECK-NEXT: vmovx.f16 s22, s10 -; CHECK-NEXT: vins.f16 s10, s26 -; CHECK-NEXT: vmovx.f16 s1, s26 ; CHECK-NEXT: vins.f16 s9, s25 ; CHECK-NEXT: vins.f16 s8, s24 ; CHECK-NEXT: vins.f16 s11, s27 -; CHECK-NEXT: vmov q6, q1 -; CHECK-NEXT: vins.f16 s22, s1 -; CHECK-NEXT: vmov.f32 s1, s25 +; CHECK-NEXT: vins.f16 s10, s26 +; CHECK-NEXT: vins.f16 s5, s21 +; CHECK-NEXT: vins.f16 s4, s20 +; CHECK-NEXT: vins.f16 s23, s1 +; CHECK-NEXT: vmovx.f16 s1, s26 ; CHECK-NEXT: vmov q6, q2 +; CHECK-NEXT: vins.f16 s22, s1 ; CHECK-NEXT: vmov.f32 s3, s0 -; CHECK-NEXT: vmov.f32 s0, s9 ; CHECK-NEXT: vmov.f32 s26, s12 -; CHECK-NEXT: vstrb.8 q0, [r1, #16] +; CHECK-NEXT: vmov.f32 s0, s9 +; CHECK-NEXT: vmov.f32 s1, s5 ; CHECK-NEXT: vmov.f32 s25, s4 +; CHECK-NEXT: vstrb.8 q0, [r1, #16] ; CHECK-NEXT: vmov.f32 s27, s19 ; CHECK-NEXT: vmov.f32 s13, s7 ; CHECK-NEXT: vstrb.8 q6, [r1] diff --git a/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll b/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll --- a/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll +++ b/llvm/test/CodeGen/X86/2008-05-12-tailmerge-5.ll @@ -13,7 +13,7 @@ ; CHECK-NEXT: subq $40, %rsp ; CHECK-NEXT: movq %rsi, %rax ; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %al, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %sil, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb %ah, {{[0-9]+}}(%rsp) ; CHECK-NEXT: shrq $16, %rsi ; CHECK-NEXT: movb %sil, {{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll b/llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll --- a/llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll +++ b/llvm/test/CodeGen/X86/8bit_cmov_of_trunc_promotion.ll @@ -338,7 +338,7 @@ ; X86_64-LABEL: negative_CopyFromRegs: ; X86_64: # %bb.0: ; X86_64-NEXT: movl %esi, %eax -; X86_64-NEXT: cmpb %al, %dil +; X86_64-NEXT: cmpb %sil, %dil ; X86_64-NEXT: cmovgl %edi, %eax ; X86_64-NEXT: # kill: def $al killed $al killed $eax ; X86_64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll --- a/llvm/test/CodeGen/X86/AMX/amx-across-func.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-across-func.ll @@ -36,10 +36,10 @@ ; CHECK-NEXT: movb $1, (%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %bx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %bpl, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %bx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %bpl, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %si, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %dil, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %si, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; CHECK-NEXT: ldtilecfg (%rsp) ; CHECK-NEXT: movl $buf, %eax ; CHECK-NEXT: movl $32, %r14d diff --git a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll --- a/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-ldtilecfg-insert.ll @@ -53,12 +53,12 @@ ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %bx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %bpl, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %bx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %bpl, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %bx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %bpl, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %si, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %dil, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %si, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %dil, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %si, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; CHECK-NEXT: vzeroupper ; CHECK-NEXT: callq foo ; CHECK-NEXT: ldtilecfg {{[0-9]+}}(%rsp) diff --git a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll --- a/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll +++ b/llvm/test/CodeGen/X86/AMX/amx-spill-merge.ll @@ -21,15 +21,15 @@ ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movb $8, {{[0-9]+}}(%rsp) ; CHECK-NEXT: movw $8, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %bx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %bpl, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %bx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %bpl, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %bpl, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %bx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %bpl, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movw %bx, {{[0-9]+}}(%rsp) -; CHECK-NEXT: movb %bpl, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %si, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %dil, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %si, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %dil, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %dil, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %si, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %dil, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movw %si, {{[0-9]+}}(%rsp) +; CHECK-NEXT: movb %dil, {{[0-9]+}}(%rsp) ; CHECK-NEXT: ldtilecfg (%rsp) ; CHECK-NEXT: movl $32, %r14d ; CHECK-NEXT: movl $buf+2048, %r15d diff --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll --- a/llvm/test/CodeGen/X86/abds.ll +++ b/llvm/test/CodeGen/X86/abds.ll @@ -320,7 +320,7 @@ ; X64-LABEL: abd_minmax_i8: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil +; X64-NEXT: cmpb %sil, %dil ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: cmovll %edi, %ecx ; X64-NEXT: cmovgl %edi, %eax @@ -349,7 +349,7 @@ ; X64-LABEL: abd_minmax_i16: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpw %ax, %di +; X64-NEXT: cmpw %si, %di ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: cmovll %edi, %ecx ; X64-NEXT: cmovgl %edi, %eax diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll --- a/llvm/test/CodeGen/X86/abdu.ll +++ b/llvm/test/CodeGen/X86/abdu.ll @@ -282,7 +282,7 @@ ; X64-LABEL: abd_minmax_i8: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil +; X64-NEXT: cmpb %sil, %dil ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: cmovbl %edi, %ecx ; X64-NEXT: cmoval %edi, %eax @@ -311,7 +311,7 @@ ; X64-LABEL: abd_minmax_i16: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpw %ax, %di +; X64-NEXT: cmpw %si, %di ; X64-NEXT: movl %esi, %ecx ; X64-NEXT: cmovbl %edi, %ecx ; X64-NEXT: cmoval %edi, %eax diff --git a/llvm/test/CodeGen/X86/abs.ll b/llvm/test/CodeGen/X86/abs.ll --- a/llvm/test/CodeGen/X86/abs.ll +++ b/llvm/test/CodeGen/X86/abs.ll @@ -26,7 +26,7 @@ ; X64-LABEL: test_i8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl %eax, %ecx +; X64-NEXT: movl %edi, %ecx ; X64-NEXT: sarb $7, %cl ; X64-NEXT: xorb %cl, %al ; X64-NEXT: subb %cl, %al diff --git a/llvm/test/CodeGen/X86/add-and-not.ll b/llvm/test/CodeGen/X86/add-and-not.ll --- a/llvm/test/CodeGen/X86/add-and-not.ll +++ b/llvm/test/CodeGen/X86/add-and-not.ll @@ -155,7 +155,7 @@ ; X64-NEXT: pushq %rbx ; X64-NEXT: movl %esi, %ebx ; X64-NEXT: movl %edi, %ebp -; X64-NEXT: movl %ebp, %eax +; X64-NEXT: movl %edi, %eax ; X64-NEXT: notb %al ; X64-NEXT: movzbl %al, %r14d ; X64-NEXT: movl %r14d, %edi diff --git a/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll b/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll --- a/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll +++ b/llvm/test/CodeGen/X86/atomic-rm-bit-test-64.ll @@ -1035,7 +1035,7 @@ ; CHECK-LABEL: atomic_shl1_mask01_and_64_gpr_valnz: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rsi, %rcx -; CHECK-NEXT: movl %ecx, %edx +; CHECK-NEXT: movl %esi, %edx ; CHECK-NEXT: andl $63, %edx ; CHECK-NEXT: movq $-2, %rsi ; CHECK-NEXT: # kill: def $cl killed $cl killed $rcx diff --git a/llvm/test/CodeGen/X86/cgp-usubo.ll b/llvm/test/CodeGen/X86/cgp-usubo.ll --- a/llvm/test/CodeGen/X86/cgp-usubo.ll +++ b/llvm/test/CodeGen/X86/cgp-usubo.ll @@ -168,7 +168,7 @@ ; CHECK-NEXT: pushq %rbx ; CHECK-NEXT: pushq %rax ; CHECK-NEXT: movl %ecx, %ebp -; CHECK-NEXT: testb $1, %bpl +; CHECK-NEXT: testb $1, %cl ; CHECK-NEXT: je .LBB9_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movq %rdx, %rbx diff --git a/llvm/test/CodeGen/X86/combine-srem.ll b/llvm/test/CodeGen/X86/combine-srem.ll --- a/llvm/test/CodeGen/X86/combine-srem.ll +++ b/llvm/test/CodeGen/X86/combine-srem.ll @@ -475,10 +475,10 @@ ; CHECK-LABEL: combine_i8_srem_negpow2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: sarb $7, %cl ; CHECK-NEXT: shrb $2, %cl -; CHECK-NEXT: addb %al, %cl +; CHECK-NEXT: addb %dil, %cl ; CHECK-NEXT: andb $-64, %cl ; CHECK-NEXT: subb %cl, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax @@ -492,7 +492,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: leal 15(%rax), %ecx -; CHECK-NEXT: testw %ax, %ax +; CHECK-NEXT: testw %di, %di ; CHECK-NEXT: cmovnsl %edi, %ecx ; CHECK-NEXT: andl $-16, %ecx ; CHECK-NEXT: subl %ecx, %eax @@ -507,7 +507,7 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: leal 255(%rax), %ecx -; CHECK-NEXT: testw %ax, %ax +; CHECK-NEXT: testw %di, %di ; CHECK-NEXT: cmovnsl %edi, %ecx ; CHECK-NEXT: andl $-256, %ecx ; CHECK-NEXT: subl %ecx, %eax diff --git a/llvm/test/CodeGen/X86/extract-bits.ll b/llvm/test/CodeGen/X86/extract-bits.ll --- a/llvm/test/CodeGen/X86/extract-bits.ll +++ b/llvm/test/CodeGen/X86/extract-bits.ll @@ -4131,7 +4131,7 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: shrxl %esi, %edi, %ebp -; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: movl %edx, %eax ; X64-BMI2-NEXT: negb %al ; X64-BMI2-NEXT: movl $-1, %ecx ; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi @@ -4265,7 +4265,7 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: shrxl %esi, %edi, %ebp -; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: movl %edx, %eax ; X64-BMI2-NEXT: negb %al ; X64-BMI2-NEXT: movl $-1, %ecx ; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi @@ -4404,7 +4404,7 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: shrxl %esi, (%rdi), %ebp -; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: movl %edx, %eax ; X64-BMI2-NEXT: negb %al ; X64-BMI2-NEXT: movl $-1, %ecx ; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi @@ -4542,7 +4542,7 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: shrxl %esi, (%rdi), %ebp -; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: movl %edx, %eax ; X64-BMI2-NEXT: negb %al ; X64-BMI2-NEXT: movl $-1, %ecx ; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi @@ -4679,7 +4679,7 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: shrxl %esi, %edi, %ebp -; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: movl %edx, %eax ; X64-BMI2-NEXT: negb %al ; X64-BMI2-NEXT: movl $-1, %ecx ; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi @@ -4784,7 +4784,7 @@ ; X64-NOBMI-NEXT: pushq %rbx ; X64-NOBMI-NEXT: movl %esi, %ebx ; X64-NOBMI-NEXT: movl %edi, %ebp -; X64-NOBMI-NEXT: movl %ebx, %ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrl %cl, %ebp ; X64-NOBMI-NEXT: negb %dl ; X64-NOBMI-NEXT: movl $-1, %r14d @@ -4808,7 +4808,7 @@ ; X64-BMI1-NEXT: pushq %rbx ; X64-BMI1-NEXT: movl %esi, %ebx ; X64-BMI1-NEXT: movl %edi, %ebp -; X64-BMI1-NEXT: movl %ebx, %ecx +; X64-BMI1-NEXT: movl %esi, %ecx ; X64-BMI1-NEXT: shrl %cl, %ebp ; X64-BMI1-NEXT: negb %dl ; X64-BMI1-NEXT: movl $-1, %r14d @@ -4833,7 +4833,7 @@ ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: movl %esi, %ebp ; X64-BMI2-NEXT: shrxl %esi, %edi, %r14d -; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: movl %edx, %eax ; X64-BMI2-NEXT: negb %al ; X64-BMI2-NEXT: movl $-1, %ecx ; X64-BMI2-NEXT: shrxl %eax, %ecx, %edi @@ -5045,7 +5045,7 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movq %rdx, %rbx ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r14 -; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: movl %edx, %eax ; X64-BMI2-NEXT: negb %al ; X64-BMI2-NEXT: movq $-1, %rcx ; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi @@ -5252,7 +5252,7 @@ ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r14 -; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: movl %edx, %eax ; X64-BMI2-NEXT: negb %al ; X64-BMI2-NEXT: movq $-1, %rcx ; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi @@ -5463,7 +5463,7 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movq %rdx, %rbx ; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %r14 -; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: movl %edx, %eax ; X64-BMI2-NEXT: negb %al ; X64-BMI2-NEXT: movq $-1, %rcx ; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi @@ -5674,7 +5674,7 @@ ; X64-BMI2-NEXT: movl %edx, %ebx ; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi ; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %r14 -; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: movl %edx, %eax ; X64-BMI2-NEXT: negb %al ; X64-BMI2-NEXT: movq $-1, %rcx ; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi @@ -5883,7 +5883,7 @@ ; X64-BMI2-NEXT: pushq %rax ; X64-BMI2-NEXT: movq %rdx, %rbx ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r14 -; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: movl %edx, %eax ; X64-BMI2-NEXT: negb %al ; X64-BMI2-NEXT: movq $-1, %rcx ; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi @@ -6060,7 +6060,7 @@ ; X64-NOBMI-NEXT: pushq %rbx ; X64-NOBMI-NEXT: movq %rsi, %rbx ; X64-NOBMI-NEXT: movq %rdi, %r14 -; X64-NOBMI-NEXT: movl %ebx, %ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrq %cl, %r14 ; X64-NOBMI-NEXT: negb %dl ; X64-NOBMI-NEXT: movq $-1, %r15 @@ -6084,7 +6084,7 @@ ; X64-BMI1-NEXT: pushq %rbx ; X64-BMI1-NEXT: movq %rsi, %rbx ; X64-BMI1-NEXT: movq %rdi, %r14 -; X64-BMI1-NEXT: movl %ebx, %ecx +; X64-BMI1-NEXT: movl %esi, %ecx ; X64-BMI1-NEXT: shrq %cl, %r14 ; X64-BMI1-NEXT: negb %dl ; X64-BMI1-NEXT: movq $-1, %r15 @@ -6109,7 +6109,7 @@ ; X64-BMI2-NEXT: movq %rdx, %rbx ; X64-BMI2-NEXT: movq %rsi, %r14 ; X64-BMI2-NEXT: shrxq %rsi, %rdi, %r15 -; X64-BMI2-NEXT: movl %ebx, %eax +; X64-BMI2-NEXT: movl %edx, %eax ; X64-BMI2-NEXT: negb %al ; X64-BMI2-NEXT: movq $-1, %rcx ; X64-BMI2-NEXT: shrxq %rax, %rcx, %rdi diff --git a/llvm/test/CodeGen/X86/fast-isel-store.ll b/llvm/test/CodeGen/X86/fast-isel-store.ll --- a/llvm/test/CodeGen/X86/fast-isel-store.ll +++ b/llvm/test/CodeGen/X86/fast-isel-store.ll @@ -30,7 +30,7 @@ ; ALL32-LABEL: test_store_16: ; ALL32: # %bb.0: # %entry ; ALL32-NEXT: movl %esi, %eax -; ALL32-NEXT: movw %ax, (%rdi) +; ALL32-NEXT: movw %si, (%rdi) ; ALL32-NEXT: # kill: def $ax killed $ax killed $eax ; ALL32-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/freeze.ll b/llvm/test/CodeGen/X86/freeze.ll --- a/llvm/test/CodeGen/X86/freeze.ll +++ b/llvm/test/CodeGen/X86/freeze.ll @@ -126,7 +126,7 @@ ; X86ASM-LABEL: freeze_zext: ; X86ASM: # %bb.0: # %entry ; X86ASM-NEXT: movq %rdi, %rax -; X86ASM-NEXT: movl %eax, %ecx +; X86ASM-NEXT: movl %edi, %ecx ; X86ASM-NEXT: movl $3435973837, %edx # imm = 0xCCCCCCCD ; X86ASM-NEXT: imulq %rcx, %rdx ; X86ASM-NEXT: shrq $35, %rdx diff --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll --- a/llvm/test/CodeGen/X86/funnel-shift.ll +++ b/llvm/test/CodeGen/X86/funnel-shift.ll @@ -153,7 +153,7 @@ ; X86-SSE2-NEXT: calll __umoddi3 ; X86-SSE2-NEXT: addl $16, %esp ; X86-SSE2-NEXT: movl %eax, %ecx -; X86-SSE2-NEXT: testb $32, %cl +; X86-SSE2-NEXT: testb $32, %al ; X86-SSE2-NEXT: jne .LBB3_1 ; X86-SSE2-NEXT: # %bb.2: ; X86-SSE2-NEXT: movl %edi, %ebx diff --git a/llvm/test/CodeGen/X86/iabs.ll b/llvm/test/CodeGen/X86/iabs.ll --- a/llvm/test/CodeGen/X86/iabs.ll +++ b/llvm/test/CodeGen/X86/iabs.ll @@ -22,7 +22,7 @@ ; X64-LABEL: test_i8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl %eax, %ecx +; X64-NEXT: movl %edi, %ecx ; X64-NEXT: sarb $7, %cl ; X64-NEXT: xorb %cl, %al ; X64-NEXT: subb %cl, %al diff --git a/llvm/test/CodeGen/X86/midpoint-int.ll b/llvm/test/CodeGen/X86/midpoint-int.ll --- a/llvm/test/CodeGen/X86/midpoint-int.ll +++ b/llvm/test/CodeGen/X86/midpoint-int.ll @@ -963,7 +963,7 @@ ; X64-LABEL: scalar_i8_signed_reg_reg: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil +; X64-NEXT: cmpb %sil, %dil ; X64-NEXT: setg %cl ; X64-NEXT: movl %edi, %edx ; X64-NEXT: cmovgl %esi, %edx @@ -1013,7 +1013,7 @@ ; X64-LABEL: scalar_i8_unsigned_reg_reg: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil +; X64-NEXT: cmpb %sil, %dil ; X64-NEXT: seta %cl ; X64-NEXT: movl %edi, %edx ; X64-NEXT: cmoval %esi, %edx diff --git a/llvm/test/CodeGen/X86/peep-setb.ll b/llvm/test/CodeGen/X86/peep-setb.ll --- a/llvm/test/CodeGen/X86/peep-setb.ll +++ b/llvm/test/CodeGen/X86/peep-setb.ll @@ -8,7 +8,7 @@ ; CHECK-LABEL: test1: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: cmpb %sil, %dil ; CHECK-NEXT: adcb $0, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq @@ -48,7 +48,7 @@ ; CHECK-LABEL: test4: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: cmpb %sil, %dil ; CHECK-NEXT: sbbb $0, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq @@ -88,7 +88,7 @@ ; CHECK-LABEL: test7: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: cmpb %al, %dil +; CHECK-NEXT: cmpb %sil, %dil ; CHECK-NEXT: adcb $0, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/signbit-test.ll b/llvm/test/CodeGen/X86/signbit-test.ll --- a/llvm/test/CodeGen/X86/signbit-test.ll +++ b/llvm/test/CodeGen/X86/signbit-test.ll @@ -5,7 +5,7 @@ ; CHECK-LABEL: test_clear_mask_i64_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: js .LBB0_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movl $42, %eax @@ -26,7 +26,7 @@ ; CHECK-LABEL: test_set_mask_i64_i32: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: testl %eax, %eax +; CHECK-NEXT: testl %edi, %edi ; CHECK-NEXT: jns .LBB1_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movl $42, %eax @@ -47,7 +47,7 @@ ; CHECK-LABEL: test_clear_mask_i64_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: testw %ax, %ax +; CHECK-NEXT: testw %di, %di ; CHECK-NEXT: js .LBB2_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movl $42, %eax @@ -68,7 +68,7 @@ ; CHECK-LABEL: test_set_mask_i64_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: testw %ax, %ax +; CHECK-NEXT: testw %di, %di ; CHECK-NEXT: jns .LBB3_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movl $42, %eax @@ -89,7 +89,7 @@ ; CHECK-LABEL: test_clear_mask_i64_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testb %dil, %dil ; CHECK-NEXT: js .LBB4_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movl $42, %eax @@ -110,7 +110,7 @@ ; CHECK-LABEL: test_set_mask_i64_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movq %rdi, %rax -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testb %dil, %dil ; CHECK-NEXT: jns .LBB5_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movl $42, %eax @@ -131,7 +131,7 @@ ; CHECK-LABEL: test_clear_mask_i32_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: testw %ax, %ax +; CHECK-NEXT: testw %di, %di ; CHECK-NEXT: js .LBB6_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movl $42, %eax @@ -152,7 +152,7 @@ ; CHECK-LABEL: test_set_mask_i32_i16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: testw %ax, %ax +; CHECK-NEXT: testw %di, %di ; CHECK-NEXT: jns .LBB7_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movl $42, %eax @@ -173,7 +173,7 @@ ; CHECK-LABEL: test_clear_mask_i32_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testb %dil, %dil ; CHECK-NEXT: js .LBB8_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movl $42, %eax @@ -194,7 +194,7 @@ ; CHECK-LABEL: test_set_mask_i32_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testb %dil, %dil ; CHECK-NEXT: jns .LBB9_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movl $42, %eax @@ -215,7 +215,7 @@ ; CHECK-LABEL: test_clear_mask_i16_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testb %dil, %dil ; CHECK-NEXT: js .LBB10_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movw $42, %ax @@ -237,7 +237,7 @@ ; CHECK-LABEL: test_set_mask_i16_i8: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: testb %al, %al +; CHECK-NEXT: testb %dil, %dil ; CHECK-NEXT: jns .LBB11_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movw $42, %ax @@ -259,7 +259,7 @@ ; CHECK-LABEL: test_set_mask_i16_i7: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: testb $64, %al +; CHECK-NEXT: testb $64, %dil ; CHECK-NEXT: je .LBB12_2 ; CHECK-NEXT: # %bb.1: # %t ; CHECK-NEXT: movw $42, %ax diff --git a/llvm/test/CodeGen/X86/smax.ll b/llvm/test/CodeGen/X86/smax.ll --- a/llvm/test/CodeGen/X86/smax.ll +++ b/llvm/test/CodeGen/X86/smax.ll @@ -24,7 +24,7 @@ ; X64-LABEL: test_i8: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil +; X64-NEXT: cmpb %sil, %dil ; X64-NEXT: cmovgl %edi, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -45,7 +45,7 @@ ; X64-LABEL: test_i16: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpw %ax, %di +; X64-NEXT: cmpw %si, %di ; X64-NEXT: cmovgl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/smin.ll b/llvm/test/CodeGen/X86/smin.ll --- a/llvm/test/CodeGen/X86/smin.ll +++ b/llvm/test/CodeGen/X86/smin.ll @@ -24,7 +24,7 @@ ; X64-LABEL: test_i8: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil +; X64-NEXT: cmpb %sil, %dil ; X64-NEXT: cmovll %edi, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -45,7 +45,7 @@ ; X64-LABEL: test_i16: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpw %ax, %di +; X64-NEXT: cmpw %si, %di ; X64-NEXT: cmovll %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll --- a/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll +++ b/llvm/test/CodeGen/X86/subvectorwise-store-of-vector-splat.ll @@ -7336,7 +7336,7 @@ ; SCALAR-NEXT: movw %r8w, 4(%rdx) ; SCALAR-NEXT: movw %ax, 2(%rdx) ; SCALAR-NEXT: movl %ebx, %esi -; SCALAR-NEXT: movw %si, (%rdx) +; SCALAR-NEXT: movw %bx, (%rdx) ; SCALAR-NEXT: movw %r13w, 62(%rdx) ; SCALAR-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Reload ; SCALAR-NEXT: movw %bx, 60(%rdx) diff --git a/llvm/test/CodeGen/X86/swift-return.ll b/llvm/test/CodeGen/X86/swift-return.ll --- a/llvm/test/CodeGen/X86/swift-return.ll +++ b/llvm/test/CodeGen/X86/swift-return.ll @@ -458,9 +458,9 @@ ; CHECK-LABEL: gen9: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: movl %eax, %r8d +; CHECK-NEXT: movl %edi, %edx +; CHECK-NEXT: movl %edi, %ecx +; CHECK-NEXT: movl %edi, %r8d ; CHECK-NEXT: retq ; ; CHECK-O0-LABEL: gen9: diff --git a/llvm/test/CodeGen/X86/umax.ll b/llvm/test/CodeGen/X86/umax.ll --- a/llvm/test/CodeGen/X86/umax.ll +++ b/llvm/test/CodeGen/X86/umax.ll @@ -25,7 +25,7 @@ ; X64-LABEL: test_i8: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil +; X64-NEXT: cmpb %sil, %dil ; X64-NEXT: cmoval %edi, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -46,7 +46,7 @@ ; X64-LABEL: test_i8_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: cmpb $1, %al +; X64-NEXT: cmpb $1, %dil ; X64-NEXT: adcl $0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -66,7 +66,7 @@ ; X64-LABEL: test_i16: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpw %ax, %di +; X64-NEXT: cmpw %si, %di ; X64-NEXT: cmoval %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq @@ -87,7 +87,7 @@ ; X64-LABEL: test_i16_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: cmpw $1, %ax +; X64-NEXT: cmpw $1, %di ; X64-NEXT: adcl $0, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/umin.ll b/llvm/test/CodeGen/X86/umin.ll --- a/llvm/test/CodeGen/X86/umin.ll +++ b/llvm/test/CodeGen/X86/umin.ll @@ -24,7 +24,7 @@ ; X64-LABEL: test_i8: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil +; X64-NEXT: cmpb %sil, %dil ; X64-NEXT: cmovbl %edi, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq @@ -45,7 +45,7 @@ ; X64-LABEL: test_i16: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpw %ax, %di +; X64-NEXT: cmpw %si, %di ; X64-NEXT: cmovbl %edi, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll b/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll --- a/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll +++ b/llvm/test/CodeGen/X86/use-cr-result-of-dom-icmp-st.ll @@ -36,7 +36,7 @@ ; EQ2ICMP: # %bb.0: # %entry ; EQ2ICMP-NEXT: movq %rsi, %rax ; EQ2ICMP-NEXT: movq %rdi, %rdx -; EQ2ICMP-NEXT: movl %eax, %ecx +; EQ2ICMP-NEXT: movl %esi, %ecx ; EQ2ICMP-NEXT: shlq %cl, %rdx ; EQ2ICMP-NEXT: cmpq $-2, %rdx ; EQ2ICMP-NEXT: jg .LBB0_2 @@ -83,7 +83,7 @@ ; EQ2ICMP: # %bb.0: # %entry ; EQ2ICMP-NEXT: movq %rsi, %rax ; EQ2ICMP-NEXT: movq %rdi, %rdx -; EQ2ICMP-NEXT: movl %eax, %ecx +; EQ2ICMP-NEXT: movl %esi, %ecx ; EQ2ICMP-NEXT: shlq %cl, %rdx ; EQ2ICMP-NEXT: testq %rdx, %rdx ; EQ2ICMP-NEXT: js .LBB1_1 @@ -131,7 +131,7 @@ ; EQ2ICMP: # %bb.0: # %entry ; EQ2ICMP-NEXT: movq %rsi, %rax ; EQ2ICMP-NEXT: movq %rdi, %rdx -; EQ2ICMP-NEXT: movl %eax, %ecx +; EQ2ICMP-NEXT: movl %esi, %ecx ; EQ2ICMP-NEXT: shlq %cl, %rdx ; EQ2ICMP-NEXT: testq %rdx, %rdx ; EQ2ICMP-NEXT: jle .LBB2_1 @@ -178,7 +178,7 @@ ; EQ2ICMP: # %bb.0: # %entry ; EQ2ICMP-NEXT: movq %rsi, %rax ; EQ2ICMP-NEXT: movq %rdi, %rdx -; EQ2ICMP-NEXT: movl %eax, %ecx +; EQ2ICMP-NEXT: movl %esi, %ecx ; EQ2ICMP-NEXT: shlq %cl, %rdx ; EQ2ICMP-NEXT: cmpq $1, %rdx ; EQ2ICMP-NEXT: jg .LBB3_2 @@ -224,7 +224,7 @@ ; EQ2ICMP: # %bb.0: # %entry ; EQ2ICMP-NEXT: movq %rsi, %rax ; EQ2ICMP-NEXT: movq %rdi, %rdx -; EQ2ICMP-NEXT: movl %eax, %ecx +; EQ2ICMP-NEXT: movl %esi, %ecx ; EQ2ICMP-NEXT: shlq %cl, %rdx ; EQ2ICMP-NEXT: cmpq $2, %rdx ; EQ2ICMP-NEXT: jg .LBB4_2 diff --git a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-6.ll b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-6.ll --- a/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-6.ll +++ b/llvm/test/CodeGen/X86/vector-interleaved-load-i8-stride-6.ll @@ -4335,7 +4335,7 @@ ; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} xmm12 = ; AVX2-ONLY-NEXT: vpshufb %xmm12, %xmm2, %xmm3 ; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} xmm9 = -; AVX2-ONLY-NEXT: vpshufb %xmm9, %xmm11, %xmm6 +; AVX2-ONLY-NEXT: vpshufb %xmm9, %xmm8, %xmm6 ; AVX2-ONLY-NEXT: vpor %xmm3, %xmm6, %xmm3 ; AVX2-ONLY-NEXT: vextracti128 $1, %ymm5, %xmm6 ; AVX2-ONLY-NEXT: vmovdqa {{.*#+}} xmm8 = <128,128,0,6,12,128,128,128,4,10,u,u,u,u,u,u>