diff --git a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp --- a/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp +++ b/llvm/lib/Target/PowerPC/PPCMIPeephole.cpp @@ -96,6 +96,8 @@ MachineBlockFrequencyInfo *MBFI; uint64_t EntryFreq; + Register getVecCopyOrSubregToReg(MachineInstr &MI, Register SrcReg, + const TargetRegisterClass *ToRC) const; // Initialize class variables. void initialize(MachineFunction &MFParm); @@ -127,7 +129,14 @@ if (skipFunction(MF.getFunction())) return false; initialize(MF); - return simplifyCode(); + bool Changed = simplifyCode(); +#ifndef NDEBUG + if (Changed) { + LLVM_DEBUG(dbgs() << "Machine function after transformations:\n"); + LLVM_DEBUG(MF.dump()); + } +#endif + return Changed; } }; @@ -260,6 +269,74 @@ TOCSaves[MI] = Keep; } +static bool isSwap(MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + if (Opc == PPC::XXPERMDIs && MI->getOperand(2).getImm() == 2) + return true; + if ((Opc == PPC::XXPERMDI || Opc == PPC::XXSLDWI) && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg() && + MI->getOperand(3).getImm() == 2) + return true; + return false; +} + +// Is the vreg produced by this instruction symmetrical about the midpoint. +// This is the case for splats, some constants, etc. +static bool isSymmetrical(MachineInstr *MI) { + unsigned Opc = MI->getOpcode(); + switch (Opc) { + case PPC::VSPLTB: + case PPC::VSPLTH: + case PPC::VSPLTW: + case PPC::VSPLTBs: + case PPC::VSPLTHs: + case PPC::VSPLTISB: + case PPC::VSPLTISH: + case PPC::VSPLTISW: + case PPC::LXVDSX: + case PPC::LXVWSX: + case PPC::MTVSRWS: + case PPC::XXSPLTW: + case PPC::XXSPLTIB: + case PPC::XXLXORz: + case PPC::XXSPLTWs: + case PPC::XXLEQVOnes: + return true; + } + + // Some instructions are symmetrical with the right inputs. + if ((Opc == PPC::XXPERMDI && + MI->getOperand(1).getReg() == MI->getOperand(2).getReg()) || + Opc == PPC::XXPERMDIs) { + int64_t Imm = MI->getOperand(Opc == PPC::XXPERMDIs ? 2 : 3).getImm(); + return Imm == 0 || Imm == 3; + } + return false; +} + +Register +PPCMIPeephole::getVecCopyOrSubregToReg(MachineInstr &MI, Register SrcReg, + const TargetRegisterClass *ToRC) const { + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); + const TargetRegisterClass *FromRC = MRI->getRegClass(SrcReg); + Register DestReg = MRI->createVirtualRegister(ToRC); + MachineBasicBlock &MBB = *MI.getParent(); + MachineInstrBuilder Copy; + if (TRI->getRegSizeInBits(*FromRC) >= TRI->getRegSizeInBits(*ToRC)) + Copy = BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY), DestReg) + .addReg(SrcReg); + else + Copy = BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::SUBREG_TO_REG), + DestReg) + .addImm(1) + .addReg(SrcReg) + .addImm(PPC::sub_64); + LLVM_DEBUG(dbgs() << "Emitted copy: "); + LLVM_DEBUG(Copy->dump()); + (void)Copy; + return DestReg; +} + // Perform peephole optimizations. bool PPCMIPeephole::simplifyCode(void) { bool Simplified = false; @@ -578,6 +655,100 @@ } break; } + case PPC::VMRGLB: + case PPC::VMRGHB: + case PPC::VMRGLH: + case PPC::VMRGHH: + case PPC::VMRGLW: + case PPC::VMRGHW: + case PPC::XXMRGLW: + case PPC::XXMRGHW: { + // (mrgl (swap %a), %b) --> (mrgh %a, %b) if %b is symmetrical + // (mrgl (swap %a), (swap %b)) --> (mrgh %a, %b) + bool Swapped = false; + Register Reg1 = TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI); + Register Reg2 = TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI); + if (!Register::isVirtualRegister(Reg1) || + !Register::isVirtualRegister(Reg2)) + break; + MachineInstr *SrcMI1 = MRI->getVRegDef(Reg1); + MachineInstr *SrcMI2 = MRI->getVRegDef(Reg2); + + // Canonicalize to the swap being in SrcMI1. + if (!isSwap(SrcMI1)) { + std::swap(SrcMI1, SrcMI2); + Swapped = true; + } + bool IsLHSSwap = isSwap(SrcMI1); + bool IsRHSSwap = isSwap(SrcMI2); + + // We can handle two cases: + // 1. Both sides come from a swap. + // 2. LHS comes from a swap, RHS is symmetrical. + if (!(IsLHSSwap && IsRHSSwap) && !(IsLHSSwap && isSymmetrical(SrcMI2))) + break; + + LLVM_DEBUG( + dbgs() << "Found a merge fed by swap/swap or swap/symmetrical:\n"); + LLVM_DEBUG(dbgs() << "Merge: "); + LLVM_DEBUG(MI.dump()); + LLVM_DEBUG(dbgs() << "Source 1: "); + LLVM_DEBUG(SrcMI1->dump()); + LLVM_DEBUG(dbgs() << "Source 2: "); + LLVM_DEBUG(SrcMI2->dump()); + + unsigned NewOpc; + switch (MI.getOpcode()) { + case PPC::VMRGLB: + NewOpc = PPC::VMRGHB; + break; + case PPC::VMRGLH: + NewOpc = PPC::VMRGHH; + break; + case PPC::VMRGLW: + NewOpc = PPC::VMRGHW; + break; + case PPC::XXMRGLW: + NewOpc = PPC::XXMRGHW; + break; + case PPC::VMRGHB: + NewOpc = PPC::VMRGLB; + break; + case PPC::VMRGHH: + NewOpc = PPC::VMRGLH; + break; + case PPC::VMRGHW: + NewOpc = PPC::VMRGLW; + break; + case PPC::XXMRGHW: + NewOpc = PPC::XXMRGLW; + break; + } + + const TargetRegisterClass *ToRC = + MRI->getRegClass(MI.getOperand(0).getReg()); + MI.setDesc(TII->get(NewOpc)); + + // We need to emit a copy for the LHS in both cases. + Register RegToCopy = + TRI->lookThruCopyLike(SrcMI1->getOperand(1).getReg(), MRI); + Register NewSrcReg = getVecCopyOrSubregToReg(MI, RegToCopy, ToRC); + + // If the RHS is also a swap, get a copy for that as well. + if (IsRHSSwap) { + Register RHSRegToCopy = + TRI->lookThruCopyLike(SrcMI2->getOperand(1).getReg(), MRI); + Register RHSNewSrcReg = + getVecCopyOrSubregToReg(MI, RHSRegToCopy, ToRC); + MI.getOperand(Swapped ? 1 : 2).setReg(RHSNewSrcReg); + } + + MI.getOperand(Swapped ? 2 : 1).setReg(NewSrcReg); + LLVM_DEBUG(dbgs() << "Converting to: "); + LLVM_DEBUG(MI.dump()); + Simplified = true; + break; + } case PPC::EXTSH: case PPC::EXTSH8: case PPC::EXTSH8_32_64: { diff --git a/llvm/test/CodeGen/PowerPC/merge_of_swap.ll b/llvm/test/CodeGen/PowerPC/merge_of_swap.ll new file mode 100644 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/merge_of_swap.ll @@ -0,0 +1,425 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-P8 +; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \ +; RUN: FileCheck %s --check-prefix=CHECK-P9 +define dso_local <4 x i32> @mrglb_self(<8 x i8>* nocapture readonly %c) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrglb_self: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: ld r3, 0(r3) +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: vmrghb v2, v2, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrglb_self: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxsd v2, 0(r3) +; CHECK-P9-NEXT: vmrghb v2, v2, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <8 x i8>, <8 x i8>* %c, align 8 + %vecinit30 = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> + %1 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %1 +} + +define dso_local <4 x i32> @mrglb_sym(<8 x i8>* nocapture readonly %c) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrglb_sym: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: ld r3, 0(r3) +; CHECK-P8-NEXT: xxlxor v2, v2, v2 +; CHECK-P8-NEXT: mtvsrd v3, r3 +; CHECK-P8-NEXT: vmrghb v2, v2, v3 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrglb_sym: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxsd v2, 0(r3) +; CHECK-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <8 x i8>, <8 x i8>* %c, align 8 + %vecinit30 = shufflevector <8 x i8> %0, <8 x i8> zeroinitializer, <16 x i32> + %1 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %1 +} + +define dso_local <4 x i32> @mrglb_swap(<8 x i8>* nocapture readonly %c, <8 x i8>* nocapture readonly %d) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrglb_swap: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: ld r3, 0(r3) +; CHECK-P8-NEXT: ld r4, 0(r4) +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: vmrghb v2, v3, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrglb_swap: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxsd v2, 0(r3) +; CHECK-P9-NEXT: lxsd v3, 0(r4) +; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <8 x i8>, <8 x i8>* %c, align 8 + %1 = load <8 x i8>, <8 x i8>* %d, align 8 + %vecinit30 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> + %2 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %2 +} + +define dso_local <4 x i32> @mrghb_self(<16 x i8>* nocapture readonly %c) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrghb_self: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-P8-NEXT: vmrglb v2, v2, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrghb_self: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxv v2, 0(r3) +; CHECK-P9-NEXT: vmrghb v2, v2, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <16 x i8>, <16 x i8>* %c, align 8 + %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %1 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %1 +} + +define dso_local <4 x i32> @mrghb_sym(<16 x i8>* nocapture readonly %c) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrghb_sym: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrghb_sym: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxv v2, 0(r3) +; CHECK-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <16 x i8>, <16 x i8>* %c, align 8 + %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> zeroinitializer, <16 x i32> + %1 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %1 +} + +define dso_local <4 x i32> @mrghb_swap(<16 x i8>* nocapture readonly %c, <16 x i8>* nocapture readonly %d) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrghb_swap: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x v3, 0, r4 +; CHECK-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrghb_swap: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxv v2, 0(r3) +; CHECK-P9-NEXT: lxv v3, 0(r4) +; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <16 x i8>, <16 x i8>* %c, align 8 + %1 = load <16 x i8>, <16 x i8>* %d, align 8 + %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> %1, <16 x i32> + %2 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %2 +} + +define dso_local <4 x i32> @mrglh_self(<8 x i8>* nocapture readonly %c) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrglh_self: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: ld r3, 0(r3) +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: vmrghh v2, v2, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrglh_self: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxsd v2, 0(r3) +; CHECK-P9-NEXT: vmrghh v2, v2, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <8 x i8>, <8 x i8>* %c, align 8 + %vecinit30 = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> + %1 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %1 +} + +define dso_local <4 x i32> @mrglh_sym(<8 x i8>* nocapture readonly %c) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrglh_sym: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: ld r3, 0(r3) +; CHECK-P8-NEXT: xxlxor v2, v2, v2 +; CHECK-P8-NEXT: mtvsrd v3, r3 +; CHECK-P8-NEXT: vmrghh v2, v2, v3 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrglh_sym: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxsd v2, 0(r3) +; CHECK-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <8 x i8>, <8 x i8>* %c, align 8 + %vecinit30 = shufflevector <8 x i8> %0, <8 x i8> zeroinitializer, <16 x i32> + %1 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %1 +} + +define dso_local <4 x i32> @mrglh_swap(<8 x i8>* nocapture readonly %c, <8 x i8>* nocapture readonly %d) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrglh_swap: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: ld r3, 0(r3) +; CHECK-P8-NEXT: ld r4, 0(r4) +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: vmrghh v2, v3, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrglh_swap: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxsd v2, 0(r3) +; CHECK-P9-NEXT: lxsd v3, 0(r4) +; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <8 x i8>, <8 x i8>* %c, align 8 + %1 = load <8 x i8>, <8 x i8>* %d, align 8 + %vecinit30 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> + %2 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %2 +} + +define dso_local <4 x i32> @mrghh_self(<16 x i8>* nocapture readonly %c) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrghh_self: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-P8-NEXT: vmrglh v2, v2, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrghh_self: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxv v2, 0(r3) +; CHECK-P9-NEXT: vmrghh v2, v2, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <16 x i8>, <16 x i8>* %c, align 8 + %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %1 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %1 +} + +define dso_local <4 x i32> @mrghh_sym(<16 x i8>* nocapture readonly %c) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrghh_sym: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrghh_sym: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxv v2, 0(r3) +; CHECK-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <16 x i8>, <16 x i8>* %c, align 8 + %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> zeroinitializer, <16 x i32> + %1 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %1 +} + +define dso_local <4 x i32> @mrghh_swap(<16 x i8>* nocapture readonly %c, <16 x i8>* nocapture readonly %d) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrghh_swap: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x v3, 0, r4 +; CHECK-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrghh_swap: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxv v2, 0(r3) +; CHECK-P9-NEXT: lxv v3, 0(r4) +; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <16 x i8>, <16 x i8>* %c, align 8 + %1 = load <16 x i8>, <16 x i8>* %d, align 8 + %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> %1, <16 x i32> + %2 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %2 +} + +define dso_local <4 x i32> @mrglw_self(<8 x i8>* nocapture readonly %c) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrglw_self: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: ld r3, 0(r3) +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: vmrghw v2, v2, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrglw_self: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxsd v2, 0(r3) +; CHECK-P9-NEXT: vmrghw v2, v2, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <8 x i8>, <8 x i8>* %c, align 8 + %vecinit30 = shufflevector <8 x i8> %0, <8 x i8> undef, <16 x i32> + %1 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %1 +} + +define dso_local <4 x i32> @mrglw_sym(<8 x i8>* nocapture readonly %c) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrglw_sym: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: ld r3, 0(r3) +; CHECK-P8-NEXT: xxlxor v2, v2, v2 +; CHECK-P8-NEXT: mtvsrd v3, r3 +; CHECK-P8-NEXT: vmrghw v2, v2, v3 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrglw_sym: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxsd v2, 0(r3) +; CHECK-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-P9-NEXT: vmrghw v2, v3, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <8 x i8>, <8 x i8>* %c, align 8 + %vecinit30 = shufflevector <8 x i8> %0, <8 x i8> zeroinitializer, <16 x i32> + %1 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %1 +} + +define dso_local <4 x i32> @mrglw_swap(<8 x i8>* nocapture readonly %c, <8 x i8>* nocapture readonly %d) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrglw_swap: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: ld r3, 0(r3) +; CHECK-P8-NEXT: ld r4, 0(r4) +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: vmrghw v2, v3, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrglw_swap: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxsd v2, 0(r3) +; CHECK-P9-NEXT: lxsd v3, 0(r4) +; CHECK-P9-NEXT: vmrghw v2, v3, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <8 x i8>, <8 x i8>* %c, align 8 + %1 = load <8 x i8>, <8 x i8>* %d, align 8 + %vecinit30 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> + %2 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %2 +} + +define dso_local <4 x i32> @mrghw_self(<16 x i8>* nocapture readonly %c) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrghw_self: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-P8-NEXT: vmrglw v2, v2, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrghw_self: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxv v2, 0(r3) +; CHECK-P9-NEXT: vmrghw v2, v2, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <16 x i8>, <16 x i8>* %c, align 8 + %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> + %1 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %1 +} + +define dso_local <4 x i32> @mrghw_sym(<16 x i8>* nocapture readonly %c) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrghw_sym: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-P8-NEXT: xxlxor v3, v3, v3 +; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrghw_sym: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxv v2, 0(r3) +; CHECK-P9-NEXT: xxlxor v3, v3, v3 +; CHECK-P9-NEXT: vmrghw v2, v3, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <16 x i8>, <16 x i8>* %c, align 8 + %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> zeroinitializer, <16 x i32> + %1 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %1 +} + +define dso_local <4 x i32> @mrghw_swap(<16 x i8>* nocapture readonly %c, <16 x i8>* nocapture readonly %d) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrghw_swap: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: lxvd2x v2, 0, r3 +; CHECK-P8-NEXT: lxvd2x v3, 0, r4 +; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrghw_swap: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxv v2, 0(r3) +; CHECK-P9-NEXT: lxv v3, 0(r4) +; CHECK-P9-NEXT: vmrghw v2, v3, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <16 x i8>, <16 x i8>* %c, align 8 + %1 = load <16 x i8>, <16 x i8>* %d, align 8 + %vecinit30 = shufflevector <16 x i8> %0, <16 x i8> %1, <16 x i32> + %2 = bitcast <16 x i8> %vecinit30 to <4 x i32> + ret <4 x i32> %2 +} + +; FIXME: This function is semantically equivalent to @mrglw_swap but we get +; terrible codegen for it due to how we handle BUILD_VECTOR/shuffle. +define dso_local <4 x i32> @mrglw_bad(<2 x i32>* nocapture readonly %c, <2 x i32>* nocapture readonly %d) local_unnamed_addr #0 { +; CHECK-P8-LABEL: mrglw_bad: +; CHECK-P8: # %bb.0: # %entry +; CHECK-P8-NEXT: ld r3, 0(r3) +; CHECK-P8-NEXT: ld r4, 0(r4) +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: xxswapd v4, v2 +; CHECK-P8-NEXT: xxswapd v5, v3 +; CHECK-P8-NEXT: vmrghw v2, v3, v2 +; CHECK-P8-NEXT: vmrgew v3, v5, v4 +; CHECK-P8-NEXT: xxmrgld v2, v3, v2 +; CHECK-P8-NEXT: blr +; +; CHECK-P9-LABEL: mrglw_bad: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9-NEXT: lxsd v2, 0(r3) +; CHECK-P9-NEXT: lxsd v4, 0(r4) +; CHECK-P9-NEXT: xxswapd v3, v2 +; CHECK-P9-NEXT: xxswapd v5, v4 +; CHECK-P9-NEXT: vmrghw v2, v4, v2 +; CHECK-P9-NEXT: vmrgew v3, v5, v3 +; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: blr +entry: + %0 = load <2 x i32>, <2 x i32>* %c, align 8 + %1 = load <2 x i32>, <2 x i32>* %d, align 8 + %vecinit30 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> + ret <4 x i32> %vecinit30 +} + +attributes #0 = { nounwind } diff --git a/llvm/test/CodeGen/PowerPC/pr25080.ll b/llvm/test/CodeGen/PowerPC/pr25080.ll --- a/llvm/test/CodeGen/PowerPC/pr25080.ll +++ b/llvm/test/CodeGen/PowerPC/pr25080.ll @@ -17,41 +17,33 @@ ; LE-NEXT: mfvsrwz 3, 34 ; LE-NEXT: xxsldwi 1, 34, 34, 1 ; LE-NEXT: mfvsrwz 4, 35 -; LE-NEXT: xxsldwi 4, 34, 34, 3 -; LE-NEXT: mtvsrd 2, 3 +; LE-NEXT: xxsldwi 2, 34, 34, 3 +; LE-NEXT: mtvsrd 36, 3 ; LE-NEXT: mfvsrwz 3, 0 ; LE-NEXT: xxswapd 0, 35 -; LE-NEXT: mtvsrd 3, 4 -; LE-NEXT: xxsldwi 5, 35, 35, 1 +; LE-NEXT: mtvsrd 37, 4 ; LE-NEXT: mfvsrwz 4, 1 -; LE-NEXT: xxsldwi 7, 35, 35, 3 -; LE-NEXT: mtvsrd 1, 3 -; LE-NEXT: xxswapd 33, 3 -; LE-NEXT: mfvsrwz 3, 4 -; LE-NEXT: mtvsrd 4, 4 -; LE-NEXT: xxswapd 34, 1 +; LE-NEXT: xxsldwi 1, 35, 35, 1 +; LE-NEXT: mtvsrd 34, 3 +; LE-NEXT: mfvsrwz 3, 2 +; LE-NEXT: mtvsrd 32, 4 ; LE-NEXT: mfvsrwz 4, 0 -; LE-NEXT: mtvsrd 0, 3 -; LE-NEXT: xxswapd 35, 4 -; LE-NEXT: mfvsrwz 3, 5 -; LE-NEXT: mtvsrd 6, 4 -; LE-NEXT: xxswapd 36, 0 -; LE-NEXT: mtvsrd 1, 3 -; LE-NEXT: mfvsrwz 3, 7 -; LE-NEXT: xxswapd 37, 6 -; LE-NEXT: vmrglh 2, 3, 2 -; LE-NEXT: xxswapd 35, 2 -; LE-NEXT: mtvsrd 2, 3 -; LE-NEXT: xxswapd 32, 1 +; LE-NEXT: xxsldwi 0, 35, 35, 3 +; LE-NEXT: mtvsrd 33, 3 +; LE-NEXT: mfvsrwz 3, 1 +; LE-NEXT: mtvsrd 38, 4 +; LE-NEXT: mtvsrd 35, 3 +; LE-NEXT: mfvsrwz 3, 0 +; LE-NEXT: vmrghh 2, 0, 2 +; LE-NEXT: mtvsrd 32, 3 ; LE-NEXT: addis 3, 2, .LCPI0_1@toc@ha +; LE-NEXT: vmrghh 4, 1, 4 ; LE-NEXT: addi 3, 3, .LCPI0_1@toc@l -; LE-NEXT: xxswapd 38, 2 -; LE-NEXT: vmrglh 3, 4, 3 -; LE-NEXT: vmrglh 4, 0, 5 -; LE-NEXT: vmrglh 5, 6, 1 -; LE-NEXT: vmrglw 2, 3, 2 -; LE-NEXT: vmrglw 3, 5, 4 +; LE-NEXT: vmrghh 3, 3, 6 +; LE-NEXT: vmrghh 5, 0, 5 +; LE-NEXT: vmrglw 2, 4, 2 ; LE-NEXT: vspltish 4, 15 +; LE-NEXT: vmrglw 3, 5, 3 ; LE-NEXT: xxmrgld 34, 35, 34 ; LE-NEXT: lvx 3, 0, 3 ; LE-NEXT: xxlor 34, 34, 35 diff --git a/llvm/test/CodeGen/PowerPC/pr38087.ll b/llvm/test/CodeGen/PowerPC/pr38087.ll --- a/llvm/test/CodeGen/PowerPC/pr38087.ll +++ b/llvm/test/CodeGen/PowerPC/pr38087.ll @@ -11,9 +11,8 @@ define void @draw_llvm_vs_variant0(<4 x float> %x) { ; CHECK-LABEL: draw_llvm_vs_variant0: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: lfd f0, 0(r3) -; CHECK-NEXT: xxswapd v3, f0 -; CHECK-NEXT: vmrglh v3, v3, v3 +; CHECK-NEXT: lxsd v3, 0(r3) +; CHECK-NEXT: vmrghh v3, v3, v3 ; CHECK-NEXT: vextsh2w v3, v3 ; CHECK-NEXT: xvcvsxwsp vs0, v3 ; CHECK-NEXT: xxspltw vs0, vs0, 2 diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll --- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -361,14 +361,13 @@ ; CHECK-NEXT: lxsihzx v2, r6, r7 ; CHECK-NEXT: lxsihzx v4, r3, r4 ; CHECK-NEXT: li r6, 0 -; CHECK-NEXT: mtvsrd f0, r6 +; CHECK-NEXT: mtvsrd v3, r6 ; CHECK-NEXT: vsplth v4, v4, 3 -; CHECK-NEXT: xxswapd v3, vs0 ; CHECK-NEXT: vsplth v2, v2, 3 ; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha ; CHECK-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-NEXT: vmrglh v2, v3, v2 -; CHECK-NEXT: vmrglh v3, v3, v4 +; CHECK-NEXT: vmrghh v2, v3, v2 +; CHECK-NEXT: vmrghh v3, v3, v4 ; CHECK-NEXT: xxlxor v4, v4, v4 ; CHECK-NEXT: vmrglw v3, v3, v4 ; CHECK-NEXT: lxvx v4, 0, r3 @@ -446,18 +445,18 @@ ; CHECK-NEXT: add r6, r3, r4 ; CHECK-NEXT: lxsibzx v2, r3, r4 ; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: mtvsrd f0, r3 +; CHECK-NEXT: mtvsrd v3, r3 ; CHECK-NEXT: li r3, 8 ; CHECK-NEXT: lxsibzx v5, r6, r3 -; CHECK-NEXT: xxswapd v3, vs0 -; CHECK-NEXT: vspltb v4, v3, 15 +; CHECK-NEXT: xxswapd v4, v3 +; CHECK-NEXT: vspltb v4, v4, 15 ; CHECK-NEXT: vspltb v2, v2, 7 -; CHECK-NEXT: vmrglb v2, v3, v2 +; CHECK-NEXT: vmrghb v2, v3, v2 ; CHECK-NEXT: addis r3, r2, .LCPI4_0@toc@ha ; CHECK-NEXT: addi r3, r3, .LCPI4_0@toc@l ; CHECK-NEXT: vspltb v5, v5, 7 ; CHECK-NEXT: vmrglh v2, v2, v4 -; CHECK-NEXT: vmrglb v3, v3, v5 +; CHECK-NEXT: vmrghb v3, v3, v5 ; CHECK-NEXT: vmrglw v2, v2, v4 ; CHECK-NEXT: vmrglh v3, v3, v4 ; CHECK-NEXT: vmrglw v3, v4, v3 diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll --- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll +++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll @@ -173,9 +173,8 @@ ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: addi r3, r3, 4 ; P9LE-DAG: xxspltw v2, v2, 2 -; P9LE-DAG: lfiwzx f0, 0, r3 -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-DAG: lxsiwzx v3, 0, r3 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f2: @@ -190,10 +189,9 @@ ; P8LE-LABEL: s2v_test_f2: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addi r3, r3, 4 -; P8LE-NEXT: xxspltw v2, v2, 2 -; P8LE-NEXT: lfiwzx f0, 0, r3 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-DAG: xxspltw v2, v2, 2 +; P8LE-DAG: lxsiwzx v3, 0, r3 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f2: @@ -216,10 +214,9 @@ ; P9LE-LABEL: s2v_test_f3: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: sldi r4, r7, 2 -; P9LE-NEXT: lfiwzx f0, r3, r4 +; P9LE-NEXT: lxsiwzx v3, r3, r4 ; P9LE-DAG: xxspltw v2, v2, 2 -; P9LE-DAG: xxswapd v3, f0 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f3: @@ -234,10 +231,9 @@ ; P8LE-LABEL: s2v_test_f3: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: sldi r4, r7, 2 -; P8LE-NEXT: xxspltw v2, v2, 2 -; P8LE-NEXT: lfiwzx f0, r3, r4 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-DAG: xxspltw v2, v2, 2 +; P8LE-DAG: lxsiwzx v3, r3, r4 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f3: @@ -261,10 +257,9 @@ ; P9LE-LABEL: s2v_test_f4: ; P9LE: # %bb.0: # %entry ; P9LE-NEXT: addi r3, r3, 4 -; P9LE-NEXT: lfiwzx f0, 0, r3 +; P9LE-NEXT: lxsiwzx v3, 0, r3 ; P9LE-DAG: xxspltw v2, v2, 2 -; P9LE-DAG: xxswapd v3, f0 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f4: @@ -279,10 +274,9 @@ ; P8LE-LABEL: s2v_test_f4: ; P8LE: # %bb.0: # %entry ; P8LE-NEXT: addi r3, r3, 4 -; P8LE-NEXT: xxspltw v2, v2, 2 -; P8LE-NEXT: lfiwzx f0, 0, r3 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-DAG: xxspltw v2, v2, 2 +; P8LE-DAG: lxsiwzx v3, 0, r3 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f4: @@ -304,10 +298,9 @@ define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1) { ; P9LE-LABEL: s2v_test_f5: ; P9LE: # %bb.0: # %entry -; P9LE-NEXT: lfiwzx f0, 0, r5 +; P9LE-NEXT: lxsiwzx v3, 0, r5 ; P9LE-NEXT: xxspltw v2, v2, 2 -; P9LE-NEXT: xxswapd v3, f0 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: vmrghw v2, v2, v3 ; P9LE-NEXT: blr ; P9BE-LABEL: s2v_test_f5: @@ -320,10 +313,9 @@ ; P8LE-LABEL: s2v_test_f5: ; P8LE: # %bb.0: # %entry -; P8LE-NEXT: lfiwzx f0, 0, r5 -; P8LE-NEXT: xxspltw v2, v2, 2 -; P8LE-NEXT: xxswapd v3, f0 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-DAG: lxsiwzx v3, 0, r5 +; P8LE-DAG: xxspltw v2, v2, 2 +; P8LE-NEXT: vmrghw v2, v2, v3 ; P8LE-NEXT: blr ; P8BE-LABEL: s2v_test_f5: diff --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll --- a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll +++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll @@ -26,7 +26,7 @@ ; P9LE-NEXT: lis r5, 31710 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 @@ -41,8 +41,7 @@ ; P9LE-NEXT: lis r5, 21399 ; P9LE-NEXT: mulli r4, r4, -124 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 @@ -56,8 +55,8 @@ ; P9LE-NEXT: lis r5, -16728 ; P9LE-NEXT: mulli r4, r4, 98 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 @@ -70,11 +69,8 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, -1003 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v2, v3 ; P9LE-NEXT: blr ; @@ -201,18 +197,14 @@ ; P8LE-NEXT: mulli r8, r8, -124 ; P8LE-NEXT: subf r3, r4, r3 ; P8LE-NEXT: subf r4, r9, r6 -; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: mtvsrd v2, r3 ; P8LE-NEXT: subf r3, r10, r7 -; P8LE-NEXT: mtvsrd f1, r4 +; P8LE-NEXT: mtvsrd v3, r4 ; P8LE-NEXT: subf r4, r8, r5 -; P8LE-NEXT: mtvsrd f2, r3 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtvsrd f3, r4 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: xxswapd v5, vs3 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: vmrglh v3, v5, v4 +; P8LE-NEXT: mtvsrd v4, r3 +; P8LE-NEXT: mtvsrd v5, r4 +; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: vmrghh v3, v5, v4 ; P8LE-NEXT: vmrglw v2, v2, v3 ; P8LE-NEXT: blr ; @@ -302,7 +294,7 @@ ; P9LE-NEXT: add r4, r4, r6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 @@ -315,8 +307,7 @@ ; P9LE-NEXT: add r4, r4, r6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 @@ -329,8 +320,8 @@ ; P9LE-NEXT: add r4, r4, r6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 @@ -343,11 +334,8 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v2, v3 ; P9LE-NEXT: blr ; @@ -466,18 +454,14 @@ ; P8LE-NEXT: mulli r4, r4, 95 ; P8LE-NEXT: subf r3, r8, r3 ; P8LE-NEXT: subf r6, r9, r6 -; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: mtvsrd v2, r3 ; P8LE-NEXT: subf r3, r10, r7 ; P8LE-NEXT: subf r4, r4, r5 -; P8LE-NEXT: mtvsrd f1, r6 -; P8LE-NEXT: mtvsrd f2, r3 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtvsrd f3, r4 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: xxswapd v5, vs3 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: vmrglh v3, v5, v4 +; P8LE-NEXT: mtvsrd v3, r6 +; P8LE-NEXT: mtvsrd v4, r3 +; P8LE-NEXT: mtvsrd v5, r4 +; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: vmrghh v3, v5, v4 ; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; @@ -565,7 +549,7 @@ ; P9LE-NEXT: add r4, r4, r6 ; P9LE-NEXT: mulli r6, r4, 95 ; P9LE-NEXT: subf r3, r6, r3 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r6, r3 @@ -578,8 +562,7 @@ ; P9LE-NEXT: add r6, r6, r7 ; P9LE-NEXT: mulli r7, r6, 95 ; P9LE-NEXT: subf r3, r7, r3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r7, r3 @@ -592,8 +575,8 @@ ; P9LE-NEXT: add r7, r7, r8 ; P9LE-NEXT: mulli r8, r7, 95 ; P9LE-NEXT: subf r3, r8, r3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r8, r3 @@ -606,22 +589,15 @@ ; P9LE-NEXT: add r5, r5, r8 ; P9LE-NEXT: mulli r8, r5, 95 ; P9LE-NEXT: subf r3, r8, r3 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: mtvsrd f0, r4 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 +; P9LE-NEXT: mtvsrd v4, r6 ; P9LE-NEXT: vmrglw v2, v2, v3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtvsrd f0, r6 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r7 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r5 -; P9LE-NEXT: xxswapd v5, vs0 -; P9LE-NEXT: vmrglh v4, v5, v4 +; P9LE-NEXT: mtvsrd v3, r4 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r7 +; P9LE-NEXT: mtvsrd v5, r5 +; P9LE-NEXT: vmrghh v4, v5, v4 ; P9LE-NEXT: vmrglw v3, v4, v3 ; P9LE-NEXT: vadduhm v2, v2, v3 ; P9LE-NEXT: blr @@ -722,16 +698,16 @@ ; P8LE-NEXT: extsh r11, r6 ; P8LE-NEXT: extsw r10, r10 ; P8LE-NEXT: mulld r12, r8, r5 -; P8LE-NEXT: extsw r11, r11 ; P8LE-NEXT: mulld r0, r9, r5 +; P8LE-NEXT: extsw r11, r11 ; P8LE-NEXT: mulld r30, r10, r5 ; P8LE-NEXT: mulld r5, r11, r5 ; P8LE-NEXT: rldicl r12, r12, 32, 32 ; P8LE-NEXT: rldicl r0, r0, 32, 32 ; P8LE-NEXT: rldicl r30, r30, 32, 32 ; P8LE-NEXT: add r8, r12, r8 -; P8LE-NEXT: rldicl r5, r5, 32, 32 ; P8LE-NEXT: add r9, r0, r9 +; P8LE-NEXT: rldicl r5, r5, 32, 32 ; P8LE-NEXT: add r10, r30, r10 ; P8LE-NEXT: srwi r12, r8, 31 ; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload @@ -743,40 +719,32 @@ ; P8LE-NEXT: srawi r12, r10, 6 ; P8LE-NEXT: srwi r10, r10, 31 ; P8LE-NEXT: add r9, r0, r9 -; P8LE-NEXT: mulli r0, r8, 95 +; P8LE-NEXT: mulli r11, r8, 95 ; P8LE-NEXT: add r10, r12, r10 -; P8LE-NEXT: mtvsrd f0, r8 -; P8LE-NEXT: srwi r8, r5, 31 +; P8LE-NEXT: srwi r12, r5, 31 +; P8LE-NEXT: mtvsrd v2, r8 ; P8LE-NEXT: srawi r5, r5, 6 -; P8LE-NEXT: mulli r11, r9, 95 -; P8LE-NEXT: mtvsrd f1, r9 +; P8LE-NEXT: mulli r8, r9, 95 +; P8LE-NEXT: mtvsrd v3, r9 +; P8LE-NEXT: add r5, r5, r12 ; P8LE-NEXT: mulli r9, r10, 95 -; P8LE-NEXT: add r5, r5, r8 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtvsrd f2, r10 -; P8LE-NEXT: mtvsrd f3, r5 -; P8LE-NEXT: mulli r5, r5, 95 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: subf r3, r0, r3 -; P8LE-NEXT: xxswapd v1, vs2 -; P8LE-NEXT: mtvsrd f0, r3 -; P8LE-NEXT: subf r4, r11, r4 -; P8LE-NEXT: xxswapd v6, vs3 +; P8LE-NEXT: mtvsrd v4, r10 +; P8LE-NEXT: mulli r10, r5, 95 +; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: subf r3, r11, r3 +; P8LE-NEXT: subf r4, r8, r4 +; P8LE-NEXT: mtvsrd v3, r3 +; P8LE-NEXT: mtvsrd v5, r4 ; P8LE-NEXT: subf r3, r9, r7 -; P8LE-NEXT: mtvsrd f1, r4 -; P8LE-NEXT: mtvsrd f4, r3 -; P8LE-NEXT: subf r3, r5, r6 -; P8LE-NEXT: mtvsrd f5, r3 -; P8LE-NEXT: xxswapd v4, vs1 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: xxswapd v3, vs0 -; P8LE-NEXT: xxswapd v5, vs4 -; P8LE-NEXT: xxswapd v0, vs5 -; P8LE-NEXT: vmrglh v3, v4, v3 -; P8LE-NEXT: vmrglh v4, v0, v5 -; P8LE-NEXT: vmrglh v5, v6, v1 -; P8LE-NEXT: vmrglw v3, v4, v3 -; P8LE-NEXT: vmrglw v2, v5, v2 +; P8LE-NEXT: subf r4, r10, r6 +; P8LE-NEXT: mtvsrd v0, r3 +; P8LE-NEXT: mtvsrd v1, r4 +; P8LE-NEXT: vmrghh v3, v5, v3 +; P8LE-NEXT: mtvsrd v5, r5 +; P8LE-NEXT: vmrghh v0, v1, v0 +; P8LE-NEXT: vmrghh v4, v5, v4 +; P8LE-NEXT: vmrglw v3, v0, v3 +; P8LE-NEXT: vmrglw v2, v4, v2 ; P8LE-NEXT: vadduhm v2, v3, v2 ; P8LE-NEXT: blr ; @@ -870,7 +838,7 @@ ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 6 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 @@ -878,15 +846,13 @@ ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 5 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 ; P9LE-NEXT: lis r5, -21386 ; P9LE-NEXT: ori r5, r5, 37253 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: extsw r4, r4 ; P9LE-NEXT: mulld r5, r4, r5 ; P9LE-NEXT: rldicl r5, r5, 32, 32 @@ -896,7 +862,7 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 @@ -904,10 +870,8 @@ ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 3 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v4, v2 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v4, v2 ; P9LE-NEXT: vmrglw v2, v2, v3 ; P9LE-NEXT: blr ; @@ -982,30 +946,26 @@ ; P8LE-NEXT: slwi r8, r8, 6 ; P8LE-NEXT: subf r7, r8, r7 ; P8LE-NEXT: rldicl r3, r3, 32, 32 -; P8LE-NEXT: mtvsrd f0, r7 +; P8LE-NEXT: mtvsrd v2, r7 ; P8LE-NEXT: add r3, r3, r6 ; P8LE-NEXT: addze r6, r10 ; P8LE-NEXT: srwi r10, r3, 31 ; P8LE-NEXT: srawi r3, r3, 6 ; P8LE-NEXT: slwi r6, r6, 5 -; P8LE-NEXT: xxswapd v2, vs0 ; P8LE-NEXT: add r3, r3, r10 ; P8LE-NEXT: extsh r10, r4 ; P8LE-NEXT: subf r6, r6, r9 ; P8LE-NEXT: mulli r3, r3, 95 ; P8LE-NEXT: srawi r8, r10, 3 -; P8LE-NEXT: mtvsrd f1, r6 +; P8LE-NEXT: mtvsrd v3, r6 ; P8LE-NEXT: addze r7, r8 -; P8LE-NEXT: xxswapd v3, vs1 +; P8LE-NEXT: vmrghh v2, v3, v2 ; P8LE-NEXT: subf r3, r3, r5 ; P8LE-NEXT: slwi r5, r7, 3 ; P8LE-NEXT: subf r4, r5, r4 -; P8LE-NEXT: mtvsrd f2, r3 -; P8LE-NEXT: mtvsrd f3, r4 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: xxswapd v5, vs3 -; P8LE-NEXT: vmrglh v3, v4, v5 +; P8LE-NEXT: mtvsrd v4, r3 +; P8LE-NEXT: mtvsrd v5, r4 +; P8LE-NEXT: vmrghh v3, v4, v5 ; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; @@ -1079,7 +1039,7 @@ ; P9LE-NEXT: lis r5, -19946 ; P9LE-NEXT: mulli r4, r4, 654 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 @@ -1094,8 +1054,8 @@ ; P9LE-NEXT: lis r5, 24749 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: vmrghh v3, v3, v4 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 @@ -1108,11 +1068,8 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 5423 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: vmrglh v3, v3, v4 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v2, v3 ; P9LE-NEXT: blr ; @@ -1179,7 +1136,7 @@ ; P8LE-NEXT: lis r3, 24749 ; P8LE-NEXT: lis r8, -19946 ; P8LE-NEXT: lis r10, -14230 -; P8LE-NEXT: xxlxor v5, v5, v5 +; P8LE-NEXT: xxlxor v2, v2, v2 ; P8LE-NEXT: ori r3, r3, 47143 ; P8LE-NEXT: ori r8, r8, 17097 ; P8LE-NEXT: mfvsrd r4, f0 @@ -1214,17 +1171,14 @@ ; P8LE-NEXT: mulli r8, r8, 23 ; P8LE-NEXT: mulli r7, r7, 654 ; P8LE-NEXT: subf r3, r3, r5 -; P8LE-NEXT: mtvsrd f0, r3 -; P8LE-NEXT: subf r3, r8, r6 -; P8LE-NEXT: subf r4, r7, r4 -; P8LE-NEXT: mtvsrd f1, r3 -; P8LE-NEXT: mtvsrd f2, r4 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: vmrglh v2, v2, v3 -; P8LE-NEXT: vmrglh v3, v4, v5 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: subf r5, r8, r6 +; P8LE-NEXT: mtvsrd v3, r3 +; P8LE-NEXT: subf r3, r7, r4 +; P8LE-NEXT: mtvsrd v4, r5 +; P8LE-NEXT: mtvsrd v5, r3 +; P8LE-NEXT: vmrghh v3, v3, v4 +; P8LE-NEXT: vmrghh v2, v5, v2 +; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_srem_one: @@ -1304,7 +1258,7 @@ ; P9LE-NEXT: lis r5, 24749 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 @@ -1317,8 +1271,7 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 5423 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: extsh r4, r3 @@ -1326,12 +1279,10 @@ ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 15 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: xxlxor v4, v4, v4 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v3, v2 ; P9LE-NEXT: blr ; @@ -1390,7 +1341,7 @@ ; P8LE-NEXT: xxswapd vs0, v2 ; P8LE-NEXT: lis r6, 24749 ; P8LE-NEXT: lis r7, -19946 -; P8LE-NEXT: xxlxor v5, v5, v5 +; P8LE-NEXT: xxlxor v2, v2, v2 ; P8LE-NEXT: ori r6, r6, 47143 ; P8LE-NEXT: ori r7, r7, 17097 ; P8LE-NEXT: mfvsrd r3, f0 @@ -1418,18 +1369,15 @@ ; P8LE-NEXT: srawi r8, r8, 15 ; P8LE-NEXT: subf r4, r6, r4 ; P8LE-NEXT: addze r6, r8 -; P8LE-NEXT: mtvsrd f0, r4 -; P8LE-NEXT: slwi r4, r6, 15 +; P8LE-NEXT: slwi r6, r6, 15 +; P8LE-NEXT: mtvsrd v3, r4 ; P8LE-NEXT: subf r5, r7, r5 -; P8LE-NEXT: subf r3, r4, r3 -; P8LE-NEXT: mtvsrd f1, r5 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtvsrd f2, r3 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: vmrglh v2, v2, v3 -; P8LE-NEXT: vmrglh v3, v4, v5 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: subf r3, r6, r3 +; P8LE-NEXT: mtvsrd v4, r5 +; P8LE-NEXT: mtvsrd v5, r3 +; P8LE-NEXT: vmrghh v3, v3, v4 +; P8LE-NEXT: vmrghh v2, v5, v2 +; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_urem_i16_smax: diff --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll --- a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll +++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll @@ -22,7 +22,7 @@ ; P9LE-NEXT: rldicl r4, r4, 27, 37 ; P9LE-NEXT: mulli r4, r4, 98 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 @@ -32,8 +32,7 @@ ; P9LE-NEXT: rldicl r4, r4, 24, 40 ; P9LE-NEXT: mulli r4, r4, 1003 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r4, r3, 30, 18, 31 @@ -41,15 +40,13 @@ ; P9LE-NEXT: rldicl r4, r4, 30, 34 ; P9LE-NEXT: mulli r4, r4, 124 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 ; P9LE-NEXT: lis r6, 22765 ; P9LE-NEXT: ori r6, r6, 8969 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 ; P9LE-NEXT: clrldi r5, r4, 32 ; P9LE-NEXT: mulld r5, r5, r6 ; P9LE-NEXT: rldicl r5, r5, 32, 32 @@ -59,9 +56,8 @@ ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: mtvsrd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v4, v2 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v4, v2 ; P9LE-NEXT: vmrglw v2, v3, v2 ; P9LE-NEXT: blr ; @@ -163,18 +159,14 @@ ; P8LE-NEXT: mulli r8, r8, 124 ; P8LE-NEXT: subf r7, r7, r9 ; P8LE-NEXT: subf r6, r6, r10 -; P8LE-NEXT: mtvsrd f0, r7 +; P8LE-NEXT: mtvsrd v2, r7 ; P8LE-NEXT: subf r3, r3, r5 ; P8LE-NEXT: subf r4, r8, r4 -; P8LE-NEXT: mtvsrd f1, r6 -; P8LE-NEXT: mtvsrd f2, r3 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtvsrd f3, r4 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: xxswapd v5, vs3 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: vmrglh v3, v5, v4 +; P8LE-NEXT: mtvsrd v3, r6 +; P8LE-NEXT: mtvsrd v4, r3 +; P8LE-NEXT: mtvsrd v5, r4 +; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: vmrghh v3, v5, v4 ; P8LE-NEXT: vmrglw v2, v2, v3 ; P8LE-NEXT: blr ; @@ -254,7 +246,7 @@ ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 @@ -267,8 +259,7 @@ ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 @@ -281,8 +272,8 @@ ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 @@ -295,11 +286,8 @@ ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v2, v3 ; P9LE-NEXT: blr ; @@ -420,18 +408,14 @@ ; P8LE-NEXT: mulli r4, r4, 95 ; P8LE-NEXT: subf r3, r8, r3 ; P8LE-NEXT: subf r6, r9, r6 -; P8LE-NEXT: mtvsrd f0, r3 +; P8LE-NEXT: mtvsrd v2, r3 ; P8LE-NEXT: subf r3, r10, r7 ; P8LE-NEXT: subf r4, r4, r5 -; P8LE-NEXT: mtvsrd f1, r6 -; P8LE-NEXT: mtvsrd f2, r3 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: mtvsrd f3, r4 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: xxswapd v5, vs3 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: vmrglh v3, v5, v4 +; P8LE-NEXT: mtvsrd v3, r6 +; P8LE-NEXT: mtvsrd v4, r3 +; P8LE-NEXT: mtvsrd v5, r4 +; P8LE-NEXT: vmrghh v2, v3, v2 +; P8LE-NEXT: vmrghh v3, v5, v4 ; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; @@ -519,7 +503,7 @@ ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r5, r4, 95 ; P9LE-NEXT: subf r3, r5, r3 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r5, r3, 0, 16, 31 @@ -532,8 +516,7 @@ ; P9LE-NEXT: srwi r5, r5, 6 ; P9LE-NEXT: mulli r7, r5, 95 ; P9LE-NEXT: subf r3, r7, r3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r7, r3, 0, 16, 31 @@ -546,8 +529,8 @@ ; P9LE-NEXT: srwi r7, r7, 6 ; P9LE-NEXT: mulli r8, r7, 95 ; P9LE-NEXT: subf r3, r8, r3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r8, r3, 0, 16, 31 @@ -560,22 +543,15 @@ ; P9LE-NEXT: srwi r6, r6, 6 ; P9LE-NEXT: mulli r8, r6, 95 ; P9LE-NEXT: subf r3, r8, r3 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: mtvsrd f0, r4 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v2, v4 +; P9LE-NEXT: mtvsrd v4, r5 ; P9LE-NEXT: vmrglw v2, v2, v3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtvsrd f0, r5 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r7 -; P9LE-NEXT: vmrglh v3, v4, v3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r6 -; P9LE-NEXT: xxswapd v5, vs0 -; P9LE-NEXT: vmrglh v4, v5, v4 +; P9LE-NEXT: mtvsrd v3, r4 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v4, r7 +; P9LE-NEXT: mtvsrd v5, r6 +; P9LE-NEXT: vmrghh v4, v5, v4 ; P9LE-NEXT: vmrglw v3, v4, v3 ; P9LE-NEXT: vadduhm v2, v2, v3 ; P9LE-NEXT: blr @@ -661,78 +637,70 @@ ; P8LE: # %bb.0: ; P8LE-NEXT: xxswapd vs0, v2 ; P8LE-NEXT: lis r5, 22765 -; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; P8LE-NEXT: std r29, -24(r1) # 8-byte Folded Spill +; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill ; P8LE-NEXT: ori r5, r5, 8969 ; P8LE-NEXT: mfvsrd r6, f0 ; P8LE-NEXT: clrldi r3, r6, 48 ; P8LE-NEXT: rldicl r4, r6, 48, 48 -; P8LE-NEXT: rldicl r7, r6, 32, 48 ; P8LE-NEXT: rlwinm r8, r3, 0, 16, 31 ; P8LE-NEXT: rlwinm r9, r4, 0, 16, 31 +; P8LE-NEXT: rldicl r7, r6, 32, 48 ; P8LE-NEXT: rldicl r6, r6, 16, 48 -; P8LE-NEXT: rlwinm r10, r7, 0, 16, 31 ; P8LE-NEXT: clrldi r11, r8, 32 -; P8LE-NEXT: rlwinm r12, r6, 0, 16, 31 ; P8LE-NEXT: clrldi r0, r9, 32 -; P8LE-NEXT: clrldi r30, r10, 32 +; P8LE-NEXT: rlwinm r10, r7, 0, 16, 31 +; P8LE-NEXT: rlwinm r12, r6, 0, 16, 31 ; P8LE-NEXT: mulld r11, r11, r5 -; P8LE-NEXT: clrldi r29, r12, 32 ; P8LE-NEXT: mulld r0, r0, r5 +; P8LE-NEXT: clrldi r30, r10, 32 +; P8LE-NEXT: clrldi r29, r12, 32 ; P8LE-NEXT: mulld r30, r30, r5 ; P8LE-NEXT: mulld r5, r29, r5 ; P8LE-NEXT: ld r29, -24(r1) # 8-byte Folded Reload ; P8LE-NEXT: rldicl r11, r11, 32, 32 ; P8LE-NEXT: rldicl r0, r0, 32, 32 -; P8LE-NEXT: rldicl r30, r30, 32, 32 ; P8LE-NEXT: subf r8, r11, r8 -; P8LE-NEXT: rldicl r5, r5, 32, 32 ; P8LE-NEXT: subf r9, r0, r9 +; P8LE-NEXT: rldicl r30, r30, 32, 32 +; P8LE-NEXT: rldicl r5, r5, 32, 32 ; P8LE-NEXT: srwi r8, r8, 1 -; P8LE-NEXT: subf r10, r30, r10 -; P8LE-NEXT: add r8, r8, r11 ; P8LE-NEXT: srwi r9, r9, 1 -; P8LE-NEXT: srwi r10, r10, 1 -; P8LE-NEXT: subf r11, r5, r12 +; P8LE-NEXT: add r8, r8, r11 +; P8LE-NEXT: subf r10, r30, r10 ; P8LE-NEXT: add r9, r9, r0 +; P8LE-NEXT: subf r11, r5, r12 +; P8LE-NEXT: srwi r10, r10, 1 ; P8LE-NEXT: srwi r8, r8, 6 -; P8LE-NEXT: add r10, r10, r30 ; P8LE-NEXT: srwi r11, r11, 1 -; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; P8LE-NEXT: srwi r9, r9, 6 +; P8LE-NEXT: add r10, r10, r30 ; P8LE-NEXT: mulli r12, r8, 95 -; P8LE-NEXT: srwi r10, r10, 6 ; P8LE-NEXT: add r5, r11, r5 -; P8LE-NEXT: mtvsrd f0, r8 +; P8LE-NEXT: mtvsrd v2, r8 +; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload ; P8LE-NEXT: mulli r8, r9, 95 -; P8LE-NEXT: mtvsrd f1, r9 -; P8LE-NEXT: mulli r9, r10, 95 +; P8LE-NEXT: srwi r10, r10, 6 ; P8LE-NEXT: srwi r5, r5, 6 -; P8LE-NEXT: mtvsrd f3, r5 -; P8LE-NEXT: mulli r5, r5, 95 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: mtvsrd f2, r10 +; P8LE-NEXT: mtvsrd v3, r9 +; P8LE-NEXT: mulli r9, r10, 95 +; P8LE-NEXT: mtvsrd v4, r10 +; P8LE-NEXT: mulli r10, r5, 95 +; P8LE-NEXT: vmrghh v2, v3, v2 ; P8LE-NEXT: subf r3, r12, r3 -; P8LE-NEXT: xxswapd v6, vs3 -; P8LE-NEXT: mtvsrd f0, r3 -; P8LE-NEXT: subf r3, r9, r7 ; P8LE-NEXT: subf r4, r8, r4 -; P8LE-NEXT: xxswapd v1, vs2 -; P8LE-NEXT: mtvsrd f4, r3 -; P8LE-NEXT: subf r3, r5, r6 -; P8LE-NEXT: mtvsrd f1, r4 -; P8LE-NEXT: mtvsrd f5, r3 -; P8LE-NEXT: xxswapd v5, vs4 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: xxswapd v3, vs0 -; P8LE-NEXT: xxswapd v4, vs1 -; P8LE-NEXT: xxswapd v0, vs5 -; P8LE-NEXT: vmrglh v3, v4, v3 -; P8LE-NEXT: vmrglh v4, v0, v5 -; P8LE-NEXT: vmrglh v5, v6, v1 -; P8LE-NEXT: vmrglw v3, v4, v3 -; P8LE-NEXT: vmrglw v2, v5, v2 +; P8LE-NEXT: mtvsrd v3, r3 +; P8LE-NEXT: mtvsrd v5, r4 +; P8LE-NEXT: subf r3, r9, r7 +; P8LE-NEXT: subf r4, r10, r6 +; P8LE-NEXT: mtvsrd v0, r3 +; P8LE-NEXT: mtvsrd v1, r4 +; P8LE-NEXT: vmrghh v3, v5, v3 +; P8LE-NEXT: mtvsrd v5, r5 +; P8LE-NEXT: vmrghh v0, v1, v0 +; P8LE-NEXT: vmrghh v4, v5, v4 +; P8LE-NEXT: vmrglw v3, v0, v3 +; P8LE-NEXT: vmrglw v2, v4, v2 ; P8LE-NEXT: vadduhm v2, v3, v2 ; P8LE-NEXT: blr ; @@ -824,19 +792,17 @@ ; P9LE-NEXT: li r3, 0 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r3, r3, 0, 26, 31 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r3, r3, 0, 27, 31 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 ; P9LE-NEXT: lis r6, 22765 ; P9LE-NEXT: ori r6, r6, 8969 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: clrldi r5, r4, 32 ; P9LE-NEXT: mulld r5, r5, r6 ; P9LE-NEXT: rldicl r5, r5, 32, 32 @@ -846,14 +812,12 @@ ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r3, r3, 0, 29, 31 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v2, v4, v2 +; P9LE-NEXT: mtvsrd v2, r3 +; P9LE-NEXT: vmrghh v2, v4, v2 ; P9LE-NEXT: vmrglw v2, v2, v3 ; P9LE-NEXT: blr ; @@ -907,9 +871,8 @@ ; P8LE-NEXT: mulld r3, r7, r3 ; P8LE-NEXT: rldicl r7, r4, 48, 48 ; P8LE-NEXT: rlwinm r7, r7, 0, 27, 31 -; P8LE-NEXT: mtvsrd f1, r7 +; P8LE-NEXT: mtvsrd v3, r7 ; P8LE-NEXT: rldicl r3, r3, 32, 32 -; P8LE-NEXT: xxswapd v3, vs1 ; P8LE-NEXT: subf r6, r3, r6 ; P8LE-NEXT: srwi r6, r6, 1 ; P8LE-NEXT: add r3, r6, r3 @@ -919,15 +882,12 @@ ; P8LE-NEXT: rlwinm r6, r6, 0, 26, 31 ; P8LE-NEXT: mulli r3, r3, 95 ; P8LE-NEXT: rlwinm r4, r4, 0, 29, 31 -; P8LE-NEXT: mtvsrd f0, r6 -; P8LE-NEXT: mtvsrd f3, r4 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: xxswapd v5, vs3 +; P8LE-NEXT: mtvsrd v2, r6 +; P8LE-NEXT: mtvsrd v5, r4 +; P8LE-NEXT: vmrghh v2, v3, v2 ; P8LE-NEXT: subf r3, r3, r5 -; P8LE-NEXT: mtvsrd f2, r3 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: vmrglh v3, v4, v5 +; P8LE-NEXT: mtvsrd v4, r3 +; P8LE-NEXT: vmrghh v3, v4, v5 ; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; @@ -987,7 +947,7 @@ ; P9LE-NEXT: rldicl r4, r4, 28, 36 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v3, r3 ; P9LE-NEXT: li r3, 6 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r4, r3, 0, 16, 31 @@ -995,8 +955,7 @@ ; P9LE-NEXT: rldicl r4, r4, 21, 43 ; P9LE-NEXT: mulli r4, r4, 5423 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v3, vs0 -; P9LE-NEXT: mtvsrd f0, r3 +; P9LE-NEXT: mtvsrd v4, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: rlwinm r4, r3, 31, 17, 31 @@ -1004,12 +963,10 @@ ; P9LE-NEXT: rldicl r4, r4, 24, 40 ; P9LE-NEXT: mulli r4, r4, 654 ; P9LE-NEXT: subf r3, r4, r3 -; P9LE-NEXT: xxswapd v4, vs0 -; P9LE-NEXT: mtvsrd f0, r3 -; P9LE-NEXT: xxswapd v2, vs0 -; P9LE-NEXT: vmrglh v3, v4, v3 +; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: mtvsrd v2, r3 ; P9LE-NEXT: xxlxor v4, v4, v4 -; P9LE-NEXT: vmrglh v2, v2, v4 +; P9LE-NEXT: vmrghh v2, v2, v4 ; P9LE-NEXT: vmrglw v2, v3, v2 ; P9LE-NEXT: blr ; @@ -1065,7 +1022,7 @@ ; P8LE-NEXT: xxswapd vs0, v2 ; P8LE-NEXT: li r3, 0 ; P8LE-NEXT: lis r8, 24749 -; P8LE-NEXT: xxlxor v5, v5, v5 +; P8LE-NEXT: xxlxor v2, v2, v2 ; P8LE-NEXT: oris r5, r3, 45590 ; P8LE-NEXT: ori r8, r8, 47143 ; P8LE-NEXT: oris r3, r3, 51306 @@ -1089,16 +1046,13 @@ ; P8LE-NEXT: mulli r3, r3, 654 ; P8LE-NEXT: subf r5, r5, r6 ; P8LE-NEXT: subf r6, r8, r7 -; P8LE-NEXT: mtvsrd f0, r5 +; P8LE-NEXT: mtvsrd v3, r5 ; P8LE-NEXT: subf r3, r3, r4 -; P8LE-NEXT: mtvsrd f1, r6 -; P8LE-NEXT: mtvsrd f2, r3 -; P8LE-NEXT: xxswapd v2, vs0 -; P8LE-NEXT: xxswapd v3, vs1 -; P8LE-NEXT: xxswapd v4, vs2 -; P8LE-NEXT: vmrglh v2, v3, v2 -; P8LE-NEXT: vmrglh v3, v4, v5 -; P8LE-NEXT: vmrglw v2, v2, v3 +; P8LE-NEXT: mtvsrd v4, r6 +; P8LE-NEXT: mtvsrd v5, r3 +; P8LE-NEXT: vmrghh v3, v4, v3 +; P8LE-NEXT: vmrghh v2, v5, v2 +; P8LE-NEXT: vmrglw v2, v3, v2 ; P8LE-NEXT: blr ; ; P8BE-LABEL: dont_fold_urem_one: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll @@ -20,12 +20,10 @@ ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f1 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: vmrghh v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: blr @@ -40,13 +38,11 @@ ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs1 -; CHECK-P9-NEXT: xxswapd v3, vs0 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: li r3, 0 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -90,20 +86,16 @@ ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mfvsrwz r3, f1 -; CHECK-P8-NEXT: mtvsrd f1, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: mfvsrwz r4, f2 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtvsrd f0, r3 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f3 -; CHECK-P8-NEXT: mtvsrd f2, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: mtvsrd f3, r3 -; CHECK-P8-NEXT: xxswapd v3, vs2 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v4, v5 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: vmrghh v3, v4, v3 +; CHECK-P8-NEXT: vmrghh v2, v2, v5 +; CHECK-P8-NEXT: vmrglw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrd r3, f0 ; CHECK-P8-NEXT: blr @@ -114,27 +106,23 @@ ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglh v2, v4, v2 +; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: vmrghh v2, v4, v2 ; CHECK-P9-NEXT: vmrglw v2, v2, v3 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr @@ -180,59 +168,51 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v5, r3, r4 -; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: lvx v3, r3, r4 ; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v5 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xscvspdpn f2, v2 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f3, v3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mfvsrwz r4, f4 -; CHECK-P8-NEXT: mfvsrwz r6, f1 -; CHECK-P8-NEXT: mfvsrwz r5, f0 -; CHECK-P8-NEXT: mtvsrd f1, r6 -; CHECK-P8-NEXT: mtvsrd f0, r5 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, v2 -; CHECK-P8-NEXT: mtvsrd f4, r4 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: mfvsrwz r4, f2 -; CHECK-P8-NEXT: xxswapd v1, vs4 -; CHECK-P8-NEXT: vmrglh v2, v4, v3 -; CHECK-P8-NEXT: mtvsrd f2, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: xxswapd v5, vs2 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mfvsrwz r3, f1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: mtvsrd f1, r3 +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mfvsrwz r3, f2 +; CHECK-P8-NEXT: xscvdpsxws f2, f4 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f4, f5 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghh v2, v4, v2 +; CHECK-P8-NEXT: mfvsrwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f3 -; CHECK-P8-NEXT: mtvsrd f3, r4 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v6, vs3 -; CHECK-P8-NEXT: xxswapd v0, vs0 -; CHECK-P8-NEXT: vmrglh v3, v3, v4 -; CHECK-P8-NEXT: vmrglh v4, v0, v5 -; CHECK-P8-NEXT: vmrglh v5, v1, v6 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: vmrghh v3, v3, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f4 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f1 +; CHECK-P8-NEXT: vmrghh v5, v0, v5 +; CHECK-P8-NEXT: mtvsrd v1, r3 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 +; CHECK-P8-NEXT: vmrghh v4, v4, v1 +; CHECK-P8-NEXT: vmrglw v3, v4, v5 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 ; CHECK-P8-NEXT: blr ; @@ -244,53 +224,45 @@ ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v2, vs2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 ; CHECK-P9-NEXT: blr @@ -363,116 +335,100 @@ ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: lvx v5, 0, r4 -; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v2, r4, r6 +; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: lvx v3, r4, r5 +; CHECK-P8-NEXT: lvx v2, r4, r6 ; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: xscvspdpn f0, v5 -; CHECK-P8-NEXT: xxsldwi vs1, v5, v5, 3 +; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3 +; CHECK-P8-NEXT: xscvspdpn f1, v5 ; CHECK-P8-NEXT: lvx v4, r4, r6 -; CHECK-P8-NEXT: xscvspdpn f4, v2 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v3 ; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxswapd vs8, v3 -; CHECK-P8-NEXT: xscvspdpn f6, v4 +; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 ; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 3 +; CHECK-P8-NEXT: xxswapd vs8, v3 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xxsldwi vs10, v2, v2, 3 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvspdpn f2, v3 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f1, f5 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f4, v2 +; CHECK-P8-NEXT: xscvdpsxws f5, f7 +; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3 +; CHECK-P8-NEXT: xscvspdpn f6, v4 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f1, f8 +; CHECK-P8-NEXT: xxswapd vs8, v4 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f5 +; CHECK-P8-NEXT: xxswapd vs5, v2 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xxsldwi vs12, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xxswapd vs11, v2 ; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xxswapd v2, v4 +; CHECK-P8-NEXT: vmrghh v3, v0, v3 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f6, f6 +; CHECK-P8-NEXT: xscvspdpn f1, vs5 +; CHECK-P8-NEXT: xxsldwi vs5, v2, v2, 1 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghh v2, v5, v1 +; CHECK-P8-NEXT: vmrghh v5, v6, v0 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f4 +; CHECK-P8-NEXT: xscvdpsxws f2, f3 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 ; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xxsldwi vs13, v4, v4, 3 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xxsldwi v3, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f10, vs10 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v4, v4, 1 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvdpsxws f0, f5 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xscvspdpn f1, vs2 +; CHECK-P8-NEXT: xscvdpsxws f3, f7 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: xscvdpsxws f0, f8 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: xscvdpsxws f6, f6 -; CHECK-P8-NEXT: xscvspdpn f12, vs12 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: mfvsrwz r4, f3 +; CHECK-P8-NEXT: vmrghh v0, v0, v7 +; CHECK-P8-NEXT: mtvsrd v7, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: xscvspdpn f11, vs11 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn v2, v2 -; CHECK-P8-NEXT: xscvdpsxws f8, f8 -; CHECK-P8-NEXT: mtvsrd f0, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: mfvsrwz r6, f2 -; CHECK-P8-NEXT: xscvspdpn f13, vs13 -; CHECK-P8-NEXT: xscvspdpn v3, v3 -; CHECK-P8-NEXT: xscvdpsxws f10, f10 -; CHECK-P8-NEXT: mtvsrd f4, r4 +; CHECK-P8-NEXT: vmrghh v4, v8, v4 +; CHECK-P8-NEXT: mtvsrd v8, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f9, f9 -; CHECK-P8-NEXT: mtvsrd f2, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f6 -; CHECK-P8-NEXT: xscvdpsxws f12, f12 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtvsrd f6, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f3 -; CHECK-P8-NEXT: xscvdpsxws v2, v2 -; CHECK-P8-NEXT: xxswapd v9, vs6 -; CHECK-P8-NEXT: mtvsrd f5, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f8 -; CHECK-P8-NEXT: mtvsrd f3, r6 -; CHECK-P8-NEXT: xxswapd v0, vs5 -; CHECK-P8-NEXT: mfvsrwz r6, f7 -; CHECK-P8-NEXT: xscvdpsxws f13, f13 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: xscvdpsxws v3, v3 -; CHECK-P8-NEXT: mtvsrd f8, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f10 -; CHECK-P8-NEXT: mtvsrd f7, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f9 -; CHECK-P8-NEXT: mtvsrd f10, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f12 -; CHECK-P8-NEXT: mtvsrd f9, r6 -; CHECK-P8-NEXT: xxswapd v6, vs10 -; CHECK-P8-NEXT: mfvsrwz r6, f11 -; CHECK-P8-NEXT: mtvsrd f12, r4 -; CHECK-P8-NEXT: xxswapd v1, vs9 -; CHECK-P8-NEXT: mfvsrwz r4, v2 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: mtvsrd f11, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f13 -; CHECK-P8-NEXT: mtvsrd f0, r4 -; CHECK-P8-NEXT: xxswapd v7, vs11 -; CHECK-P8-NEXT: mfvsrwz r4, v3 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 -; CHECK-P8-NEXT: xxswapd v4, vs7 -; CHECK-P8-NEXT: vmrglh v2, v2, v0 -; CHECK-P8-NEXT: xxswapd v5, vs8 -; CHECK-P8-NEXT: xxswapd v0, vs2 -; CHECK-P8-NEXT: mtvsrd f13, r6 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: vmrglh v4, v5, v4 -; CHECK-P8-NEXT: vmrglh v5, v0, v1 -; CHECK-P8-NEXT: xxswapd v1, vs4 -; CHECK-P8-NEXT: vmrglh v0, v7, v6 -; CHECK-P8-NEXT: xxswapd v6, vs12 -; CHECK-P8-NEXT: xxswapd v7, vs13 -; CHECK-P8-NEXT: xxswapd v10, vs1 +; CHECK-P8-NEXT: vmrghh v1, v1, v9 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghh v7, v8, v7 +; CHECK-P8-NEXT: vmrghh v6, v6, v9 ; CHECK-P8-NEXT: vmrglw v2, v2, v3 -; CHECK-P8-NEXT: vmrglh v1, v1, v6 -; CHECK-P8-NEXT: vmrglh v6, v8, v7 -; CHECK-P8-NEXT: vmrglh v7, v9, v10 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: vmrglw v4, v1, v0 -; CHECK-P8-NEXT: vmrglw v5, v7, v6 +; CHECK-P8-NEXT: vmrglw v3, v0, v5 +; CHECK-P8-NEXT: vmrglw v4, v1, v4 +; CHECK-P8-NEXT: vmrglw v5, v6, v7 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 ; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: xxmrgld v3, v5, v4 @@ -481,118 +437,102 @@ ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs1, 0(r4) -; CHECK-P9-NEXT: lxv vs3, 16(r4) -; CHECK-P9-NEXT: xscvspdpn f5, vs1 -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f8, vs3 -; CHECK-P9-NEXT: xxswapd vs4, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: lxv vs2, 0(r4) +; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 +; CHECK-P9-NEXT: xxswapd vs4, vs2 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvspdpn f4, vs4 -; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: xscvspdpn f5, vs2 +; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f8, f8 -; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 3 -; CHECK-P9-NEXT: xxswapd vs7, vs3 -; CHECK-P9-NEXT: xscvspdpn f6, vs6 -; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-P9-NEXT: xscvspdpn f7, vs7 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: mfvsrwz r5, f3 +; CHECK-P9-NEXT: lxv vs1, 16(r4) +; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 3 +; CHECK-P9-NEXT: xxswapd vs3, vs1 +; CHECK-P9-NEXT: mtvsrd v2, r5 +; CHECK-P9-NEXT: mfvsrwz r5, f4 +; CHECK-P9-NEXT: xscvdpsxws f4, f5 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: mfvsrwz r5, f4 +; CHECK-P9-NEXT: xscvspdpn f4, vs6 +; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: mfvsrwz r5, f2 +; CHECK-P9-NEXT: xscvspdpn f2, vs1 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: xscvdpsxws f6, f6 -; CHECK-P9-NEXT: mfvsrwz r5, f5 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mtvsrd f5, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f8 -; CHECK-P9-NEXT: mtvsrd f8, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f2 ; CHECK-P9-NEXT: lxv vs0, 32(r4) -; CHECK-P9-NEXT: xxsldwi vs9, vs0, vs0, 3 -; CHECK-P9-NEXT: xxswapd vs10, vs0 -; CHECK-P9-NEXT: xscvspdpn f9, vs9 -; CHECK-P9-NEXT: xscvspdpn f10, vs10 -; CHECK-P9-NEXT: xscvdpsxws f9, f9 -; CHECK-P9-NEXT: xscvdpsxws f10, f10 -; CHECK-P9-NEXT: mtvsrd f2, r5 +; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrwz r5, f4 -; CHECK-P9-NEXT: mtvsrd f4, r5 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: mfvsrwz r5, f3 +; CHECK-P9-NEXT: xxsldwi vs3, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v5, r5 +; CHECK-P9-NEXT: mfvsrwz r5, f2 +; CHECK-P9-NEXT: xscvspdpn f2, vs3 +; CHECK-P9-NEXT: vmrghh v4, v5, v4 +; CHECK-P9-NEXT: mtvsrd v5, r5 ; CHECK-P9-NEXT: mfvsrwz r5, f1 -; CHECK-P9-NEXT: mtvsrd f1, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f6 -; CHECK-P9-NEXT: xxswapd v2, vs2 -; CHECK-P9-NEXT: xxswapd v3, vs4 +; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrglw v3, v5, v4 +; CHECK-P9-NEXT: mfvsrwz r5, f2 ; CHECK-P9-NEXT: xscvspdpn f2, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v0, r5 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mtvsrd f6, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f7 -; CHECK-P9-NEXT: xxswapd v4, vs1 +; CHECK-P9-NEXT: mfvsrwz r5, f1 ; CHECK-P9-NEXT: lxv vs1, 48(r4) -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs5 -; CHECK-P9-NEXT: mtvsrd f7, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f3 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs6 -; CHECK-P9-NEXT: xxswapd v5, vs7 -; CHECK-P9-NEXT: mtvsrd f3, r5 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: xxswapd v0, vs3 -; CHECK-P9-NEXT: vmrglh v4, v5, v4 -; CHECK-P9-NEXT: xxswapd v5, vs8 -; CHECK-P9-NEXT: vmrglh v5, v5, v0 +; CHECK-P9-NEXT: mtvsrd v1, r5 +; CHECK-P9-NEXT: vmrghh v0, v1, v0 ; CHECK-P9-NEXT: mfvsrwz r4, f2 -; CHECK-P9-NEXT: mtvsrd f2, r4 -; CHECK-P9-NEXT: mfvsrwz r4, f0 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd f0, r4 -; CHECK-P9-NEXT: vmrglw v3, v5, v4 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xxmrgld vs2, v3, v2 -; CHECK-P9-NEXT: xxswapd v2, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: mfvsrwz r4, f0 ; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 3 +; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: vmrghh v2, v4, v2 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrglw v2, v2, v0 ; CHECK-P9-NEXT: mfvsrwz r4, f0 -; CHECK-P9-NEXT: mtvsrd f0, r4 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r4, f0 -; CHECK-P9-NEXT: mtvsrd f0, r4 -; CHECK-P9-NEXT: vmrglh v2, v4, v2 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: mfvsrwz r4, f0 -; CHECK-P9-NEXT: mtvsrd f0, r4 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mfvsrwz r5, f9 -; CHECK-P9-NEXT: mtvsrd f9, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f10 -; CHECK-P9-NEXT: mtvsrd f10, r5 -; CHECK-P9-NEXT: xxswapd v0, vs9 -; CHECK-P9-NEXT: xxswapd v1, vs10 -; CHECK-P9-NEXT: vmrglh v0, v1, v0 -; CHECK-P9-NEXT: vmrglw v2, v2, v0 -; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: mfvsrwz r4, f0 -; CHECK-P9-NEXT: mtvsrd f0, r4 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r4 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 ; CHECK-P9-NEXT: stxv vs0, 16(r3) +; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: @@ -728,12 +668,10 @@ ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f1 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: vmrghh v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: blr @@ -748,13 +686,11 @@ ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs1 -; CHECK-P9-NEXT: xxswapd v3, vs0 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: li r3, 0 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -798,20 +734,16 @@ ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mfvsrwz r3, f1 -; CHECK-P8-NEXT: mtvsrd f1, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: mfvsrwz r4, f2 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtvsrd f0, r3 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f3 -; CHECK-P8-NEXT: mtvsrd f2, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: mtvsrd f3, r3 -; CHECK-P8-NEXT: xxswapd v3, vs2 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v4, v5 -; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: vmrghh v3, v4, v3 +; CHECK-P8-NEXT: vmrghh v2, v2, v5 +; CHECK-P8-NEXT: vmrglw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrd r3, f0 ; CHECK-P8-NEXT: blr @@ -822,27 +754,23 @@ ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglh v2, v4, v2 +; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: vmrghh v2, v4, v2 ; CHECK-P9-NEXT: vmrglw v2, v2, v3 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr @@ -888,59 +816,51 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v5, r3, r4 -; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: lvx v3, r3, r4 ; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v5 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xscvspdpn f2, v2 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f3, v3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mfvsrwz r4, f4 -; CHECK-P8-NEXT: mfvsrwz r6, f1 -; CHECK-P8-NEXT: mfvsrwz r5, f0 -; CHECK-P8-NEXT: mtvsrd f1, r6 -; CHECK-P8-NEXT: mtvsrd f0, r5 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, v2 -; CHECK-P8-NEXT: mtvsrd f4, r4 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: mfvsrwz r4, f2 -; CHECK-P8-NEXT: xxswapd v1, vs4 -; CHECK-P8-NEXT: vmrglh v2, v4, v3 -; CHECK-P8-NEXT: mtvsrd f2, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: xxswapd v5, vs2 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mfvsrwz r3, f1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: mtvsrd f1, r3 +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mfvsrwz r3, f2 +; CHECK-P8-NEXT: xscvdpsxws f2, f4 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f4, f5 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghh v2, v4, v2 +; CHECK-P8-NEXT: mfvsrwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f3 -; CHECK-P8-NEXT: mtvsrd f3, r4 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v6, vs3 -; CHECK-P8-NEXT: xxswapd v0, vs0 -; CHECK-P8-NEXT: vmrglh v3, v3, v4 -; CHECK-P8-NEXT: vmrglh v4, v0, v5 -; CHECK-P8-NEXT: vmrglh v5, v1, v6 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: vmrghh v3, v3, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f4 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f1 +; CHECK-P8-NEXT: vmrghh v5, v0, v5 +; CHECK-P8-NEXT: mtvsrd v1, r3 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 +; CHECK-P8-NEXT: vmrghh v4, v4, v1 +; CHECK-P8-NEXT: vmrglw v3, v4, v5 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 ; CHECK-P8-NEXT: blr ; @@ -952,53 +872,45 @@ ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v2, vs2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 ; CHECK-P9-NEXT: blr @@ -1071,116 +983,100 @@ ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: lvx v5, 0, r4 -; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: li r5, 16 -; CHECK-P8-NEXT: lvx v2, r4, r6 +; CHECK-P8-NEXT: li r6, 32 ; CHECK-P8-NEXT: lvx v3, r4, r5 +; CHECK-P8-NEXT: lvx v2, r4, r6 ; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: xscvspdpn f0, v5 -; CHECK-P8-NEXT: xxsldwi vs1, v5, v5, 3 +; CHECK-P8-NEXT: xxsldwi vs0, v5, v5, 3 +; CHECK-P8-NEXT: xscvspdpn f1, v5 ; CHECK-P8-NEXT: lvx v4, r4, r6 -; CHECK-P8-NEXT: xscvspdpn f4, v2 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xscvspdpn f2, v3 ; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxswapd vs8, v3 -; CHECK-P8-NEXT: xscvspdpn f6, v4 +; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 ; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 3 +; CHECK-P8-NEXT: xxswapd vs8, v3 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvspdpn f3, vs3 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xxsldwi vs10, v2, v2, 3 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: xscvspdpn f7, vs7 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xxsldwi vs9, v3, v3, 1 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvspdpn f2, v3 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f1, f5 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f4, v2 +; CHECK-P8-NEXT: xscvdpsxws f5, f7 +; CHECK-P8-NEXT: xxsldwi vs7, v4, v4, 3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 3 +; CHECK-P8-NEXT: xscvspdpn f6, v4 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f1, f8 +; CHECK-P8-NEXT: xxswapd vs8, v4 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f5 +; CHECK-P8-NEXT: xxswapd vs5, v2 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xxsldwi vs12, v2, v2, 1 -; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: xxswapd vs11, v2 ; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xxswapd v2, v4 +; CHECK-P8-NEXT: vmrghh v3, v0, v3 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f6, f6 +; CHECK-P8-NEXT: xscvspdpn f1, vs5 +; CHECK-P8-NEXT: xxsldwi vs5, v2, v2, 1 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghh v2, v5, v1 +; CHECK-P8-NEXT: vmrghh v5, v6, v0 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f4 +; CHECK-P8-NEXT: xscvdpsxws f2, f3 +; CHECK-P8-NEXT: xscvspdpn f5, vs5 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 ; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: xxsldwi vs13, v4, v4, 3 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xxsldwi v3, v4, v4, 1 -; CHECK-P8-NEXT: xscvspdpn f10, vs10 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f2 +; CHECK-P8-NEXT: xxsldwi vs2, v4, v4, 1 +; CHECK-P8-NEXT: xscvspdpn f8, vs8 +; CHECK-P8-NEXT: xscvdpsxws f0, f5 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xscvspdpn f1, vs2 +; CHECK-P8-NEXT: xscvdpsxws f3, f7 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: xscvdpsxws f0, f8 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: xscvdpsxws f6, f6 -; CHECK-P8-NEXT: xscvspdpn f12, vs12 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 +; CHECK-P8-NEXT: mfvsrwz r4, f3 +; CHECK-P8-NEXT: vmrghh v0, v0, v7 +; CHECK-P8-NEXT: mtvsrd v7, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: xscvspdpn f11, vs11 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvspdpn v2, v2 -; CHECK-P8-NEXT: xscvdpsxws f8, f8 -; CHECK-P8-NEXT: mtvsrd f0, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: mfvsrwz r6, f2 -; CHECK-P8-NEXT: xscvspdpn f13, vs13 -; CHECK-P8-NEXT: xscvspdpn v3, v3 -; CHECK-P8-NEXT: xscvdpsxws f10, f10 -; CHECK-P8-NEXT: mtvsrd f4, r4 +; CHECK-P8-NEXT: vmrghh v4, v8, v4 +; CHECK-P8-NEXT: mtvsrd v8, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f9, f9 -; CHECK-P8-NEXT: mtvsrd f2, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f6 -; CHECK-P8-NEXT: xscvdpsxws f12, f12 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtvsrd f6, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f3 -; CHECK-P8-NEXT: xscvdpsxws v2, v2 -; CHECK-P8-NEXT: xxswapd v9, vs6 -; CHECK-P8-NEXT: mtvsrd f5, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f8 -; CHECK-P8-NEXT: mtvsrd f3, r6 -; CHECK-P8-NEXT: xxswapd v0, vs5 -; CHECK-P8-NEXT: mfvsrwz r6, f7 -; CHECK-P8-NEXT: xscvdpsxws f13, f13 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: xscvdpsxws v3, v3 -; CHECK-P8-NEXT: mtvsrd f8, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f10 -; CHECK-P8-NEXT: mtvsrd f7, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f9 -; CHECK-P8-NEXT: mtvsrd f10, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f12 -; CHECK-P8-NEXT: mtvsrd f9, r6 -; CHECK-P8-NEXT: xxswapd v6, vs10 -; CHECK-P8-NEXT: mfvsrwz r6, f11 -; CHECK-P8-NEXT: mtvsrd f12, r4 -; CHECK-P8-NEXT: xxswapd v1, vs9 -; CHECK-P8-NEXT: mfvsrwz r4, v2 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: mtvsrd f11, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f13 -; CHECK-P8-NEXT: mtvsrd f0, r4 -; CHECK-P8-NEXT: xxswapd v7, vs11 -; CHECK-P8-NEXT: mfvsrwz r4, v3 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 -; CHECK-P8-NEXT: xxswapd v4, vs7 -; CHECK-P8-NEXT: vmrglh v2, v2, v0 -; CHECK-P8-NEXT: xxswapd v5, vs8 -; CHECK-P8-NEXT: xxswapd v0, vs2 -; CHECK-P8-NEXT: mtvsrd f13, r6 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: vmrglh v4, v5, v4 -; CHECK-P8-NEXT: vmrglh v5, v0, v1 -; CHECK-P8-NEXT: xxswapd v1, vs4 -; CHECK-P8-NEXT: vmrglh v0, v7, v6 -; CHECK-P8-NEXT: xxswapd v6, vs12 -; CHECK-P8-NEXT: xxswapd v7, vs13 -; CHECK-P8-NEXT: xxswapd v10, vs1 +; CHECK-P8-NEXT: vmrghh v1, v1, v9 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghh v7, v8, v7 +; CHECK-P8-NEXT: vmrghh v6, v6, v9 ; CHECK-P8-NEXT: vmrglw v2, v2, v3 -; CHECK-P8-NEXT: vmrglh v1, v1, v6 -; CHECK-P8-NEXT: vmrglh v6, v8, v7 -; CHECK-P8-NEXT: vmrglh v7, v9, v10 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: vmrglw v4, v1, v0 -; CHECK-P8-NEXT: vmrglw v5, v7, v6 +; CHECK-P8-NEXT: vmrglw v3, v0, v5 +; CHECK-P8-NEXT: vmrglw v4, v1, v4 +; CHECK-P8-NEXT: vmrglw v5, v6, v7 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 ; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: xxmrgld v3, v5, v4 @@ -1189,118 +1085,102 @@ ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs1, 0(r4) -; CHECK-P9-NEXT: lxv vs3, 16(r4) -; CHECK-P9-NEXT: xscvspdpn f5, vs1 -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f8, vs3 -; CHECK-P9-NEXT: xxswapd vs4, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: lxv vs2, 0(r4) +; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 +; CHECK-P9-NEXT: xxswapd vs4, vs2 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvspdpn f4, vs4 -; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: xscvspdpn f5, vs2 +; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f8, f8 -; CHECK-P9-NEXT: xxsldwi vs6, vs3, vs3, 3 -; CHECK-P9-NEXT: xxswapd vs7, vs3 -; CHECK-P9-NEXT: xscvspdpn f6, vs6 -; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-P9-NEXT: xscvspdpn f7, vs7 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: mfvsrwz r5, f3 +; CHECK-P9-NEXT: lxv vs1, 16(r4) +; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 3 +; CHECK-P9-NEXT: xxswapd vs3, vs1 +; CHECK-P9-NEXT: mtvsrd v2, r5 +; CHECK-P9-NEXT: mfvsrwz r5, f4 +; CHECK-P9-NEXT: xscvdpsxws f4, f5 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: mfvsrwz r5, f4 +; CHECK-P9-NEXT: xscvspdpn f4, vs6 +; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: mfvsrwz r5, f2 +; CHECK-P9-NEXT: xscvspdpn f2, vs1 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: xscvdpsxws f6, f6 -; CHECK-P9-NEXT: mfvsrwz r5, f5 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mtvsrd f5, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f8 -; CHECK-P9-NEXT: mtvsrd f8, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f2 ; CHECK-P9-NEXT: lxv vs0, 32(r4) -; CHECK-P9-NEXT: xxsldwi vs9, vs0, vs0, 3 -; CHECK-P9-NEXT: xxswapd vs10, vs0 -; CHECK-P9-NEXT: xscvspdpn f9, vs9 -; CHECK-P9-NEXT: xscvspdpn f10, vs10 -; CHECK-P9-NEXT: xscvdpsxws f9, f9 -; CHECK-P9-NEXT: xscvdpsxws f10, f10 -; CHECK-P9-NEXT: mtvsrd f2, r5 +; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrwz r5, f4 -; CHECK-P9-NEXT: mtvsrd f4, r5 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: mfvsrwz r5, f3 +; CHECK-P9-NEXT: xxsldwi vs3, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v5, r5 +; CHECK-P9-NEXT: mfvsrwz r5, f2 +; CHECK-P9-NEXT: xscvspdpn f2, vs3 +; CHECK-P9-NEXT: vmrghh v4, v5, v4 +; CHECK-P9-NEXT: mtvsrd v5, r5 ; CHECK-P9-NEXT: mfvsrwz r5, f1 -; CHECK-P9-NEXT: mtvsrd f1, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f6 -; CHECK-P9-NEXT: xxswapd v2, vs2 -; CHECK-P9-NEXT: xxswapd v3, vs4 +; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrglw v3, v5, v4 +; CHECK-P9-NEXT: mfvsrwz r5, f2 ; CHECK-P9-NEXT: xscvspdpn f2, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v0, r5 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mtvsrd f6, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f7 -; CHECK-P9-NEXT: xxswapd v4, vs1 +; CHECK-P9-NEXT: mfvsrwz r5, f1 ; CHECK-P9-NEXT: lxv vs1, 48(r4) -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs5 -; CHECK-P9-NEXT: mtvsrd f7, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f3 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs6 -; CHECK-P9-NEXT: xxswapd v5, vs7 -; CHECK-P9-NEXT: mtvsrd f3, r5 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: xxswapd v0, vs3 -; CHECK-P9-NEXT: vmrglh v4, v5, v4 -; CHECK-P9-NEXT: xxswapd v5, vs8 -; CHECK-P9-NEXT: vmrglh v5, v5, v0 +; CHECK-P9-NEXT: mtvsrd v1, r5 +; CHECK-P9-NEXT: vmrghh v0, v1, v0 ; CHECK-P9-NEXT: mfvsrwz r4, f2 -; CHECK-P9-NEXT: mtvsrd f2, r4 -; CHECK-P9-NEXT: mfvsrwz r4, f0 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd f0, r4 -; CHECK-P9-NEXT: vmrglw v3, v5, v4 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xxmrgld vs2, v3, v2 -; CHECK-P9-NEXT: xxswapd v2, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: mfvsrwz r4, f0 ; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 3 +; CHECK-P9-NEXT: mtvsrd v2, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: vmrghh v2, v4, v2 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrglw v2, v2, v0 ; CHECK-P9-NEXT: mfvsrwz r4, f0 -; CHECK-P9-NEXT: mtvsrd f0, r4 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r4, f0 -; CHECK-P9-NEXT: mtvsrd f0, r4 -; CHECK-P9-NEXT: vmrglh v2, v4, v2 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: mfvsrwz r4, f0 -; CHECK-P9-NEXT: mtvsrd f0, r4 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mfvsrwz r5, f9 -; CHECK-P9-NEXT: mtvsrd f9, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f10 -; CHECK-P9-NEXT: mtvsrd f10, r5 -; CHECK-P9-NEXT: xxswapd v0, vs9 -; CHECK-P9-NEXT: xxswapd v1, vs10 -; CHECK-P9-NEXT: vmrglh v0, v1, v0 -; CHECK-P9-NEXT: vmrglw v2, v2, v0 -; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: mfvsrwz r4, f0 -; CHECK-P9-NEXT: mtvsrd f0, r4 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r4 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 ; CHECK-P9-NEXT: stxv vs0, 16(r3) +; CHECK-P9-NEXT: stxv vs2, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i64_elts.ll @@ -13,8 +13,7 @@ ; CHECK-P8-LABEL: test2elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P8-NEXT: xxmrghw vs0, vs0, vs0 ; CHECK-P8-NEXT: xvcvspdp vs0, vs0 ; CHECK-P8-NEXT: xvcvdpuxds v2, vs0 ; CHECK-P8-NEXT: blr @@ -22,8 +21,7 @@ ; CHECK-P9-LABEL: test2elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs0 ; CHECK-P9-NEXT: xvcvspdp vs0, vs0 ; CHECK-P9-NEXT: xvcvdpuxds v2, vs0 ; CHECK-P9-NEXT: blr @@ -312,8 +310,7 @@ ; CHECK-P8-LABEL: test2elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P8-NEXT: xxmrghw vs0, vs0, vs0 ; CHECK-P8-NEXT: xvcvspdp vs0, vs0 ; CHECK-P8-NEXT: xvcvdpuxds v2, vs0 ; CHECK-P8-NEXT: blr @@ -321,8 +318,7 @@ ; CHECK-P9-LABEL: test2elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: xxmrglw vs0, v2, v2 +; CHECK-P9-NEXT: xxmrghw vs0, vs0, vs0 ; CHECK-P9-NEXT: xvcvspdp vs0, vs0 ; CHECK-P9-NEXT: xvcvdpuxds v2, vs0 ; CHECK-P9-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll @@ -20,12 +20,10 @@ ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f1 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: vmrghb v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrd r3, f0 ; CHECK-P8-NEXT: clrldi r3, r3, 48 @@ -43,14 +41,12 @@ ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs1 -; CHECK-P9-NEXT: xxswapd v3, vs0 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 -; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: addi r3, r1, -2 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 ; CHECK-P9-NEXT: stxsihx v2, 0, r3 ; CHECK-P9-NEXT: lhz r3, -2(r1) ; CHECK-P9-NEXT: blr @@ -97,20 +93,16 @@ ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mfvsrwz r3, f1 -; CHECK-P8-NEXT: mtvsrd f1, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: mfvsrwz r4, f2 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtvsrd f0, r3 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f3 -; CHECK-P8-NEXT: mtvsrd f2, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: mtvsrd f3, r3 -; CHECK-P8-NEXT: xxswapd v3, vs2 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: vmrglb v2, v3, v2 -; CHECK-P8-NEXT: vmrglb v3, v4, v5 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: vmrghb v3, v4, v3 +; CHECK-P8-NEXT: vmrghb v2, v2, v5 +; CHECK-P8-NEXT: vmrglh v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: blr @@ -121,28 +113,24 @@ ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglb v2, v4, v2 +; CHECK-P9-NEXT: vmrghb v2, v4, v2 ; CHECK-P9-NEXT: vmrglh v2, v2, v3 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr @@ -189,59 +177,51 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v5, r3, r4 -; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: lvx v3, r3, r4 ; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v5 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xscvspdpn f2, v2 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f3, v3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mfvsrwz r4, f4 -; CHECK-P8-NEXT: mfvsrwz r6, f1 -; CHECK-P8-NEXT: mfvsrwz r5, f0 -; CHECK-P8-NEXT: mtvsrd f1, r6 -; CHECK-P8-NEXT: mtvsrd f0, r5 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, v2 -; CHECK-P8-NEXT: mtvsrd f4, r4 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: mfvsrwz r4, f2 -; CHECK-P8-NEXT: xxswapd v1, vs4 -; CHECK-P8-NEXT: vmrglb v2, v4, v3 -; CHECK-P8-NEXT: mtvsrd f2, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: xxswapd v5, vs2 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mfvsrwz r3, f1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: mtvsrd f1, r3 +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mfvsrwz r3, f2 +; CHECK-P8-NEXT: xscvdpsxws f2, f4 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f4, f5 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghb v2, v4, v2 +; CHECK-P8-NEXT: mfvsrwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f3 -; CHECK-P8-NEXT: mtvsrd f3, r4 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v6, vs3 -; CHECK-P8-NEXT: xxswapd v0, vs0 -; CHECK-P8-NEXT: vmrglb v3, v3, v4 -; CHECK-P8-NEXT: vmrglb v4, v0, v5 -; CHECK-P8-NEXT: vmrglb v5, v1, v6 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: vmrghb v3, v3, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f4 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f1 +; CHECK-P8-NEXT: vmrghb v5, v0, v5 +; CHECK-P8-NEXT: mtvsrd v1, r3 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 +; CHECK-P8-NEXT: vmrghb v4, v4, v1 +; CHECK-P8-NEXT: vmrglh v3, v4, v5 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrd r3, f0 @@ -255,53 +235,45 @@ ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v2, vs2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 ; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 @@ -376,117 +348,101 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v4, 0, r3 ; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: li r5, 32 ; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: li r4, 32 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: lvx v2, r3, r4 +; CHECK-P8-NEXT: lvx v2, r3, r5 +; CHECK-P8-NEXT: xxsldwi vs0, v4, v4, 3 +; CHECK-P8-NEXT: xxswapd vs2, v4 +; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 1 +; CHECK-P8-NEXT: xscvspdpn f1, v4 +; CHECK-P8-NEXT: xscvspdpn f3, v3 +; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xxswapd vs6, v3 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xxsldwi vs8, v2, v2, 3 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xxswapd vs9, v2 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xxswapd vs7, v3 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xxsldwi vs8, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xxsldwi vs9, v2, v2, 3 ; CHECK-P8-NEXT: xscvspdpn f6, vs6 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mfvsrwz r4, f2 ; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: mtvsrd f2, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 ; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: mtvsrd f4, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f5 -; CHECK-P8-NEXT: xxswapd v0, vs4 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: mtvsrd f5, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: mfvsrwz r5, f2 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 ; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: mtvsrd v4, r5 +; CHECK-P8-NEXT: mfvsrwz r5, f4 ; CHECK-P8-NEXT: xscvdpsxws f1, f6 -; CHECK-P8-NEXT: xxswapd v3, vs5 -; CHECK-P8-NEXT: mtvsrd f6, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f3 +; CHECK-P8-NEXT: vmrghb v3, v4, v3 +; CHECK-P8-NEXT: mtvsrd v4, r5 +; CHECK-P8-NEXT: mfvsrwz r5, f3 ; CHECK-P8-NEXT: xscvdpsxws f3, f7 -; CHECK-P8-NEXT: xxswapd v4, vs6 -; CHECK-P8-NEXT: mtvsrd f7, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f8 -; CHECK-P8-NEXT: xxswapd v5, vs7 -; CHECK-P8-NEXT: mtvsrd f8, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f9 -; CHECK-P8-NEXT: xxswapd v1, vs8 -; CHECK-P8-NEXT: mtvsrd f9, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f3 -; CHECK-P8-NEXT: vmrglb v3, v4, v3 -; CHECK-P8-NEXT: xxswapd v4, vs2 -; CHECK-P8-NEXT: mtvsrd f3, r4 -; CHECK-P8-NEXT: xxswapd v6, vs9 -; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: xscvspdpn f0, v2 -; CHECK-P8-NEXT: xxswapd v7, vs3 -; CHECK-P8-NEXT: mtvsrd f5, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: vmrglb v4, v4, v5 -; CHECK-P8-NEXT: xxswapd v5, vs5 -; CHECK-P8-NEXT: mtvsrd f1, r4 +; CHECK-P8-NEXT: xscvdpsxws f4, f8 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lvx v9, r3, r4 -; CHECK-P8-NEXT: vmrglb v1, v6, v1 -; CHECK-P8-NEXT: xxswapd v8, vs1 +; CHECK-P8-NEXT: lvx v0, r3, r4 +; CHECK-P8-NEXT: mfvsrwz r3, f1 ; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs2, v9, v9, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v9 -; CHECK-P8-NEXT: xxswapd vs3, v9 -; CHECK-P8-NEXT: xxsldwi vs5, v9, v9, 1 +; CHECK-P8-NEXT: xscvspdpn f5, v2 +; CHECK-P8-NEXT: mfvsrwz r4, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v0, v0, 3 +; CHECK-P8-NEXT: mtvsrd v1, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f4 +; CHECK-P8-NEXT: xxswapd vs4, v0 ; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: mtvsrd v7, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v0, v0, 1 +; CHECK-P8-NEXT: xscvspdpn f2, v0 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f6, f9 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: mfvsrwz r4, f4 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f4, r4 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghb v2, v6, v1 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f5 +; CHECK-P8-NEXT: mtvsrd v6, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f1 +; CHECK-P8-NEXT: vmrghb v4, v5, v4 +; CHECK-P8-NEXT: mtvsrd v5, r5 +; CHECK-P8-NEXT: vmrghb v0, v6, v1 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f2 -; CHECK-P8-NEXT: xxswapd v9, vs4 -; CHECK-P8-NEXT: mtvsrd f1, r3 +; CHECK-P8-NEXT: mtvsrd v6, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f3 -; CHECK-P8-NEXT: mtvsrd f2, r4 -; CHECK-P8-NEXT: xxswapd v6, vs1 -; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: vmrglb v2, v0, v7 -; CHECK-P8-NEXT: xxswapd v0, vs0 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v7, vs2 -; CHECK-P8-NEXT: mtvsrd f3, r4 -; CHECK-P8-NEXT: vmrglb v5, v8, v5 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: xxswapd v10, vs3 -; CHECK-P8-NEXT: vmrglb v0, v0, v6 +; CHECK-P8-NEXT: vmrghb v5, v5, v7 +; CHECK-P8-NEXT: vmrghb v1, v1, v6 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f4 +; CHECK-P8-NEXT: mtvsrd v7, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f0 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mtvsrd v9, r3 +; CHECK-P8-NEXT: vmrghb v7, v8, v7 +; CHECK-P8-NEXT: vmrghb v6, v6, v9 ; CHECK-P8-NEXT: vmrglh v3, v4, v3 -; CHECK-P8-NEXT: vmrglb v6, v8, v7 -; CHECK-P8-NEXT: vmrglb v7, v9, v10 -; CHECK-P8-NEXT: vmrglh v2, v2, v1 -; CHECK-P8-NEXT: vmrglh v4, v0, v5 -; CHECK-P8-NEXT: vmrglh v5, v7, v6 +; CHECK-P8-NEXT: vmrglh v2, v5, v2 +; CHECK-P8-NEXT: vmrglh v4, v1, v0 +; CHECK-P8-NEXT: vmrglh v5, v6, v7 ; CHECK-P8-NEXT: vmrglw v2, v2, v3 ; CHECK-P8-NEXT: vmrglw v3, v5, v4 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 @@ -494,114 +450,98 @@ ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: lxv vs3, 0(r3) +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: lxv vs0, 48(r3) +; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: lxv vs2, 16(r3) +; CHECK-P9-NEXT: mfvsrwz r3, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs3 +; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mfvsrwz r3, f4 +; CHECK-P9-NEXT: xscvspdpn f4, vs3 +; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: mfvsrwz r3, f4 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mfvsrwz r3, f3 ; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: lxv vs0, 48(r3) -; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs4, 16(r3) +; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 -; CHECK-P9-NEXT: xxswapd v2, vs3 ; CHECK-P9-NEXT: xxswapd vs3, vs2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 -; CHECK-P9-NEXT: xxswapd v3, vs3 ; CHECK-P9-NEXT: xscvspdpn f3, vs2 ; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 -; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs4, vs4, 3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: xxswapd vs2, vs4 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: xscvspdpn f2, vs4 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs4, vs4, 1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v5, vs2 ; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v5, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v5, vs1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghb v4, v5, v4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglb v4, v5, v4 -; CHECK-P9-NEXT: xxswapd v5, vs1 -; CHECK-P9-NEXT: xxswapd v0, vs0 -; CHECK-P9-NEXT: vmrglb v5, v5, v0 +; CHECK-P9-NEXT: mtvsrd v0, r3 +; CHECK-P9-NEXT: vmrghb v5, v5, v0 ; CHECK-P9-NEXT: vmrglh v4, v5, v4 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 @@ -738,12 +678,10 @@ ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f1 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: vmrglb v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: vmrghb v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrd r3, f0 ; CHECK-P8-NEXT: clrldi r3, r3, 48 @@ -761,14 +699,12 @@ ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs1 -; CHECK-P9-NEXT: xxswapd v3, vs0 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 -; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: addi r3, r1, -2 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 ; CHECK-P9-NEXT: stxsihx v2, 0, r3 ; CHECK-P9-NEXT: lhz r3, -2(r1) ; CHECK-P9-NEXT: blr @@ -815,20 +751,16 @@ ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: mfvsrwz r3, f1 -; CHECK-P8-NEXT: mtvsrd f1, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: mfvsrwz r4, f2 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtvsrd f0, r3 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f3 -; CHECK-P8-NEXT: mtvsrd f2, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: mtvsrd f3, r3 -; CHECK-P8-NEXT: xxswapd v3, vs2 -; CHECK-P8-NEXT: xxswapd v5, vs3 -; CHECK-P8-NEXT: vmrglb v2, v3, v2 -; CHECK-P8-NEXT: vmrglb v3, v4, v5 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: vmrghb v3, v4, v3 +; CHECK-P8-NEXT: vmrghb v2, v2, v5 +; CHECK-P8-NEXT: vmrglh v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: blr @@ -839,28 +771,24 @@ ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglb v2, v4, v2 +; CHECK-P9-NEXT: vmrghb v2, v4, v2 ; CHECK-P9-NEXT: vmrglh v2, v2, v3 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr @@ -907,59 +835,51 @@ ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: lvx v2, 0, r3 ; CHECK-P8-NEXT: li r4, 16 -; CHECK-P8-NEXT: lvx v5, r3, r4 -; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: lvx v3, r3, r4 ; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xxsldwi vs2, v5, v5, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v5 -; CHECK-P8-NEXT: xxswapd vs3, v5 -; CHECK-P8-NEXT: xxsldwi vs5, v5, v5, 1 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xxswapd vs1, v2 +; CHECK-P8-NEXT: xscvspdpn f2, v2 +; CHECK-P8-NEXT: xxsldwi vs4, v2, v2, 1 +; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 +; CHECK-P8-NEXT: xscvspdpn f3, v3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 ; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mfvsrwz r4, f4 -; CHECK-P8-NEXT: mfvsrwz r6, f1 -; CHECK-P8-NEXT: mfvsrwz r5, f0 -; CHECK-P8-NEXT: mtvsrd f1, r6 -; CHECK-P8-NEXT: mtvsrd f0, r5 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xscvspdpn f0, v2 -; CHECK-P8-NEXT: mtvsrd f4, r4 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: mfvsrwz r4, f2 -; CHECK-P8-NEXT: xxswapd v1, vs4 -; CHECK-P8-NEXT: vmrglb v2, v4, v3 -; CHECK-P8-NEXT: mtvsrd f2, r4 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: xxswapd v5, vs2 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mfvsrwz r3, f1 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: mtvsrd f1, r3 +; CHECK-P8-NEXT: xxswapd vs0, v3 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xxsldwi vs1, v3, v3, 1 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: mfvsrwz r3, f2 +; CHECK-P8-NEXT: xscvdpsxws f2, f4 +; CHECK-P8-NEXT: xscvspdpn f1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f4, f5 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghb v2, v4, v2 +; CHECK-P8-NEXT: mfvsrwz r4, f2 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v3, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f3 -; CHECK-P8-NEXT: mtvsrd f3, r4 -; CHECK-P8-NEXT: xxswapd v4, vs1 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v6, vs3 -; CHECK-P8-NEXT: xxswapd v0, vs0 -; CHECK-P8-NEXT: vmrglb v3, v3, v4 -; CHECK-P8-NEXT: vmrglb v4, v0, v5 -; CHECK-P8-NEXT: vmrglb v5, v1, v6 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: vmrghb v3, v3, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f4 +; CHECK-P8-NEXT: mtvsrd v0, r4 +; CHECK-P8-NEXT: mtvsrd v5, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f1 +; CHECK-P8-NEXT: vmrghb v5, v0, v5 +; CHECK-P8-NEXT: mtvsrd v1, r3 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 +; CHECK-P8-NEXT: vmrghb v4, v4, v1 +; CHECK-P8-NEXT: vmrglh v3, v4, v5 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrd r3, f0 @@ -973,53 +893,45 @@ ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v2, vs2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 ; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 @@ -1094,117 +1006,101 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lvx v2, 0, r3 +; CHECK-P8-NEXT: lvx v4, 0, r3 ; CHECK-P8-NEXT: li r4, 16 +; CHECK-P8-NEXT: li r5, 32 ; CHECK-P8-NEXT: lvx v3, r3, r4 -; CHECK-P8-NEXT: li r4, 32 -; CHECK-P8-NEXT: xscvspdpn f2, v2 -; CHECK-P8-NEXT: xxsldwi vs0, v2, v2, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v3 -; CHECK-P8-NEXT: xxswapd vs1, v2 -; CHECK-P8-NEXT: xxsldwi vs3, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs5, v3, v3, 3 -; CHECK-P8-NEXT: lvx v2, r3, r4 +; CHECK-P8-NEXT: lvx v2, r3, r5 +; CHECK-P8-NEXT: xxsldwi vs0, v4, v4, 3 +; CHECK-P8-NEXT: xxswapd vs2, v4 +; CHECK-P8-NEXT: xxsldwi vs4, v4, v4, 1 +; CHECK-P8-NEXT: xscvspdpn f1, v4 +; CHECK-P8-NEXT: xscvspdpn f3, v3 +; CHECK-P8-NEXT: xxsldwi vs6, v3, v3, 3 ; CHECK-P8-NEXT: xscvspdpn f0, vs0 -; CHECK-P8-NEXT: xxswapd vs6, v3 -; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xxsldwi vs7, v3, v3, 1 -; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xxsldwi vs8, v2, v2, 3 -; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xxswapd vs9, v2 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xxswapd vs7, v3 +; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: xxsldwi vs8, v3, v3, 1 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xxsldwi vs9, v2, v2, 3 ; CHECK-P8-NEXT: xscvspdpn f6, vs6 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mfvsrwz r4, f2 ; CHECK-P8-NEXT: xscvspdpn f7, vs7 -; CHECK-P8-NEXT: mtvsrd f2, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 ; CHECK-P8-NEXT: xscvspdpn f8, vs8 -; CHECK-P8-NEXT: mtvsrd f4, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f5 -; CHECK-P8-NEXT: xxswapd v0, vs4 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 ; CHECK-P8-NEXT: xscvspdpn f9, vs9 -; CHECK-P8-NEXT: mtvsrd f5, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: xxswapd vs0, v2 +; CHECK-P8-NEXT: mfvsrwz r5, f2 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 ; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: mtvsrd v4, r5 +; CHECK-P8-NEXT: mfvsrwz r5, f4 ; CHECK-P8-NEXT: xscvdpsxws f1, f6 -; CHECK-P8-NEXT: xxswapd v3, vs5 -; CHECK-P8-NEXT: mtvsrd f6, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f3 +; CHECK-P8-NEXT: vmrghb v3, v4, v3 +; CHECK-P8-NEXT: mtvsrd v4, r5 +; CHECK-P8-NEXT: mfvsrwz r5, f3 ; CHECK-P8-NEXT: xscvdpsxws f3, f7 -; CHECK-P8-NEXT: xxswapd v4, vs6 -; CHECK-P8-NEXT: mtvsrd f7, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f0, f8 -; CHECK-P8-NEXT: xxswapd v5, vs7 -; CHECK-P8-NEXT: mtvsrd f8, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: xscvdpsxws f1, f9 -; CHECK-P8-NEXT: xxswapd v1, vs8 -; CHECK-P8-NEXT: mtvsrd f9, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f3 -; CHECK-P8-NEXT: vmrglb v3, v4, v3 -; CHECK-P8-NEXT: xxswapd v4, vs2 -; CHECK-P8-NEXT: mtvsrd f3, r4 -; CHECK-P8-NEXT: xxswapd v6, vs9 -; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: xscvspdpn f0, v2 -; CHECK-P8-NEXT: xxswapd v7, vs3 -; CHECK-P8-NEXT: mtvsrd f5, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: vmrglb v4, v4, v5 -; CHECK-P8-NEXT: xxswapd v5, vs5 -; CHECK-P8-NEXT: mtvsrd f1, r4 +; CHECK-P8-NEXT: xscvdpsxws f4, f8 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: li r4, 48 -; CHECK-P8-NEXT: lvx v9, r3, r4 -; CHECK-P8-NEXT: vmrglb v1, v6, v1 -; CHECK-P8-NEXT: xxswapd v8, vs1 +; CHECK-P8-NEXT: lvx v0, r3, r4 +; CHECK-P8-NEXT: mfvsrwz r3, f1 ; CHECK-P8-NEXT: xxsldwi vs1, v2, v2, 1 -; CHECK-P8-NEXT: xxsldwi vs2, v9, v9, 3 -; CHECK-P8-NEXT: xscvspdpn f4, v9 -; CHECK-P8-NEXT: xxswapd vs3, v9 -; CHECK-P8-NEXT: xxsldwi vs5, v9, v9, 1 +; CHECK-P8-NEXT: xscvspdpn f5, v2 +; CHECK-P8-NEXT: mfvsrwz r4, f3 +; CHECK-P8-NEXT: xxsldwi vs3, v0, v0, 3 +; CHECK-P8-NEXT: mtvsrd v1, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f4 +; CHECK-P8-NEXT: xxswapd vs4, v0 ; CHECK-P8-NEXT: xscvspdpn f1, vs1 -; CHECK-P8-NEXT: xscvspdpn f2, vs2 +; CHECK-P8-NEXT: mtvsrd v7, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f0 +; CHECK-P8-NEXT: xxsldwi vs0, v0, v0, 1 +; CHECK-P8-NEXT: xscvspdpn f2, v0 ; CHECK-P8-NEXT: xscvspdpn f3, vs3 -; CHECK-P8-NEXT: xscvspdpn f5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f6, f9 +; CHECK-P8-NEXT: xscvspdpn f4, vs4 +; CHECK-P8-NEXT: xscvspdpn f0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f5, f5 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: mfvsrwz r4, f4 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f4, r4 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f4, f4 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: vmrghb v2, v6, v1 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f5 +; CHECK-P8-NEXT: mtvsrd v6, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f1 +; CHECK-P8-NEXT: vmrghb v4, v5, v4 +; CHECK-P8-NEXT: mtvsrd v5, r5 +; CHECK-P8-NEXT: vmrghb v0, v6, v1 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f2 -; CHECK-P8-NEXT: xxswapd v9, vs4 -; CHECK-P8-NEXT: mtvsrd f1, r3 +; CHECK-P8-NEXT: mtvsrd v6, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f3 -; CHECK-P8-NEXT: mtvsrd f2, r4 -; CHECK-P8-NEXT: xxswapd v6, vs1 -; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: vmrglb v2, v0, v7 -; CHECK-P8-NEXT: xxswapd v0, vs0 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v7, vs2 -; CHECK-P8-NEXT: mtvsrd f3, r4 -; CHECK-P8-NEXT: vmrglb v5, v8, v5 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: xxswapd v10, vs3 -; CHECK-P8-NEXT: vmrglb v0, v0, v6 +; CHECK-P8-NEXT: vmrghb v5, v5, v7 +; CHECK-P8-NEXT: vmrghb v1, v1, v6 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f4 +; CHECK-P8-NEXT: mtvsrd v7, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f0 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mtvsrd v9, r3 +; CHECK-P8-NEXT: vmrghb v7, v8, v7 +; CHECK-P8-NEXT: vmrghb v6, v6, v9 ; CHECK-P8-NEXT: vmrglh v3, v4, v3 -; CHECK-P8-NEXT: vmrglb v6, v8, v7 -; CHECK-P8-NEXT: vmrglb v7, v9, v10 -; CHECK-P8-NEXT: vmrglh v2, v2, v1 -; CHECK-P8-NEXT: vmrglh v4, v0, v5 -; CHECK-P8-NEXT: vmrglh v5, v7, v6 +; CHECK-P8-NEXT: vmrglh v2, v5, v2 +; CHECK-P8-NEXT: vmrglh v4, v1, v0 +; CHECK-P8-NEXT: vmrglh v5, v6, v7 ; CHECK-P8-NEXT: vmrglw v2, v2, v3 ; CHECK-P8-NEXT: vmrglw v3, v5, v4 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 @@ -1212,114 +1108,98 @@ ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs2, 0(r3) +; CHECK-P9-NEXT: lxv vs3, 0(r3) +; CHECK-P9-NEXT: xxsldwi vs4, vs3, vs3, 3 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: lxv vs0, 48(r3) +; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: lxv vs2, 16(r3) +; CHECK-P9-NEXT: mfvsrwz r3, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs3 +; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: xscvspdpn f4, vs4 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mfvsrwz r3, f4 +; CHECK-P9-NEXT: xscvspdpn f4, vs3 +; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: mfvsrwz r3, f4 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mfvsrwz r3, f3 ; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: lxv vs0, 48(r3) -; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs4, 16(r3) +; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 -; CHECK-P9-NEXT: xxswapd v2, vs3 ; CHECK-P9-NEXT: xxswapd vs3, vs2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 -; CHECK-P9-NEXT: xxswapd v3, vs3 ; CHECK-P9-NEXT: xscvspdpn f3, vs2 ; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 -; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs4, vs4, 3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: xxswapd vs2, vs4 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: xscvspdpn f2, vs4 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs4, vs4, 1 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v5, vs2 ; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v5, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglb v3, v4, v3 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v5, vs1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghb v4, v5, v4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglb v4, v5, v4 -; CHECK-P9-NEXT: xxswapd v5, vs1 -; CHECK-P9-NEXT: xxswapd v0, vs0 -; CHECK-P9-NEXT: vmrglb v5, v5, v0 +; CHECK-P9-NEXT: mtvsrd v0, r3 +; CHECK-P9-NEXT: vmrghb v5, v5, v0 ; CHECK-P9-NEXT: vmrglh v4, v5, v4 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll @@ -16,12 +16,10 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mfvsrwz r3, f1 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: blr @@ -30,15 +28,13 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xscvdpsxws f0, v2 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -77,18 +73,14 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mfvsrwz r3, f2 ; CHECK-P8-NEXT: mfvsrwz r4, f3 -; CHECK-P8-NEXT: mtvsrd f2, r3 -; CHECK-P8-NEXT: mtvsrd f3, r4 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: xxswapd v2, vs2 ; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: xxswapd v4, vs3 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xxswapd v5, vs1 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: vmrghh v2, v4, v2 +; CHECK-P8-NEXT: vmrghh v3, v5, v3 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrd r3, f0 @@ -102,22 +94,18 @@ ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: xxswapd v2, vs2 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs1 -; CHECK-P9-NEXT: xxswapd v4, vs0 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 +; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr @@ -176,36 +164,28 @@ ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mfvsrwz r3, f4 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mfvsrwz r3, f4 ; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: mtvsrd f4, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f6 -; CHECK-P8-NEXT: mtvsrd f5, r4 -; CHECK-P8-NEXT: xxswapd v2, vs4 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f7 -; CHECK-P8-NEXT: mtvsrd f6, r3 -; CHECK-P8-NEXT: xxswapd v3, vs5 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: mtvsrd f7, r4 -; CHECK-P8-NEXT: xxswapd v4, vs6 ; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v1, vs7 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f2 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v5, vs0 ; CHECK-P8-NEXT: mfvsrwz r4, f3 -; CHECK-P8-NEXT: mtvsrd f2, r3 -; CHECK-P8-NEXT: xxswapd v0, vs1 -; CHECK-P8-NEXT: mtvsrd f0, r4 -; CHECK-P8-NEXT: xxswapd v6, vs2 -; CHECK-P8-NEXT: vmrglh v2, v5, v2 -; CHECK-P8-NEXT: xxswapd v5, vs0 -; CHECK-P8-NEXT: vmrglh v3, v0, v3 -; CHECK-P8-NEXT: vmrglh v4, v6, v4 -; CHECK-P8-NEXT: vmrglh v5, v5, v1 +; CHECK-P8-NEXT: vmrghh v2, v0, v2 +; CHECK-P8-NEXT: vmrghh v3, v1, v3 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: vmrghh v4, v0, v4 +; CHECK-P8-NEXT: vmrghh v5, v1, v5 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 ; CHECK-P8-NEXT: vmrglw v3, v5, v4 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 @@ -217,47 +197,39 @@ ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f4 -; CHECK-P9-NEXT: mtvsrd f4, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: xxswapd v2, vs4 -; CHECK-P9-NEXT: mtvsrd f3, r3 -; CHECK-P9-NEXT: xxswapd v3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs3 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mfvsrwz r3, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 ; CHECK-P9-NEXT: blr @@ -321,209 +293,177 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r5, 16 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r4 ; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lxvd2x vs1, r4, r5 -; CHECK-P8-NEXT: lxvd2x vs2, r4, r6 -; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r4, r6 +; CHECK-P8-NEXT: li r7, 48 +; CHECK-P8-NEXT: lxvd2x vs3, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 ; CHECK-P8-NEXT: li r6, 64 -; CHECK-P8-NEXT: xscvdpsxws f4, f0 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r7 ; CHECK-P8-NEXT: lxvd2x vs5, r4, r6 -; CHECK-P8-NEXT: li r6, 80 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f6, f1 -; CHECK-P8-NEXT: lxvd2x vs7, r4, r6 +; CHECK-P8-NEXT: li r7, 80 ; CHECK-P8-NEXT: li r6, 96 -; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f8, f2 -; CHECK-P8-NEXT: lxvd2x vs9, r4, r6 +; CHECK-P8-NEXT: xscvdpsxws f4, f2 +; CHECK-P8-NEXT: lxvd2x vs7, r4, r7 +; CHECK-P8-NEXT: lxvd2x vs10, r4, r6 ; CHECK-P8-NEXT: li r6, 112 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f10, f3 -; CHECK-P8-NEXT: lxvd2x vs11, r4, r6 +; CHECK-P8-NEXT: xscvdpsxws f6, f3 ; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f12, f5 +; CHECK-P8-NEXT: xscvdpsxws f8, f1 +; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f9, f0 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f11, f5 ; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f13, f7 +; CHECK-P8-NEXT: xscvdpsxws f12, f7 ; CHECK-P8-NEXT: xxswapd vs7, vs7 -; CHECK-P8-NEXT: xscvdpsxws v2, f9 -; CHECK-P8-NEXT: xxswapd vs9, vs9 -; CHECK-P8-NEXT: mfvsrwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws v3, f11 -; CHECK-P8-NEXT: xxswapd vs11, vs11 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mfvsrwz r6, f6 -; CHECK-P8-NEXT: mtvsrd f4, r4 +; CHECK-P8-NEXT: mfvsrwz r7, f4 +; CHECK-P8-NEXT: lxvd2x vs4, r4, r6 +; CHECK-P8-NEXT: mfvsrwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f13, f10 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f8 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xxswapd v4, vs4 +; CHECK-P8-NEXT: xscvdpsxws f6, f4 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f9 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtvsrd f6, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f10 -; CHECK-P8-NEXT: mtvsrd f8, r4 -; CHECK-P8-NEXT: xxswapd v5, vs6 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f11 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mtvsrd v0, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f12 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f13 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f6 +; CHECK-P8-NEXT: xxswapd vs6, vs10 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xxswapd v0, vs8 -; CHECK-P8-NEXT: mtvsrd f10, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f13 -; CHECK-P8-NEXT: mtvsrd f12, r4 -; CHECK-P8-NEXT: xxswapd v1, vs10 -; CHECK-P8-NEXT: mfvsrwz r4, v2 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xxswapd v6, vs12 -; CHECK-P8-NEXT: xscvdpsxws f9, f9 -; CHECK-P8-NEXT: mtvsrd f13, r6 -; CHECK-P8-NEXT: mfvsrwz r6, v3 -; CHECK-P8-NEXT: mtvsrd v2, r4 -; CHECK-P8-NEXT: xxswapd v7, vs13 -; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: xxswapd v2, v2 -; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: mtvsrd v3, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f1 -; CHECK-P8-NEXT: mtvsrd f0, r4 -; CHECK-P8-NEXT: xxswapd v3, v3 +; CHECK-P8-NEXT: mtvsrd v7, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f2 -; CHECK-P8-NEXT: mtvsrd f1, r6 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: mtvsrd f2, r4 +; CHECK-P8-NEXT: xxswapd vs2, vs4 +; CHECK-P8-NEXT: mtvsrd v2, r7 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f3 +; CHECK-P8-NEXT: xscvdpsxws f7, f7 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f4, f6 +; CHECK-P8-NEXT: vmrghh v2, v8, v2 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: vmrghh v3, v9, v3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: xxswapd v9, vs1 -; CHECK-P8-NEXT: mfvsrwz r6, f3 -; CHECK-P8-NEXT: xxswapd v10, vs2 -; CHECK-P8-NEXT: mtvsrd f5, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f9 -; CHECK-P8-NEXT: mtvsrd f3, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f7 -; CHECK-P8-NEXT: mtvsrd f9, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f11 -; CHECK-P8-NEXT: vmrglh v4, v8, v4 -; CHECK-P8-NEXT: xxswapd v8, vs3 -; CHECK-P8-NEXT: vmrglh v5, v9, v5 -; CHECK-P8-NEXT: xxswapd v9, vs5 -; CHECK-P8-NEXT: mtvsrd f7, r6 -; CHECK-P8-NEXT: mtvsrd f0, r4 -; CHECK-P8-NEXT: vmrglh v0, v10, v0 -; CHECK-P8-NEXT: xxswapd v10, vs7 -; CHECK-P8-NEXT: vmrglh v1, v8, v1 -; CHECK-P8-NEXT: xxswapd v8, vs9 -; CHECK-P8-NEXT: vmrglh v6, v9, v6 -; CHECK-P8-NEXT: xxswapd v9, vs0 -; CHECK-P8-NEXT: vmrglh v7, v10, v7 -; CHECK-P8-NEXT: vmrglh v2, v8, v2 -; CHECK-P8-NEXT: vmrglh v3, v9, v3 -; CHECK-P8-NEXT: vmrglw v4, v5, v4 -; CHECK-P8-NEXT: vmrglw v5, v1, v0 -; CHECK-P8-NEXT: vmrglw v0, v7, v6 +; CHECK-P8-NEXT: vmrghh v4, v8, v4 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f7 +; CHECK-P8-NEXT: vmrghh v5, v9, v5 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f4 +; CHECK-P8-NEXT: vmrghh v0, v8, v0 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f2 +; CHECK-P8-NEXT: vmrghh v1, v9, v1 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghh v6, v8, v6 +; CHECK-P8-NEXT: vmrghh v7, v9, v7 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: vmrglw v3, v5, v4 +; CHECK-P8-NEXT: vmrglw v4, v1, v0 +; CHECK-P8-NEXT: vmrglw v5, v7, v6 +; CHECK-P8-NEXT: xxmrgld v2, v3, v2 +; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: xxmrgld v3, v5, v4 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: xxmrgld v2, v2, v0 -; CHECK-P8-NEXT: stvx v2, r3, r5 +; CHECK-P8-NEXT: stvx v3, r3, r5 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs4, 0(r4) -; CHECK-P9-NEXT: lxv vs3, 16(r4) -; CHECK-P9-NEXT: lxv vs2, 32(r4) -; CHECK-P9-NEXT: xscvdpsxws f5, f4 -; CHECK-P9-NEXT: lxv vs1, 48(r4) -; CHECK-P9-NEXT: xscvdpsxws f6, f3 -; CHECK-P9-NEXT: lxv vs0, 64(r4) -; CHECK-P9-NEXT: xscvdpsxws f7, f2 -; CHECK-P9-NEXT: xscvdpsxws f8, f1 -; CHECK-P9-NEXT: xxswapd vs4, vs4 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mfvsrwz r5, f5 -; CHECK-P9-NEXT: xscvdpsxws f9, f0 +; CHECK-P9-NEXT: lxv vs3, 0(r4) +; CHECK-P9-NEXT: lxv vs2, 16(r4) +; CHECK-P9-NEXT: lxv vs1, 32(r4) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 +; CHECK-P9-NEXT: lxv vs0, 48(r4) +; CHECK-P9-NEXT: xscvdpsxws f5, f2 +; CHECK-P9-NEXT: xscvdpsxws f6, f1 ; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: xscvdpsxws f7, f0 +; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mfvsrwz r5, f4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mtvsrd f5, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f6 ; CHECK-P9-NEXT: xxswapd vs2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mtvsrd f6, r5 +; CHECK-P9-NEXT: mtvsrd v2, r5 +; CHECK-P9-NEXT: mfvsrwz r5, f5 +; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: mfvsrwz r5, f6 +; CHECK-P9-NEXT: mtvsrd v4, r5 ; CHECK-P9-NEXT: mfvsrwz r5, f7 -; CHECK-P9-NEXT: mtvsrd f7, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f8 -; CHECK-P9-NEXT: mtvsrd f8, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f9 -; CHECK-P9-NEXT: mtvsrd f9, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f4 -; CHECK-P9-NEXT: mtvsrd f4, r5 +; CHECK-P9-NEXT: mtvsrd v5, r5 ; CHECK-P9-NEXT: mfvsrwz r5, f3 +; CHECK-P9-NEXT: lxv vs3, 64(r4) ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xxswapd v2, vs5 -; CHECK-P9-NEXT: xxswapd v5, vs8 -; CHECK-P9-NEXT: xxswapd v0, vs9 -; CHECK-P9-NEXT: mtvsrd f3, r5 +; CHECK-P9-NEXT: mtvsrd v0, r5 ; CHECK-P9-NEXT: mfvsrwz r5, f2 -; CHECK-P9-NEXT: mtvsrd f2, r5 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: xxswapd v1, vs2 ; CHECK-P9-NEXT: lxv vs2, 80(r4) -; CHECK-P9-NEXT: xxswapd v3, vs4 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs6 -; CHECK-P9-NEXT: xxswapd v4, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: vmrghh v2, v2, v0 +; CHECK-P9-NEXT: mtvsrd v0, r5 ; CHECK-P9-NEXT: mfvsrwz r5, f1 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs7 -; CHECK-P9-NEXT: mtvsrd f1, r5 +; CHECK-P9-NEXT: lxv vs1, 96(r4) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: vmrghh v3, v3, v0 +; CHECK-P9-NEXT: mtvsrd v0, r5 ; CHECK-P9-NEXT: mfvsrwz r5, f0 -; CHECK-P9-NEXT: vmrglh v4, v4, v1 -; CHECK-P9-NEXT: xxswapd v1, vs1 -; CHECK-P9-NEXT: mtvsrd f0, r5 -; CHECK-P9-NEXT: vmrglh v5, v5, v1 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xxswapd v1, vs0 ; CHECK-P9-NEXT: lxv vs0, 112(r4) -; CHECK-P9-NEXT: lxv vs1, 96(r4) +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: vmrghh v4, v4, v0 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: mfvsrwz r4, f4 +; CHECK-P9-NEXT: vmrglw v4, v5, v4 +; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: mfvsrwz r4, f3 -; CHECK-P9-NEXT: mtvsrd f3, r4 +; CHECK-P9-NEXT: xscvdpsxws f3, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: xxmrgld vs4, v4, v2 +; CHECK-P9-NEXT: mtvsrd v2, r4 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: stxv vs4, 0(r3) +; CHECK-P9-NEXT: mfvsrwz r4, f3 +; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: mfvsrwz r4, f2 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: vmrglw v3, v5, v4 -; CHECK-P9-NEXT: xxmrgld vs4, v3, v2 -; CHECK-P9-NEXT: xxswapd v2, vs3 -; CHECK-P9-NEXT: vmrglh v0, v0, v1 -; CHECK-P9-NEXT: mtvsrd f2, r4 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r4, f2 -; CHECK-P9-NEXT: mtvsrd f2, r4 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: mfvsrwz r4, f1 -; CHECK-P9-NEXT: mtvsrd f1, r4 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r4, f1 -; CHECK-P9-NEXT: mtvsrd f1, r4 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: mfvsrwz r4, f0 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: vmrglw v2, v2, v0 -; CHECK-P9-NEXT: mtvsrd f0, r4 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r4 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 ; CHECK-P9-NEXT: stxv vs0, 16(r3) -; CHECK-P9-NEXT: stxv vs4, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: @@ -639,12 +579,10 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mfvsrwz r3, f1 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: vmrglh v2, v2, v3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: vmrghh v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: blr @@ -653,15 +591,13 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xscvdpsxws f0, v2 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -700,18 +636,14 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mfvsrwz r3, f2 ; CHECK-P8-NEXT: mfvsrwz r4, f3 -; CHECK-P8-NEXT: mtvsrd f2, r3 -; CHECK-P8-NEXT: mtvsrd f3, r4 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: xxswapd v2, vs2 ; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: xxswapd v4, vs3 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xxswapd v5, vs1 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglh v3, v5, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: vmrghh v2, v4, v2 +; CHECK-P8-NEXT: vmrghh v3, v5, v3 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrd r3, f0 @@ -725,22 +657,18 @@ ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: xxswapd v2, vs2 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs1 -; CHECK-P9-NEXT: xxswapd v4, vs0 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 +; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr @@ -799,36 +727,28 @@ ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mfvsrwz r3, f4 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mfvsrwz r3, f4 ; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: mtvsrd f4, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f6 -; CHECK-P8-NEXT: mtvsrd f5, r4 -; CHECK-P8-NEXT: xxswapd v2, vs4 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f7 -; CHECK-P8-NEXT: mtvsrd f6, r3 -; CHECK-P8-NEXT: xxswapd v3, vs5 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: mtvsrd f7, r4 -; CHECK-P8-NEXT: xxswapd v4, vs6 ; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v1, vs7 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f2 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v5, vs0 ; CHECK-P8-NEXT: mfvsrwz r4, f3 -; CHECK-P8-NEXT: mtvsrd f2, r3 -; CHECK-P8-NEXT: xxswapd v0, vs1 -; CHECK-P8-NEXT: mtvsrd f0, r4 -; CHECK-P8-NEXT: xxswapd v6, vs2 -; CHECK-P8-NEXT: vmrglh v2, v5, v2 -; CHECK-P8-NEXT: xxswapd v5, vs0 -; CHECK-P8-NEXT: vmrglh v3, v0, v3 -; CHECK-P8-NEXT: vmrglh v4, v6, v4 -; CHECK-P8-NEXT: vmrglh v5, v5, v1 +; CHECK-P8-NEXT: vmrghh v2, v0, v2 +; CHECK-P8-NEXT: vmrghh v3, v1, v3 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: vmrghh v4, v0, v4 +; CHECK-P8-NEXT: vmrghh v5, v1, v5 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 ; CHECK-P8-NEXT: vmrglw v3, v5, v4 ; CHECK-P8-NEXT: xxmrgld v2, v3, v2 @@ -840,47 +760,39 @@ ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f4 -; CHECK-P9-NEXT: mtvsrd f4, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: xxswapd v2, vs4 -; CHECK-P9-NEXT: mtvsrd f3, r3 -; CHECK-P9-NEXT: xxswapd v3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs3 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mfvsrwz r3, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 ; CHECK-P9-NEXT: blr @@ -944,209 +856,177 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <16 x double>* nocapture readonly) local_unnamed_addr #3 { ; CHECK-P8-LABEL: test16elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 ; CHECK-P8-NEXT: li r5, 16 +; CHECK-P8-NEXT: lxvd2x vs2, 0, r4 ; CHECK-P8-NEXT: li r6, 32 -; CHECK-P8-NEXT: lxvd2x vs1, r4, r5 -; CHECK-P8-NEXT: lxvd2x vs2, r4, r6 -; CHECK-P8-NEXT: li r6, 48 -; CHECK-P8-NEXT: lxvd2x vs3, r4, r6 +; CHECK-P8-NEXT: li r7, 48 +; CHECK-P8-NEXT: lxvd2x vs3, r4, r5 +; CHECK-P8-NEXT: lxvd2x vs1, r4, r6 ; CHECK-P8-NEXT: li r6, 64 -; CHECK-P8-NEXT: xscvdpsxws f4, f0 +; CHECK-P8-NEXT: lxvd2x vs0, r4, r7 ; CHECK-P8-NEXT: lxvd2x vs5, r4, r6 -; CHECK-P8-NEXT: li r6, 80 -; CHECK-P8-NEXT: xxswapd vs0, vs0 -; CHECK-P8-NEXT: xscvdpsxws f6, f1 -; CHECK-P8-NEXT: lxvd2x vs7, r4, r6 +; CHECK-P8-NEXT: li r7, 80 ; CHECK-P8-NEXT: li r6, 96 -; CHECK-P8-NEXT: xxswapd vs1, vs1 -; CHECK-P8-NEXT: xscvdpsxws f8, f2 -; CHECK-P8-NEXT: lxvd2x vs9, r4, r6 +; CHECK-P8-NEXT: xscvdpsxws f4, f2 +; CHECK-P8-NEXT: lxvd2x vs7, r4, r7 +; CHECK-P8-NEXT: lxvd2x vs10, r4, r6 ; CHECK-P8-NEXT: li r6, 112 ; CHECK-P8-NEXT: xxswapd vs2, vs2 -; CHECK-P8-NEXT: xscvdpsxws f10, f3 -; CHECK-P8-NEXT: lxvd2x vs11, r4, r6 +; CHECK-P8-NEXT: xscvdpsxws f6, f3 ; CHECK-P8-NEXT: xxswapd vs3, vs3 -; CHECK-P8-NEXT: xscvdpsxws f12, f5 +; CHECK-P8-NEXT: xscvdpsxws f8, f1 +; CHECK-P8-NEXT: xxswapd vs1, vs1 +; CHECK-P8-NEXT: xscvdpsxws f9, f0 +; CHECK-P8-NEXT: xxswapd vs0, vs0 +; CHECK-P8-NEXT: xscvdpsxws f11, f5 ; CHECK-P8-NEXT: xxswapd vs5, vs5 -; CHECK-P8-NEXT: xscvdpsxws f13, f7 +; CHECK-P8-NEXT: xscvdpsxws f12, f7 ; CHECK-P8-NEXT: xxswapd vs7, vs7 -; CHECK-P8-NEXT: xscvdpsxws v2, f9 -; CHECK-P8-NEXT: xxswapd vs9, vs9 -; CHECK-P8-NEXT: mfvsrwz r4, f4 -; CHECK-P8-NEXT: xscvdpsxws v3, f11 -; CHECK-P8-NEXT: xxswapd vs11, vs11 -; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mfvsrwz r6, f6 -; CHECK-P8-NEXT: mtvsrd f4, r4 +; CHECK-P8-NEXT: mfvsrwz r7, f4 +; CHECK-P8-NEXT: lxvd2x vs4, r4, r6 +; CHECK-P8-NEXT: mfvsrwz r4, f6 +; CHECK-P8-NEXT: xscvdpsxws f13, f10 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f8 -; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xxswapd v4, vs4 +; CHECK-P8-NEXT: xscvdpsxws f6, f4 +; CHECK-P8-NEXT: mtvsrd v4, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f9 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: mtvsrd f6, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f10 -; CHECK-P8-NEXT: mtvsrd f8, r4 -; CHECK-P8-NEXT: xxswapd v5, vs6 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f11 +; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mtvsrd v0, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f12 +; CHECK-P8-NEXT: xscvdpsxws f1, f1 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f13 +; CHECK-P8-NEXT: xscvdpsxws f0, f0 +; CHECK-P8-NEXT: mtvsrd v6, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f6 +; CHECK-P8-NEXT: xxswapd vs6, vs10 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xxswapd v0, vs8 -; CHECK-P8-NEXT: mtvsrd f10, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f13 -; CHECK-P8-NEXT: mtvsrd f12, r4 -; CHECK-P8-NEXT: xxswapd v1, vs10 -; CHECK-P8-NEXT: mfvsrwz r4, v2 -; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xxswapd v6, vs12 -; CHECK-P8-NEXT: xscvdpsxws f9, f9 -; CHECK-P8-NEXT: mtvsrd f13, r6 -; CHECK-P8-NEXT: mfvsrwz r6, v3 -; CHECK-P8-NEXT: mtvsrd v2, r4 -; CHECK-P8-NEXT: xxswapd v7, vs13 -; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: xxswapd v2, v2 -; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: mtvsrd v3, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f1 -; CHECK-P8-NEXT: mtvsrd f0, r4 -; CHECK-P8-NEXT: xxswapd v3, v3 +; CHECK-P8-NEXT: mtvsrd v7, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f2 -; CHECK-P8-NEXT: mtvsrd f1, r6 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: mtvsrd f2, r4 +; CHECK-P8-NEXT: xxswapd vs2, vs4 +; CHECK-P8-NEXT: mtvsrd v2, r7 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f3 +; CHECK-P8-NEXT: xscvdpsxws f7, f7 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f1 +; CHECK-P8-NEXT: xscvdpsxws f4, f6 +; CHECK-P8-NEXT: vmrghh v2, v8, v2 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f0 +; CHECK-P8-NEXT: xscvdpsxws f2, f2 +; CHECK-P8-NEXT: vmrghh v3, v9, v3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: xxswapd v9, vs1 -; CHECK-P8-NEXT: mfvsrwz r6, f3 -; CHECK-P8-NEXT: xxswapd v10, vs2 -; CHECK-P8-NEXT: mtvsrd f5, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f9 -; CHECK-P8-NEXT: mtvsrd f3, r6 -; CHECK-P8-NEXT: mfvsrwz r6, f7 -; CHECK-P8-NEXT: mtvsrd f9, r4 -; CHECK-P8-NEXT: mfvsrwz r4, f11 -; CHECK-P8-NEXT: vmrglh v4, v8, v4 -; CHECK-P8-NEXT: xxswapd v8, vs3 -; CHECK-P8-NEXT: vmrglh v5, v9, v5 -; CHECK-P8-NEXT: xxswapd v9, vs5 -; CHECK-P8-NEXT: mtvsrd f7, r6 -; CHECK-P8-NEXT: mtvsrd f0, r4 -; CHECK-P8-NEXT: vmrglh v0, v10, v0 -; CHECK-P8-NEXT: xxswapd v10, vs7 -; CHECK-P8-NEXT: vmrglh v1, v8, v1 -; CHECK-P8-NEXT: xxswapd v8, vs9 -; CHECK-P8-NEXT: vmrglh v6, v9, v6 -; CHECK-P8-NEXT: xxswapd v9, vs0 -; CHECK-P8-NEXT: vmrglh v7, v10, v7 -; CHECK-P8-NEXT: vmrglh v2, v8, v2 -; CHECK-P8-NEXT: vmrglh v3, v9, v3 -; CHECK-P8-NEXT: vmrglw v4, v5, v4 -; CHECK-P8-NEXT: vmrglw v5, v1, v0 -; CHECK-P8-NEXT: vmrglw v0, v7, v6 +; CHECK-P8-NEXT: vmrghh v4, v8, v4 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f7 +; CHECK-P8-NEXT: vmrghh v5, v9, v5 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f4 +; CHECK-P8-NEXT: vmrghh v0, v8, v0 +; CHECK-P8-NEXT: mtvsrd v8, r4 +; CHECK-P8-NEXT: mfvsrwz r4, f2 +; CHECK-P8-NEXT: vmrghh v1, v9, v1 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghh v6, v8, v6 +; CHECK-P8-NEXT: vmrghh v7, v9, v7 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: vmrglw v3, v5, v4 +; CHECK-P8-NEXT: vmrglw v4, v1, v0 +; CHECK-P8-NEXT: vmrglw v5, v7, v6 +; CHECK-P8-NEXT: xxmrgld v2, v3, v2 +; CHECK-P8-NEXT: stvx v2, 0, r3 ; CHECK-P8-NEXT: xxmrgld v3, v5, v4 -; CHECK-P8-NEXT: stvx v3, 0, r3 -; CHECK-P8-NEXT: xxmrgld v2, v2, v0 -; CHECK-P8-NEXT: stvx v2, r3, r5 +; CHECK-P8-NEXT: stvx v3, r3, r5 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs4, 0(r4) -; CHECK-P9-NEXT: lxv vs3, 16(r4) -; CHECK-P9-NEXT: lxv vs2, 32(r4) -; CHECK-P9-NEXT: xscvdpsxws f5, f4 -; CHECK-P9-NEXT: lxv vs1, 48(r4) -; CHECK-P9-NEXT: xscvdpsxws f6, f3 -; CHECK-P9-NEXT: lxv vs0, 64(r4) -; CHECK-P9-NEXT: xscvdpsxws f7, f2 -; CHECK-P9-NEXT: xscvdpsxws f8, f1 -; CHECK-P9-NEXT: xxswapd vs4, vs4 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mfvsrwz r5, f5 -; CHECK-P9-NEXT: xscvdpsxws f9, f0 +; CHECK-P9-NEXT: lxv vs3, 0(r4) +; CHECK-P9-NEXT: lxv vs2, 16(r4) +; CHECK-P9-NEXT: lxv vs1, 32(r4) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 +; CHECK-P9-NEXT: lxv vs0, 48(r4) +; CHECK-P9-NEXT: xscvdpsxws f5, f2 +; CHECK-P9-NEXT: xscvdpsxws f6, f1 ; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: xscvdpsxws f7, f0 +; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mfvsrwz r5, f4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mtvsrd f5, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f6 ; CHECK-P9-NEXT: xxswapd vs2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mtvsrd f6, r5 +; CHECK-P9-NEXT: mtvsrd v2, r5 +; CHECK-P9-NEXT: mfvsrwz r5, f5 +; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: mfvsrwz r5, f6 +; CHECK-P9-NEXT: mtvsrd v4, r5 ; CHECK-P9-NEXT: mfvsrwz r5, f7 -; CHECK-P9-NEXT: mtvsrd f7, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f8 -; CHECK-P9-NEXT: mtvsrd f8, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f9 -; CHECK-P9-NEXT: mtvsrd f9, r5 -; CHECK-P9-NEXT: mfvsrwz r5, f4 -; CHECK-P9-NEXT: mtvsrd f4, r5 +; CHECK-P9-NEXT: mtvsrd v5, r5 ; CHECK-P9-NEXT: mfvsrwz r5, f3 +; CHECK-P9-NEXT: lxv vs3, 64(r4) ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xxswapd v2, vs5 -; CHECK-P9-NEXT: xxswapd v5, vs8 -; CHECK-P9-NEXT: xxswapd v0, vs9 -; CHECK-P9-NEXT: mtvsrd f3, r5 +; CHECK-P9-NEXT: mtvsrd v0, r5 ; CHECK-P9-NEXT: mfvsrwz r5, f2 -; CHECK-P9-NEXT: mtvsrd f2, r5 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: xxswapd v1, vs2 ; CHECK-P9-NEXT: lxv vs2, 80(r4) -; CHECK-P9-NEXT: xxswapd v3, vs4 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs6 -; CHECK-P9-NEXT: xxswapd v4, vs3 -; CHECK-P9-NEXT: xscvdpsxws f3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: vmrghh v2, v2, v0 +; CHECK-P9-NEXT: mtvsrd v0, r5 ; CHECK-P9-NEXT: mfvsrwz r5, f1 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs7 -; CHECK-P9-NEXT: mtvsrd f1, r5 +; CHECK-P9-NEXT: lxv vs1, 96(r4) +; CHECK-P9-NEXT: xscvdpsxws f4, f3 +; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: vmrghh v3, v3, v0 +; CHECK-P9-NEXT: mtvsrd v0, r5 ; CHECK-P9-NEXT: mfvsrwz r5, f0 -; CHECK-P9-NEXT: vmrglh v4, v4, v1 -; CHECK-P9-NEXT: xxswapd v1, vs1 -; CHECK-P9-NEXT: mtvsrd f0, r5 -; CHECK-P9-NEXT: vmrglh v5, v5, v1 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xxswapd v1, vs0 ; CHECK-P9-NEXT: lxv vs0, 112(r4) -; CHECK-P9-NEXT: lxv vs1, 96(r4) +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: vmrghh v4, v4, v0 +; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: mfvsrwz r4, f4 +; CHECK-P9-NEXT: vmrglw v4, v5, v4 +; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: mfvsrwz r4, f3 -; CHECK-P9-NEXT: mtvsrd f3, r4 +; CHECK-P9-NEXT: xscvdpsxws f3, f2 +; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: xxmrgld vs4, v4, v2 +; CHECK-P9-NEXT: mtvsrd v2, r4 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghh v2, v3, v2 +; CHECK-P9-NEXT: stxv vs4, 0(r3) +; CHECK-P9-NEXT: mfvsrwz r4, f3 +; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: mfvsrwz r4, f2 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: vmrglw v3, v5, v4 -; CHECK-P9-NEXT: xxmrgld vs4, v3, v2 -; CHECK-P9-NEXT: xxswapd v2, vs3 -; CHECK-P9-NEXT: vmrglh v0, v0, v1 -; CHECK-P9-NEXT: mtvsrd f2, r4 -; CHECK-P9-NEXT: xxswapd v3, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r4, f2 -; CHECK-P9-NEXT: mtvsrd f2, r4 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: mfvsrwz r4, f1 -; CHECK-P9-NEXT: mtvsrd f1, r4 -; CHECK-P9-NEXT: xxswapd v4, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r4, f1 -; CHECK-P9-NEXT: mtvsrd f1, r4 +; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: mfvsrwz r4, f0 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: vmrglh v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: vmrglw v2, v2, v0 -; CHECK-P9-NEXT: mtvsrd f0, r4 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglh v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r4 +; CHECK-P9-NEXT: vmrghh v4, v4, v5 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 ; CHECK-P9-NEXT: stxv vs0, 16(r3) -; CHECK-P9-NEXT: stxv vs4, 0(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll @@ -16,12 +16,10 @@ ; CHECK-P8-NEXT: xscvdpuxws f1, v2 ; CHECK-P8-NEXT: xscvdpuxws f0, f0 ; CHECK-P8-NEXT: mfvsrwz r3, f1 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: vmrglw v2, v2, v3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: vmrghw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrd r3, f0 ; CHECK-P8-NEXT: blr @@ -310,12 +308,10 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mfvsrwz r3, f1 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: vmrglw v2, v2, v3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: vmrghw v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrd r3, f0 ; CHECK-P8-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll @@ -16,12 +16,10 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mfvsrwz r3, f1 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: vmrglb v2, v2, v3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: vmrghb v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrd r3, f0 ; CHECK-P8-NEXT: clrldi r3, r3, 48 @@ -33,15 +31,13 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xscvdpsxws f0, v2 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 ; CHECK-P9-NEXT: stxsihx v2, 0, r3 ; CHECK-P9-NEXT: lhz r3, -2(r1) @@ -84,18 +80,14 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mfvsrwz r3, f2 ; CHECK-P8-NEXT: mfvsrwz r4, f3 -; CHECK-P8-NEXT: mtvsrd f2, r3 -; CHECK-P8-NEXT: mtvsrd f3, r4 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: xxswapd v2, vs2 ; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: xxswapd v4, vs3 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xxswapd v5, vs1 -; CHECK-P8-NEXT: vmrglb v2, v3, v2 -; CHECK-P8-NEXT: vmrglb v3, v5, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: vmrghb v2, v4, v2 +; CHECK-P8-NEXT: vmrghb v3, v5, v3 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrwz r3, f0 @@ -109,24 +101,20 @@ ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: xxswapd v2, vs2 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglb v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs1 -; CHECK-P9-NEXT: xxswapd v4, vs0 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: li r3, 0 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -185,36 +173,28 @@ ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mfvsrwz r3, f4 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mfvsrwz r3, f4 ; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: mtvsrd f4, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f6 -; CHECK-P8-NEXT: mtvsrd f5, r4 -; CHECK-P8-NEXT: xxswapd v2, vs4 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f7 -; CHECK-P8-NEXT: mtvsrd f6, r3 -; CHECK-P8-NEXT: xxswapd v3, vs5 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: mtvsrd f7, r4 -; CHECK-P8-NEXT: xxswapd v4, vs6 ; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v1, vs7 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f2 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v5, vs0 ; CHECK-P8-NEXT: mfvsrwz r4, f3 -; CHECK-P8-NEXT: mtvsrd f2, r3 -; CHECK-P8-NEXT: xxswapd v0, vs1 -; CHECK-P8-NEXT: mtvsrd f0, r4 -; CHECK-P8-NEXT: xxswapd v6, vs2 -; CHECK-P8-NEXT: vmrglb v2, v5, v2 -; CHECK-P8-NEXT: xxswapd v5, vs0 -; CHECK-P8-NEXT: vmrglb v3, v0, v3 -; CHECK-P8-NEXT: vmrglb v4, v6, v4 -; CHECK-P8-NEXT: vmrglb v5, v5, v1 +; CHECK-P8-NEXT: vmrghb v2, v0, v2 +; CHECK-P8-NEXT: vmrghb v3, v1, v3 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: vmrghb v4, v0, v4 +; CHECK-P8-NEXT: vmrghb v5, v1, v5 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 ; CHECK-P8-NEXT: vmrglh v3, v5, v4 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 @@ -228,47 +208,39 @@ ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f4 -; CHECK-P9-NEXT: mtvsrd f4, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: xxswapd v2, vs4 -; CHECK-P9-NEXT: mtvsrd f3, r3 -; CHECK-P9-NEXT: xxswapd v3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: vmrglb v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 ; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mfvsrwz r3, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 @@ -364,79 +336,63 @@ ; CHECK-P8-NEXT: xxswapd vs7, vs7 ; CHECK-P8-NEXT: xscvdpsxws v2, f9 ; CHECK-P8-NEXT: xxswapd vs9, vs9 -; CHECK-P8-NEXT: mfvsrwz r3, f4 ; CHECK-P8-NEXT: xscvdpsxws v3, f11 ; CHECK-P8-NEXT: xxswapd vs11, vs11 +; CHECK-P8-NEXT: mfvsrwz r3, f4 ; CHECK-P8-NEXT: mfvsrwz r4, f6 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mtvsrd f4, r3 -; CHECK-P8-NEXT: mfvsrwz r3, f8 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xxswapd v4, vs4 -; CHECK-P8-NEXT: mtvsrd f6, r4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f8 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f10 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xxswapd v5, vs6 -; CHECK-P8-NEXT: mtvsrd f8, r3 -; CHECK-P8-NEXT: mfvsrwz r3, f12 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xxswapd v0, vs8 -; CHECK-P8-NEXT: mtvsrd f10, r4 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f12 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f13 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xxswapd v1, vs10 -; CHECK-P8-NEXT: mtvsrd f12, r3 -; CHECK-P8-NEXT: mfvsrwz r3, v2 ; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: xxswapd v6, vs12 -; CHECK-P8-NEXT: mtvsrd f13, r4 +; CHECK-P8-NEXT: mtvsrd v6, r3 +; CHECK-P8-NEXT: mfvsrwz r3, v2 +; CHECK-P8-NEXT: mtvsrd v2, r4 ; CHECK-P8-NEXT: mfvsrwz r4, v3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: xxswapd v7, vs13 -; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: xscvdpsxws f9, f9 -; CHECK-P8-NEXT: xxswapd v2, v2 ; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mtvsrd v3, r3 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v3, v3 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f2 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v8, vs0 ; CHECK-P8-NEXT: mfvsrwz r4, f3 -; CHECK-P8-NEXT: mtvsrd f2, r3 -; CHECK-P8-NEXT: xxswapd v9, vs1 +; CHECK-P8-NEXT: vmrghb v4, v8, v4 +; CHECK-P8-NEXT: vmrghb v5, v9, v5 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f5 -; CHECK-P8-NEXT: mtvsrd f3, r4 -; CHECK-P8-NEXT: xxswapd v10, vs2 ; CHECK-P8-NEXT: mfvsrwz r4, f7 -; CHECK-P8-NEXT: mtvsrd f5, r3 +; CHECK-P8-NEXT: vmrghb v0, v8, v0 +; CHECK-P8-NEXT: vmrghb v1, v9, v1 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f9 -; CHECK-P8-NEXT: mtvsrd f7, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f11 -; CHECK-P8-NEXT: vmrglb v4, v8, v4 -; CHECK-P8-NEXT: xxswapd v8, vs3 -; CHECK-P8-NEXT: vmrglb v5, v9, v5 -; CHECK-P8-NEXT: xxswapd v9, vs5 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: vmrglb v0, v10, v0 -; CHECK-P8-NEXT: xxswapd v10, vs7 -; CHECK-P8-NEXT: vmrglb v1, v8, v1 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: vmrglb v6, v9, v6 -; CHECK-P8-NEXT: xxswapd v9, vs1 -; CHECK-P8-NEXT: vmrglb v7, v10, v7 -; CHECK-P8-NEXT: vmrglb v2, v8, v2 -; CHECK-P8-NEXT: vmrglb v3, v9, v3 +; CHECK-P8-NEXT: vmrghb v6, v8, v6 +; CHECK-P8-NEXT: vmrghb v2, v9, v2 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghb v3, v8, v3 +; CHECK-P8-NEXT: vmrghb v7, v9, v7 ; CHECK-P8-NEXT: vmrglh v4, v5, v4 ; CHECK-P8-NEXT: vmrglh v5, v1, v0 -; CHECK-P8-NEXT: vmrglh v0, v7, v6 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: vmrglw v2, v2, v0 -; CHECK-P8-NEXT: xxmrgld v2, v2, v3 +; CHECK-P8-NEXT: vmrglh v2, v2, v6 +; CHECK-P8-NEXT: vmrglh v3, v7, v3 +; CHECK-P8-NEXT: vmrglw v4, v5, v4 +; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: xxmrgld v2, v2, v4 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt: @@ -445,94 +401,78 @@ ; CHECK-P9-NEXT: xscvdpsxws f8, f7 ; CHECK-P9-NEXT: xxswapd vs7, vs7 ; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: lxv vs6, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 112(r3) ; CHECK-P9-NEXT: lxv vs1, 96(r3) ; CHECK-P9-NEXT: lxv vs2, 80(r3) ; CHECK-P9-NEXT: lxv vs3, 64(r3) ; CHECK-P9-NEXT: lxv vs4, 48(r3) ; CHECK-P9-NEXT: lxv vs5, 32(r3) -; CHECK-P9-NEXT: lxv vs6, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f8 -; CHECK-P9-NEXT: mtvsrd f8, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f7 -; CHECK-P9-NEXT: xxswapd v2, vs8 -; CHECK-P9-NEXT: mtvsrd f7, r3 -; CHECK-P9-NEXT: xxswapd v3, vs7 ; CHECK-P9-NEXT: xscvdpsxws f7, f6 ; CHECK-P9-NEXT: xxswapd vs6, vs6 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f6, f6 +; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f7 -; CHECK-P9-NEXT: mtvsrd f7, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f6 -; CHECK-P9-NEXT: mtvsrd f6, r3 -; CHECK-P9-NEXT: xxswapd v4, vs6 ; CHECK-P9-NEXT: xscvdpsxws f6, f5 ; CHECK-P9-NEXT: xxswapd vs5, vs5 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f6 -; CHECK-P9-NEXT: mtvsrd f6, r3 -; CHECK-P9-NEXT: mfvsrwz r3, f5 -; CHECK-P9-NEXT: vmrglb v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs7 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 ; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs6 -; CHECK-P9-NEXT: mtvsrd f5, r3 -; CHECK-P9-NEXT: xxswapd v4, vs5 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mfvsrwz r3, f5 ; CHECK-P9-NEXT: xscvdpsxws f5, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs4 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f5 -; CHECK-P9-NEXT: mtvsrd f5, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f4 -; CHECK-P9-NEXT: mtvsrd f4, r3 -; CHECK-P9-NEXT: xxswapd v5, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs5 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f4 -; CHECK-P9-NEXT: mtvsrd f4, r3 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 -; CHECK-P9-NEXT: xxswapd v4, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v5, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs4 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs3 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 +; CHECK-P9-NEXT: vmrglh v3, v4, v3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v5, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 -; CHECK-P9-NEXT: xxswapd v5, vs1 -; CHECK-P9-NEXT: xxswapd v0, vs0 -; CHECK-P9-NEXT: vmrglb v5, v5, v0 +; CHECK-P9-NEXT: mtvsrd v0, r3 +; CHECK-P9-NEXT: vmrghb v5, v5, v0 ; CHECK-P9-NEXT: vmrglh v4, v5, v4 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 @@ -649,12 +589,10 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, v2 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: mfvsrwz r3, f1 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r4, f0 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxswapd v3, vs1 -; CHECK-P8-NEXT: vmrglb v2, v2, v3 +; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: vmrghb v2, v2, v3 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrd r3, f0 ; CHECK-P8-NEXT: clrldi r3, r3, 48 @@ -666,15 +604,13 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: xscvdpsxws f0, v2 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v3, vs0 ; CHECK-P9-NEXT: xxswapd vs0, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: addi r3, r1, -2 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglb v2, v3, v2 +; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: vsldoi v2, v2, v2, 8 ; CHECK-P9-NEXT: stxsihx v2, 0, r3 ; CHECK-P9-NEXT: lhz r3, -2(r1) @@ -717,18 +653,14 @@ ; CHECK-P8-NEXT: xscvdpsxws f1, f1 ; CHECK-P8-NEXT: mfvsrwz r3, f2 ; CHECK-P8-NEXT: mfvsrwz r4, f3 -; CHECK-P8-NEXT: mtvsrd f2, r3 -; CHECK-P8-NEXT: mtvsrd f3, r4 +; CHECK-P8-NEXT: mtvsrd v2, r3 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: xxswapd v2, vs2 ; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: xxswapd v4, vs3 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v3, vs0 -; CHECK-P8-NEXT: xxswapd v5, vs1 -; CHECK-P8-NEXT: vmrglb v2, v3, v2 -; CHECK-P8-NEXT: vmrglb v3, v5, v4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 +; CHECK-P8-NEXT: vmrghb v2, v4, v2 +; CHECK-P8-NEXT: vmrghb v3, v5, v3 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 ; CHECK-P8-NEXT: xxswapd vs0, v2 ; CHECK-P8-NEXT: mfvsrwz r3, f0 @@ -742,24 +674,20 @@ ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: lxv vs0, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: xxswapd v2, vs2 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v3, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglb v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs1 -; CHECK-P9-NEXT: xxswapd v4, vs0 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: li r3, 0 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -818,36 +746,28 @@ ; CHECK-P8-NEXT: xxswapd vs3, vs3 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: mfvsrwz r3, f4 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 +; CHECK-P8-NEXT: mfvsrwz r3, f4 ; CHECK-P8-NEXT: mfvsrwz r4, f5 -; CHECK-P8-NEXT: mtvsrd f4, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: mfvsrwz r3, f6 -; CHECK-P8-NEXT: mtvsrd f5, r4 -; CHECK-P8-NEXT: xxswapd v2, vs4 +; CHECK-P8-NEXT: mtvsrd v3, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f7 -; CHECK-P8-NEXT: mtvsrd f6, r3 -; CHECK-P8-NEXT: xxswapd v3, vs5 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f0 -; CHECK-P8-NEXT: mtvsrd f7, r4 -; CHECK-P8-NEXT: xxswapd v4, vs6 ; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v1, vs7 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f2 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v5, vs0 ; CHECK-P8-NEXT: mfvsrwz r4, f3 -; CHECK-P8-NEXT: mtvsrd f2, r3 -; CHECK-P8-NEXT: xxswapd v0, vs1 -; CHECK-P8-NEXT: mtvsrd f0, r4 -; CHECK-P8-NEXT: xxswapd v6, vs2 -; CHECK-P8-NEXT: vmrglb v2, v5, v2 -; CHECK-P8-NEXT: xxswapd v5, vs0 -; CHECK-P8-NEXT: vmrglb v3, v0, v3 -; CHECK-P8-NEXT: vmrglb v4, v6, v4 -; CHECK-P8-NEXT: vmrglb v5, v5, v1 +; CHECK-P8-NEXT: vmrghb v2, v0, v2 +; CHECK-P8-NEXT: vmrghb v3, v1, v3 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mtvsrd v1, r4 +; CHECK-P8-NEXT: vmrghb v4, v0, v4 +; CHECK-P8-NEXT: vmrghb v5, v1, v5 ; CHECK-P8-NEXT: vmrglh v2, v3, v2 ; CHECK-P8-NEXT: vmrglh v3, v5, v4 ; CHECK-P8-NEXT: vmrglw v2, v3, v2 @@ -861,47 +781,39 @@ ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) -; CHECK-P9-NEXT: lxv vs2, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f4 -; CHECK-P9-NEXT: mtvsrd f4, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: xxswapd v2, vs4 -; CHECK-P9-NEXT: mtvsrd f3, r3 -; CHECK-P9-NEXT: xxswapd v3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v4, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: vmrglb v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 ; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs2 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v4, vs1 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mfvsrwz r3, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs1 -; CHECK-P9-NEXT: xxswapd v5, vs0 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 +; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 @@ -997,79 +909,63 @@ ; CHECK-P8-NEXT: xxswapd vs7, vs7 ; CHECK-P8-NEXT: xscvdpsxws v2, f9 ; CHECK-P8-NEXT: xxswapd vs9, vs9 -; CHECK-P8-NEXT: mfvsrwz r3, f4 ; CHECK-P8-NEXT: xscvdpsxws v3, f11 ; CHECK-P8-NEXT: xxswapd vs11, vs11 +; CHECK-P8-NEXT: mfvsrwz r3, f4 ; CHECK-P8-NEXT: mfvsrwz r4, f6 ; CHECK-P8-NEXT: xscvdpsxws f0, f0 -; CHECK-P8-NEXT: mtvsrd f4, r3 -; CHECK-P8-NEXT: mfvsrwz r3, f8 ; CHECK-P8-NEXT: xscvdpsxws f1, f1 -; CHECK-P8-NEXT: xxswapd v4, vs4 -; CHECK-P8-NEXT: mtvsrd f6, r4 +; CHECK-P8-NEXT: mtvsrd v4, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f8 +; CHECK-P8-NEXT: mtvsrd v5, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f10 ; CHECK-P8-NEXT: xscvdpsxws f2, f2 -; CHECK-P8-NEXT: xxswapd v5, vs6 -; CHECK-P8-NEXT: mtvsrd f8, r3 -; CHECK-P8-NEXT: mfvsrwz r3, f12 ; CHECK-P8-NEXT: xscvdpsxws f3, f3 -; CHECK-P8-NEXT: xxswapd v0, vs8 -; CHECK-P8-NEXT: mtvsrd f10, r4 +; CHECK-P8-NEXT: mtvsrd v0, r3 +; CHECK-P8-NEXT: mfvsrwz r3, f12 +; CHECK-P8-NEXT: mtvsrd v1, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f13 ; CHECK-P8-NEXT: xscvdpsxws f5, f5 -; CHECK-P8-NEXT: xxswapd v1, vs10 -; CHECK-P8-NEXT: mtvsrd f12, r3 -; CHECK-P8-NEXT: mfvsrwz r3, v2 ; CHECK-P8-NEXT: xscvdpsxws f7, f7 -; CHECK-P8-NEXT: xxswapd v6, vs12 -; CHECK-P8-NEXT: mtvsrd f13, r4 +; CHECK-P8-NEXT: mtvsrd v6, r3 +; CHECK-P8-NEXT: mfvsrwz r3, v2 +; CHECK-P8-NEXT: mtvsrd v2, r4 ; CHECK-P8-NEXT: mfvsrwz r4, v3 -; CHECK-P8-NEXT: mtvsrd v2, r3 -; CHECK-P8-NEXT: xxswapd v7, vs13 -; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: xscvdpsxws f9, f9 -; CHECK-P8-NEXT: xxswapd v2, v2 ; CHECK-P8-NEXT: xscvdpsxws f11, f11 -; CHECK-P8-NEXT: mtvsrd v3, r4 +; CHECK-P8-NEXT: mtvsrd v3, r3 +; CHECK-P8-NEXT: mtvsrd v7, r4 +; CHECK-P8-NEXT: mfvsrwz r3, f0 ; CHECK-P8-NEXT: mfvsrwz r4, f1 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v3, v3 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f2 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: xxswapd v8, vs0 ; CHECK-P8-NEXT: mfvsrwz r4, f3 -; CHECK-P8-NEXT: mtvsrd f2, r3 -; CHECK-P8-NEXT: xxswapd v9, vs1 +; CHECK-P8-NEXT: vmrghb v4, v8, v4 +; CHECK-P8-NEXT: vmrghb v5, v9, v5 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f5 -; CHECK-P8-NEXT: mtvsrd f3, r4 -; CHECK-P8-NEXT: xxswapd v10, vs2 ; CHECK-P8-NEXT: mfvsrwz r4, f7 -; CHECK-P8-NEXT: mtvsrd f5, r3 +; CHECK-P8-NEXT: vmrghb v0, v8, v0 +; CHECK-P8-NEXT: vmrghb v1, v9, v1 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 ; CHECK-P8-NEXT: mfvsrwz r3, f9 -; CHECK-P8-NEXT: mtvsrd f7, r4 ; CHECK-P8-NEXT: mfvsrwz r4, f11 -; CHECK-P8-NEXT: vmrglb v4, v8, v4 -; CHECK-P8-NEXT: xxswapd v8, vs3 -; CHECK-P8-NEXT: vmrglb v5, v9, v5 -; CHECK-P8-NEXT: xxswapd v9, vs5 -; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: mtvsrd f1, r4 -; CHECK-P8-NEXT: vmrglb v0, v10, v0 -; CHECK-P8-NEXT: xxswapd v10, vs7 -; CHECK-P8-NEXT: vmrglb v1, v8, v1 -; CHECK-P8-NEXT: xxswapd v8, vs0 -; CHECK-P8-NEXT: vmrglb v6, v9, v6 -; CHECK-P8-NEXT: xxswapd v9, vs1 -; CHECK-P8-NEXT: vmrglb v7, v10, v7 -; CHECK-P8-NEXT: vmrglb v2, v8, v2 -; CHECK-P8-NEXT: vmrglb v3, v9, v3 +; CHECK-P8-NEXT: vmrghb v6, v8, v6 +; CHECK-P8-NEXT: vmrghb v2, v9, v2 +; CHECK-P8-NEXT: mtvsrd v8, r3 +; CHECK-P8-NEXT: mtvsrd v9, r4 +; CHECK-P8-NEXT: vmrghb v3, v8, v3 +; CHECK-P8-NEXT: vmrghb v7, v9, v7 ; CHECK-P8-NEXT: vmrglh v4, v5, v4 ; CHECK-P8-NEXT: vmrglh v5, v1, v0 -; CHECK-P8-NEXT: vmrglh v0, v7, v6 -; CHECK-P8-NEXT: vmrglh v2, v3, v2 -; CHECK-P8-NEXT: vmrglw v3, v5, v4 -; CHECK-P8-NEXT: vmrglw v2, v2, v0 -; CHECK-P8-NEXT: xxmrgld v2, v2, v3 +; CHECK-P8-NEXT: vmrglh v2, v2, v6 +; CHECK-P8-NEXT: vmrglh v3, v7, v3 +; CHECK-P8-NEXT: vmrglw v4, v5, v4 +; CHECK-P8-NEXT: vmrglw v2, v3, v2 +; CHECK-P8-NEXT: xxmrgld v2, v2, v4 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test16elt_signed: @@ -1078,94 +974,78 @@ ; CHECK-P9-NEXT: xscvdpsxws f8, f7 ; CHECK-P9-NEXT: xxswapd vs7, vs7 ; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: lxv vs6, 16(r3) ; CHECK-P9-NEXT: lxv vs0, 112(r3) ; CHECK-P9-NEXT: lxv vs1, 96(r3) ; CHECK-P9-NEXT: lxv vs2, 80(r3) ; CHECK-P9-NEXT: lxv vs3, 64(r3) ; CHECK-P9-NEXT: lxv vs4, 48(r3) ; CHECK-P9-NEXT: lxv vs5, 32(r3) -; CHECK-P9-NEXT: lxv vs6, 16(r3) ; CHECK-P9-NEXT: mfvsrwz r3, f8 -; CHECK-P9-NEXT: mtvsrd f8, r3 +; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f7 -; CHECK-P9-NEXT: xxswapd v2, vs8 -; CHECK-P9-NEXT: mtvsrd f7, r3 -; CHECK-P9-NEXT: xxswapd v3, vs7 ; CHECK-P9-NEXT: xscvdpsxws f7, f6 ; CHECK-P9-NEXT: xxswapd vs6, vs6 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f6, f6 +; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f7 -; CHECK-P9-NEXT: mtvsrd f7, r3 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f6 -; CHECK-P9-NEXT: mtvsrd f6, r3 -; CHECK-P9-NEXT: xxswapd v4, vs6 ; CHECK-P9-NEXT: xscvdpsxws f6, f5 ; CHECK-P9-NEXT: xxswapd vs5, vs5 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f6 -; CHECK-P9-NEXT: mtvsrd f6, r3 -; CHECK-P9-NEXT: mfvsrwz r3, f5 -; CHECK-P9-NEXT: vmrglb v2, v2, v3 -; CHECK-P9-NEXT: xxswapd v3, vs7 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 ; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs6 -; CHECK-P9-NEXT: mtvsrd f5, r3 -; CHECK-P9-NEXT: xxswapd v4, vs5 +; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mfvsrwz r3, f5 ; CHECK-P9-NEXT: xscvdpsxws f5, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs4 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f5 -; CHECK-P9-NEXT: mtvsrd f5, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f4 -; CHECK-P9-NEXT: mtvsrd f4, r3 -; CHECK-P9-NEXT: xxswapd v5, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs5 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: mfvsrwz r3, f4 -; CHECK-P9-NEXT: mtvsrd f4, r3 +; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 -; CHECK-P9-NEXT: xxswapd v4, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 +; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mfvsrwz r3, f3 -; CHECK-P9-NEXT: mtvsrd f3, r3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 -; CHECK-P9-NEXT: xxswapd v5, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: xxswapd v3, vs4 -; CHECK-P9-NEXT: vmrglb v3, v3, v4 -; CHECK-P9-NEXT: xxswapd v4, vs3 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: mfvsrwz r3, f2 -; CHECK-P9-NEXT: mtvsrd f2, r3 +; CHECK-P9-NEXT: vmrglh v3, v4, v3 +; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: xxswapd v4, vs2 -; CHECK-P9-NEXT: mtvsrd f1, r3 -; CHECK-P9-NEXT: xxswapd v5, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: mfvsrwz r3, f1 -; CHECK-P9-NEXT: mtvsrd f1, r3 +; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: mfvsrwz r3, f0 -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: vmrglb v4, v4, v5 -; CHECK-P9-NEXT: xxswapd v5, vs1 -; CHECK-P9-NEXT: xxswapd v0, vs0 -; CHECK-P9-NEXT: vmrglb v5, v5, v0 +; CHECK-P9-NEXT: mtvsrd v0, r3 +; CHECK-P9-NEXT: vmrghb v5, v5, v0 ; CHECK-P9-NEXT: vmrglh v4, v5, v4 ; CHECK-P9-NEXT: vmrglw v3, v4, v3 ; CHECK-P9-NEXT: xxmrgld v2, v3, v2 diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll @@ -332,11 +332,10 @@ define <4 x float> @test4elt_signed(i64 %a.coerce) local_unnamed_addr #1 { ; CHECK-P8-LABEL: test4elt_signed: ; CHECK-P8: # %bb.0: # %entry -; CHECK-P8-NEXT: mtvsrd f0, r3 +; CHECK-P8-NEXT: mtvsrd v2, r3 ; CHECK-P8-NEXT: vspltisw v3, 8 -; CHECK-P8-NEXT: xxswapd v2, vs0 +; CHECK-P8-NEXT: vmrghh v2, v2, v2 ; CHECK-P8-NEXT: vadduwm v3, v3, v3 -; CHECK-P8-NEXT: vmrglh v2, v2, v2 ; CHECK-P8-NEXT: vslw v2, v2, v3 ; CHECK-P8-NEXT: vsraw v2, v2, v3 ; CHECK-P8-NEXT: xvcvsxwsp v2, v2 @@ -344,9 +343,8 @@ ; ; CHECK-P9-LABEL: test4elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: vmrglh v2, v2, v2 +; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: vmrghh v2, v2, v2 ; CHECK-P9-NEXT: vextsh2w v2, v2 ; CHECK-P9-NEXT: xvcvsxwsp v2, v2 ; CHECK-P9-NEXT: blr diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll --- a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll +++ b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll @@ -13,16 +13,14 @@ ; CHECK-P8-LABEL: test2elt: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxmrglw v2, v2, v2 +; CHECK-P8-NEXT: xxmrghw v2, vs0, vs0 ; CHECK-P8-NEXT: xvcvuxwdp v2, v2 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test2elt: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: xxmrglw v2, v2, v2 +; CHECK-P9-NEXT: xxmrghw v2, vs0, vs0 ; CHECK-P9-NEXT: xvcvuxwdp v2, v2 ; CHECK-P9-NEXT: blr ; @@ -267,16 +265,14 @@ ; CHECK-P8-LABEL: test2elt_signed: ; CHECK-P8: # %bb.0: # %entry ; CHECK-P8-NEXT: mtvsrd f0, r3 -; CHECK-P8-NEXT: xxswapd v2, vs0 -; CHECK-P8-NEXT: xxmrglw v2, v2, v2 +; CHECK-P8-NEXT: xxmrghw v2, vs0, vs0 ; CHECK-P8-NEXT: xvcvsxwdp v2, v2 ; CHECK-P8-NEXT: blr ; ; CHECK-P9-LABEL: test2elt_signed: ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: mtvsrd f0, r3 -; CHECK-P9-NEXT: xxswapd v2, vs0 -; CHECK-P9-NEXT: xxmrglw v2, v2, v2 +; CHECK-P9-NEXT: xxmrghw v2, vs0, vs0 ; CHECK-P9-NEXT: xvcvsxwdp v2, v2 ; CHECK-P9-NEXT: blr ;