Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8581,6 +8581,56 @@ return Success; } +// Use mtvsrdd and vpku* instruction to optimize BUILD_VECTOR: +// - v8i16: mtvsrdd*4 + vpkudum*2 + vpkuwum +// - v16i8: mtvsrdd*8 + vpkudum*4 + vpkuwum*2 + vpkuhum +// This also applies to v4i32 and v2i64 construction, but we've already had +// optimized codegen for them. +static SDValue tryPackBuildVector(SDValue Op, SelectionDAG &DAG, + bool LittleEndian) { + EVT VT = Op.getValueType(); + SDLoc dl(Op); + + // rldimi+vpkudum is better for v4i32 construction. + if (VT != MVT::v8i16 && VT != MVT::v16i8) + return SDValue(); + + // Vector merge is better when only first two elements are present. + if (std::all_of(Op->op_begin() + 2, Op->op_end(), + [](const SDValue &E) { return E.isUndef(); })) + return SDValue(); + + // Shuffle mask is different on LE/BE, because elements are bitcasted. + int Mask[16]; + for (int i = 0; i < 16; ++i) + Mask[i] = i * 2 + (LittleEndian ? 0 : 1); + + // Construct v2i64 vectors, each with two extended sources. + int NumElements = VT.getVectorNumElements(); + SmallVector Parts(NumElements / 2); + SmallVector Shuffles(NumElements / 4); + for (int i = 0; i < NumElements / 2; ++i) { + SDValue LHS = DAG.getAnyExtOrTrunc(Op.getOperand(i * 2), dl, MVT::i64); + SDValue RHS = DAG.getAnyExtOrTrunc(Op.getOperand(i * 2 + 1), dl, MVT::i64); + Parts[i] = DAG.getBuildVector(MVT::v2i64, dl, {LHS, RHS}); + } + + // Pack two v2i64 into v4i32, and v4i32 into v8i16, until get the result. + int VecLen = NumElements / 4; + EVT DestTy[] = {MVT::v4i32, MVT::v8i16, MVT::v16i8}; + for (int i = 0; VecLen >= 1; ++i, VecLen /= 2) { + for (int j = 0; j < VecLen; ++j) { + SDValue LHS = DAG.getBitcast(DestTy[i], Parts[j * 2]); + SDValue RHS = DAG.getBitcast(DestTy[i], Parts[j * 2 + 1]); + Parts[j] = Shuffles[j] = DAG.getVectorShuffle( + DestTy[i], dl, LHS, RHS, + makeArrayRef(Mask, DestTy[i].getVectorNumElements())); + } + } + + return Shuffles[0]; +} + // If this is a case we can't handle, return null and let the default // expansion code take care of it. If we CAN select this case, and if it // selects to a single instruction, return Op. Otherwise, if we can codegen @@ -8659,6 +8709,13 @@ haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(), Subtarget.hasP8Vector())) return Op; + + // Try to construct vector using mtvsrdd and vpku* instruction. + if (Subtarget.hasP9Vector() && !BVN->isConstant() && + !DAG.isSplatValue(Op, true)) + if (SDValue Res = tryPackBuildVector(Op, DAG, Subtarget.isLittleEndian())) + return Res; + return SDValue(); } Index: llvm/test/CodeGen/PowerPC/pre-inc-disable.ll =================================================================== --- llvm/test/CodeGen/PowerPC/pre-inc-disable.ll +++ llvm/test/CodeGen/PowerPC/pre-inc-disable.ll @@ -349,26 +349,17 @@ ; CHECK-LABEL: test16: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: sldi r4, r4, 1 -; CHECK-NEXT: li r7, 16 -; CHECK-NEXT: add r6, r3, r4 -; CHECK-NEXT: lxsihzx v4, r3, r4 -; CHECK-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; CHECK-NEXT: lxsihzx v2, r6, r7 ; CHECK-NEXT: li r6, 0 -; CHECK-NEXT: addi r3, r3, .LCPI3_0@toc@l -; CHECK-NEXT: mtvsrd v3, r6 -; CHECK-NEXT: vsplth v4, v4, 3 -; CHECK-NEXT: vsplth v2, v2, 3 -; CHECK-NEXT: vmrghh v4, v3, v4 -; CHECK-NEXT: vmrghh v2, v3, v2 -; CHECK-NEXT: vsplth v3, v3, 3 -; CHECK-NEXT: vmrglw v3, v4, v3 -; CHECK-NEXT: lxvx v4, 0, r3 -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: vperm v2, v2, v3, v4 +; CHECK-NEXT: lhzux r4, r3, r4 +; CHECK-NEXT: lhz r3, 16(r3) +; CHECK-NEXT: mtvsrdd v2, 0, r4 +; CHECK-NEXT: mtvsrdd v3, 0, r3 +; CHECK-NEXT: vpkudum v2, v3, v2 +; CHECK-NEXT: xxlxor v3, v3, v3 +; CHECK-NEXT: vpkuwum v2, v2, v3 ; CHECK-NEXT: xxspltw v3, v2, 2 ; CHECK-NEXT: vadduwm v2, v2, v3 -; CHECK-NEXT: vextuwrx r3, r3, v2 +; CHECK-NEXT: vextuwrx r3, r6, v2 ; CHECK-NEXT: cmpw r3, r5 ; CHECK-NEXT: bgelr+ cr0 ; CHECK-NEXT: # %bb.1: # %if.then @@ -376,24 +367,15 @@ ; P9BE-LABEL: test16: ; P9BE: # %bb.0: # %entry ; P9BE-NEXT: sldi r4, r4, 1 -; P9BE-NEXT: li r7, 16 -; P9BE-NEXT: add r6, r3, r4 -; P9BE-NEXT: lxsihzx v4, r3, r4 -; P9BE-NEXT: addis r3, r2, .LCPI3_0@toc@ha -; P9BE-NEXT: lxsihzx v2, r6, r7 -; P9BE-NEXT: li r6, 0 -; P9BE-NEXT: addi r3, r3, .LCPI3_0@toc@l -; P9BE-NEXT: sldi r6, r6, 48 -; P9BE-NEXT: vsplth v4, v4, 3 -; P9BE-NEXT: mtvsrd v3, r6 -; P9BE-NEXT: vsplth v2, v2, 3 -; P9BE-NEXT: vmrghh v4, v3, v4 -; P9BE-NEXT: vmrghh v2, v3, v2 -; P9BE-NEXT: vsplth v3, v3, 0 -; P9BE-NEXT: vmrghw v3, v3, v4 -; P9BE-NEXT: lxvx v4, 0, r3 +; P9BE-NEXT: lhzux r4, r3, r4 +; P9BE-NEXT: lhz r3, 16(r3) +; P9BE-NEXT: mtvsrdd v2, 0, r4 +; P9BE-NEXT: mtvsrdd v3, 0, r3 ; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vperm v2, v3, v2, v4 +; P9BE-NEXT: vpkudum v2, v2, v3 +; P9BE-NEXT: xxlxor v3, v3, v3 +; P9BE-NEXT: vpkudum v3, v3, v3 +; P9BE-NEXT: vpkuwum v2, v3, v2 ; P9BE-NEXT: xxspltw v3, v2, 1 ; P9BE-NEXT: vadduwm v2, v2, v3 ; P9BE-NEXT: vextuwlx r3, r3, v2 @@ -435,58 +417,39 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) { ; CHECK-LABEL: test8: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: add r6, r3, r4 -; CHECK-NEXT: lxsibzx v2, r3, r4 -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: mtvsrd v3, r3 -; CHECK-NEXT: li r3, 8 -; CHECK-NEXT: lxsibzx v5, r6, r3 -; CHECK-NEXT: vspltb v4, v3, 7 -; CHECK-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; CHECK-NEXT: vspltb v2, v2, 7 -; CHECK-NEXT: addi r3, r3, .LCPI4_0@toc@l -; CHECK-NEXT: vmrghb v2, v3, v2 -; CHECK-NEXT: vspltb v5, v5, 7 -; CHECK-NEXT: vmrglh v2, v2, v4 -; CHECK-NEXT: vmrghb v3, v3, v5 -; CHECK-NEXT: vmrglw v2, v2, v4 -; CHECK-NEXT: vmrglh v3, v3, v4 -; CHECK-NEXT: vmrglw v3, v4, v3 -; CHECK-NEXT: lxvx v4, 0, r3 -; CHECK-NEXT: li r3, 0 -; CHECK-NEXT: vperm v2, v3, v2, v4 +; CHECK-NEXT: lbzux r4, r3, r4 +; CHECK-NEXT: xxlxor v3, v3, v3 +; CHECK-NEXT: li r6, 0 +; CHECK-NEXT: lbz r3, 8(r3) +; CHECK-NEXT: mtvsrdd v2, 0, r4 +; CHECK-NEXT: mtvsrdd v4, 0, r3 +; CHECK-NEXT: vpkudum v2, v3, v2 +; CHECK-NEXT: vpkudum v4, v3, v4 +; CHECK-NEXT: vpkuwum v2, v4, v2 +; CHECK-NEXT: vpkuhum v2, v2, v3 ; CHECK-NEXT: xxspltw v3, v2, 2 ; CHECK-NEXT: vadduwm v2, v2, v3 -; CHECK-NEXT: vextuwrx r3, r3, v2 +; CHECK-NEXT: vextuwrx r3, r6, v2 ; CHECK-NEXT: cmpw r3, r5 ; CHECK-NEXT: bgelr+ cr0 ; CHECK-NEXT: # %bb.1: # %if.then ; ; P9BE-LABEL: test8: ; P9BE: # %bb.0: # %entry -; P9BE-NEXT: add r6, r3, r4 -; P9BE-NEXT: li r7, 8 -; P9BE-NEXT: lxsibzx v4, r3, r4 -; P9BE-NEXT: addis r3, r2, .LCPI4_0@toc@ha -; P9BE-NEXT: lxsibzx v2, r6, r7 -; P9BE-NEXT: li r6, 0 -; P9BE-NEXT: addi r3, r3, .LCPI4_0@toc@l -; P9BE-NEXT: sldi r6, r6, 56 -; P9BE-NEXT: vspltb v4, v4, 7 -; P9BE-NEXT: mtvsrd v3, r6 -; P9BE-NEXT: vspltb v2, v2, 7 -; P9BE-NEXT: vmrghb v4, v3, v4 -; P9BE-NEXT: vmrghb v2, v3, v2 -; P9BE-NEXT: vspltb v3, v3, 0 -; P9BE-NEXT: vmrghh v4, v4, v3 -; P9BE-NEXT: xxspltw v3, v3, 0 -; P9BE-NEXT: vmrghw v2, v4, v2 -; P9BE-NEXT: lxvx v4, 0, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vperm v2, v3, v2, v4 +; P9BE-NEXT: lbzux r4, r3, r4 +; P9BE-NEXT: xxlxor v3, v3, v3 +; P9BE-NEXT: lbz r3, 8(r3) +; P9BE-NEXT: mtvsrdd v2, 0, r4 +; P9BE-NEXT: li r4, 0 +; P9BE-NEXT: mtvsrdd v4, 0, r3 +; P9BE-NEXT: vpkudum v2, v3, v2 +; P9BE-NEXT: vpkudum v4, v3, v4 +; P9BE-NEXT: vpkuwum v3, v3, v3 +; P9BE-NEXT: vpkuwum v2, v2, v4 +; P9BE-NEXT: vpkuhum v2, v3, v2 ; P9BE-NEXT: xxspltw v3, v2, 1 ; P9BE-NEXT: vadduwm v2, v2, v3 -; P9BE-NEXT: vextuwlx r3, r3, v2 +; P9BE-NEXT: vextuwlx r3, r4, v2 ; P9BE-NEXT: cmpw r3, r5 ; P9BE-NEXT: bgelr+ cr0 ; P9BE-NEXT: # %bb.1: # %if.then Index: llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll =================================================================== --- llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll +++ llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll @@ -21,49 +21,46 @@ ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 6 ; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: lis r5, 31710 ; P9LE-NEXT: mulli r4, r4, 95 +; P9LE-NEXT: ori r5, r5, 63421 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 31710 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: ori r4, r4, 63421 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: sub r4, r4, r3 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 -; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: mulli r4, r4, -124 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 21399 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r4, 2 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r4, r4 +; P9LE-NEXT: mulhw r5, r4, r5 +; P9LE-NEXT: sub r5, r5, r4 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srawi r5, r5, 6 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, -124 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: mtvsrdd v3, r4, r3 ; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: ori r4, r4, 33437 +; P9LE-NEXT: lis r4, 21399 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: ori r4, r4, 33437 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 5 ; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: lis r5, -16728 ; P9LE-NEXT: mulli r4, r4, 98 +; P9LE-NEXT: ori r5, r5, 63249 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, -16728 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 63249 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 8 -; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: mulli r4, r4, -1003 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r4, r4 +; P9LE-NEXT: mulhw r5, r4, r5 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srawi r5, r5, 8 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, -1003 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: mtvsrdd v2, r4, r3 +; P9LE-NEXT: vpkudum v2, v2, v3 +; P9LE-NEXT: vpkuwum v2, v2, v2 ; P9LE-NEXT: blr ; ; P9BE-LABEL: fold_srem_vec_1: @@ -78,53 +75,46 @@ ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 6 ; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: lis r5, -21386 ; P9BE-NEXT: mulli r4, r4, -124 +; P9BE-NEXT: ori r5, r5, 37253 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, -21386 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 37253 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: add r4, r4, r3 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 6 -; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, -16728 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 63249 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r4, 0 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r4, r4 +; P9BE-NEXT: mulhw r5, r4, r5 +; P9BE-NEXT: add r5, r5, r4 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srawi r5, r5, 6 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 95 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: mtvsrdd v3, r4, r3 ; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: lis r4, -16728 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: ori r4, r4, 63249 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 8 ; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: lis r5, 21399 ; P9BE-NEXT: mulli r4, r4, -1003 +; P9BE-NEXT: ori r5, r5, 33437 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 21399 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 33437 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 5 -; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 98 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: vmrghh v2, v2, v4 -; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r4, r4 +; P9BE-NEXT: mulhw r5, r4, r5 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srawi r5, r5, 5 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 98 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: mtvsrdd v2, r4, r3 +; P9BE-NEXT: vpkudum v2, v3, v2 +; P9BE-NEXT: vpkuwum v2, v2, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_srem_vec_1: @@ -258,21 +248,19 @@ ; P9LE-NEXT: add r5, r5, r6 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r5, r3, r4 -; P9LE-NEXT: add r5, r5, r3 -; P9LE-NEXT: srwi r6, r5, 31 -; P9LE-NEXT: srawi r5, r5, 6 -; P9LE-NEXT: add r5, r5, r6 -; P9LE-NEXT: mulli r5, r5, 95 -; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r5, 2 +; P9LE-NEXT: vextuhrx r5, r5, v2 +; P9LE-NEXT: extsh r5, r5 +; P9LE-NEXT: mulhw r6, r5, r4 +; P9LE-NEXT: add r6, r6, r5 +; P9LE-NEXT: srwi r7, r6, 31 +; P9LE-NEXT: srawi r6, r6, 6 +; P9LE-NEXT: add r6, r6, r7 +; P9LE-NEXT: mulli r6, r6, 95 +; P9LE-NEXT: sub r5, r5, r6 +; P9LE-NEXT: mtvsrdd v3, r5, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r5, r3, r4 ; P9LE-NEXT: add r5, r5, r3 @@ -281,20 +269,19 @@ ; P9LE-NEXT: add r5, r5, r6 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: add r4, r4, r3 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: li r5, 6 +; P9LE-NEXT: vextuhrx r5, r5, v2 +; P9LE-NEXT: extsh r5, r5 +; P9LE-NEXT: mulhw r4, r5, r4 ; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: srwi r6, r4, 31 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r6 ; P9LE-NEXT: mulli r4, r4, 95 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: sub r4, r5, r4 +; P9LE-NEXT: mtvsrdd v2, r4, r3 +; P9LE-NEXT: vpkudum v2, v2, v3 +; P9LE-NEXT: vpkuwum v2, v2, v2 ; P9LE-NEXT: blr ; ; P9BE-LABEL: fold_srem_vec_2: @@ -311,23 +298,19 @@ ; P9BE-NEXT: add r5, r5, r6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r5, r3, r4 -; P9BE-NEXT: add r5, r5, r3 -; P9BE-NEXT: srwi r6, r5, 31 -; P9BE-NEXT: srawi r5, r5, 6 -; P9BE-NEXT: add r5, r5, r6 -; P9BE-NEXT: mulli r5, r5, 95 -; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r5, 4 +; P9BE-NEXT: vextuhlx r5, r5, v2 +; P9BE-NEXT: extsh r5, r5 +; P9BE-NEXT: mulhw r6, r5, r4 +; P9BE-NEXT: add r6, r6, r5 +; P9BE-NEXT: srwi r7, r6, 31 +; P9BE-NEXT: srawi r6, r6, 6 +; P9BE-NEXT: add r6, r6, r7 +; P9BE-NEXT: mulli r6, r6, 95 +; P9BE-NEXT: sub r5, r5, r6 +; P9BE-NEXT: mtvsrdd v3, r5, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r5, r3, r4 ; P9BE-NEXT: add r5, r5, r3 @@ -336,22 +319,19 @@ ; P9BE-NEXT: add r5, r5, r6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: add r4, r4, r3 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 6 +; P9BE-NEXT: li r5, 0 +; P9BE-NEXT: vextuhlx r5, r5, v2 +; P9BE-NEXT: extsh r5, r5 +; P9BE-NEXT: mulhw r4, r5, r4 ; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: srwi r6, r4, 31 +; P9BE-NEXT: srawi r4, r4, 6 +; P9BE-NEXT: add r4, r4, r6 ; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: vmrghh v2, v2, v4 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: sub r4, r5, r4 +; P9BE-NEXT: mtvsrdd v2, r4, r3 +; P9BE-NEXT: vpkudum v2, v2, v3 +; P9BE-NEXT: vpkuwum v2, v2, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_srem_vec_2: @@ -479,50 +459,44 @@ ; P9LE-NEXT: add r5, r5, r6 ; P9LE-NEXT: mulli r6, r5, 95 ; P9LE-NEXT: sub r3, r3, r6 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r6, r3 -; P9LE-NEXT: mulhw r7, r6, r4 -; P9LE-NEXT: add r6, r7, r6 -; P9LE-NEXT: srwi r7, r6, 31 -; P9LE-NEXT: srawi r6, r6, 6 -; P9LE-NEXT: add r6, r6, r7 -; P9LE-NEXT: mulli r7, r6, 95 -; P9LE-NEXT: sub r3, r3, r7 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: extsh r7, r3 +; P9LE-NEXT: li r6, 2 +; P9LE-NEXT: vextuhrx r6, r6, v2 +; P9LE-NEXT: extsh r7, r6 ; P9LE-NEXT: mulhw r8, r7, r4 ; P9LE-NEXT: add r7, r8, r7 ; P9LE-NEXT: srwi r8, r7, 31 ; P9LE-NEXT: srawi r7, r7, 6 ; P9LE-NEXT: add r7, r7, r8 ; P9LE-NEXT: mulli r8, r7, 95 -; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: sub r6, r6, r8 +; P9LE-NEXT: mtvsrdd v3, r6, r3 +; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r8, r3 -; P9LE-NEXT: mulhw r4, r8, r4 -; P9LE-NEXT: add r4, r4, r8 -; P9LE-NEXT: srwi r8, r4, 31 -; P9LE-NEXT: srawi r4, r4, 6 -; P9LE-NEXT: add r4, r4, r8 -; P9LE-NEXT: mulli r8, r4, 95 -; P9LE-NEXT: mtvsrd v5, r4 +; P9LE-NEXT: extsh r6, r3 +; P9LE-NEXT: mulhw r8, r6, r4 +; P9LE-NEXT: add r6, r8, r6 +; P9LE-NEXT: srwi r8, r6, 31 +; P9LE-NEXT: srawi r6, r6, 6 +; P9LE-NEXT: add r6, r6, r8 +; P9LE-NEXT: mulli r8, r6, 95 ; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: mtvsrd v4, r6 -; P9LE-NEXT: vmrglw v2, v2, v3 -; P9LE-NEXT: mtvsrd v3, r5 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: mtvsrd v4, r7 -; P9LE-NEXT: vmrghh v4, v5, v4 -; P9LE-NEXT: vmrglw v3, v4, v3 +; P9LE-NEXT: li r8, 6 +; P9LE-NEXT: vextuhrx r8, r8, v2 +; P9LE-NEXT: extsh r9, r8 +; P9LE-NEXT: mulhw r4, r9, r4 +; P9LE-NEXT: add r4, r4, r9 +; P9LE-NEXT: srwi r9, r4, 31 +; P9LE-NEXT: srawi r4, r4, 6 +; P9LE-NEXT: add r4, r4, r9 +; P9LE-NEXT: mulli r9, r4, 95 +; P9LE-NEXT: mtvsrdd v4, r4, r6 +; P9LE-NEXT: sub r8, r8, r9 +; P9LE-NEXT: mtvsrdd v2, r8, r3 +; P9LE-NEXT: vpkudum v2, v2, v3 +; P9LE-NEXT: mtvsrdd v3, r7, r5 +; P9LE-NEXT: vpkudum v3, v4, v3 +; P9LE-NEXT: vpkuwum v2, v2, v2 +; P9LE-NEXT: vpkuwum v3, v3, v3 ; P9LE-NEXT: vadduhm v2, v2, v3 ; P9LE-NEXT: blr ; @@ -540,58 +514,44 @@ ; P9BE-NEXT: add r4, r4, r6 ; P9BE-NEXT: mulli r6, r4, 95 ; P9BE-NEXT: sub r3, r3, r6 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r6, r3 -; P9BE-NEXT: mulhw r7, r6, r5 -; P9BE-NEXT: add r6, r7, r6 -; P9BE-NEXT: srwi r7, r6, 31 -; P9BE-NEXT: srawi r6, r6, 6 -; P9BE-NEXT: add r6, r6, r7 -; P9BE-NEXT: mulli r7, r6, 95 -; P9BE-NEXT: sub r3, r3, r7 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: li r3, 2 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 -; P9BE-NEXT: extsh r7, r3 +; P9BE-NEXT: li r6, 4 +; P9BE-NEXT: vextuhlx r6, r6, v2 +; P9BE-NEXT: extsh r7, r6 ; P9BE-NEXT: mulhw r8, r7, r5 ; P9BE-NEXT: add r7, r8, r7 ; P9BE-NEXT: srwi r8, r7, 31 ; P9BE-NEXT: srawi r7, r7, 6 ; P9BE-NEXT: add r7, r7, r8 ; P9BE-NEXT: mulli r8, r7, 95 -; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: sub r6, r6, r8 +; P9BE-NEXT: mtvsrdd v3, r6, r3 +; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r5, r3, r5 -; P9BE-NEXT: add r5, r5, r3 -; P9BE-NEXT: srwi r8, r5, 31 -; P9BE-NEXT: srawi r5, r5, 6 -; P9BE-NEXT: add r5, r5, r8 -; P9BE-NEXT: mulli r8, r5, 95 +; P9BE-NEXT: extsh r6, r3 +; P9BE-NEXT: mulhw r8, r6, r5 +; P9BE-NEXT: add r6, r8, r6 +; P9BE-NEXT: srwi r8, r6, 31 +; P9BE-NEXT: srawi r6, r6, 6 +; P9BE-NEXT: add r6, r6, r8 +; P9BE-NEXT: mulli r8, r6, 95 ; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: sldi r3, r4, 48 -; P9BE-NEXT: vmrghh v2, v2, v4 -; P9BE-NEXT: vmrghw v2, v2, v3 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: sldi r3, r6, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: sldi r3, r7, 48 -; P9BE-NEXT: vmrghh v3, v4, v3 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: sldi r3, r5, 48 -; P9BE-NEXT: mtvsrd v5, r3 -; P9BE-NEXT: vmrghh v4, v5, v4 -; P9BE-NEXT: vmrghw v3, v4, v3 +; P9BE-NEXT: li r8, 0 +; P9BE-NEXT: vextuhlx r8, r8, v2 +; P9BE-NEXT: extsh r8, r8 +; P9BE-NEXT: mulhw r5, r8, r5 +; P9BE-NEXT: add r5, r5, r8 +; P9BE-NEXT: srwi r9, r5, 31 +; P9BE-NEXT: srawi r5, r5, 6 +; P9BE-NEXT: add r5, r5, r9 +; P9BE-NEXT: mulli r9, r5, 95 +; P9BE-NEXT: mtvsrdd v4, r5, r6 +; P9BE-NEXT: sub r8, r8, r9 +; P9BE-NEXT: mtvsrdd v2, r8, r3 +; P9BE-NEXT: vpkudum v2, v2, v3 +; P9BE-NEXT: mtvsrdd v3, r7, r4 +; P9BE-NEXT: vpkudum v3, v4, v3 +; P9BE-NEXT: vpkuwum v2, v2, v2 +; P9BE-NEXT: vpkuwum v3, v3, v3 ; P9BE-NEXT: vadduhm v2, v2, v3 ; P9BE-NEXT: blr ; @@ -736,20 +696,18 @@ ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 6 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: srawi r4, r3, 5 -; P9LE-NEXT: addze r4, r4 -; P9LE-NEXT: slwi r4, r4, 5 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, -21386 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r4, 2 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r4, r4 +; P9LE-NEXT: srawi r5, r4, 5 +; P9LE-NEXT: addze r5, r5 +; P9LE-NEXT: slwi r5, r5, 5 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: mtvsrdd v3, r4, r3 ; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 37253 +; P9LE-NEXT: lis r4, -21386 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: ori r4, r4, 37253 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: add r4, r4, r3 @@ -758,17 +716,16 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: srawi r4, r3, 3 -; P9LE-NEXT: addze r4, r4 -; P9LE-NEXT: slwi r4, r4, 3 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v4, v2 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: li r4, 4 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r4, r4 +; P9LE-NEXT: srawi r5, r4, 3 +; P9LE-NEXT: addze r5, r5 +; P9LE-NEXT: slwi r5, r5, 3 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: mtvsrdd v2, r3, r4 +; P9LE-NEXT: vpkudum v2, v2, v3 +; P9LE-NEXT: vpkuwum v2, v2, v2 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_srem_power_of_two: @@ -780,22 +737,18 @@ ; P9BE-NEXT: addze r4, r4 ; P9BE-NEXT: slwi r4, r4, 5 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: srawi r4, r3, 6 -; P9BE-NEXT: addze r4, r4 -; P9BE-NEXT: slwi r4, r4, 6 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, -21386 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 37253 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r4, 0 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r4, r4 +; P9BE-NEXT: srawi r5, r4, 6 +; P9BE-NEXT: addze r5, r5 +; P9BE-NEXT: slwi r5, r5, 6 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: mtvsrdd v3, r4, r3 ; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: lis r4, -21386 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: ori r4, r4, 37253 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 @@ -804,19 +757,16 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: srawi r4, r3, 3 -; P9BE-NEXT: addze r4, r4 -; P9BE-NEXT: slwi r4, r4, 3 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: vmrghh v2, v2, v4 -; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r4, r4 +; P9BE-NEXT: srawi r5, r4, 3 +; P9BE-NEXT: addze r5, r5 +; P9BE-NEXT: slwi r5, r5, 3 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: mtvsrdd v2, r4, r3 +; P9BE-NEXT: vpkudum v2, v3, v2 +; P9BE-NEXT: vpkuwum v2, v2, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_srem_power_of_two: @@ -925,37 +875,34 @@ ; P9LE-NEXT: add r4, r4, r5 ; P9LE-NEXT: mulli r4, r4, 654 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, -19946 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: ori r4, r4, 17097 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r4, 0 +; P9LE-NEXT: mtvsrdd v3, r3, r4 ; P9LE-NEXT: li r3, 4 +; P9LE-NEXT: lis r4, -19946 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v3, v4 +; P9LE-NEXT: ori r4, r4, 17097 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: mulhw r4, r3, r4 ; P9LE-NEXT: add r4, r4, r3 ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 4 ; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: lis r5, 24749 ; P9LE-NEXT: mulli r4, r4, 23 +; P9LE-NEXT: ori r5, r5, 47143 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 24749 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 47143 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 11 -; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: mulli r4, r4, 5423 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r4, r4 +; P9LE-NEXT: mulhw r5, r4, r5 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srawi r5, r5, 11 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, 5423 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: mtvsrdd v2, r4, r3 +; P9LE-NEXT: vpkudum v2, v2, v3 +; P9LE-NEXT: vpkuwum v2, v2, v2 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_srem_one: @@ -970,28 +917,24 @@ ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 4 ; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: lis r5, 24749 ; P9BE-NEXT: mulli r4, r4, 23 +; P9BE-NEXT: ori r5, r5, 47143 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 24749 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 47143 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: li r3, 6 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 11 -; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 5423 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, -14230 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 30865 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r4, 6 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r4, r4 +; P9BE-NEXT: mulhw r5, r4, r5 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srawi r5, r5, 11 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 5423 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: mtvsrdd v3, r3, r4 ; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: lis r4, -14230 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v3, v4 +; P9BE-NEXT: ori r4, r4, 30865 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: mulhw r4, r3, r4 ; P9BE-NEXT: add r4, r4, r3 @@ -1000,13 +943,9 @@ ; P9BE-NEXT: add r4, r4, r5 ; P9BE-NEXT: mulli r4, r4, 654 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: vmrghh v2, v4, v2 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: mtvsrdd v2, 0, r3 +; P9BE-NEXT: vpkudum v2, v2, v3 +; P9BE-NEXT: vpkuwum v2, v2, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_srem_one: @@ -1121,34 +1060,31 @@ ; P9LE-NEXT: srwi r5, r4, 31 ; P9LE-NEXT: srawi r4, r4, 4 ; P9LE-NEXT: add r4, r4, r5 +; P9LE-NEXT: lis r5, 24749 ; P9LE-NEXT: mulli r4, r4, 23 +; P9LE-NEXT: ori r5, r5, 47143 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 24749 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 47143 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: extsh r3, r3 -; P9LE-NEXT: mulhw r4, r3, r4 -; P9LE-NEXT: srwi r5, r4, 31 -; P9LE-NEXT: srawi r4, r4, 11 -; P9LE-NEXT: add r4, r4, r5 -; P9LE-NEXT: mulli r4, r4, 5423 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: extsh r4, r4 +; P9LE-NEXT: mulhw r5, r4, r5 +; P9LE-NEXT: srwi r6, r5, 31 +; P9LE-NEXT: srawi r5, r5, 11 +; P9LE-NEXT: add r5, r5, r6 +; P9LE-NEXT: mulli r5, r5, 5423 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: mtvsrdd v3, r4, r3 ; P9LE-NEXT: li r3, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: extsh r3, r3 ; P9LE-NEXT: srawi r4, r3, 15 ; P9LE-NEXT: addze r4, r4 ; P9LE-NEXT: slwi r4, r4, 15 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: vmrglw v2, v3, v2 +; P9LE-NEXT: li r4, 0 +; P9LE-NEXT: mtvsrdd v2, r3, r4 +; P9LE-NEXT: vpkudum v2, v3, v2 +; P9LE-NEXT: vpkuwum v2, v2, v2 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_urem_i16_smax: @@ -1163,38 +1099,30 @@ ; P9BE-NEXT: srwi r5, r4, 31 ; P9BE-NEXT: srawi r4, r4, 4 ; P9BE-NEXT: add r4, r4, r5 +; P9BE-NEXT: lis r5, 24749 ; P9BE-NEXT: mulli r4, r4, 23 +; P9BE-NEXT: ori r5, r5, 47143 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 24749 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 47143 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: li r3, 6 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: extsh r3, r3 -; P9BE-NEXT: mulhw r4, r3, r4 -; P9BE-NEXT: srwi r5, r4, 31 -; P9BE-NEXT: srawi r4, r4, 11 -; P9BE-NEXT: add r4, r4, r5 -; P9BE-NEXT: mulli r4, r4, 5423 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r4, 6 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: extsh r4, r4 +; P9BE-NEXT: mulhw r5, r4, r5 +; P9BE-NEXT: srwi r6, r5, 31 +; P9BE-NEXT: srawi r5, r5, 11 +; P9BE-NEXT: add r5, r5, r6 +; P9BE-NEXT: mulli r5, r5, 5423 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: mtvsrdd v3, r3, r4 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v3, v4 ; P9BE-NEXT: extsh r3, r3 ; P9BE-NEXT: srawi r4, r3, 15 ; P9BE-NEXT: addze r4, r4 ; P9BE-NEXT: slwi r4, r4, 15 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: vmrghh v2, v4, v2 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: mtvsrdd v2, 0, r3 +; P9BE-NEXT: vpkudum v2, v2, v3 +; P9BE-NEXT: vpkuwum v2, v2, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_urem_i16_smax: Index: llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll =================================================================== --- llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll +++ llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll @@ -13,106 +13,96 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: lis r4, 21399 -; P9LE-NEXT: lis r5, 8456 +; P9LE-NEXT: lis r5, 16727 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 33437 -; P9LE-NEXT: ori r5, r5, 16913 +; P9LE-NEXT: ori r5, r5, 2287 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: srwi r4, r4, 5 ; P9LE-NEXT: mulli r4, r4, 98 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 16727 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 2287 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 -; P9LE-NEXT: srwi r4, r4, 8 -; P9LE-NEXT: mulli r4, r4, 1003 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: clrlwi r4, r4, 16 +; P9LE-NEXT: mulhwu r5, r4, r5 +; P9LE-NEXT: srwi r5, r5, 8 +; P9LE-NEXT: mulli r5, r5, 1003 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: lis r5, 8456 +; P9LE-NEXT: mtvsrdd v3, r4, r3 ; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: ori r5, r5, 16913 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: clrlwi r4, r3, 16 ; P9LE-NEXT: rlwinm r3, r3, 30, 18, 31 ; P9LE-NEXT: mulhwu r3, r3, r5 +; P9LE-NEXT: lis r5, 22765 +; P9LE-NEXT: ori r5, r5, 8969 ; P9LE-NEXT: srwi r3, r3, 2 ; P9LE-NEXT: mulli r3, r3, 124 ; P9LE-NEXT: sub r3, r4, r3 -; P9LE-NEXT: lis r4, 22765 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: ori r4, r4, 8969 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 -; P9LE-NEXT: sub r5, r3, r4 -; P9LE-NEXT: srwi r5, r5, 1 -; P9LE-NEXT: add r4, r5, r4 -; P9LE-NEXT: srwi r4, r4, 6 -; P9LE-NEXT: mulli r4, r4, 95 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v4, v2 -; P9LE-NEXT: vmrglw v2, v3, v2 +; P9LE-NEXT: li r4, 0 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: clrlwi r4, r4, 16 +; P9LE-NEXT: mulhwu r5, r4, r5 +; P9LE-NEXT: sub r6, r4, r5 +; P9LE-NEXT: srwi r6, r6, 1 +; P9LE-NEXT: add r5, r6, r5 +; P9LE-NEXT: srwi r5, r5, 6 +; P9LE-NEXT: mulli r5, r5, 95 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: mtvsrdd v2, r3, r4 +; P9LE-NEXT: vpkudum v2, v3, v2 +; P9LE-NEXT: vpkuwum v2, v2, v2 ; P9LE-NEXT: blr ; ; P9BE-LABEL: fold_urem_vec_1: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: lis r4, 16727 -; P9BE-NEXT: lis r5, 8456 +; P9BE-NEXT: lis r5, 21399 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 2287 -; P9BE-NEXT: ori r5, r5, 16913 +; P9BE-NEXT: ori r5, r5, 33437 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: srwi r4, r4, 8 ; P9BE-NEXT: mulli r4, r4, 1003 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, 21399 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 33437 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r4, r3, r4 -; P9BE-NEXT: srwi r4, r4, 5 -; P9BE-NEXT: mulli r4, r4, 98 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: clrlwi r4, r4, 16 +; P9BE-NEXT: mulhwu r5, r4, r5 +; P9BE-NEXT: srwi r5, r5, 5 +; P9BE-NEXT: mulli r5, r5, 98 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: lis r5, 8456 +; P9BE-NEXT: mtvsrdd v3, r4, r3 ; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: ori r5, r5, 16913 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r4, r3, 16 ; P9BE-NEXT: rlwinm r3, r3, 30, 18, 31 ; P9BE-NEXT: mulhwu r3, r3, r5 +; P9BE-NEXT: lis r5, 22765 +; P9BE-NEXT: ori r5, r5, 8969 ; P9BE-NEXT: srwi r3, r3, 2 ; P9BE-NEXT: mulli r3, r3, 124 ; P9BE-NEXT: sub r3, r4, r3 -; P9BE-NEXT: lis r4, 22765 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 8969 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r4, r3, r4 -; P9BE-NEXT: sub r5, r3, r4 -; P9BE-NEXT: srwi r5, r5, 1 -; P9BE-NEXT: add r4, r5, r4 -; P9BE-NEXT: srwi r4, r4, 6 -; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: vmrghh v2, v2, v4 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: li r4, 0 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: clrlwi r4, r4, 16 +; P9BE-NEXT: mulhwu r5, r4, r5 +; P9BE-NEXT: sub r6, r4, r5 +; P9BE-NEXT: srwi r6, r6, 1 +; P9BE-NEXT: add r5, r6, r5 +; P9BE-NEXT: srwi r5, r5, 6 +; P9BE-NEXT: mulli r5, r5, 95 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: mtvsrdd v2, r4, r3 +; P9BE-NEXT: vpkudum v2, v2, v3 +; P9BE-NEXT: vpkuwum v2, v2, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_urem_vec_1: @@ -234,21 +224,19 @@ ; P9LE-NEXT: srwi r5, r5, 6 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r5, r3, r4 -; P9LE-NEXT: sub r6, r3, r5 -; P9LE-NEXT: srwi r6, r6, 1 -; P9LE-NEXT: add r5, r6, r5 -; P9LE-NEXT: srwi r5, r5, 6 -; P9LE-NEXT: mulli r5, r5, 95 -; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r5, 2 +; P9LE-NEXT: vextuhrx r5, r5, v2 +; P9LE-NEXT: clrlwi r5, r5, 16 +; P9LE-NEXT: mulhwu r6, r5, r4 +; P9LE-NEXT: sub r7, r5, r6 +; P9LE-NEXT: srwi r7, r7, 1 +; P9LE-NEXT: add r6, r7, r6 +; P9LE-NEXT: srwi r6, r6, 6 +; P9LE-NEXT: mulli r6, r6, 95 +; P9LE-NEXT: sub r5, r5, r6 +; P9LE-NEXT: mtvsrdd v3, r5, r3 ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r5, r3, r4 ; P9LE-NEXT: sub r6, r3, r5 @@ -257,20 +245,19 @@ ; P9LE-NEXT: srwi r5, r5, 6 ; P9LE-NEXT: mulli r5, r5, 95 ; P9LE-NEXT: sub r3, r3, r5 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 -; P9LE-NEXT: sub r5, r3, r4 -; P9LE-NEXT: srwi r5, r5, 1 -; P9LE-NEXT: add r4, r5, r4 +; P9LE-NEXT: li r5, 6 +; P9LE-NEXT: vextuhrx r5, r5, v2 +; P9LE-NEXT: clrlwi r5, r5, 16 +; P9LE-NEXT: mulhwu r4, r5, r4 +; P9LE-NEXT: sub r6, r5, r4 +; P9LE-NEXT: srwi r6, r6, 1 +; P9LE-NEXT: add r4, r6, r4 ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: sub r4, r5, r4 +; P9LE-NEXT: mtvsrdd v2, r4, r3 +; P9LE-NEXT: vpkudum v2, v2, v3 +; P9LE-NEXT: vpkuwum v2, v2, v2 ; P9LE-NEXT: blr ; ; P9BE-LABEL: fold_urem_vec_2: @@ -287,23 +274,19 @@ ; P9BE-NEXT: srwi r5, r5, 6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r5, r3, r4 -; P9BE-NEXT: sub r6, r3, r5 -; P9BE-NEXT: srwi r6, r6, 1 -; P9BE-NEXT: add r5, r6, r5 -; P9BE-NEXT: srwi r5, r5, 6 -; P9BE-NEXT: mulli r5, r5, 95 -; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r5, 4 +; P9BE-NEXT: vextuhlx r5, r5, v2 +; P9BE-NEXT: clrlwi r5, r5, 16 +; P9BE-NEXT: mulhwu r6, r5, r4 +; P9BE-NEXT: sub r7, r5, r6 +; P9BE-NEXT: srwi r7, r7, 1 +; P9BE-NEXT: add r6, r7, r6 +; P9BE-NEXT: srwi r6, r6, 6 +; P9BE-NEXT: mulli r6, r6, 95 +; P9BE-NEXT: sub r5, r5, r6 +; P9BE-NEXT: mtvsrdd v3, r5, r3 ; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r5, r3, r4 ; P9BE-NEXT: sub r6, r3, r5 @@ -312,22 +295,19 @@ ; P9BE-NEXT: srwi r5, r5, 6 ; P9BE-NEXT: mulli r5, r5, 95 ; P9BE-NEXT: sub r3, r3, r5 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r4, r3, r4 -; P9BE-NEXT: sub r5, r3, r4 -; P9BE-NEXT: srwi r5, r5, 1 -; P9BE-NEXT: add r4, r5, r4 +; P9BE-NEXT: li r5, 0 +; P9BE-NEXT: vextuhlx r5, r5, v2 +; P9BE-NEXT: clrlwi r5, r5, 16 +; P9BE-NEXT: mulhwu r4, r5, r4 +; P9BE-NEXT: sub r6, r5, r4 +; P9BE-NEXT: srwi r6, r6, 1 +; P9BE-NEXT: add r4, r6, r4 ; P9BE-NEXT: srwi r4, r4, 6 ; P9BE-NEXT: mulli r4, r4, 95 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: vmrghh v2, v2, v4 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: sub r4, r5, r4 +; P9BE-NEXT: mtvsrdd v2, r4, r3 +; P9BE-NEXT: vpkudum v2, v2, v3 +; P9BE-NEXT: vpkuwum v2, v2, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: fold_urem_vec_2: @@ -455,50 +435,44 @@ ; P9LE-NEXT: srwi r5, r5, 6 ; P9LE-NEXT: mulli r6, r5, 95 ; P9LE-NEXT: sub r3, r3, r6 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r6, r3, 16 -; P9LE-NEXT: mulhwu r7, r6, r4 -; P9LE-NEXT: sub r6, r6, r7 -; P9LE-NEXT: srwi r6, r6, 1 -; P9LE-NEXT: add r6, r6, r7 -; P9LE-NEXT: srwi r6, r6, 6 -; P9LE-NEXT: mulli r7, r6, 95 -; P9LE-NEXT: sub r3, r3, r7 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: clrlwi r7, r3, 16 +; P9LE-NEXT: li r6, 2 +; P9LE-NEXT: vextuhrx r6, r6, v2 +; P9LE-NEXT: clrlwi r7, r6, 16 ; P9LE-NEXT: mulhwu r8, r7, r4 ; P9LE-NEXT: sub r7, r7, r8 ; P9LE-NEXT: srwi r7, r7, 1 ; P9LE-NEXT: add r7, r7, r8 ; P9LE-NEXT: srwi r7, r7, 6 ; P9LE-NEXT: mulli r8, r7, 95 -; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: sub r6, r6, r8 +; P9LE-NEXT: mtvsrdd v3, r6, r3 +; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r8, r3, 16 -; P9LE-NEXT: mulhwu r4, r8, r4 -; P9LE-NEXT: sub r8, r8, r4 -; P9LE-NEXT: srwi r8, r8, 1 -; P9LE-NEXT: add r4, r8, r4 -; P9LE-NEXT: srwi r4, r4, 6 -; P9LE-NEXT: mulli r8, r4, 95 -; P9LE-NEXT: mtvsrd v5, r4 +; P9LE-NEXT: clrlwi r6, r3, 16 +; P9LE-NEXT: mulhwu r8, r6, r4 +; P9LE-NEXT: sub r6, r6, r8 +; P9LE-NEXT: srwi r6, r6, 1 +; P9LE-NEXT: add r6, r6, r8 +; P9LE-NEXT: srwi r6, r6, 6 +; P9LE-NEXT: mulli r8, r6, 95 ; P9LE-NEXT: sub r3, r3, r8 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: mtvsrd v4, r6 -; P9LE-NEXT: vmrglw v2, v2, v3 -; P9LE-NEXT: mtvsrd v3, r5 -; P9LE-NEXT: vmrghh v3, v4, v3 -; P9LE-NEXT: mtvsrd v4, r7 -; P9LE-NEXT: vmrghh v4, v5, v4 -; P9LE-NEXT: vmrglw v3, v4, v3 +; P9LE-NEXT: li r8, 6 +; P9LE-NEXT: vextuhrx r8, r8, v2 +; P9LE-NEXT: clrlwi r9, r8, 16 +; P9LE-NEXT: mulhwu r4, r9, r4 +; P9LE-NEXT: sub r9, r9, r4 +; P9LE-NEXT: srwi r9, r9, 1 +; P9LE-NEXT: add r4, r9, r4 +; P9LE-NEXT: srwi r4, r4, 6 +; P9LE-NEXT: mulli r9, r4, 95 +; P9LE-NEXT: mtvsrdd v4, r4, r6 +; P9LE-NEXT: sub r8, r8, r9 +; P9LE-NEXT: mtvsrdd v2, r8, r3 +; P9LE-NEXT: vpkudum v2, v2, v3 +; P9LE-NEXT: mtvsrdd v3, r7, r5 +; P9LE-NEXT: vpkudum v3, v4, v3 +; P9LE-NEXT: vpkuwum v2, v2, v2 +; P9LE-NEXT: vpkuwum v3, v3, v3 ; P9LE-NEXT: vadduhm v2, v2, v3 ; P9LE-NEXT: blr ; @@ -516,58 +490,44 @@ ; P9BE-NEXT: srwi r4, r4, 6 ; P9BE-NEXT: mulli r6, r4, 95 ; P9BE-NEXT: sub r3, r3, r6 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r6, r3, 16 -; P9BE-NEXT: mulhwu r7, r6, r5 -; P9BE-NEXT: sub r6, r6, r7 -; P9BE-NEXT: srwi r6, r6, 1 -; P9BE-NEXT: add r6, r6, r7 -; P9BE-NEXT: srwi r6, r6, 6 -; P9BE-NEXT: mulli r7, r6, 95 -; P9BE-NEXT: sub r3, r3, r7 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: li r3, 2 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 -; P9BE-NEXT: clrlwi r7, r3, 16 +; P9BE-NEXT: li r6, 4 +; P9BE-NEXT: vextuhlx r6, r6, v2 +; P9BE-NEXT: clrlwi r7, r6, 16 ; P9BE-NEXT: mulhwu r8, r7, r5 ; P9BE-NEXT: sub r7, r7, r8 ; P9BE-NEXT: srwi r7, r7, 1 ; P9BE-NEXT: add r7, r7, r8 ; P9BE-NEXT: srwi r7, r7, 6 ; P9BE-NEXT: mulli r8, r7, 95 -; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: li r3, 0 +; P9BE-NEXT: sub r6, r6, r8 +; P9BE-NEXT: mtvsrdd v3, r6, r3 +; P9BE-NEXT: li r3, 2 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r5, r3, r5 -; P9BE-NEXT: sub r8, r3, r5 -; P9BE-NEXT: srwi r8, r8, 1 -; P9BE-NEXT: add r5, r8, r5 -; P9BE-NEXT: srwi r5, r5, 6 -; P9BE-NEXT: mulli r8, r5, 95 +; P9BE-NEXT: clrlwi r6, r3, 16 +; P9BE-NEXT: mulhwu r8, r6, r5 +; P9BE-NEXT: sub r6, r6, r8 +; P9BE-NEXT: srwi r6, r6, 1 +; P9BE-NEXT: add r6, r6, r8 +; P9BE-NEXT: srwi r6, r6, 6 +; P9BE-NEXT: mulli r8, r6, 95 ; P9BE-NEXT: sub r3, r3, r8 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: sldi r3, r4, 48 -; P9BE-NEXT: vmrghh v2, v2, v4 -; P9BE-NEXT: vmrghw v2, v2, v3 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: sldi r3, r6, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: sldi r3, r7, 48 -; P9BE-NEXT: vmrghh v3, v4, v3 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: sldi r3, r5, 48 -; P9BE-NEXT: mtvsrd v5, r3 -; P9BE-NEXT: vmrghh v4, v5, v4 -; P9BE-NEXT: vmrghw v3, v4, v3 +; P9BE-NEXT: li r8, 0 +; P9BE-NEXT: vextuhlx r8, r8, v2 +; P9BE-NEXT: clrlwi r8, r8, 16 +; P9BE-NEXT: mulhwu r5, r8, r5 +; P9BE-NEXT: sub r9, r8, r5 +; P9BE-NEXT: srwi r9, r9, 1 +; P9BE-NEXT: add r5, r9, r5 +; P9BE-NEXT: srwi r5, r5, 6 +; P9BE-NEXT: mulli r9, r5, 95 +; P9BE-NEXT: mtvsrdd v4, r5, r6 +; P9BE-NEXT: sub r8, r8, r9 +; P9BE-NEXT: mtvsrdd v2, r8, r3 +; P9BE-NEXT: vpkudum v2, v2, v3 +; P9BE-NEXT: mtvsrdd v3, r7, r4 +; P9BE-NEXT: vpkudum v3, v4, v3 +; P9BE-NEXT: vpkuwum v2, v2, v2 +; P9BE-NEXT: vpkuwum v3, v3, v3 ; P9BE-NEXT: vadduhm v2, v2, v3 ; P9BE-NEXT: blr ; @@ -708,18 +668,16 @@ ; P9LE-LABEL: dont_fold_urem_power_of_two: ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: lis r4, 22765 +; P9LE-NEXT: li r4, 2 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: ori r4, r4, 8969 +; P9LE-NEXT: vextuhrx r4, r4, v2 ; P9LE-NEXT: clrlwi r3, r3, 26 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 2 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 27 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: clrlwi r4, r4, 27 +; P9LE-NEXT: mtvsrdd v3, r4, r3 ; P9LE-NEXT: li r3, 6 +; P9LE-NEXT: lis r4, 22765 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 +; P9LE-NEXT: ori r4, r4, 8969 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: sub r5, r3, r4 @@ -728,32 +686,27 @@ ; P9LE-NEXT: srwi r4, r4, 6 ; P9LE-NEXT: mulli r4, r4, 95 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: li r3, 4 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 29 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: vmrghh v2, v4, v2 -; P9LE-NEXT: vmrglw v2, v2, v3 +; P9LE-NEXT: li r4, 4 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: clrlwi r4, r4, 29 +; P9LE-NEXT: mtvsrdd v2, r3, r4 +; P9LE-NEXT: vpkudum v2, v2, v3 +; P9LE-NEXT: vpkuwum v2, v2, v2 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_urem_power_of_two: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 2 -; P9BE-NEXT: lis r4, 22765 +; P9BE-NEXT: li r4, 0 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: ori r4, r4, 8969 +; P9BE-NEXT: vextuhlx r4, r4, v2 ; P9BE-NEXT: clrlwi r3, r3, 27 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 26 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: clrlwi r4, r4, 26 +; P9BE-NEXT: mtvsrdd v3, r4, r3 ; P9BE-NEXT: li r3, 6 +; P9BE-NEXT: lis r4, 22765 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 +; P9BE-NEXT: ori r4, r4, 8969 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: sub r5, r3, r4 @@ -762,15 +715,12 @@ ; P9BE-NEXT: srwi r4, r4, 6 ; P9BE-NEXT: mulli r4, r4, 95 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 29 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: vmrghh v2, v2, v4 -; P9BE-NEXT: vmrghw v2, v3, v2 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: clrlwi r4, r4, 29 +; P9BE-NEXT: mtvsrdd v2, r4, r3 +; P9BE-NEXT: vpkudum v2, v3, v2 +; P9BE-NEXT: vpkuwum v2, v2, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_urem_power_of_two: @@ -845,84 +795,73 @@ ; P9LE: # %bb.0: ; P9LE-NEXT: li r3, 4 ; P9LE-NEXT: lis r4, -19946 -; P9LE-NEXT: lis r5, -14230 +; P9LE-NEXT: lis r5, 24749 ; P9LE-NEXT: vextuhrx r3, r3, v2 ; P9LE-NEXT: ori r4, r4, 17097 -; P9LE-NEXT: ori r5, r5, 30865 +; P9LE-NEXT: ori r5, r5, 47143 ; P9LE-NEXT: clrlwi r3, r3, 16 ; P9LE-NEXT: mulhwu r4, r3, r4 ; P9LE-NEXT: srwi r4, r4, 4 ; P9LE-NEXT: mulli r4, r4, 23 ; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: lis r4, 24749 -; P9LE-NEXT: mtvsrd v3, r3 -; P9LE-NEXT: li r3, 6 -; P9LE-NEXT: ori r4, r4, 47143 -; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: clrlwi r3, r3, 16 -; P9LE-NEXT: mulhwu r4, r3, r4 -; P9LE-NEXT: srwi r4, r4, 11 -; P9LE-NEXT: mulli r4, r4, 5423 -; P9LE-NEXT: sub r3, r3, r4 -; P9LE-NEXT: mtvsrd v4, r3 +; P9LE-NEXT: li r4, 6 +; P9LE-NEXT: vextuhrx r4, r4, v2 +; P9LE-NEXT: clrlwi r4, r4, 16 +; P9LE-NEXT: mulhwu r5, r4, r5 +; P9LE-NEXT: srwi r5, r5, 11 +; P9LE-NEXT: mulli r5, r5, 5423 +; P9LE-NEXT: sub r4, r4, r5 +; P9LE-NEXT: lis r5, -14230 +; P9LE-NEXT: mtvsrdd v3, r4, r3 ; P9LE-NEXT: li r3, 2 +; P9LE-NEXT: ori r5, r5, 30865 ; P9LE-NEXT: vextuhrx r3, r3, v2 -; P9LE-NEXT: vmrghh v3, v4, v3 ; P9LE-NEXT: clrlwi r4, r3, 16 ; P9LE-NEXT: rlwinm r3, r3, 31, 17, 31 ; P9LE-NEXT: mulhwu r3, r3, r5 ; P9LE-NEXT: srwi r3, r3, 8 ; P9LE-NEXT: mulli r3, r3, 654 ; P9LE-NEXT: sub r3, r4, r3 -; P9LE-NEXT: mtvsrd v2, r3 -; P9LE-NEXT: li r3, 0 -; P9LE-NEXT: mtvsrd v4, r3 -; P9LE-NEXT: vmrghh v2, v2, v4 -; P9LE-NEXT: vmrglw v2, v3, v2 +; P9LE-NEXT: li r4, 0 +; P9LE-NEXT: mtvsrdd v2, r3, r4 +; P9LE-NEXT: vpkudum v2, v3, v2 +; P9LE-NEXT: vpkuwum v2, v2, v2 ; P9LE-NEXT: blr ; ; P9BE-LABEL: dont_fold_urem_one: ; P9BE: # %bb.0: ; P9BE-NEXT: li r3, 6 ; P9BE-NEXT: lis r4, 24749 -; P9BE-NEXT: lis r5, -14230 +; P9BE-NEXT: lis r5, -19946 ; P9BE-NEXT: vextuhlx r3, r3, v2 ; P9BE-NEXT: ori r4, r4, 47143 -; P9BE-NEXT: ori r5, r5, 30865 +; P9BE-NEXT: ori r5, r5, 17097 ; P9BE-NEXT: clrlwi r3, r3, 16 ; P9BE-NEXT: mulhwu r4, r3, r4 ; P9BE-NEXT: srwi r4, r4, 11 ; P9BE-NEXT: mulli r4, r4, 5423 ; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: lis r4, -19946 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: ori r4, r4, 17097 -; P9BE-NEXT: mtvsrd v3, r3 -; P9BE-NEXT: li r3, 4 -; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: clrlwi r3, r3, 16 -; P9BE-NEXT: mulhwu r4, r3, r4 -; P9BE-NEXT: srwi r4, r4, 4 -; P9BE-NEXT: mulli r4, r4, 23 -; P9BE-NEXT: sub r3, r3, r4 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 +; P9BE-NEXT: li r4, 4 +; P9BE-NEXT: vextuhlx r4, r4, v2 +; P9BE-NEXT: clrlwi r4, r4, 16 +; P9BE-NEXT: mulhwu r5, r4, r5 +; P9BE-NEXT: srwi r5, r5, 4 +; P9BE-NEXT: mulli r5, r5, 23 +; P9BE-NEXT: sub r4, r4, r5 +; P9BE-NEXT: lis r5, -14230 +; P9BE-NEXT: mtvsrdd v3, r4, r3 ; P9BE-NEXT: li r3, 2 +; P9BE-NEXT: ori r5, r5, 30865 ; P9BE-NEXT: vextuhlx r3, r3, v2 -; P9BE-NEXT: vmrghh v3, v4, v3 ; P9BE-NEXT: clrlwi r4, r3, 16 ; P9BE-NEXT: rlwinm r3, r3, 31, 17, 31 ; P9BE-NEXT: mulhwu r3, r3, r5 ; P9BE-NEXT: srwi r3, r3, 8 ; P9BE-NEXT: mulli r3, r3, 654 ; P9BE-NEXT: sub r3, r4, r3 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v2, r3 -; P9BE-NEXT: li r3, 0 -; P9BE-NEXT: sldi r3, r3, 48 -; P9BE-NEXT: mtvsrd v4, r3 -; P9BE-NEXT: vmrghh v2, v4, v2 -; P9BE-NEXT: vmrghw v2, v2, v3 +; P9BE-NEXT: mtvsrdd v2, 0, r3 +; P9BE-NEXT: vpkudum v2, v2, v3 +; P9BE-NEXT: vpkuwum v2, v2, v2 ; P9BE-NEXT: blr ; ; P8LE-LABEL: dont_fold_urem_one: Index: llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll @@ -107,23 +107,20 @@ ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: vmrghh v2, v4, v2 -; CHECK-P9-NEXT: vmrglw v2, v2, v3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 +; CHECK-P9-NEXT: vpkudum v2, v2, v3 +; CHECK-P9-NEXT: vpkuwum v2, v2, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -134,27 +131,20 @@ ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 48 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 ; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: sldi r3, r3, 48 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: vmrghh v2, v4, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 +; CHECK-BE-NEXT: vpkudum v2, v2, v3 +; CHECK-BE-NEXT: vpkuwum v2, v2, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -218,112 +208,88 @@ ; ; CHECK-P9-LABEL: test8elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs1, 0(r3) -; CHECK-P9-NEXT: lxv vs0, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: lxv vs0, 0(r3) +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xscvspdpn f1, vs0 +; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrdd v2, r5, r4 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: mffprwz r5, f0 +; CHECK-P9-NEXT: lxv vs0, 16(r3) +; CHECK-P9-NEXT: mtvsrdd v3, r4, r5 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: lxv vs0, 16(r3) +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-BE-NEXT: mtvsrdd v2, r5, r4 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: mffprwz r5, f0 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: mtvsrdd v3, r4, r5 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r3, r3, 48 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 48 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <8 x float>, <8 x float>* %0, align 32 @@ -437,218 +403,170 @@ ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs2, 0(r4) -; CHECK-P9-NEXT: lxv vs1, 16(r4) -; CHECK-P9-NEXT: lxv vs0, 32(r4) -; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-P9-NEXT: xxswapd vs4, vs2 -; CHECK-P9-NEXT: xscvspdpn f5, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 3 +; CHECK-P9-NEXT: lxv vs1, 0(r4) +; CHECK-P9-NEXT: lxv vs0, 16(r4) +; CHECK-P9-NEXT: xxswapd vs3, vs1 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-P9-NEXT: xscvspdpn f4, vs1 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: xxsldwi vs5, vs0, vs0, 3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: xscvdpsxws f4, f5 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: xscvspdpn f4, vs6 -; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r6, f3 +; CHECK-P9-NEXT: xscvdpsxws f3, f4 ; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mtvsrd v4, r5 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: xxswapd vs2, vs0 +; CHECK-P9-NEXT: mtvsrdd v2, r6, r5 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: xscvspdpn f1, vs0 +; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xxsldwi vs3, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs3 -; CHECK-P9-NEXT: vmrghh v4, v5, v4 -; CHECK-P9-NEXT: mtvsrd v5, r5 +; CHECK-P9-NEXT: xscvspdpn f3, vs5 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mtvsrdd v3, r5, r6 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r6, f2 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: mtvsrdd v4, r6, r5 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: lxv vs0, 32(r4) ; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: mtvsrdd v5, r5, r6 +; CHECK-P9-NEXT: vpkudum v4, v5, v4 +; CHECK-P9-NEXT: xxswapd vs2, vs0 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: vpkuwum v2, v4, v2 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: stxv v2, 0(r3) +; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v3, v5, v4 -; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: mffprwz r6, f2 ; CHECK-P9-NEXT: xscvspdpn f2, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r5, f1 ; CHECK-P9-NEXT: lxv vs1, 48(r4) +; CHECK-P9-NEXT: mtvsrdd v3, r6, r5 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mtvsrd v1, r5 -; CHECK-P9-NEXT: vmrghh v0, v1, v0 ; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: xxmrgld vs2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mffprwz r5, f0 ; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 3 -; CHECK-P9-NEXT: stxv vs2, 0(r3) -; CHECK-P9-NEXT: mtvsrd v2, r4 +; CHECK-P9-NEXT: mtvsrdd v4, r4, r5 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: vmrghh v2, v4, v2 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrglw v2, v2, v0 ; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs1 -; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mffprwz r5, f0 ; CHECK-P9-NEXT: xscvspdpn f0, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: mtvsrdd v4, r5, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtvsrd v5, r4 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 -; CHECK-P9-NEXT: stxv vs0, 16(r3) +; CHECK-P9-NEXT: mffprwz r5, f0 +; CHECK-P9-NEXT: mtvsrdd v5, r4, r5 +; CHECK-P9-NEXT: vpkudum v4, v5, v4 +; CHECK-P9-NEXT: vpkuwum v3, v4, v3 +; CHECK-P9-NEXT: stxv v3, 16(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r4) ; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xxswapd vs3, vs1 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f4, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v2, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: mffprwz r6, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f4 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r5 ; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: mtvsrd v4, r5 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xxswapd vs2, vs0 +; CHECK-BE-NEXT: mtvsrdd v2, r6, r5 +; CHECK-BE-NEXT: mffprwz r6, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v5, r5 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: xscvspdpn f3, vs5 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: mtvsrdd v3, r5, r6 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r6, f2 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: mtvsrdd v4, r6, r5 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: lxv vs0, 48(r4) ; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: lxv vs1, 48(r4) -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v5, r5 -; CHECK-BE-NEXT: mffprwz r5, f0 -; CHECK-BE-NEXT: lxv vs0, 32(r4) -; CHECK-BE-NEXT: xscvspdpn f5, vs1 -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: mtvsrdd v5, r5, r6 +; CHECK-BE-NEXT: vpkudum v4, v5, v4 +; CHECK-BE-NEXT: xxswapd vs2, vs0 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-BE-NEXT: vpkuwum v2, v4, v2 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v0, r5 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vmrghh v5, v5, v0 +; CHECK-BE-NEXT: stxv v2, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v3, v5, v4 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: xxmrghd vs4, v3, v2 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v2, r4 -; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: stxv vs4, 0(r3) -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: mffprwz r6, f2 +; CHECK-BE-NEXT: xscvspdpn f2, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: lxv vs1, 32(r4) +; CHECK-BE-NEXT: mtvsrdd v3, r6, r5 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r4 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mffprwz r5, f0 +; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 3 +; CHECK-BE-NEXT: mtvsrdd v4, r4, r5 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v4, r4 ; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v5, r4 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd vs0, v3, v2 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: xxswapd vs0, vs1 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r5, f0 +; CHECK-BE-NEXT: xscvspdpn f0, vs1 +; CHECK-BE-NEXT: mtvsrdd v4, r5, r4 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 1 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r5, f0 +; CHECK-BE-NEXT: mtvsrdd v5, r4, r5 +; CHECK-BE-NEXT: vpkudum v4, v5, v4 +; CHECK-BE-NEXT: vpkuwum v3, v4, v3 +; CHECK-BE-NEXT: stxv v3, 16(r3) ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, <16 x float>* %0, align 64 @@ -755,23 +673,20 @@ ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: vmrghh v2, v4, v2 -; CHECK-P9-NEXT: vmrglw v2, v2, v3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 +; CHECK-P9-NEXT: vpkudum v2, v2, v3 +; CHECK-P9-NEXT: vpkuwum v2, v2, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -782,27 +697,20 @@ ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 48 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 ; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: sldi r3, r3, 48 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: vmrghh v2, v4, v2 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 +; CHECK-BE-NEXT: vpkudum v2, v2, v3 +; CHECK-BE-NEXT: vpkuwum v2, v2, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -866,112 +774,88 @@ ; ; CHECK-P9-LABEL: test8elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs1, 0(r3) -; CHECK-P9-NEXT: lxv vs0, 16(r3) -; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: lxv vs0, 0(r3) +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: xxswapd vs1, vs0 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: xscvspdpn f1, vs0 +; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: mtvsrdd v2, r5, r4 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: mffprwz r5, f0 +; CHECK-P9-NEXT: lxv vs0, 16(r3) +; CHECK-P9-NEXT: mtvsrdd v3, r4, r5 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs1, 16(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: xscvspdpn f2, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: lxv vs0, 16(r3) +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: xxswapd vs1, vs0 +; CHECK-BE-NEXT: xscvspdpn f1, vs1 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-BE-NEXT: mtvsrdd v2, r5, r4 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: mffprwz r5, f0 +; CHECK-BE-NEXT: lxv vs0, 0(r3) +; CHECK-BE-NEXT: mtvsrdd v3, r4, r5 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r3, r3, 48 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 48 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <8 x float>, <8 x float>* %0, align 32 @@ -1085,218 +969,170 @@ ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs2, 0(r4) -; CHECK-P9-NEXT: lxv vs1, 16(r4) -; CHECK-P9-NEXT: lxv vs0, 32(r4) -; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-P9-NEXT: xxswapd vs4, vs2 -; CHECK-P9-NEXT: xscvspdpn f5, vs2 -; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: xxsldwi vs6, vs1, vs1, 3 +; CHECK-P9-NEXT: lxv vs1, 0(r4) +; CHECK-P9-NEXT: lxv vs0, 16(r4) +; CHECK-P9-NEXT: xxswapd vs3, vs1 +; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 +; CHECK-P9-NEXT: xscvspdpn f4, vs1 +; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 +; CHECK-P9-NEXT: xxsldwi vs5, vs0, vs0, 3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xxswapd vs3, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: xscvdpsxws f4, f5 -; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: xscvspdpn f4, vs6 -; CHECK-P9-NEXT: mtvsrd v3, r5 +; CHECK-P9-NEXT: xscvdpsxws f1, f1 +; CHECK-P9-NEXT: mffprwz r6, f3 +; CHECK-P9-NEXT: xscvdpsxws f3, f4 ; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs1 -; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mtvsrd v4, r5 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: xxswapd vs2, vs0 +; CHECK-P9-NEXT: mtvsrdd v2, r6, r5 +; CHECK-P9-NEXT: mffprwz r6, f1 +; CHECK-P9-NEXT: xscvspdpn f1, vs0 +; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: mtvsrd v4, r5 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: xxsldwi vs3, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: xscvspdpn f2, vs3 -; CHECK-P9-NEXT: vmrghh v4, v5, v4 -; CHECK-P9-NEXT: mtvsrd v5, r5 +; CHECK-P9-NEXT: xscvspdpn f3, vs5 +; CHECK-P9-NEXT: xscvdpsxws f0, f0 +; CHECK-P9-NEXT: mtvsrdd v3, r5, r6 +; CHECK-P9-NEXT: xscvdpsxws f3, f3 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r6, f2 +; CHECK-P9-NEXT: mffprwz r5, f3 +; CHECK-P9-NEXT: mtvsrdd v4, r6, r5 +; CHECK-P9-NEXT: mffprwz r6, f0 +; CHECK-P9-NEXT: lxv vs0, 32(r4) ; CHECK-P9-NEXT: mffprwz r5, f1 -; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mtvsrd v0, r5 +; CHECK-P9-NEXT: mtvsrdd v5, r5, r6 +; CHECK-P9-NEXT: vpkudum v4, v5, v4 +; CHECK-P9-NEXT: xxswapd vs2, vs0 +; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-P9-NEXT: vpkuwum v2, v4, v2 +; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: stxv v2, 0(r3) +; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglw v3, v5, v4 -; CHECK-P9-NEXT: mffprwz r5, f2 +; CHECK-P9-NEXT: mffprwz r6, f2 ; CHECK-P9-NEXT: xscvspdpn f2, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r5, f1 ; CHECK-P9-NEXT: lxv vs1, 48(r4) +; CHECK-P9-NEXT: mtvsrdd v3, r6, r5 +; CHECK-P9-NEXT: xscvspdpn f0, vs0 +; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mtvsrd v1, r5 -; CHECK-P9-NEXT: vmrghh v0, v1, v0 ; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: xxmrgld vs2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mffprwz r5, f0 ; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 3 -; CHECK-P9-NEXT: stxv vs2, 0(r3) -; CHECK-P9-NEXT: mtvsrd v2, r4 +; CHECK-P9-NEXT: mtvsrdd v4, r4, r5 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 -; CHECK-P9-NEXT: vmrghh v2, v4, v2 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrglw v2, v2, v0 ; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs1 -; CHECK-P9-NEXT: mtvsrd v3, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mffprwz r5, f0 ; CHECK-P9-NEXT: xscvspdpn f0, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: mtvsrdd v4, r5, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xxsldwi vs0, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v4, r4 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtvsrd v5, r4 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 -; CHECK-P9-NEXT: stxv vs0, 16(r3) +; CHECK-P9-NEXT: mffprwz r5, f0 +; CHECK-P9-NEXT: mtvsrdd v5, r4, r5 +; CHECK-P9-NEXT: vpkudum v4, v5, v4 +; CHECK-P9-NEXT: vpkuwum v3, v4, v3 +; CHECK-P9-NEXT: stxv v3, 16(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs1, 16(r4) ; CHECK-BE-NEXT: lxv vs0, 0(r4) -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xxswapd vs3, vs1 +; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 ; CHECK-BE-NEXT: xscvspdpn f4, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 +; CHECK-BE-NEXT: xxsldwi vs5, vs0, vs0, 3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xxsldwi vs2, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v2, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: mffprwz r6, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f4 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r5 ; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: mtvsrd v4, r5 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: xxswapd vs2, vs0 +; CHECK-BE-NEXT: mtvsrdd v2, r6, r5 +; CHECK-BE-NEXT: mffprwz r6, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvspdpn f2, vs2 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v5, r5 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: xscvspdpn f3, vs5 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 +; CHECK-BE-NEXT: mtvsrdd v3, r5, r6 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r6, f2 +; CHECK-BE-NEXT: mffprwz r5, f3 +; CHECK-BE-NEXT: mtvsrdd v4, r6, r5 +; CHECK-BE-NEXT: mffprwz r6, f0 +; CHECK-BE-NEXT: lxv vs0, 48(r4) ; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: lxv vs1, 48(r4) -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v5, r5 -; CHECK-BE-NEXT: mffprwz r5, f0 -; CHECK-BE-NEXT: lxv vs0, 32(r4) -; CHECK-BE-NEXT: xscvspdpn f5, vs1 -; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: xxswapd vs3, vs1 -; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: mtvsrdd v5, r5, r6 +; CHECK-BE-NEXT: vpkudum v4, v5, v4 +; CHECK-BE-NEXT: xxswapd vs2, vs0 +; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 +; CHECK-BE-NEXT: vpkuwum v2, v4, v2 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v0, r5 -; CHECK-BE-NEXT: xscvspdpn f3, vs3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: vmrghh v5, v5, v0 +; CHECK-BE-NEXT: stxv v2, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghw v3, v5, v4 -; CHECK-BE-NEXT: mffprwz r4, f5 -; CHECK-BE-NEXT: xxmrghd vs4, v3, v2 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v2, r4 -; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: stxv vs4, 0(r3) -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghh v2, v2, v4 -; CHECK-BE-NEXT: vmrghw v2, v2, v3 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: xscvspdpn f1, vs0 +; CHECK-BE-NEXT: mffprwz r6, f2 +; CHECK-BE-NEXT: xscvspdpn f2, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mffprwz r5, f1 +; CHECK-BE-NEXT: lxv vs1, 32(r4) +; CHECK-BE-NEXT: mtvsrdd v3, r6, r5 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r4 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mffprwz r5, f0 +; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 3 +; CHECK-BE-NEXT: mtvsrdd v4, r4, r5 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v4, r4 ; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v5, r4 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd vs0, v3, v2 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: xxswapd vs0, vs1 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r5, f0 +; CHECK-BE-NEXT: xscvspdpn f0, vs1 +; CHECK-BE-NEXT: mtvsrdd v4, r5, r4 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: xxsldwi vs0, vs1, vs1, 1 +; CHECK-BE-NEXT: xscvspdpn f0, vs0 +; CHECK-BE-NEXT: xscvdpsxws f0, f0 +; CHECK-BE-NEXT: mffprwz r5, f0 +; CHECK-BE-NEXT: mtvsrdd v5, r4, r5 +; CHECK-BE-NEXT: vpkudum v4, v5, v4 +; CHECK-BE-NEXT: vpkuwum v3, v4, v3 +; CHECK-BE-NEXT: stxv v3, 16(r3) ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, <16 x float>* %0, align 64 Index: llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll @@ -114,24 +114,22 @@ ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 ; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghb v2, v4, v2 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-P9-NEXT: vpkudum v2, v2, v3 +; CHECK-P9-NEXT: vpkuwum v2, v2, v2 +; CHECK-P9-NEXT: vpkuhum v2, v2, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -142,28 +140,22 @@ ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 ; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vmrghb v2, v4, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vpkudum v2, v2, v3 +; CHECK-BE-NEXT: vpkuwum v2, v2, v2 +; CHECK-BE-NEXT: vpkuhum v2, v2, v2 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -236,46 +228,39 @@ ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mtvsrdd v2, r4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 +; CHECK-P9-NEXT: vpkuhum v2, v2, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -288,54 +273,39 @@ ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 +; CHECK-BE-NEXT: vpkuhum v2, v2, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -459,92 +429,76 @@ ; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: mffprwz r3, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs3 -; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: mffprwz r4, f4 ; CHECK-P9-NEXT: xscvspdpn f4, vs3 ; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mtvsrdd v2, r4, r3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvspdpn f3, vs2 ; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: mtvsrdd v4, r4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v4, v5, v4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v0, r3 -; CHECK-P9-NEXT: vmrghb v5, v5, v0 -; CHECK-P9-NEXT: vmrglh v4, v5, v4 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v5, r3, r4 +; CHECK-P9-NEXT: vpkudum v4, v5, v4 +; CHECK-P9-NEXT: vpkuwum v3, v4, v3 +; CHECK-P9-NEXT: vpkuhum v2, v3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: @@ -558,108 +512,76 @@ ; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs3 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r3, f4 +; CHECK-BE-NEXT: mffprwz r4, f4 ; CHECK-BE-NEXT: xscvspdpn f4, vs3 ; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghb v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvspdpn f3, vs2 ; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrdd v4, r4, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghb v4, v5, v4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v0, r3 -; CHECK-BE-NEXT: vmrghb v5, v5, v0 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v5, r3, r4 +; CHECK-BE-NEXT: vpkudum v4, v5, v4 +; CHECK-BE-NEXT: vpkuwum v3, v4, v3 +; CHECK-BE-NEXT: vpkuhum v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, <16 x float>* %0, align 64 @@ -772,24 +694,22 @@ ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxswapd vs0, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 +; CHECK-P9-NEXT: mffprwz r4, f0 ; CHECK-P9-NEXT: xscvspdpn f0, v2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f0 ; CHECK-P9-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v2, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 ; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghb v2, v4, v2 -; CHECK-P9-NEXT: vmrglh v2, v2, v3 +; CHECK-P9-NEXT: vpkudum v2, v2, v3 +; CHECK-P9-NEXT: vpkuwum v2, v2, v2 +; CHECK-P9-NEXT: vpkuhum v2, v2, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -800,28 +720,22 @@ ; CHECK-BE-NEXT: xscvdpsxws f0, f0 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxswapd vs0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 +; CHECK-BE-NEXT: mffprwz r4, f0 ; CHECK-BE-NEXT: xscvspdpn f0, v2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f0 ; CHECK-BE-NEXT: xxsldwi vs0, v2, v2, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vmrghb v2, v4, v2 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vpkudum v2, v2, v3 +; CHECK-BE-NEXT: vpkuwum v2, v2, v2 +; CHECK-BE-NEXT: vpkuhum v2, v2, v2 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -894,46 +808,39 @@ ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mtvsrdd v2, r4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 +; CHECK-P9-NEXT: vpkuhum v2, v2, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -946,54 +853,39 @@ ; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 +; CHECK-BE-NEXT: vpkuhum v2, v2, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -1117,92 +1009,76 @@ ; CHECK-P9-NEXT: xscvdpsxws f4, f4 ; CHECK-P9-NEXT: mffprwz r3, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs3 -; CHECK-P9-NEXT: mtvsrd v2, r3 ; CHECK-P9-NEXT: xscvspdpn f4, vs4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: mffprwz r4, f4 ; CHECK-P9-NEXT: xscvspdpn f4, vs3 ; CHECK-P9-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mtvsrdd v2, r4, r3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghb v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvspdpn f3, vs2 ; CHECK-P9-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs1 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvspdpn f2, vs2 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xscvspdpn f2, vs1 ; CHECK-P9-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r3 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-P9-NEXT: mtvsrd v5, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 ; CHECK-P9-NEXT: xscvspdpn f1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvspdpn f1, vs0 ; CHECK-P9-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: mtvsrdd v4, r4, r3 ; CHECK-P9-NEXT: xscvspdpn f0, vs0 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v4, v5, v4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v0, r3 -; CHECK-P9-NEXT: vmrghb v5, v5, v0 -; CHECK-P9-NEXT: vmrglh v4, v5, v4 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v5, r3, r4 +; CHECK-P9-NEXT: vpkudum v4, v5, v4 +; CHECK-P9-NEXT: vpkuwum v3, v4, v3 +; CHECK-P9-NEXT: vpkuhum v2, v3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: @@ -1216,108 +1092,76 @@ ; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: mffprwz r3, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs3 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f4, vs4 -; CHECK-BE-NEXT: mtvsrd v2, r3 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mffprwz r3, f4 +; CHECK-BE-NEXT: mffprwz r4, f4 ; CHECK-BE-NEXT: xscvspdpn f4, vs3 ; CHECK-BE-NEXT: xxsldwi vs3, vs3, vs3, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f4, f4 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r3 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrd v3, r3 +; CHECK-BE-NEXT: xscvdpsxws f4, f4 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghb v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xxsldwi vs3, vs2, vs2, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f3, vs3 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvspdpn f3, vs2 ; CHECK-BE-NEXT: xxsldwi vs2, vs2, vs2, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r3 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xxsldwi vs2, vs1, vs1, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f2, vs2 -; CHECK-BE-NEXT: mtvsrd v3, r3 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvspdpn f2, vs1 ; CHECK-BE-NEXT: xxsldwi vs1, vs1, vs1, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r3 +; CHECK-BE-NEXT: xscvdpsxws f2, f2 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xxsldwi vs1, vs0, vs0, 3 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvspdpn f1, vs1 -; CHECK-BE-NEXT: mtvsrd v4, r3 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvspdpn f1, vs0 ; CHECK-BE-NEXT: xxsldwi vs0, vs0, vs0, 1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 +; CHECK-BE-NEXT: mtvsrdd v4, r4, r3 ; CHECK-BE-NEXT: xscvspdpn f0, vs0 -; CHECK-BE-NEXT: mtvsrd v5, r3 +; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: vmrghb v4, v5, v4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v0, r3 -; CHECK-BE-NEXT: vmrghb v5, v5, v0 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v5, r3, r4 +; CHECK-BE-NEXT: vpkudum v4, v5, v4 +; CHECK-BE-NEXT: vpkuwum v3, v4, v3 +; CHECK-BE-NEXT: vpkuhum v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <16 x float>, <16 x float>* %0, align 64 Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll @@ -94,19 +94,16 @@ ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 +; CHECK-P9-NEXT: vpkuwum v2, v2, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -118,23 +115,16 @@ ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 +; CHECK-BE-NEXT: vpkuwum v2, v2, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -195,94 +185,70 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs3, 0(r3) ; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <8 x double>, <8 x double>* %0, align 64 @@ -384,186 +350,138 @@ ; ; CHECK-P9-LABEL: test16elt: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r4) -; CHECK-P9-NEXT: lxv vs2, 16(r4) -; CHECK-P9-NEXT: lxv vs1, 32(r4) -; CHECK-P9-NEXT: lxv vs0, 48(r4) -; CHECK-P9-NEXT: xscvdpsxws f4, f3 -; CHECK-P9-NEXT: xscvdpsxws f5, f2 -; CHECK-P9-NEXT: xscvdpsxws f6, f1 -; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f7, f0 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f5 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: mffprwz r5, f6 -; CHECK-P9-NEXT: mtvsrd v4, r5 -; CHECK-P9-NEXT: mffprwz r5, f7 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: lxv vs3, 64(r4) -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: lxv vs2, 80(r4) -; CHECK-P9-NEXT: vmrghh v2, v2, v0 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: lxv vs7, 0(r4) +; CHECK-P9-NEXT: lxv vs6, 16(r4) +; CHECK-P9-NEXT: lxv vs5, 32(r4) +; CHECK-P9-NEXT: lxv vs0, 112(r4) +; CHECK-P9-NEXT: xscvdpsxws f8, f7 +; CHECK-P9-NEXT: xxswapd vs7, vs7 ; CHECK-P9-NEXT: lxv vs1, 96(r4) +; CHECK-P9-NEXT: lxv vs2, 80(r4) +; CHECK-P9-NEXT: lxv vs3, 64(r4) +; CHECK-P9-NEXT: lxv vs4, 48(r4) +; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: mffprwz r4, f8 +; CHECK-P9-NEXT: mffprwz r5, f7 +; CHECK-P9-NEXT: xscvdpsxws f7, f6 +; CHECK-P9-NEXT: xxswapd vs6, vs6 +; CHECK-P9-NEXT: xscvdpsxws f6, f6 +; CHECK-P9-NEXT: mtvsrdd v2, r4, r5 +; CHECK-P9-NEXT: mffprwz r4, f7 +; CHECK-P9-NEXT: mffprwz r5, f6 +; CHECK-P9-NEXT: xscvdpsxws f6, f5 +; CHECK-P9-NEXT: xxswapd vs5, vs5 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r5 +; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f6 +; CHECK-P9-NEXT: mffprwz r5, f5 +; CHECK-P9-NEXT: xscvdpsxws f5, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs4 +; CHECK-P9-NEXT: mtvsrdd v4, r4, r5 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r4, f5 +; CHECK-P9-NEXT: mffprwz r5, f4 ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: vmrghh v3, v3, v0 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f0 -; CHECK-P9-NEXT: lxv vs0, 112(r4) +; CHECK-P9-NEXT: mtvsrdd v5, r4, r5 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghh v4, v4, v0 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: vpkudum v3, v5, v4 ; CHECK-P9-NEXT: mffprwz r4, f4 -; CHECK-P9-NEXT: vmrglw v4, v5, v4 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r5, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: xxmrgld vs4, v4, v2 -; CHECK-P9-NEXT: mtvsrd v2, r4 +; CHECK-P9-NEXT: stxv v2, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: stxv vs4, 0(r3) +; CHECK-P9-NEXT: mtvsrdd v3, r4, r5 ; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: mffprwz r5, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: mtvsrdd v4, r4, r5 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: mffprwz r5, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: mtvsrdd v4, r4, r5 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtvsrd v5, r4 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 -; CHECK-P9-NEXT: stxv vs0, 16(r3) +; CHECK-P9-NEXT: mffprwz r5, f0 +; CHECK-P9-NEXT: mtvsrdd v5, r4, r5 +; CHECK-P9-NEXT: vpkudum v4, v5, v4 +; CHECK-P9-NEXT: vpkuwum v3, v4, v3 +; CHECK-P9-NEXT: stxv v3, 16(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs4, 48(r4) -; CHECK-BE-NEXT: lxv vs3, 32(r4) -; CHECK-BE-NEXT: lxv vs2, 16(r4) -; CHECK-BE-NEXT: lxv vs1, 0(r4) +; CHECK-BE-NEXT: lxv vs7, 48(r4) +; CHECK-BE-NEXT: lxv vs6, 32(r4) +; CHECK-BE-NEXT: lxv vs5, 16(r4) +; CHECK-BE-NEXT: lxv vs0, 64(r4) +; CHECK-BE-NEXT: xscvdpsxws f8, f7 +; CHECK-BE-NEXT: xxswapd vs7, vs7 +; CHECK-BE-NEXT: lxv vs1, 80(r4) +; CHECK-BE-NEXT: lxv vs2, 96(r4) +; CHECK-BE-NEXT: lxv vs3, 112(r4) +; CHECK-BE-NEXT: lxv vs4, 0(r4) +; CHECK-BE-NEXT: xscvdpsxws f7, f7 +; CHECK-BE-NEXT: mffprwz r4, f8 +; CHECK-BE-NEXT: mffprwz r5, f7 +; CHECK-BE-NEXT: xscvdpsxws f7, f6 +; CHECK-BE-NEXT: xxswapd vs6, vs6 +; CHECK-BE-NEXT: xscvdpsxws f6, f6 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r5 +; CHECK-BE-NEXT: mffprwz r4, f7 +; CHECK-BE-NEXT: mffprwz r5, f6 +; CHECK-BE-NEXT: xscvdpsxws f6, f5 +; CHECK-BE-NEXT: xxswapd vs5, vs5 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r5 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: mffprwz r5, f5 ; CHECK-BE-NEXT: xscvdpsxws f5, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f6, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f7, f2 -; CHECK-BE-NEXT: lxv vs0, 112(r4) -; CHECK-BE-NEXT: xxswapd vs2, vs2 +; CHECK-BE-NEXT: mtvsrdd v4, r4, r5 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f5 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v2, r5 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: xscvdpsxws f4, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f6 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: xscvdpsxws f3, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r5 -; CHECK-BE-NEXT: mffprwz r5, f7 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: mtvsrd v4, r5 +; CHECK-BE-NEXT: mffprwz r4, f5 ; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v5, r5 +; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: mtvsrdd v5, r4, r5 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: vpkudum v3, v5, v4 +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: lxv vs2, 96(r4) -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: lxv vs1, 80(r4) ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v4, v4, v1 -; CHECK-BE-NEXT: mtvsrd v1, r5 +; CHECK-BE-NEXT: stxv v2, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f0 -; CHECK-BE-NEXT: lxv vs0, 64(r4) -; CHECK-BE-NEXT: vmrghh v5, v5, v1 -; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r5 ; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtvsrd v1, r5 -; CHECK-BE-NEXT: vmrghw v3, v5, v4 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: vmrghh v0, v0, v1 -; CHECK-BE-NEXT: xxmrghd vs3, v3, v2 -; CHECK-BE-NEXT: mtvsrd v2, r4 -; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mffprwz r5, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: mtvsrdd v4, r4, r5 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: stxv vs3, 0(r3) -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: vmrghw v2, v2, v0 -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: mffprwz r5, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: mtvsrdd v4, r4, r5 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 ; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v5, r4 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd vs0, v3, v2 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: mffprwz r5, f0 +; CHECK-BE-NEXT: mtvsrdd v5, r4, r5 +; CHECK-BE-NEXT: vpkudum v4, v5, v4 +; CHECK-BE-NEXT: vpkuwum v3, v4, v3 +; CHECK-BE-NEXT: stxv v3, 16(r3) ; CHECK-BE-NEXT: blr entry: %a = load <16 x double>, <16 x double>* %0, align 128 @@ -657,19 +575,16 @@ ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 +; CHECK-P9-NEXT: vpkuwum v2, v2, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -681,23 +596,16 @@ ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 +; CHECK-BE-NEXT: vpkuwum v2, v2, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -758,94 +666,70 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs3, 0(r3) ; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test8elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 48 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 48 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <8 x double>, <8 x double>* %0, align 64 @@ -947,186 +831,138 @@ ; ; CHECK-P9-LABEL: test16elt_signed: ; CHECK-P9: # %bb.0: # %entry -; CHECK-P9-NEXT: lxv vs3, 0(r4) -; CHECK-P9-NEXT: lxv vs2, 16(r4) -; CHECK-P9-NEXT: lxv vs1, 32(r4) -; CHECK-P9-NEXT: lxv vs0, 48(r4) -; CHECK-P9-NEXT: xscvdpsxws f4, f3 -; CHECK-P9-NEXT: xscvdpsxws f5, f2 -; CHECK-P9-NEXT: xscvdpsxws f6, f1 -; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: xscvdpsxws f7, f0 -; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: mffprwz r5, f4 -; CHECK-P9-NEXT: mtvsrd v2, r5 -; CHECK-P9-NEXT: mffprwz r5, f5 -; CHECK-P9-NEXT: mtvsrd v3, r5 -; CHECK-P9-NEXT: mffprwz r5, f6 -; CHECK-P9-NEXT: mtvsrd v4, r5 -; CHECK-P9-NEXT: mffprwz r5, f7 -; CHECK-P9-NEXT: mtvsrd v5, r5 -; CHECK-P9-NEXT: mffprwz r5, f3 -; CHECK-P9-NEXT: lxv vs3, 64(r4) -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f2 -; CHECK-P9-NEXT: lxv vs2, 80(r4) -; CHECK-P9-NEXT: vmrghh v2, v2, v0 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f1 +; CHECK-P9-NEXT: lxv vs7, 0(r4) +; CHECK-P9-NEXT: lxv vs6, 16(r4) +; CHECK-P9-NEXT: lxv vs5, 32(r4) +; CHECK-P9-NEXT: lxv vs0, 112(r4) +; CHECK-P9-NEXT: xscvdpsxws f8, f7 +; CHECK-P9-NEXT: xxswapd vs7, vs7 ; CHECK-P9-NEXT: lxv vs1, 96(r4) +; CHECK-P9-NEXT: lxv vs2, 80(r4) +; CHECK-P9-NEXT: lxv vs3, 64(r4) +; CHECK-P9-NEXT: lxv vs4, 48(r4) +; CHECK-P9-NEXT: xscvdpsxws f7, f7 +; CHECK-P9-NEXT: mffprwz r4, f8 +; CHECK-P9-NEXT: mffprwz r5, f7 +; CHECK-P9-NEXT: xscvdpsxws f7, f6 +; CHECK-P9-NEXT: xxswapd vs6, vs6 +; CHECK-P9-NEXT: xscvdpsxws f6, f6 +; CHECK-P9-NEXT: mtvsrdd v2, r4, r5 +; CHECK-P9-NEXT: mffprwz r4, f7 +; CHECK-P9-NEXT: mffprwz r5, f6 +; CHECK-P9-NEXT: xscvdpsxws f6, f5 +; CHECK-P9-NEXT: xxswapd vs5, vs5 +; CHECK-P9-NEXT: mtvsrdd v3, r4, r5 +; CHECK-P9-NEXT: xscvdpsxws f5, f5 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f6 +; CHECK-P9-NEXT: mffprwz r5, f5 +; CHECK-P9-NEXT: xscvdpsxws f5, f4 +; CHECK-P9-NEXT: xxswapd vs4, vs4 +; CHECK-P9-NEXT: mtvsrdd v4, r4, r5 +; CHECK-P9-NEXT: xscvdpsxws f4, f4 +; CHECK-P9-NEXT: mffprwz r4, f5 +; CHECK-P9-NEXT: mffprwz r5, f4 ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: vmrghh v3, v3, v0 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: mffprwz r5, f0 -; CHECK-P9-NEXT: lxv vs0, 112(r4) +; CHECK-P9-NEXT: mtvsrdd v5, r4, r5 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghh v4, v4, v0 -; CHECK-P9-NEXT: mtvsrd v0, r5 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: vmrghh v5, v5, v0 +; CHECK-P9-NEXT: vpkudum v3, v5, v4 ; CHECK-P9-NEXT: mffprwz r4, f4 -; CHECK-P9-NEXT: vmrglw v4, v5, v4 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f3 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r5, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: xxmrgld vs4, v4, v2 -; CHECK-P9-NEXT: mtvsrd v2, r4 +; CHECK-P9-NEXT: stxv v2, 0(r3) ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghh v2, v3, v2 -; CHECK-P9-NEXT: stxv vs4, 0(r3) +; CHECK-P9-NEXT: mtvsrdd v3, r4, r5 ; CHECK-P9-NEXT: mffprwz r4, f3 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f2 +; CHECK-P9-NEXT: mffprwz r5, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: mtvsrdd v4, r4, r5 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r4, f2 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r4 -; CHECK-P9-NEXT: mffprwz r4, f1 +; CHECK-P9-NEXT: mffprwz r5, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r4 +; CHECK-P9-NEXT: mtvsrdd v4, r4, r5 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghh v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r4, f1 -; CHECK-P9-NEXT: mtvsrd v4, r4 -; CHECK-P9-NEXT: mffprwz r4, f0 -; CHECK-P9-NEXT: mtvsrd v5, r4 -; CHECK-P9-NEXT: vmrghh v4, v4, v5 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld vs0, v3, v2 -; CHECK-P9-NEXT: stxv vs0, 16(r3) +; CHECK-P9-NEXT: mffprwz r5, f0 +; CHECK-P9-NEXT: mtvsrdd v5, r4, r5 +; CHECK-P9-NEXT: vpkudum v4, v5, v4 +; CHECK-P9-NEXT: vpkuwum v3, v4, v3 +; CHECK-P9-NEXT: stxv v3, 16(r3) ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: lxv vs4, 48(r4) -; CHECK-BE-NEXT: lxv vs3, 32(r4) -; CHECK-BE-NEXT: lxv vs2, 16(r4) -; CHECK-BE-NEXT: lxv vs1, 0(r4) +; CHECK-BE-NEXT: lxv vs7, 48(r4) +; CHECK-BE-NEXT: lxv vs6, 32(r4) +; CHECK-BE-NEXT: lxv vs5, 16(r4) +; CHECK-BE-NEXT: lxv vs0, 64(r4) +; CHECK-BE-NEXT: xscvdpsxws f8, f7 +; CHECK-BE-NEXT: xxswapd vs7, vs7 +; CHECK-BE-NEXT: lxv vs1, 80(r4) +; CHECK-BE-NEXT: lxv vs2, 96(r4) +; CHECK-BE-NEXT: lxv vs3, 112(r4) +; CHECK-BE-NEXT: lxv vs4, 0(r4) +; CHECK-BE-NEXT: xscvdpsxws f7, f7 +; CHECK-BE-NEXT: mffprwz r4, f8 +; CHECK-BE-NEXT: mffprwz r5, f7 +; CHECK-BE-NEXT: xscvdpsxws f7, f6 +; CHECK-BE-NEXT: xxswapd vs6, vs6 +; CHECK-BE-NEXT: xscvdpsxws f6, f6 +; CHECK-BE-NEXT: mtvsrdd v2, r4, r5 +; CHECK-BE-NEXT: mffprwz r4, f7 +; CHECK-BE-NEXT: mffprwz r5, f6 +; CHECK-BE-NEXT: xscvdpsxws f6, f5 +; CHECK-BE-NEXT: xxswapd vs5, vs5 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r5 +; CHECK-BE-NEXT: xscvdpsxws f5, f5 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f6 +; CHECK-BE-NEXT: mffprwz r5, f5 ; CHECK-BE-NEXT: xscvdpsxws f5, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: xscvdpsxws f6, f3 -; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: xscvdpsxws f7, f2 -; CHECK-BE-NEXT: lxv vs0, 112(r4) -; CHECK-BE-NEXT: xxswapd vs2, vs2 +; CHECK-BE-NEXT: mtvsrdd v4, r4, r5 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f5 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v2, r5 -; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: xscvdpsxws f4, f1 -; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f6 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 -; CHECK-BE-NEXT: mtvsrd v3, r5 -; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: xscvdpsxws f3, f0 -; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r5 -; CHECK-BE-NEXT: mffprwz r5, f7 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 -; CHECK-BE-NEXT: mtvsrd v4, r5 +; CHECK-BE-NEXT: mffprwz r4, f5 ; CHECK-BE-NEXT: mffprwz r5, f4 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v5, r5 +; CHECK-BE-NEXT: xscvdpsxws f4, f3 +; CHECK-BE-NEXT: xxswapd vs3, vs3 +; CHECK-BE-NEXT: mtvsrdd v5, r4, r5 +; CHECK-BE-NEXT: xscvdpsxws f3, f3 +; CHECK-BE-NEXT: vpkudum v3, v5, v4 +; CHECK-BE-NEXT: mffprwz r4, f4 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r5, f3 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v0, r5 -; CHECK-BE-NEXT: mffprwz r5, f2 -; CHECK-BE-NEXT: lxv vs2, 96(r4) -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: mtvsrd v1, r5 -; CHECK-BE-NEXT: mffprwz r5, f1 -; CHECK-BE-NEXT: lxv vs1, 80(r4) ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r5, r5, 48 -; CHECK-BE-NEXT: vmrghh v4, v4, v1 -; CHECK-BE-NEXT: mtvsrd v1, r5 +; CHECK-BE-NEXT: stxv v2, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mffprwz r5, f0 -; CHECK-BE-NEXT: lxv vs0, 64(r4) -; CHECK-BE-NEXT: vmrghh v5, v5, v1 -; CHECK-BE-NEXT: sldi r5, r5, 48 +; CHECK-BE-NEXT: mtvsrdd v3, r4, r5 ; CHECK-BE-NEXT: mffprwz r4, f3 -; CHECK-BE-NEXT: mtvsrd v1, r5 -; CHECK-BE-NEXT: vmrghw v3, v5, v4 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: vmrghh v0, v0, v1 -; CHECK-BE-NEXT: xxmrghd vs3, v3, v2 -; CHECK-BE-NEXT: mtvsrd v2, r4 -; CHECK-BE-NEXT: mffprwz r4, f2 +; CHECK-BE-NEXT: mffprwz r5, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: mtvsrdd v4, r4, r5 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: stxv vs3, 0(r3) -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: vmrghh v2, v2, v3 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r4, f2 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: vmrghw v2, v2, v0 -; CHECK-BE-NEXT: mtvsrd v3, r4 -; CHECK-BE-NEXT: mffprwz r4, f1 +; CHECK-BE-NEXT: mffprwz r5, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r4, r4, 48 +; CHECK-BE-NEXT: mtvsrdd v4, r4, r5 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: vmrghh v3, v3, v4 ; CHECK-BE-NEXT: mffprwz r4, f1 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v4, r4 -; CHECK-BE-NEXT: mffprwz r4, f0 -; CHECK-BE-NEXT: sldi r4, r4, 48 -; CHECK-BE-NEXT: mtvsrd v5, r4 -; CHECK-BE-NEXT: vmrghh v4, v4, v5 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd vs0, v3, v2 -; CHECK-BE-NEXT: stxv vs0, 16(r3) +; CHECK-BE-NEXT: mffprwz r5, f0 +; CHECK-BE-NEXT: mtvsrdd v5, r4, r5 +; CHECK-BE-NEXT: vpkudum v4, v5, v4 +; CHECK-BE-NEXT: vpkuwum v3, v4, v3 +; CHECK-BE-NEXT: stxv v3, 16(r3) ; CHECK-BE-NEXT: blr entry: %a = load <16 x double>, <16 x double>* %0, align 128 Index: llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll +++ llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll @@ -101,20 +101,18 @@ ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 +; CHECK-P9-NEXT: vpkuwum v2, v2, v2 +; CHECK-P9-NEXT: vpkuhum v2, v2, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -126,24 +124,18 @@ ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 +; CHECK-BE-NEXT: vpkuwum v2, v2, v2 +; CHECK-BE-NEXT: vpkuhum v2, v2, v2 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -206,43 +198,36 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs3, 0(r3) ; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 +; CHECK-P9-NEXT: vpkuhum v2, v2, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -250,51 +235,36 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 +; CHECK-BE-NEXT: vpkuhum v2, v2, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -399,182 +369,134 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs7, 0(r3) ; CHECK-P9-NEXT: lxv vs6, 16(r3) +; CHECK-P9-NEXT: lxv vs5, 32(r3) ; CHECK-P9-NEXT: lxv vs0, 112(r3) -; CHECK-P9-NEXT: lxv vs1, 96(r3) ; CHECK-P9-NEXT: xscvdpsxws f8, f7 ; CHECK-P9-NEXT: xxswapd vs7, vs7 +; CHECK-P9-NEXT: lxv vs1, 96(r3) ; CHECK-P9-NEXT: lxv vs2, 80(r3) ; CHECK-P9-NEXT: lxv vs3, 64(r3) ; CHECK-P9-NEXT: lxv vs4, 48(r3) -; CHECK-P9-NEXT: lxv vs5, 32(r3) ; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: mffprwz r3, f8 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f7 +; CHECK-P9-NEXT: mffprwz r4, f7 ; CHECK-P9-NEXT: xscvdpsxws f7, f6 ; CHECK-P9-NEXT: xxswapd vs6, vs6 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f6, f6 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 ; CHECK-P9-NEXT: mffprwz r3, f7 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f6 +; CHECK-P9-NEXT: mffprwz r4, f6 ; CHECK-P9-NEXT: xscvdpsxws f6, f5 ; CHECK-P9-NEXT: xxswapd vs5, vs5 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f5, f5 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f6 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f5 +; CHECK-P9-NEXT: mffprwz r4, f5 ; CHECK-P9-NEXT: xscvdpsxws f5, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs4 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: mffprwz r4, f4 ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v0, r3 -; CHECK-P9-NEXT: vmrghb v5, v5, v0 -; CHECK-P9-NEXT: vmrglh v4, v5, v4 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v5, r3, r4 +; CHECK-P9-NEXT: vpkudum v4, v5, v4 +; CHECK-P9-NEXT: vpkuwum v3, v4, v3 +; CHECK-P9-NEXT: vpkuhum v2, v3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs7, 112(r3) ; CHECK-BE-NEXT: lxv vs6, 96(r3) +; CHECK-BE-NEXT: lxv vs5, 80(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: xscvdpsxws f8, f7 ; CHECK-BE-NEXT: xxswapd vs7, vs7 +; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs4, 64(r3) -; CHECK-BE-NEXT: lxv vs5, 80(r3) ; CHECK-BE-NEXT: xscvdpsxws f7, f7 ; CHECK-BE-NEXT: mffprwz r3, f8 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: mffprwz r3, f7 +; CHECK-BE-NEXT: mffprwz r4, f7 ; CHECK-BE-NEXT: xscvdpsxws f7, f6 ; CHECK-BE-NEXT: xxswapd vs6, vs6 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvdpsxws f6, f6 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 ; CHECK-BE-NEXT: mffprwz r3, f7 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f6 +; CHECK-BE-NEXT: mffprwz r4, f6 ; CHECK-BE-NEXT: xscvdpsxws f6, f5 ; CHECK-BE-NEXT: xxswapd vs5, vs5 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f6 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: mffprwz r4, f5 ; CHECK-BE-NEXT: xscvdpsxws f5, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 ; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f4 +; CHECK-BE-NEXT: mffprwz r4, f4 ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v0, r3 -; CHECK-BE-NEXT: vmrghb v5, v5, v0 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v5, r3, r4 +; CHECK-BE-NEXT: vpkudum v4, v5, v4 +; CHECK-BE-NEXT: vpkuwum v3, v4, v3 +; CHECK-BE-NEXT: vpkuhum v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <16 x double>, <16 x double>* %0, align 128 @@ -674,20 +596,18 @@ ; CHECK-P9-NEXT: xxswapd vs1, vs1 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: li r3, 0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 +; CHECK-P9-NEXT: vpkuwum v2, v2, v2 +; CHECK-P9-NEXT: vpkuhum v2, v2, v2 ; CHECK-P9-NEXT: vextuwrx r3, r3, v2 ; CHECK-P9-NEXT: blr ; @@ -699,24 +619,18 @@ ; CHECK-BE-NEXT: xxswapd vs1, vs1 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: li r3, 0 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 +; CHECK-BE-NEXT: vpkuwum v2, v2, v2 +; CHECK-BE-NEXT: vpkuhum v2, v2, v2 ; CHECK-BE-NEXT: vextuwlx r3, r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -779,43 +693,36 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs3, 0(r3) ; CHECK-P9-NEXT: lxv vs2, 16(r3) -; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: lxv vs1, 32(r3) +; CHECK-P9-NEXT: lxv vs0, 48(r3) ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v3, r3 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 +; CHECK-P9-NEXT: vpkuhum v2, v2, v2 ; CHECK-P9-NEXT: mfvsrld r3, v2 ; CHECK-P9-NEXT: blr ; @@ -823,51 +730,36 @@ ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) -; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: lxv vs1, 16(r3) +; CHECK-BE-NEXT: lxv vs0, 0(r3) ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 +; CHECK-BE-NEXT: vpkuhum v2, v2, v2 ; CHECK-BE-NEXT: mfvsrd r3, v2 ; CHECK-BE-NEXT: blr entry: @@ -972,182 +864,134 @@ ; CHECK-P9: # %bb.0: # %entry ; CHECK-P9-NEXT: lxv vs7, 0(r3) ; CHECK-P9-NEXT: lxv vs6, 16(r3) +; CHECK-P9-NEXT: lxv vs5, 32(r3) ; CHECK-P9-NEXT: lxv vs0, 112(r3) -; CHECK-P9-NEXT: lxv vs1, 96(r3) ; CHECK-P9-NEXT: xscvdpsxws f8, f7 ; CHECK-P9-NEXT: xxswapd vs7, vs7 +; CHECK-P9-NEXT: lxv vs1, 96(r3) ; CHECK-P9-NEXT: lxv vs2, 80(r3) ; CHECK-P9-NEXT: lxv vs3, 64(r3) ; CHECK-P9-NEXT: lxv vs4, 48(r3) -; CHECK-P9-NEXT: lxv vs5, 32(r3) ; CHECK-P9-NEXT: xscvdpsxws f7, f7 ; CHECK-P9-NEXT: mffprwz r3, f8 -; CHECK-P9-NEXT: mtvsrd v2, r3 -; CHECK-P9-NEXT: mffprwz r3, f7 +; CHECK-P9-NEXT: mffprwz r4, f7 ; CHECK-P9-NEXT: xscvdpsxws f7, f6 ; CHECK-P9-NEXT: xxswapd vs6, vs6 -; CHECK-P9-NEXT: mtvsrd v3, r3 ; CHECK-P9-NEXT: xscvdpsxws f6, f6 -; CHECK-P9-NEXT: vmrghb v2, v2, v3 +; CHECK-P9-NEXT: mtvsrdd v2, r3, r4 ; CHECK-P9-NEXT: mffprwz r3, f7 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f6 +; CHECK-P9-NEXT: mffprwz r4, f6 ; CHECK-P9-NEXT: xscvdpsxws f6, f5 ; CHECK-P9-NEXT: xxswapd vs5, vs5 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f5, f5 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 +; CHECK-P9-NEXT: vpkudum v2, v3, v2 ; CHECK-P9-NEXT: mffprwz r3, f6 -; CHECK-P9-NEXT: vmrglh v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f5 +; CHECK-P9-NEXT: mffprwz r4, f5 ; CHECK-P9-NEXT: xscvdpsxws f5, f4 ; CHECK-P9-NEXT: xxswapd vs4, vs4 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f4, f4 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f5 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f4 +; CHECK-P9-NEXT: mffprwz r4, f4 ; CHECK-P9-NEXT: xscvdpsxws f4, f3 ; CHECK-P9-NEXT: xxswapd vs3, vs3 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f3, f3 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f4 -; CHECK-P9-NEXT: vmrglw v2, v3, v2 -; CHECK-P9-NEXT: mtvsrd v3, r3 -; CHECK-P9-NEXT: mffprwz r3, f3 +; CHECK-P9-NEXT: vpkuwum v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f3 ; CHECK-P9-NEXT: xscvdpsxws f3, f2 ; CHECK-P9-NEXT: xxswapd vs2, vs2 -; CHECK-P9-NEXT: mtvsrd v4, r3 +; CHECK-P9-NEXT: mtvsrdd v3, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f2, f2 -; CHECK-P9-NEXT: vmrghb v3, v3, v4 ; CHECK-P9-NEXT: mffprwz r3, f3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f2 +; CHECK-P9-NEXT: mffprwz r4, f2 ; CHECK-P9-NEXT: xscvdpsxws f2, f1 ; CHECK-P9-NEXT: xxswapd vs1, vs1 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f1, f1 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 +; CHECK-P9-NEXT: vpkudum v3, v4, v3 ; CHECK-P9-NEXT: mffprwz r3, f2 -; CHECK-P9-NEXT: vmrglh v3, v4, v3 -; CHECK-P9-NEXT: mtvsrd v4, r3 -; CHECK-P9-NEXT: mffprwz r3, f1 +; CHECK-P9-NEXT: mffprwz r4, f1 ; CHECK-P9-NEXT: xscvdpsxws f1, f0 ; CHECK-P9-NEXT: xxswapd vs0, vs0 -; CHECK-P9-NEXT: mtvsrd v5, r3 +; CHECK-P9-NEXT: mtvsrdd v4, r3, r4 ; CHECK-P9-NEXT: xscvdpsxws f0, f0 -; CHECK-P9-NEXT: vmrghb v4, v4, v5 ; CHECK-P9-NEXT: mffprwz r3, f1 -; CHECK-P9-NEXT: mtvsrd v5, r3 -; CHECK-P9-NEXT: mffprwz r3, f0 -; CHECK-P9-NEXT: mtvsrd v0, r3 -; CHECK-P9-NEXT: vmrghb v5, v5, v0 -; CHECK-P9-NEXT: vmrglh v4, v5, v4 -; CHECK-P9-NEXT: vmrglw v3, v4, v3 -; CHECK-P9-NEXT: xxmrgld v2, v3, v2 +; CHECK-P9-NEXT: mffprwz r4, f0 +; CHECK-P9-NEXT: mtvsrdd v5, r3, r4 +; CHECK-P9-NEXT: vpkudum v4, v5, v4 +; CHECK-P9-NEXT: vpkuwum v3, v4, v3 +; CHECK-P9-NEXT: vpkuhum v2, v3, v2 ; CHECK-P9-NEXT: blr ; ; CHECK-BE-LABEL: test16elt_signed: ; CHECK-BE: # %bb.0: # %entry ; CHECK-BE-NEXT: lxv vs7, 112(r3) ; CHECK-BE-NEXT: lxv vs6, 96(r3) +; CHECK-BE-NEXT: lxv vs5, 80(r3) ; CHECK-BE-NEXT: lxv vs0, 0(r3) -; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: xscvdpsxws f8, f7 ; CHECK-BE-NEXT: xxswapd vs7, vs7 +; CHECK-BE-NEXT: lxv vs1, 16(r3) ; CHECK-BE-NEXT: lxv vs2, 32(r3) ; CHECK-BE-NEXT: lxv vs3, 48(r3) ; CHECK-BE-NEXT: lxv vs4, 64(r3) -; CHECK-BE-NEXT: lxv vs5, 80(r3) ; CHECK-BE-NEXT: xscvdpsxws f7, f7 ; CHECK-BE-NEXT: mffprwz r3, f8 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v2, r3 -; CHECK-BE-NEXT: mffprwz r3, f7 +; CHECK-BE-NEXT: mffprwz r4, f7 ; CHECK-BE-NEXT: xscvdpsxws f7, f6 ; CHECK-BE-NEXT: xxswapd vs6, vs6 -; CHECK-BE-NEXT: sldi r3, r3, 56 ; CHECK-BE-NEXT: xscvdpsxws f6, f6 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: vmrghb v2, v2, v3 +; CHECK-BE-NEXT: mtvsrdd v2, r3, r4 ; CHECK-BE-NEXT: mffprwz r3, f7 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f6 +; CHECK-BE-NEXT: mffprwz r4, f6 ; CHECK-BE-NEXT: xscvdpsxws f6, f5 ; CHECK-BE-NEXT: xxswapd vs5, vs5 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f5, f5 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 +; CHECK-BE-NEXT: vpkudum v2, v3, v2 ; CHECK-BE-NEXT: mffprwz r3, f6 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f5 +; CHECK-BE-NEXT: mffprwz r4, f5 ; CHECK-BE-NEXT: xscvdpsxws f5, f4 ; CHECK-BE-NEXT: xxswapd vs4, vs4 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f4, f4 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 ; CHECK-BE-NEXT: mffprwz r3, f5 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f4 +; CHECK-BE-NEXT: mffprwz r4, f4 ; CHECK-BE-NEXT: xscvdpsxws f4, f3 ; CHECK-BE-NEXT: xxswapd vs3, vs3 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f3, f3 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f4 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghw v2, v3, v2 -; CHECK-BE-NEXT: mtvsrd v3, r3 -; CHECK-BE-NEXT: mffprwz r3, f3 +; CHECK-BE-NEXT: vpkuwum v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f3 ; CHECK-BE-NEXT: xscvdpsxws f3, f2 ; CHECK-BE-NEXT: xxswapd vs2, vs2 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v3, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f2, f2 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: vmrghb v3, v3, v4 ; CHECK-BE-NEXT: mffprwz r3, f3 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f2 +; CHECK-BE-NEXT: mffprwz r4, f2 ; CHECK-BE-NEXT: xscvdpsxws f2, f1 ; CHECK-BE-NEXT: xxswapd vs1, vs1 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f1, f1 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 +; CHECK-BE-NEXT: vpkudum v3, v4, v3 ; CHECK-BE-NEXT: mffprwz r3, f2 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: vmrghh v3, v4, v3 -; CHECK-BE-NEXT: mtvsrd v4, r3 -; CHECK-BE-NEXT: mffprwz r3, f1 +; CHECK-BE-NEXT: mffprwz r4, f1 ; CHECK-BE-NEXT: xscvdpsxws f1, f0 ; CHECK-BE-NEXT: xxswapd vs0, vs0 -; CHECK-BE-NEXT: sldi r3, r3, 56 +; CHECK-BE-NEXT: mtvsrdd v4, r3, r4 ; CHECK-BE-NEXT: xscvdpsxws f0, f0 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: vmrghb v4, v4, v5 ; CHECK-BE-NEXT: mffprwz r3, f1 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v5, r3 -; CHECK-BE-NEXT: mffprwz r3, f0 -; CHECK-BE-NEXT: sldi r3, r3, 56 -; CHECK-BE-NEXT: mtvsrd v0, r3 -; CHECK-BE-NEXT: vmrghb v5, v5, v0 -; CHECK-BE-NEXT: vmrghh v4, v5, v4 -; CHECK-BE-NEXT: vmrghw v3, v4, v3 -; CHECK-BE-NEXT: xxmrghd v2, v3, v2 +; CHECK-BE-NEXT: mffprwz r4, f0 +; CHECK-BE-NEXT: mtvsrdd v5, r3, r4 +; CHECK-BE-NEXT: vpkudum v4, v5, v4 +; CHECK-BE-NEXT: vpkuwum v3, v4, v3 +; CHECK-BE-NEXT: vpkuhum v2, v3, v2 ; CHECK-BE-NEXT: blr entry: %a = load <16 x double>, <16 x double>* %0, align 128 Index: llvm/test/CodeGen/PowerPC/vec_int_ext.ll =================================================================== --- llvm/test/CodeGen/PowerPC/vec_int_ext.ll +++ llvm/test/CodeGen/PowerPC/vec_int_ext.ll @@ -301,83 +301,59 @@ ; CHECK-LE-NEXT: li 9, 12 ; CHECK-LE-NEXT: li 10, 14 ; CHECK-LE-NEXT: extsb 3, 3 +; CHECK-LE-NEXT: vextubrx 7, 7, 2 +; CHECK-LE-NEXT: vextubrx 8, 8, 2 +; CHECK-LE-NEXT: vextubrx 9, 9, 2 +; CHECK-LE-NEXT: vextubrx 10, 10, 2 ; CHECK-LE-NEXT: extsb 4, 4 ; CHECK-LE-NEXT: extsb 5, 5 ; CHECK-LE-NEXT: extsb 6, 6 -; CHECK-LE-NEXT: vextubrx 7, 7, 2 -; CHECK-LE-NEXT: vextubrx 8, 8, 2 ; CHECK-LE-NEXT: extsb 7, 7 ; CHECK-LE-NEXT: extsb 8, 8 -; CHECK-LE-NEXT: mtvsrd 35, 4 -; CHECK-LE-NEXT: vextubrx 9, 9, 2 -; CHECK-LE-NEXT: vextubrx 10, 10, 2 -; CHECK-LE-NEXT: mtvsrd 34, 3 -; CHECK-LE-NEXT: mtvsrd 36, 6 ; CHECK-LE-NEXT: extsb 9, 9 ; CHECK-LE-NEXT: extsb 10, 10 -; CHECK-LE-NEXT: mtvsrd 37, 10 -; CHECK-LE-NEXT: vmrghh 2, 3, 2 -; CHECK-LE-NEXT: mtvsrd 35, 5 -; CHECK-LE-NEXT: vmrghh 3, 4, 3 -; CHECK-LE-NEXT: mtvsrd 36, 8 -; CHECK-LE-NEXT: vmrglw 2, 3, 2 -; CHECK-LE-NEXT: mtvsrd 35, 7 -; CHECK-LE-NEXT: vmrghh 3, 4, 3 -; CHECK-LE-NEXT: mtvsrd 36, 9 -; CHECK-LE-NEXT: vmrghh 4, 5, 4 -; CHECK-LE-NEXT: vmrglw 3, 4, 3 -; CHECK-LE-NEXT: xxmrgld 34, 35, 34 +; CHECK-LE-NEXT: mtvsrdd 34, 4, 3 +; CHECK-LE-NEXT: mtvsrdd 35, 6, 5 +; CHECK-LE-NEXT: mtvsrdd 36, 10, 9 +; CHECK-LE-NEXT: vpkudum 2, 3, 2 +; CHECK-LE-NEXT: mtvsrdd 35, 8, 7 +; CHECK-LE-NEXT: vpkudum 3, 4, 3 +; CHECK-LE-NEXT: vpkuwum 2, 3, 2 ; CHECK-LE-NEXT: blr ; ; CHECK-BE-LABEL: testInvalidExtend: ; CHECK-BE: # %bb.0: # %entry -; CHECK-BE-NEXT: li 9, 12 -; CHECK-BE-NEXT: li 10, 14 ; CHECK-BE-NEXT: li 7, 8 ; CHECK-BE-NEXT: li 8, 10 -; CHECK-BE-NEXT: vextublx 9, 9, 2 -; CHECK-BE-NEXT: vextublx 10, 10, 2 +; CHECK-BE-NEXT: li 9, 12 +; CHECK-BE-NEXT: li 10, 14 ; CHECK-BE-NEXT: vextublx 7, 7, 2 -; CHECK-BE-NEXT: vextublx 8, 8, 2 ; CHECK-BE-NEXT: li 3, 0 ; CHECK-BE-NEXT: li 4, 2 ; CHECK-BE-NEXT: li 5, 4 ; CHECK-BE-NEXT: li 6, 6 -; CHECK-BE-NEXT: extsb 9, 9 -; CHECK-BE-NEXT: extsb 10, 10 +; CHECK-BE-NEXT: vextublx 8, 8, 2 ; CHECK-BE-NEXT: extsb 7, 7 -; CHECK-BE-NEXT: extsb 8, 8 +; CHECK-BE-NEXT: vextublx 9, 9, 2 +; CHECK-BE-NEXT: vextublx 10, 10, 2 ; CHECK-BE-NEXT: vextublx 3, 3, 2 ; CHECK-BE-NEXT: vextublx 4, 4, 2 -; CHECK-BE-NEXT: extsb 3, 3 -; CHECK-BE-NEXT: extsb 4, 4 -; CHECK-BE-NEXT: sldi 10, 10, 48 -; CHECK-BE-NEXT: sldi 9, 9, 48 ; CHECK-BE-NEXT: vextublx 5, 5, 2 ; CHECK-BE-NEXT: vextublx 6, 6, 2 -; CHECK-BE-NEXT: sldi 8, 8, 48 -; CHECK-BE-NEXT: sldi 7, 7, 48 +; CHECK-BE-NEXT: extsb 8, 8 +; CHECK-BE-NEXT: extsb 9, 9 +; CHECK-BE-NEXT: extsb 10, 10 +; CHECK-BE-NEXT: extsb 3, 3 +; CHECK-BE-NEXT: extsb 4, 4 ; CHECK-BE-NEXT: extsb 5, 5 ; CHECK-BE-NEXT: extsb 6, 6 -; CHECK-BE-NEXT: sldi 6, 6, 48 -; CHECK-BE-NEXT: sldi 5, 5, 48 -; CHECK-BE-NEXT: sldi 4, 4, 48 -; CHECK-BE-NEXT: sldi 3, 3, 48 -; CHECK-BE-NEXT: mtvsrd 34, 10 -; CHECK-BE-NEXT: mtvsrd 35, 9 -; CHECK-BE-NEXT: mtvsrd 36, 7 -; CHECK-BE-NEXT: mtvsrd 37, 3 -; CHECK-BE-NEXT: vmrghh 2, 3, 2 -; CHECK-BE-NEXT: mtvsrd 35, 8 -; CHECK-BE-NEXT: vmrghh 3, 4, 3 -; CHECK-BE-NEXT: mtvsrd 36, 5 -; CHECK-BE-NEXT: vmrghw 2, 3, 2 -; CHECK-BE-NEXT: mtvsrd 35, 6 -; CHECK-BE-NEXT: vmrghh 3, 4, 3 -; CHECK-BE-NEXT: mtvsrd 36, 4 -; CHECK-BE-NEXT: vmrghh 4, 5, 4 -; CHECK-BE-NEXT: vmrghw 3, 4, 3 -; CHECK-BE-NEXT: xxmrghd 34, 35, 34 +; CHECK-BE-NEXT: mtvsrdd 34, 9, 10 +; CHECK-BE-NEXT: mtvsrdd 35, 7, 8 +; CHECK-BE-NEXT: mtvsrdd 36, 3, 4 +; CHECK-BE-NEXT: vpkudum 2, 3, 2 +; CHECK-BE-NEXT: mtvsrdd 35, 5, 6 +; CHECK-BE-NEXT: vpkudum 3, 4, 3 +; CHECK-BE-NEXT: vpkuwum 2, 3, 2 ; CHECK-BE-NEXT: blr entry: